1/*
2**********************************************************************
3*   Copyright (C) 1998-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   09/25/98    stephen     Creation.
13*   11/11/98    stephen     Changed per 11/9 code review.
14*   04/20/99    stephen     Overhauled per 4/16 code review.
15*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
16*                           handleReplaceBetween(); other methods unchanged.
17*   06/25/01    grhoten     Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
24/**
25 * \file
26 * \brief C++ API: Unicode String
27 */
28
29#include "unicode/utypes.h"
30#include "unicode/rep.h"
31#include "unicode/std_string.h"
32#include "unicode/stringpiece.h"
33#include "unicode/bytestream.h"
34#include "unicode/ucasemap.h"
35
36struct UConverter;          // unicode/ucnv.h
37class  StringThreadTest;
38
39#ifndef U_COMPARE_CODE_POINT_ORDER
40/* see also ustring.h and unorm.h */
41/**
42 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
43 * Compare strings in code point order instead of code unit order.
44 * @stable ICU 2.2
45 */
46#define U_COMPARE_CODE_POINT_ORDER  0x8000
47#endif
48
49#ifndef USTRING_H
50/**
51 * \ingroup ustring_ustrlen
52 */
53U_STABLE int32_t U_EXPORT2
54u_strlen(const UChar *s);
55#endif
56
57/**
58 * \def U_STRING_CASE_MAPPER_DEFINED
59 * @internal
60 */
61#ifndef U_STRING_CASE_MAPPER_DEFINED
62#define U_STRING_CASE_MAPPER_DEFINED
63
64/**
65 * Internal string case mapping function type.
66 * @internal
67 */
68typedef int32_t U_CALLCONV
69UStringCaseMapper(const UCaseMap *csm,
70                  UChar *dest, int32_t destCapacity,
71                  const UChar *src, int32_t srcLength,
72                  UErrorCode *pErrorCode);
73
74#endif
75
76U_NAMESPACE_BEGIN
77
78class BreakIterator;        // unicode/brkiter.h
79class Locale;               // unicode/locid.h
80class StringCharacterIterator;
81class UnicodeStringAppendable;  // unicode/appendable.h
82
83/* The <iostream> include has been moved to unicode/ustream.h */
84
85/**
86 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
87 * which constructs a Unicode string from an invariant-character char * string.
88 * About invariant characters see utypes.h.
89 * This constructor has no runtime dependency on conversion code and is
90 * therefore recommended over ones taking a charset name string
91 * (where the empty string "" indicates invariant-character conversion).
92 *
93 * @stable ICU 3.2
94 */
95#define US_INV icu::UnicodeString::kInvariant
96
97/**
98 * Unicode String literals in C++.
99 * Dependent on the platform properties, different UnicodeString
100 * constructors should be used to create a UnicodeString object from
101 * a string literal.
102 * The macros are defined for maximum performance.
103 * They work only for strings that contain "invariant characters", i.e.,
104 * only latin letters, digits, and some punctuation.
105 * See utypes.h for details.
106 *
107 * The string parameter must be a C string literal.
108 * The length of the string, not including the terminating
109 * <code>NUL</code>, must be specified as a constant.
110 * The U_STRING_DECL macro should be invoked exactly once for one
111 * such string variable before it is used.
112 * @stable ICU 2.0
113 */
114#if defined(U_DECLARE_UTF16)
115#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
116#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
117#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
118#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
119#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
120#else
121#   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
122#endif
123
124/**
125 * Unicode String literals in C++.
126 * Dependent on the platform properties, different UnicodeString
127 * constructors should be used to create a UnicodeString object from
128 * a string literal.
129 * The macros are defined for improved performance.
130 * They work only for strings that contain "invariant characters", i.e.,
131 * only latin letters, digits, and some punctuation.
132 * See utypes.h for details.
133 *
134 * The string parameter must be a C string literal.
135 * @stable ICU 2.0
136 */
137#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
138
139/**
140 * \def UNISTR_FROM_CHAR_EXPLICIT
141 * This can be defined to be empty or "explicit".
142 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
143 * constructors are marked as explicit, preventing their inadvertent use.
144 * @stable ICU 49
145 */
146#ifndef UNISTR_FROM_CHAR_EXPLICIT
147# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
148    // Auto-"explicit" in ICU library code.
149#   define UNISTR_FROM_CHAR_EXPLICIT explicit
150# else
151    // Empty by default for source code compatibility.
152#   define UNISTR_FROM_CHAR_EXPLICIT
153# endif
154#endif
155
156/**
157 * \def UNISTR_FROM_STRING_EXPLICIT
158 * This can be defined to be empty or "explicit".
159 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
160 * constructors are marked as explicit, preventing their inadvertent use.
161 *
162 * In particular, this helps prevent accidentally depending on ICU conversion code
163 * by passing a string literal into an API with a const UnicodeString & parameter.
164 * @stable ICU 49
165 */
166#ifndef UNISTR_FROM_STRING_EXPLICIT
167# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
168    // Auto-"explicit" in ICU library code.
169#   define UNISTR_FROM_STRING_EXPLICIT explicit
170# else
171    // Empty by default for source code compatibility.
172#   define UNISTR_FROM_STRING_EXPLICIT
173# endif
174#endif
175
176/**
177 * UnicodeString is a string class that stores Unicode characters directly and provides
178 * similar functionality as the Java String and StringBuffer classes.
179 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
180 *
181 * The UnicodeString class is not suitable for subclassing.
182 *
183 * <p>For an overview of Unicode strings in C and C++ see the
184 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
185 *
186 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
187 * A Unicode character may be stored with either one code unit
188 * (the most common case) or with a matched pair of special code units
189 * ("surrogates"). The data type for code units is UChar.
190 * For single-character handling, a Unicode character code <em>point</em> is a value
191 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
192 *
193 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
194 * This is the same as with multi-byte char* strings in traditional string handling.
195 * Operations on partial strings typically do not test for code point boundaries.
196 * If necessary, the user needs to take care of such boundaries by testing for the code unit
197 * values or by using functions like
198 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
199 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
200 *
201 * UnicodeString methods are more lenient with regard to input parameter values
202 * than other ICU APIs. In particular:
203 * - If indexes are out of bounds for a UnicodeString object
204 *   (<0 or >length()) then they are "pinned" to the nearest boundary.
205 * - If primitive string pointer values (e.g., const UChar * or char *)
206 *   for input strings are NULL, then those input string parameters are treated
207 *   as if they pointed to an empty string.
208 *   However, this is <em>not</em> the case for char * parameters for charset names
209 *   or other IDs.
210 * - Most UnicodeString methods do not take a UErrorCode parameter because
211 *   there are usually very few opportunities for failure other than a shortage
212 *   of memory, error codes in low-level C++ string methods would be inconvenient,
213 *   and the error code as the last parameter (ICU convention) would prevent
214 *   the use of default parameter values.
215 *   Instead, such methods set the UnicodeString into a "bogus" state
216 *   (see isBogus()) if an error occurs.
217 *
218 * In string comparisons, two UnicodeString objects that are both "bogus"
219 * compare equal (to be transitive and prevent endless loops in sorting),
220 * and a "bogus" string compares less than any non-"bogus" one.
221 *
222 * Const UnicodeString methods are thread-safe. Multiple threads can use
223 * const methods on the same UnicodeString object simultaneously,
224 * but non-const methods must not be called concurrently (in multiple threads)
225 * with any other (const or non-const) methods.
226 *
227 * Similarly, const UnicodeString & parameters are thread-safe.
228 * One object may be passed in as such a parameter concurrently in multiple threads.
229 * This includes the const UnicodeString & parameters for
230 * copy construction, assignment, and cloning.
231 *
232 * <p>UnicodeString uses several storage methods.
233 * String contents can be stored inside the UnicodeString object itself,
234 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
235 * Most of this is done transparently, but careful aliasing in particular provides
236 * significant performance improvements.
237 * Also, the internal buffer is accessible via special functions.
238 * For details see the
239 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
240 *
241 * @see utf.h
242 * @see CharacterIterator
243 * @stable ICU 2.0
244 */
245class U_COMMON_API UnicodeString : public Replaceable
246{
247public:
248
249  /**
250   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
251   * which constructs a Unicode string from an invariant-character char * string.
252   * Use the macro US_INV instead of the full qualification for this value.
253   *
254   * @see US_INV
255   * @stable ICU 3.2
256   */
257  enum EInvariant {
258    /**
259     * @see EInvariant
260     * @stable ICU 3.2
261     */
262    kInvariant
263  };
264
265  //========================================
266  // Read-only operations
267  //========================================
268
269  /* Comparison - bitwise only - for international comparison use collation */
270
271  /**
272   * Equality operator. Performs only bitwise comparison.
273   * @param text The UnicodeString to compare to this one.
274   * @return TRUE if <TT>text</TT> contains the same characters as this one,
275   * FALSE otherwise.
276   * @stable ICU 2.0
277   */
278  inline UBool operator== (const UnicodeString& text) const;
279
280  /**
281   * Inequality operator. Performs only bitwise comparison.
282   * @param text The UnicodeString to compare to this one.
283   * @return FALSE if <TT>text</TT> contains the same characters as this one,
284   * TRUE otherwise.
285   * @stable ICU 2.0
286   */
287  inline UBool operator!= (const UnicodeString& text) const;
288
289  /**
290   * Greater than operator. Performs only bitwise comparison.
291   * @param text The UnicodeString to compare to this one.
292   * @return TRUE if the characters in this are bitwise
293   * greater than the characters in <code>text</code>, FALSE otherwise
294   * @stable ICU 2.0
295   */
296  inline UBool operator> (const UnicodeString& text) const;
297
298  /**
299   * Less than operator. Performs only bitwise comparison.
300   * @param text The UnicodeString to compare to this one.
301   * @return TRUE if the characters in this are bitwise
302   * less than the characters in <code>text</code>, FALSE otherwise
303   * @stable ICU 2.0
304   */
305  inline UBool operator< (const UnicodeString& text) const;
306
307  /**
308   * Greater than or equal operator. Performs only bitwise comparison.
309   * @param text The UnicodeString to compare to this one.
310   * @return TRUE if the characters in this are bitwise
311   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
312   * @stable ICU 2.0
313   */
314  inline UBool operator>= (const UnicodeString& text) const;
315
316  /**
317   * Less than or equal operator. Performs only bitwise comparison.
318   * @param text The UnicodeString to compare to this one.
319   * @return TRUE if the characters in this are bitwise
320   * less than or equal to the characters in <code>text</code>, FALSE otherwise
321   * @stable ICU 2.0
322   */
323  inline UBool operator<= (const UnicodeString& text) const;
324
325  /**
326   * Compare the characters bitwise in this UnicodeString to
327   * the characters in <code>text</code>.
328   * @param text The UnicodeString to compare to this one.
329   * @return The result of bitwise character comparison: 0 if this
330   * contains the same characters as <code>text</code>, -1 if the characters in
331   * this are bitwise less than the characters in <code>text</code>, +1 if the
332   * characters in this are bitwise greater than the characters
333   * in <code>text</code>.
334   * @stable ICU 2.0
335   */
336  inline int8_t compare(const UnicodeString& text) const;
337
338  /**
339   * Compare the characters bitwise in the range
340   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
341   * in the <b>entire string</b> <TT>text</TT>.
342   * (The parameters "start" and "length" are not applied to the other text "text".)
343   * @param start the offset at which the compare operation begins
344   * @param length the number of characters of text to compare.
345   * @param text the other text to be compared against this string.
346   * @return The result of bitwise character comparison: 0 if this
347   * contains the same characters as <code>text</code>, -1 if the characters in
348   * this are bitwise less than the characters in <code>text</code>, +1 if the
349   * characters in this are bitwise greater than the characters
350   * in <code>text</code>.
351   * @stable ICU 2.0
352   */
353  inline int8_t compare(int32_t start,
354         int32_t length,
355         const UnicodeString& text) const;
356
357  /**
358   * Compare the characters bitwise in the range
359   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
360   * in <TT>srcText</TT> in the range
361   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
362   * @param start the offset at which the compare operation begins
363   * @param length the number of characters in this to compare.
364   * @param srcText the text to be compared
365   * @param srcStart the offset into <TT>srcText</TT> to start comparison
366   * @param srcLength the number of characters in <TT>src</TT> to compare
367   * @return The result of bitwise character comparison: 0 if this
368   * contains the same characters as <code>srcText</code>, -1 if the characters in
369   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
370   * characters in this are bitwise greater than the characters
371   * in <code>srcText</code>.
372   * @stable ICU 2.0
373   */
374   inline int8_t compare(int32_t start,
375         int32_t length,
376         const UnicodeString& srcText,
377         int32_t srcStart,
378         int32_t srcLength) const;
379
380  /**
381   * Compare the characters bitwise in this UnicodeString with the first
382   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
383   * @param srcChars The characters to compare to this UnicodeString.
384   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
385   * @return The result of bitwise character comparison: 0 if this
386   * contains the same characters as <code>srcChars</code>, -1 if the characters in
387   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
388   * characters in this are bitwise greater than the characters
389   * in <code>srcChars</code>.
390   * @stable ICU 2.0
391   */
392  inline int8_t compare(const UChar *srcChars,
393         int32_t srcLength) const;
394
395  /**
396   * Compare the characters bitwise in the range
397   * [<TT>start</TT>, <TT>start + length</TT>) with the first
398   * <TT>length</TT> characters in <TT>srcChars</TT>
399   * @param start the offset at which the compare operation begins
400   * @param length the number of characters to compare.
401   * @param srcChars the characters to be compared
402   * @return The result of bitwise character comparison: 0 if this
403   * contains the same characters as <code>srcChars</code>, -1 if the characters in
404   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
405   * characters in this are bitwise greater than the characters
406   * in <code>srcChars</code>.
407   * @stable ICU 2.0
408   */
409  inline int8_t compare(int32_t start,
410         int32_t length,
411         const UChar *srcChars) const;
412
413  /**
414   * Compare the characters bitwise in the range
415   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
416   * in <TT>srcChars</TT> in the range
417   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
418   * @param start the offset at which the compare operation begins
419   * @param length the number of characters in this to compare
420   * @param srcChars the characters to be compared
421   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
422   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
423   * @return The result of bitwise character comparison: 0 if this
424   * contains the same characters as <code>srcChars</code>, -1 if the characters in
425   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
426   * characters in this are bitwise greater than the characters
427   * in <code>srcChars</code>.
428   * @stable ICU 2.0
429   */
430  inline int8_t compare(int32_t start,
431         int32_t length,
432         const UChar *srcChars,
433         int32_t srcStart,
434         int32_t srcLength) const;
435
436  /**
437   * Compare the characters bitwise in the range
438   * [<TT>start</TT>, <TT>limit</TT>) with the characters
439   * in <TT>srcText</TT> in the range
440   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
441   * @param start the offset at which the compare operation begins
442   * @param limit the offset immediately following the compare operation
443   * @param srcText the text to be compared
444   * @param srcStart the offset into <TT>srcText</TT> to start comparison
445   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
446   * @return The result of bitwise character comparison: 0 if this
447   * contains the same characters as <code>srcText</code>, -1 if the characters in
448   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
449   * characters in this are bitwise greater than the characters
450   * in <code>srcText</code>.
451   * @stable ICU 2.0
452   */
453  inline int8_t compareBetween(int32_t start,
454            int32_t limit,
455            const UnicodeString& srcText,
456            int32_t srcStart,
457            int32_t srcLimit) const;
458
459  /**
460   * Compare two Unicode strings in code point order.
461   * The result may be different from the results of compare(), operator<, etc.
462   * if supplementary characters are present:
463   *
464   * In UTF-16, supplementary characters (with code points U+10000 and above) are
465   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
466   * which means that they compare as less than some other BMP characters like U+feff.
467   * This function compares Unicode strings in code point order.
468   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
469   *
470   * @param text Another string to compare this one to.
471   * @return a negative/zero/positive integer corresponding to whether
472   * this string is less than/equal to/greater than the second one
473   * in code point order
474   * @stable ICU 2.0
475   */
476  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
477
478  /**
479   * Compare two Unicode strings in code point order.
480   * The result may be different from the results of compare(), operator<, etc.
481   * if supplementary characters are present:
482   *
483   * In UTF-16, supplementary characters (with code points U+10000 and above) are
484   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
485   * which means that they compare as less than some other BMP characters like U+feff.
486   * This function compares Unicode strings in code point order.
487   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
488   *
489   * @param start The start offset in this string at which the compare operation begins.
490   * @param length The number of code units from this string to compare.
491   * @param srcText Another string to compare this one to.
492   * @return a negative/zero/positive integer corresponding to whether
493   * this string is less than/equal to/greater than the second one
494   * in code point order
495   * @stable ICU 2.0
496   */
497  inline int8_t compareCodePointOrder(int32_t start,
498                                      int32_t length,
499                                      const UnicodeString& srcText) const;
500
501  /**
502   * Compare two Unicode strings in code point order.
503   * The result may be different from the results of compare(), operator<, etc.
504   * if supplementary characters are present:
505   *
506   * In UTF-16, supplementary characters (with code points U+10000 and above) are
507   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
508   * which means that they compare as less than some other BMP characters like U+feff.
509   * This function compares Unicode strings in code point order.
510   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
511   *
512   * @param start The start offset in this string at which the compare operation begins.
513   * @param length The number of code units from this string to compare.
514   * @param srcText Another string to compare this one to.
515   * @param srcStart The start offset in that string at which the compare operation begins.
516   * @param srcLength The number of code units from that string to compare.
517   * @return a negative/zero/positive integer corresponding to whether
518   * this string is less than/equal to/greater than the second one
519   * in code point order
520   * @stable ICU 2.0
521   */
522   inline int8_t compareCodePointOrder(int32_t start,
523                                       int32_t length,
524                                       const UnicodeString& srcText,
525                                       int32_t srcStart,
526                                       int32_t srcLength) const;
527
528  /**
529   * Compare two Unicode strings in code point order.
530   * The result may be different from the results of compare(), operator<, etc.
531   * if supplementary characters are present:
532   *
533   * In UTF-16, supplementary characters (with code points U+10000 and above) are
534   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
535   * which means that they compare as less than some other BMP characters like U+feff.
536   * This function compares Unicode strings in code point order.
537   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
538   *
539   * @param srcChars A pointer to another string to compare this one to.
540   * @param srcLength The number of code units from that string to compare.
541   * @return a negative/zero/positive integer corresponding to whether
542   * this string is less than/equal to/greater than the second one
543   * in code point order
544   * @stable ICU 2.0
545   */
546  inline int8_t compareCodePointOrder(const UChar *srcChars,
547                                      int32_t srcLength) const;
548
549  /**
550   * Compare two Unicode strings in code point order.
551   * The result may be different from the results of compare(), operator<, etc.
552   * if supplementary characters are present:
553   *
554   * In UTF-16, supplementary characters (with code points U+10000 and above) are
555   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
556   * which means that they compare as less than some other BMP characters like U+feff.
557   * This function compares Unicode strings in code point order.
558   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
559   *
560   * @param start The start offset in this string at which the compare operation begins.
561   * @param length The number of code units from this string to compare.
562   * @param srcChars A pointer to another string to compare this one to.
563   * @return a negative/zero/positive integer corresponding to whether
564   * this string is less than/equal to/greater than the second one
565   * in code point order
566   * @stable ICU 2.0
567   */
568  inline int8_t compareCodePointOrder(int32_t start,
569                                      int32_t length,
570                                      const UChar *srcChars) const;
571
572  /**
573   * Compare two Unicode strings in code point order.
574   * The result may be different from the results of compare(), operator<, etc.
575   * if supplementary characters are present:
576   *
577   * In UTF-16, supplementary characters (with code points U+10000 and above) are
578   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
579   * which means that they compare as less than some other BMP characters like U+feff.
580   * This function compares Unicode strings in code point order.
581   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
582   *
583   * @param start The start offset in this string at which the compare operation begins.
584   * @param length The number of code units from this string to compare.
585   * @param srcChars A pointer to another string to compare this one to.
586   * @param srcStart The start offset in that string at which the compare operation begins.
587   * @param srcLength The number of code units from that string to compare.
588   * @return a negative/zero/positive integer corresponding to whether
589   * this string is less than/equal to/greater than the second one
590   * in code point order
591   * @stable ICU 2.0
592   */
593  inline int8_t compareCodePointOrder(int32_t start,
594                                      int32_t length,
595                                      const UChar *srcChars,
596                                      int32_t srcStart,
597                                      int32_t srcLength) const;
598
599  /**
600   * Compare two Unicode strings in code point order.
601   * The result may be different from the results of compare(), operator<, etc.
602   * if supplementary characters are present:
603   *
604   * In UTF-16, supplementary characters (with code points U+10000 and above) are
605   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
606   * which means that they compare as less than some other BMP characters like U+feff.
607   * This function compares Unicode strings in code point order.
608   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
609   *
610   * @param start The start offset in this string at which the compare operation begins.
611   * @param limit The offset after the last code unit from this string to compare.
612   * @param srcText Another string to compare this one to.
613   * @param srcStart The start offset in that string at which the compare operation begins.
614   * @param srcLimit The offset after the last code unit from that string to compare.
615   * @return a negative/zero/positive integer corresponding to whether
616   * this string is less than/equal to/greater than the second one
617   * in code point order
618   * @stable ICU 2.0
619   */
620  inline int8_t compareCodePointOrderBetween(int32_t start,
621                                             int32_t limit,
622                                             const UnicodeString& srcText,
623                                             int32_t srcStart,
624                                             int32_t srcLimit) const;
625
626  /**
627   * Compare two strings case-insensitively using full case folding.
628   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
629   *
630   * @param text Another string to compare this one to.
631   * @param options A bit set of options:
632   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
633   *     Comparison in code unit order with default case folding.
634   *
635   *   - U_COMPARE_CODE_POINT_ORDER
636   *     Set to choose code point order instead of code unit order
637   *     (see u_strCompare for details).
638   *
639   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
640   *
641   * @return A negative, zero, or positive integer indicating the comparison result.
642   * @stable ICU 2.0
643   */
644  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
645
646  /**
647   * Compare two strings case-insensitively using full case folding.
648   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
649   *
650   * @param start The start offset in this string at which the compare operation begins.
651   * @param length The number of code units from this string to compare.
652   * @param srcText Another string to compare this one to.
653   * @param options A bit set of options:
654   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
655   *     Comparison in code unit order with default case folding.
656   *
657   *   - U_COMPARE_CODE_POINT_ORDER
658   *     Set to choose code point order instead of code unit order
659   *     (see u_strCompare for details).
660   *
661   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
662   *
663   * @return A negative, zero, or positive integer indicating the comparison result.
664   * @stable ICU 2.0
665   */
666  inline int8_t caseCompare(int32_t start,
667         int32_t length,
668         const UnicodeString& srcText,
669         uint32_t options) const;
670
671  /**
672   * Compare two strings case-insensitively using full case folding.
673   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
674   *
675   * @param start The start offset in this string at which the compare operation begins.
676   * @param length The number of code units from this string to compare.
677   * @param srcText Another string to compare this one to.
678   * @param srcStart The start offset in that string at which the compare operation begins.
679   * @param srcLength The number of code units from that string to compare.
680   * @param options A bit set of options:
681   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
682   *     Comparison in code unit order with default case folding.
683   *
684   *   - U_COMPARE_CODE_POINT_ORDER
685   *     Set to choose code point order instead of code unit order
686   *     (see u_strCompare for details).
687   *
688   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
689   *
690   * @return A negative, zero, or positive integer indicating the comparison result.
691   * @stable ICU 2.0
692   */
693  inline int8_t caseCompare(int32_t start,
694         int32_t length,
695         const UnicodeString& srcText,
696         int32_t srcStart,
697         int32_t srcLength,
698         uint32_t options) const;
699
700  /**
701   * Compare two strings case-insensitively using full case folding.
702   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
703   *
704   * @param srcChars A pointer to another string to compare this one to.
705   * @param srcLength The number of code units from that string to compare.
706   * @param options A bit set of options:
707   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
708   *     Comparison in code unit order with default case folding.
709   *
710   *   - U_COMPARE_CODE_POINT_ORDER
711   *     Set to choose code point order instead of code unit order
712   *     (see u_strCompare for details).
713   *
714   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
715   *
716   * @return A negative, zero, or positive integer indicating the comparison result.
717   * @stable ICU 2.0
718   */
719  inline int8_t caseCompare(const UChar *srcChars,
720         int32_t srcLength,
721         uint32_t options) const;
722
723  /**
724   * Compare two strings case-insensitively using full case folding.
725   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
726   *
727   * @param start The start offset in this string at which the compare operation begins.
728   * @param length The number of code units from this string to compare.
729   * @param srcChars A pointer to another string to compare this one to.
730   * @param options A bit set of options:
731   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
732   *     Comparison in code unit order with default case folding.
733   *
734   *   - U_COMPARE_CODE_POINT_ORDER
735   *     Set to choose code point order instead of code unit order
736   *     (see u_strCompare for details).
737   *
738   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
739   *
740   * @return A negative, zero, or positive integer indicating the comparison result.
741   * @stable ICU 2.0
742   */
743  inline int8_t caseCompare(int32_t start,
744         int32_t length,
745         const UChar *srcChars,
746         uint32_t options) const;
747
748  /**
749   * Compare two strings case-insensitively using full case folding.
750   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
751   *
752   * @param start The start offset in this string at which the compare operation begins.
753   * @param length The number of code units from this string to compare.
754   * @param srcChars A pointer to another string to compare this one to.
755   * @param srcStart The start offset in that string at which the compare operation begins.
756   * @param srcLength The number of code units from that string to compare.
757   * @param options A bit set of options:
758   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
759   *     Comparison in code unit order with default case folding.
760   *
761   *   - U_COMPARE_CODE_POINT_ORDER
762   *     Set to choose code point order instead of code unit order
763   *     (see u_strCompare for details).
764   *
765   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
766   *
767   * @return A negative, zero, or positive integer indicating the comparison result.
768   * @stable ICU 2.0
769   */
770  inline int8_t caseCompare(int32_t start,
771         int32_t length,
772         const UChar *srcChars,
773         int32_t srcStart,
774         int32_t srcLength,
775         uint32_t options) const;
776
777  /**
778   * Compare two strings case-insensitively using full case folding.
779   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
780   *
781   * @param start The start offset in this string at which the compare operation begins.
782   * @param limit The offset after the last code unit from this string to compare.
783   * @param srcText Another string to compare this one to.
784   * @param srcStart The start offset in that string at which the compare operation begins.
785   * @param srcLimit The offset after the last code unit from that string to compare.
786   * @param options A bit set of options:
787   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
788   *     Comparison in code unit order with default case folding.
789   *
790   *   - U_COMPARE_CODE_POINT_ORDER
791   *     Set to choose code point order instead of code unit order
792   *     (see u_strCompare for details).
793   *
794   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
795   *
796   * @return A negative, zero, or positive integer indicating the comparison result.
797   * @stable ICU 2.0
798   */
799  inline int8_t caseCompareBetween(int32_t start,
800            int32_t limit,
801            const UnicodeString& srcText,
802            int32_t srcStart,
803            int32_t srcLimit,
804            uint32_t options) const;
805
806  /**
807   * Determine if this starts with the characters in <TT>text</TT>
808   * @param text The text to match.
809   * @return TRUE if this starts with the characters in <TT>text</TT>,
810   * FALSE otherwise
811   * @stable ICU 2.0
812   */
813  inline UBool startsWith(const UnicodeString& text) const;
814
815  /**
816   * Determine if this starts with the characters in <TT>srcText</TT>
817   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
818   * @param srcText The text to match.
819   * @param srcStart the offset into <TT>srcText</TT> to start matching
820   * @param srcLength the number of characters in <TT>srcText</TT> to match
821   * @return TRUE if this starts with the characters in <TT>text</TT>,
822   * FALSE otherwise
823   * @stable ICU 2.0
824   */
825  inline UBool startsWith(const UnicodeString& srcText,
826            int32_t srcStart,
827            int32_t srcLength) const;
828
829  /**
830   * Determine if this starts with the characters in <TT>srcChars</TT>
831   * @param srcChars The characters to match.
832   * @param srcLength the number of characters in <TT>srcChars</TT>
833   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
834   * FALSE otherwise
835   * @stable ICU 2.0
836   */
837  inline UBool startsWith(const UChar *srcChars,
838            int32_t srcLength) const;
839
840  /**
841   * Determine if this ends with the characters in <TT>srcChars</TT>
842   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
843   * @param srcChars The characters to match.
844   * @param srcStart the offset into <TT>srcText</TT> to start matching
845   * @param srcLength the number of characters in <TT>srcChars</TT> to match
846   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
847   * @stable ICU 2.0
848   */
849  inline UBool startsWith(const UChar *srcChars,
850            int32_t srcStart,
851            int32_t srcLength) const;
852
853  /**
854   * Determine if this ends with the characters in <TT>text</TT>
855   * @param text The text to match.
856   * @return TRUE if this ends with the characters in <TT>text</TT>,
857   * FALSE otherwise
858   * @stable ICU 2.0
859   */
860  inline UBool endsWith(const UnicodeString& text) const;
861
862  /**
863   * Determine if this ends with the characters in <TT>srcText</TT>
864   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
865   * @param srcText The text to match.
866   * @param srcStart the offset into <TT>srcText</TT> to start matching
867   * @param srcLength the number of characters in <TT>srcText</TT> to match
868   * @return TRUE if this ends with the characters in <TT>text</TT>,
869   * FALSE otherwise
870   * @stable ICU 2.0
871   */
872  inline UBool endsWith(const UnicodeString& srcText,
873          int32_t srcStart,
874          int32_t srcLength) const;
875
876  /**
877   * Determine if this ends with the characters in <TT>srcChars</TT>
878   * @param srcChars The characters to match.
879   * @param srcLength the number of characters in <TT>srcChars</TT>
880   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
881   * FALSE otherwise
882   * @stable ICU 2.0
883   */
884  inline UBool endsWith(const UChar *srcChars,
885          int32_t srcLength) const;
886
887  /**
888   * Determine if this ends with the characters in <TT>srcChars</TT>
889   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
890   * @param srcChars The characters to match.
891   * @param srcStart the offset into <TT>srcText</TT> to start matching
892   * @param srcLength the number of characters in <TT>srcChars</TT> to match
893   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
894   * FALSE otherwise
895   * @stable ICU 2.0
896   */
897  inline UBool endsWith(const UChar *srcChars,
898          int32_t srcStart,
899          int32_t srcLength) const;
900
901
902  /* Searching - bitwise only */
903
904  /**
905   * Locate in this the first occurrence of the characters in <TT>text</TT>,
906   * using bitwise comparison.
907   * @param text The text to search for.
908   * @return The offset into this of the start of <TT>text</TT>,
909   * or -1 if not found.
910   * @stable ICU 2.0
911   */
912  inline int32_t indexOf(const UnicodeString& text) const;
913
914  /**
915   * Locate in this the first occurrence of the characters in <TT>text</TT>
916   * starting at offset <TT>start</TT>, using bitwise comparison.
917   * @param text The text to search for.
918   * @param start The offset at which searching will start.
919   * @return The offset into this of the start of <TT>text</TT>,
920   * or -1 if not found.
921   * @stable ICU 2.0
922   */
923  inline int32_t indexOf(const UnicodeString& text,
924              int32_t start) const;
925
926  /**
927   * Locate in this the first occurrence in the range
928   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
929   * in <TT>text</TT>, using bitwise comparison.
930   * @param text The text to search for.
931   * @param start The offset at which searching will start.
932   * @param length The number of characters to search
933   * @return The offset into this of the start of <TT>text</TT>,
934   * or -1 if not found.
935   * @stable ICU 2.0
936   */
937  inline int32_t indexOf(const UnicodeString& text,
938              int32_t start,
939              int32_t length) const;
940
941  /**
942   * Locate in this the first occurrence in the range
943   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
944   *  in <TT>srcText</TT> in the range
945   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
946   * using bitwise comparison.
947   * @param srcText The text to search for.
948   * @param srcStart the offset into <TT>srcText</TT> at which
949   * to start matching
950   * @param srcLength the number of characters in <TT>srcText</TT> to match
951   * @param start the offset into this at which to start matching
952   * @param length the number of characters in this to search
953   * @return The offset into this of the start of <TT>text</TT>,
954   * or -1 if not found.
955   * @stable ICU 2.0
956   */
957  inline int32_t indexOf(const UnicodeString& srcText,
958              int32_t srcStart,
959              int32_t srcLength,
960              int32_t start,
961              int32_t length) const;
962
963  /**
964   * Locate in this the first occurrence of the characters in
965   * <TT>srcChars</TT>
966   * starting at offset <TT>start</TT>, using bitwise comparison.
967   * @param srcChars The text to search for.
968   * @param srcLength the number of characters in <TT>srcChars</TT> to match
969   * @param start the offset into this at which to start matching
970   * @return The offset into this of the start of <TT>text</TT>,
971   * or -1 if not found.
972   * @stable ICU 2.0
973   */
974  inline int32_t indexOf(const UChar *srcChars,
975              int32_t srcLength,
976              int32_t start) const;
977
978  /**
979   * Locate in this the first occurrence in the range
980   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
981   * in <TT>srcChars</TT>, using bitwise comparison.
982   * @param srcChars The text to search for.
983   * @param srcLength the number of characters in <TT>srcChars</TT>
984   * @param start The offset at which searching will start.
985   * @param length The number of characters to search
986   * @return The offset into this of the start of <TT>srcChars</TT>,
987   * or -1 if not found.
988   * @stable ICU 2.0
989   */
990  inline int32_t indexOf(const UChar *srcChars,
991              int32_t srcLength,
992              int32_t start,
993              int32_t length) const;
994
995  /**
996   * Locate in this the first occurrence in the range
997   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
998   * in <TT>srcChars</TT> in the range
999   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1000   * using bitwise comparison.
1001   * @param srcChars The text to search for.
1002   * @param srcStart the offset into <TT>srcChars</TT> at which
1003   * to start matching
1004   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1005   * @param start the offset into this at which to start matching
1006   * @param length the number of characters in this to search
1007   * @return The offset into this of the start of <TT>text</TT>,
1008   * or -1 if not found.
1009   * @stable ICU 2.0
1010   */
1011  int32_t indexOf(const UChar *srcChars,
1012              int32_t srcStart,
1013              int32_t srcLength,
1014              int32_t start,
1015              int32_t length) const;
1016
1017  /**
1018   * Locate in this the first occurrence of the BMP code point <code>c</code>,
1019   * using bitwise comparison.
1020   * @param c The code unit to search for.
1021   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1022   * @stable ICU 2.0
1023   */
1024  inline int32_t indexOf(UChar c) const;
1025
1026  /**
1027   * Locate in this the first occurrence of the code point <TT>c</TT>,
1028   * using bitwise comparison.
1029   *
1030   * @param c The code point to search for.
1031   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1032   * @stable ICU 2.0
1033   */
1034  inline int32_t indexOf(UChar32 c) const;
1035
1036  /**
1037   * Locate in this the first occurrence of the BMP code point <code>c</code>,
1038   * starting at offset <TT>start</TT>, using bitwise comparison.
1039   * @param c The code unit to search for.
1040   * @param start The offset at which searching will start.
1041   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1042   * @stable ICU 2.0
1043   */
1044  inline int32_t indexOf(UChar c,
1045              int32_t start) const;
1046
1047  /**
1048   * Locate in this the first occurrence of the code point <TT>c</TT>
1049   * starting at offset <TT>start</TT>, using bitwise comparison.
1050   *
1051   * @param c The code point to search for.
1052   * @param start The offset at which searching will start.
1053   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1054   * @stable ICU 2.0
1055   */
1056  inline int32_t indexOf(UChar32 c,
1057              int32_t start) const;
1058
1059  /**
1060   * Locate in this the first occurrence of the BMP code point <code>c</code>
1061   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1062   * using bitwise comparison.
1063   * @param c The code unit to search for.
1064   * @param start the offset into this at which to start matching
1065   * @param length the number of characters in this to search
1066   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1067   * @stable ICU 2.0
1068   */
1069  inline int32_t indexOf(UChar c,
1070              int32_t start,
1071              int32_t length) const;
1072
1073  /**
1074   * Locate in this the first occurrence of the code point <TT>c</TT>
1075   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1076   * using bitwise comparison.
1077   *
1078   * @param c The code point to search for.
1079   * @param start the offset into this at which to start matching
1080   * @param length the number of characters in this to search
1081   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1082   * @stable ICU 2.0
1083   */
1084  inline int32_t indexOf(UChar32 c,
1085              int32_t start,
1086              int32_t length) const;
1087
1088  /**
1089   * Locate in this the last occurrence of the characters in <TT>text</TT>,
1090   * using bitwise comparison.
1091   * @param text The text to search for.
1092   * @return The offset into this of the start of <TT>text</TT>,
1093   * or -1 if not found.
1094   * @stable ICU 2.0
1095   */
1096  inline int32_t lastIndexOf(const UnicodeString& text) const;
1097
1098  /**
1099   * Locate in this the last occurrence of the characters in <TT>text</TT>
1100   * starting at offset <TT>start</TT>, using bitwise comparison.
1101   * @param text The text to search for.
1102   * @param start The offset at which searching will start.
1103   * @return The offset into this of the start of <TT>text</TT>,
1104   * or -1 if not found.
1105   * @stable ICU 2.0
1106   */
1107  inline int32_t lastIndexOf(const UnicodeString& text,
1108              int32_t start) const;
1109
1110  /**
1111   * Locate in this the last occurrence in the range
1112   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1113   * in <TT>text</TT>, using bitwise comparison.
1114   * @param text The text to search for.
1115   * @param start The offset at which searching will start.
1116   * @param length The number of characters to search
1117   * @return The offset into this of the start of <TT>text</TT>,
1118   * or -1 if not found.
1119   * @stable ICU 2.0
1120   */
1121  inline int32_t lastIndexOf(const UnicodeString& text,
1122              int32_t start,
1123              int32_t length) const;
1124
1125  /**
1126   * Locate in this the last occurrence in the range
1127   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1128   * in <TT>srcText</TT> in the range
1129   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1130   * using bitwise comparison.
1131   * @param srcText The text to search for.
1132   * @param srcStart the offset into <TT>srcText</TT> at which
1133   * to start matching
1134   * @param srcLength the number of characters in <TT>srcText</TT> to match
1135   * @param start the offset into this at which to start matching
1136   * @param length the number of characters in this to search
1137   * @return The offset into this of the start of <TT>text</TT>,
1138   * or -1 if not found.
1139   * @stable ICU 2.0
1140   */
1141  inline int32_t lastIndexOf(const UnicodeString& srcText,
1142              int32_t srcStart,
1143              int32_t srcLength,
1144              int32_t start,
1145              int32_t length) const;
1146
1147  /**
1148   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1149   * starting at offset <TT>start</TT>, using bitwise comparison.
1150   * @param srcChars The text to search for.
1151   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1152   * @param start the offset into this at which to start matching
1153   * @return The offset into this of the start of <TT>text</TT>,
1154   * or -1 if not found.
1155   * @stable ICU 2.0
1156   */
1157  inline int32_t lastIndexOf(const UChar *srcChars,
1158              int32_t srcLength,
1159              int32_t start) const;
1160
1161  /**
1162   * Locate in this the last occurrence in the range
1163   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1164   * in <TT>srcChars</TT>, using bitwise comparison.
1165   * @param srcChars The text to search for.
1166   * @param srcLength the number of characters in <TT>srcChars</TT>
1167   * @param start The offset at which searching will start.
1168   * @param length The number of characters to search
1169   * @return The offset into this of the start of <TT>srcChars</TT>,
1170   * or -1 if not found.
1171   * @stable ICU 2.0
1172   */
1173  inline int32_t lastIndexOf(const UChar *srcChars,
1174              int32_t srcLength,
1175              int32_t start,
1176              int32_t length) const;
1177
1178  /**
1179   * Locate in this the last occurrence in the range
1180   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1181   * in <TT>srcChars</TT> in the range
1182   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1183   * using bitwise comparison.
1184   * @param srcChars The text to search for.
1185   * @param srcStart the offset into <TT>srcChars</TT> at which
1186   * to start matching
1187   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1188   * @param start the offset into this at which to start matching
1189   * @param length the number of characters in this to search
1190   * @return The offset into this of the start of <TT>text</TT>,
1191   * or -1 if not found.
1192   * @stable ICU 2.0
1193   */
1194  int32_t lastIndexOf(const UChar *srcChars,
1195              int32_t srcStart,
1196              int32_t srcLength,
1197              int32_t start,
1198              int32_t length) const;
1199
1200  /**
1201   * Locate in this the last occurrence of the BMP code point <code>c</code>,
1202   * using bitwise comparison.
1203   * @param c The code unit to search for.
1204   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1205   * @stable ICU 2.0
1206   */
1207  inline int32_t lastIndexOf(UChar c) const;
1208
1209  /**
1210   * Locate in this the last occurrence of the code point <TT>c</TT>,
1211   * using bitwise comparison.
1212   *
1213   * @param c The code point to search for.
1214   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1215   * @stable ICU 2.0
1216   */
1217  inline int32_t lastIndexOf(UChar32 c) const;
1218
1219  /**
1220   * Locate in this the last occurrence of the BMP code point <code>c</code>
1221   * starting at offset <TT>start</TT>, using bitwise comparison.
1222   * @param c The code unit to search for.
1223   * @param start The offset at which searching will start.
1224   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1225   * @stable ICU 2.0
1226   */
1227  inline int32_t lastIndexOf(UChar c,
1228              int32_t start) const;
1229
1230  /**
1231   * Locate in this the last occurrence of the code point <TT>c</TT>
1232   * starting at offset <TT>start</TT>, using bitwise comparison.
1233   *
1234   * @param c The code point to search for.
1235   * @param start The offset at which searching will start.
1236   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1237   * @stable ICU 2.0
1238   */
1239  inline int32_t lastIndexOf(UChar32 c,
1240              int32_t start) const;
1241
1242  /**
1243   * Locate in this the last occurrence of the BMP code point <code>c</code>
1244   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1245   * using bitwise comparison.
1246   * @param c The code unit to search for.
1247   * @param start the offset into this at which to start matching
1248   * @param length the number of characters in this to search
1249   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1250   * @stable ICU 2.0
1251   */
1252  inline int32_t lastIndexOf(UChar c,
1253              int32_t start,
1254              int32_t length) const;
1255
1256  /**
1257   * Locate in this the last occurrence of the code point <TT>c</TT>
1258   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1259   * using bitwise comparison.
1260   *
1261   * @param c The code point to search for.
1262   * @param start the offset into this at which to start matching
1263   * @param length the number of characters in this to search
1264   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1265   * @stable ICU 2.0
1266   */
1267  inline int32_t lastIndexOf(UChar32 c,
1268              int32_t start,
1269              int32_t length) const;
1270
1271
1272  /* Character access */
1273
1274  /**
1275   * Return the code unit at offset <tt>offset</tt>.
1276   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1277   * @param offset a valid offset into the text
1278   * @return the code unit at offset <tt>offset</tt>
1279   *         or 0xffff if the offset is not valid for this string
1280   * @stable ICU 2.0
1281   */
1282  inline UChar charAt(int32_t offset) const;
1283
1284  /**
1285   * Return the code unit at offset <tt>offset</tt>.
1286   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1287   * @param offset a valid offset into the text
1288   * @return the code unit at offset <tt>offset</tt>
1289   * @stable ICU 2.0
1290   */
1291  inline UChar operator[] (int32_t offset) const;
1292
1293  /**
1294   * Return the code point that contains the code unit
1295   * at offset <tt>offset</tt>.
1296   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1297   * @param offset a valid offset into the text
1298   * that indicates the text offset of any of the code units
1299   * that will be assembled into a code point (21-bit value) and returned
1300   * @return the code point of text at <tt>offset</tt>
1301   *         or 0xffff if the offset is not valid for this string
1302   * @stable ICU 2.0
1303   */
1304  UChar32 char32At(int32_t offset) const;
1305
1306  /**
1307   * Adjust a random-access offset so that
1308   * it points to the beginning of a Unicode character.
1309   * The offset that is passed in points to
1310   * any code unit of a code point,
1311   * while the returned offset will point to the first code unit
1312   * of the same code point.
1313   * In UTF-16, if the input offset points to a second surrogate
1314   * of a surrogate pair, then the returned offset will point
1315   * to the first surrogate.
1316   * @param offset a valid offset into one code point of the text
1317   * @return offset of the first code unit of the same code point
1318   * @see U16_SET_CP_START
1319   * @stable ICU 2.0
1320   */
1321  int32_t getChar32Start(int32_t offset) const;
1322
1323  /**
1324   * Adjust a random-access offset so that
1325   * it points behind a Unicode character.
1326   * The offset that is passed in points behind
1327   * any code unit of a code point,
1328   * while the returned offset will point behind the last code unit
1329   * of the same code point.
1330   * In UTF-16, if the input offset points behind the first surrogate
1331   * (i.e., to the second surrogate)
1332   * of a surrogate pair, then the returned offset will point
1333   * behind the second surrogate (i.e., to the first surrogate).
1334   * @param offset a valid offset after any code unit of a code point of the text
1335   * @return offset of the first code unit after the same code point
1336   * @see U16_SET_CP_LIMIT
1337   * @stable ICU 2.0
1338   */
1339  int32_t getChar32Limit(int32_t offset) const;
1340
1341  /**
1342   * Move the code unit index along the string by delta code points.
1343   * Interpret the input index as a code unit-based offset into the string,
1344   * move the index forward or backward by delta code points, and
1345   * return the resulting index.
1346   * The input index should point to the first code unit of a code point,
1347   * if there is more than one.
1348   *
1349   * Both input and output indexes are code unit-based as for all
1350   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1351   * If delta<0 then the index is moved backward (toward the start of the string).
1352   * If delta>0 then the index is moved forward (toward the end of the string).
1353   *
1354   * This behaves like CharacterIterator::move32(delta, kCurrent).
1355   *
1356   * Behavior for out-of-bounds indexes:
1357   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1358   * if the input index<0 then it is pinned to 0;
1359   * if it is index>length() then it is pinned to length().
1360   * Afterwards, the index is moved by <code>delta</code> code points
1361   * forward or backward,
1362   * but no further backward than to 0 and no further forward than to length().
1363   * The resulting index return value will be in between 0 and length(), inclusively.
1364   *
1365   * Examples:
1366   * <pre>
1367   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1368   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1369   *
1370   * // initial index: position of U+10000
1371   * int32_t index=1;
1372   *
1373   * // the following examples will all result in index==4, position of U+10ffff
1374   *
1375   * // skip 2 code points from some position in the string
1376   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1377   *
1378   * // go to the 3rd code point from the start of s (0-based)
1379   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1380   *
1381   * // go to the next-to-last code point of s
1382   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1383   * </pre>
1384   *
1385   * @param index input code unit index
1386   * @param delta (signed) code point count to move the index forward or backward
1387   *        in the string
1388   * @return the resulting code unit index
1389   * @stable ICU 2.0
1390   */
1391  int32_t moveIndex32(int32_t index, int32_t delta) const;
1392
1393  /* Substring extraction */
1394
1395  /**
1396   * Copy the characters in the range
1397   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1398   * beginning at <tt>dstStart</tt>.
1399   * If the string aliases to <code>dst</code> itself as an external buffer,
1400   * then extract() will not copy the contents.
1401   *
1402   * @param start offset of first character which will be copied into the array
1403   * @param length the number of characters to extract
1404   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1405   * must be at least (<tt>dstStart + length</tt>).
1406   * @param dstStart the offset in <TT>dst</TT> where the first character
1407   * will be extracted
1408   * @stable ICU 2.0
1409   */
1410  inline void extract(int32_t start,
1411           int32_t length,
1412           UChar *dst,
1413           int32_t dstStart = 0) const;
1414
1415  /**
1416   * Copy the contents of the string into dest.
1417   * This is a convenience function that
1418   * checks if there is enough space in dest,
1419   * extracts the entire string if possible,
1420   * and NUL-terminates dest if possible.
1421   *
1422   * If the string fits into dest but cannot be NUL-terminated
1423   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1424   * If the string itself does not fit into dest
1425   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1426   *
1427   * If the string aliases to <code>dest</code> itself as an external buffer,
1428   * then extract() will not copy the contents.
1429   *
1430   * @param dest Destination string buffer.
1431   * @param destCapacity Number of UChars available at dest.
1432   * @param errorCode ICU error code.
1433   * @return length()
1434   * @stable ICU 2.0
1435   */
1436  int32_t
1437  extract(UChar *dest, int32_t destCapacity,
1438          UErrorCode &errorCode) const;
1439
1440  /**
1441   * Copy the characters in the range
1442   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1443   * <tt>target</tt>.
1444   * @param start offset of first character which will be copied
1445   * @param length the number of characters to extract
1446   * @param target UnicodeString into which to copy characters.
1447   * @return A reference to <TT>target</TT>
1448   * @stable ICU 2.0
1449   */
1450  inline void extract(int32_t start,
1451           int32_t length,
1452           UnicodeString& target) const;
1453
1454  /**
1455   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1456   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1457   * @param start offset of first character which will be copied into the array
1458   * @param limit offset immediately following the last character to be copied
1459   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1460   * must be at least (<tt>dstStart + (limit - start)</tt>).
1461   * @param dstStart the offset in <TT>dst</TT> where the first character
1462   * will be extracted
1463   * @stable ICU 2.0
1464   */
1465  inline void extractBetween(int32_t start,
1466              int32_t limit,
1467              UChar *dst,
1468              int32_t dstStart = 0) const;
1469
1470  /**
1471   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1472   * into the UnicodeString <tt>target</tt>.  Replaceable API.
1473   * @param start offset of first character which will be copied
1474   * @param limit offset immediately following the last character to be copied
1475   * @param target UnicodeString into which to copy characters.
1476   * @return A reference to <TT>target</TT>
1477   * @stable ICU 2.0
1478   */
1479  virtual void extractBetween(int32_t start,
1480              int32_t limit,
1481              UnicodeString& target) const;
1482
1483  /**
1484   * Copy the characters in the range
1485   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1486   * All characters must be invariant (see utypes.h).
1487   * Use US_INV as the last, signature-distinguishing parameter.
1488   *
1489   * This function does not write any more than <code>targetLength</code>
1490   * characters but returns the length of the entire output string
1491   * so that one can allocate a larger buffer and call the function again
1492   * if necessary.
1493   * The output string is NUL-terminated if possible.
1494   *
1495   * @param start offset of first character which will be copied
1496   * @param startLength the number of characters to extract
1497   * @param target the target buffer for extraction, can be NULL
1498   *               if targetLength is 0
1499   * @param targetCapacity the length of the target buffer
1500   * @param inv Signature-distinguishing paramater, use US_INV.
1501   * @return the output string length, not including the terminating NUL
1502   * @stable ICU 3.2
1503   */
1504  int32_t extract(int32_t start,
1505           int32_t startLength,
1506           char *target,
1507           int32_t targetCapacity,
1508           enum EInvariant inv) const;
1509
1510#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1511
1512  /**
1513   * Copy the characters in the range
1514   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1515   * in the platform's default codepage.
1516   * This function does not write any more than <code>targetLength</code>
1517   * characters but returns the length of the entire output string
1518   * so that one can allocate a larger buffer and call the function again
1519   * if necessary.
1520   * The output string is NUL-terminated if possible.
1521   *
1522   * @param start offset of first character which will be copied
1523   * @param startLength the number of characters to extract
1524   * @param target the target buffer for extraction
1525   * @param targetLength the length of the target buffer
1526   * If <TT>target</TT> is NULL, then the number of bytes required for
1527   * <TT>target</TT> is returned.
1528   * @return the output string length, not including the terminating NUL
1529   * @stable ICU 2.0
1530   */
1531  int32_t extract(int32_t start,
1532           int32_t startLength,
1533           char *target,
1534           uint32_t targetLength) const;
1535
1536#endif
1537
1538#if !UCONFIG_NO_CONVERSION
1539
1540  /**
1541   * Copy the characters in the range
1542   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1543   * in a specified codepage.
1544   * The output string is NUL-terminated.
1545   *
1546   * Recommendation: For invariant-character strings use
1547   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1548   * because it avoids object code dependencies of UnicodeString on
1549   * the conversion code.
1550   *
1551   * @param start offset of first character which will be copied
1552   * @param startLength the number of characters to extract
1553   * @param target the target buffer for extraction
1554   * @param codepage the desired codepage for the characters.  0 has
1555   * the special meaning of the default codepage
1556   * If <code>codepage</code> is an empty string (<code>""</code>),
1557   * then a simple conversion is performed on the codepage-invariant
1558   * subset ("invariant characters") of the platform encoding. See utypes.h.
1559   * If <TT>target</TT> is NULL, then the number of bytes required for
1560   * <TT>target</TT> is returned. It is assumed that the target is big enough
1561   * to fit all of the characters.
1562   * @return the output string length, not including the terminating NUL
1563   * @stable ICU 2.0
1564   */
1565  inline int32_t extract(int32_t start,
1566                 int32_t startLength,
1567                 char *target,
1568                 const char *codepage = 0) const;
1569
1570  /**
1571   * Copy the characters in the range
1572   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1573   * in a specified codepage.
1574   * This function does not write any more than <code>targetLength</code>
1575   * characters but returns the length of the entire output string
1576   * so that one can allocate a larger buffer and call the function again
1577   * if necessary.
1578   * The output string is NUL-terminated if possible.
1579   *
1580   * Recommendation: For invariant-character strings use
1581   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1582   * because it avoids object code dependencies of UnicodeString on
1583   * the conversion code.
1584   *
1585   * @param start offset of first character which will be copied
1586   * @param startLength the number of characters to extract
1587   * @param target the target buffer for extraction
1588   * @param targetLength the length of the target buffer
1589   * @param codepage the desired codepage for the characters.  0 has
1590   * the special meaning of the default codepage
1591   * If <code>codepage</code> is an empty string (<code>""</code>),
1592   * then a simple conversion is performed on the codepage-invariant
1593   * subset ("invariant characters") of the platform encoding. See utypes.h.
1594   * If <TT>target</TT> is NULL, then the number of bytes required for
1595   * <TT>target</TT> is returned.
1596   * @return the output string length, not including the terminating NUL
1597   * @stable ICU 2.0
1598   */
1599  int32_t extract(int32_t start,
1600           int32_t startLength,
1601           char *target,
1602           uint32_t targetLength,
1603           const char *codepage) const;
1604
1605  /**
1606   * Convert the UnicodeString into a codepage string using an existing UConverter.
1607   * The output string is NUL-terminated if possible.
1608   *
1609   * This function avoids the overhead of opening and closing a converter if
1610   * multiple strings are extracted.
1611   *
1612   * @param dest destination string buffer, can be NULL if destCapacity==0
1613   * @param destCapacity the number of chars available at dest
1614   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1615   *        or NULL for the default converter
1616   * @param errorCode normal ICU error code
1617   * @return the length of the output string, not counting the terminating NUL;
1618   *         if the length is greater than destCapacity, then the string will not fit
1619   *         and a buffer of the indicated length would need to be passed in
1620   * @stable ICU 2.0
1621   */
1622  int32_t extract(char *dest, int32_t destCapacity,
1623                  UConverter *cnv,
1624                  UErrorCode &errorCode) const;
1625
1626#endif
1627
1628  /**
1629   * Create a temporary substring for the specified range.
1630   * Unlike the substring constructor and setTo() functions,
1631   * the object returned here will be a read-only alias (using getBuffer())
1632   * rather than copying the text.
1633   * As a result, this substring operation is much faster but requires
1634   * that the original string not be modified or deleted during the lifetime
1635   * of the returned substring object.
1636   * @param start offset of the first character visible in the substring
1637   * @param length length of the substring
1638   * @return a read-only alias UnicodeString object for the substring
1639   * @stable ICU 4.4
1640   */
1641  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1642
1643  /**
1644   * Create a temporary substring for the specified range.
1645   * Same as tempSubString(start, length) except that the substring range
1646   * is specified as a (start, limit) pair (with an exclusive limit index)
1647   * rather than a (start, length) pair.
1648   * @param start offset of the first character visible in the substring
1649   * @param limit offset immediately following the last character visible in the substring
1650   * @return a read-only alias UnicodeString object for the substring
1651   * @stable ICU 4.4
1652   */
1653  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1654
1655  /**
1656   * Convert the UnicodeString to UTF-8 and write the result
1657   * to a ByteSink. This is called by toUTF8String().
1658   * Unpaired surrogates are replaced with U+FFFD.
1659   * Calls u_strToUTF8WithSub().
1660   *
1661   * @param sink A ByteSink to which the UTF-8 version of the string is written.
1662   *             sink.Flush() is called at the end.
1663   * @stable ICU 4.2
1664   * @see toUTF8String
1665   */
1666  void toUTF8(ByteSink &sink) const;
1667
1668#if U_HAVE_STD_STRING
1669
1670  /**
1671   * Convert the UnicodeString to UTF-8 and append the result
1672   * to a standard string.
1673   * Unpaired surrogates are replaced with U+FFFD.
1674   * Calls toUTF8().
1675   *
1676   * @param result A standard string (or a compatible object)
1677   *        to which the UTF-8 version of the string is appended.
1678   * @return The string object.
1679   * @stable ICU 4.2
1680   * @see toUTF8
1681   */
1682  template<typename StringClass>
1683  StringClass &toUTF8String(StringClass &result) const {
1684    StringByteSink<StringClass> sbs(&result);
1685    toUTF8(sbs);
1686    return result;
1687  }
1688
1689#endif
1690
1691  /**
1692   * Convert the UnicodeString to UTF-32.
1693   * Unpaired surrogates are replaced with U+FFFD.
1694   * Calls u_strToUTF32WithSub().
1695   *
1696   * @param utf32 destination string buffer, can be NULL if capacity==0
1697   * @param capacity the number of UChar32s available at utf32
1698   * @param errorCode Standard ICU error code. Its input value must
1699   *                  pass the U_SUCCESS() test, or else the function returns
1700   *                  immediately. Check for U_FAILURE() on output or use with
1701   *                  function chaining. (See User Guide for details.)
1702   * @return The length of the UTF-32 string.
1703   * @see fromUTF32
1704   * @stable ICU 4.2
1705   */
1706  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1707
1708  /* Length operations */
1709
1710  /**
1711   * Return the length of the UnicodeString object.
1712   * The length is the number of UChar code units are in the UnicodeString.
1713   * If you want the number of code points, please use countChar32().
1714   * @return the length of the UnicodeString object
1715   * @see countChar32
1716   * @stable ICU 2.0
1717   */
1718  inline int32_t length(void) const;
1719
1720  /**
1721   * Count Unicode code points in the length UChar code units of the string.
1722   * A code point may occupy either one or two UChar code units.
1723   * Counting code points involves reading all code units.
1724   *
1725   * This functions is basically the inverse of moveIndex32().
1726   *
1727   * @param start the index of the first code unit to check
1728   * @param length the number of UChar code units to check
1729   * @return the number of code points in the specified code units
1730   * @see length
1731   * @stable ICU 2.0
1732   */
1733  int32_t
1734  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1735
1736  /**
1737   * Check if the length UChar code units of the string
1738   * contain more Unicode code points than a certain number.
1739   * This is more efficient than counting all code points in this part of the string
1740   * and comparing that number with a threshold.
1741   * This function may not need to scan the string at all if the length
1742   * falls within a certain range, and
1743   * never needs to count more than 'number+1' code points.
1744   * Logically equivalent to (countChar32(start, length)>number).
1745   * A Unicode code point may occupy either one or two UChar code units.
1746   *
1747   * @param start the index of the first code unit to check (0 for the entire string)
1748   * @param length the number of UChar code units to check
1749   *               (use INT32_MAX for the entire string; remember that start/length
1750   *                values are pinned)
1751   * @param number The number of code points in the (sub)string is compared against
1752   *               the 'number' parameter.
1753   * @return Boolean value for whether the string contains more Unicode code points
1754   *         than 'number'. Same as (u_countChar32(s, length)>number).
1755   * @see countChar32
1756   * @see u_strHasMoreChar32Than
1757   * @stable ICU 2.4
1758   */
1759  UBool
1760  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1761
1762  /**
1763   * Determine if this string is empty.
1764   * @return TRUE if this string contains 0 characters, FALSE otherwise.
1765   * @stable ICU 2.0
1766   */
1767  inline UBool isEmpty(void) const;
1768
1769  /**
1770   * Return the capacity of the internal buffer of the UnicodeString object.
1771   * This is useful together with the getBuffer functions.
1772   * See there for details.
1773   *
1774   * @return the number of UChars available in the internal buffer
1775   * @see getBuffer
1776   * @stable ICU 2.0
1777   */
1778  inline int32_t getCapacity(void) const;
1779
1780  /* Other operations */
1781
1782  /**
1783   * Generate a hash code for this object.
1784   * @return The hash code of this UnicodeString.
1785   * @stable ICU 2.0
1786   */
1787  inline int32_t hashCode(void) const;
1788
1789  /**
1790   * Determine if this object contains a valid string.
1791   * A bogus string has no value. It is different from an empty string,
1792   * although in both cases isEmpty() returns TRUE and length() returns 0.
1793   * setToBogus() and isBogus() can be used to indicate that no string value is available.
1794   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1795   * length() returns 0.
1796   *
1797   * @return TRUE if the string is bogus/invalid, FALSE otherwise
1798   * @see setToBogus()
1799   * @stable ICU 2.0
1800   */
1801  inline UBool isBogus(void) const;
1802
1803
1804  //========================================
1805  // Write operations
1806  //========================================
1807
1808  /* Assignment operations */
1809
1810  /**
1811   * Assignment operator.  Replace the characters in this UnicodeString
1812   * with the characters from <TT>srcText</TT>.
1813   * @param srcText The text containing the characters to replace
1814   * @return a reference to this
1815   * @stable ICU 2.0
1816   */
1817  UnicodeString &operator=(const UnicodeString &srcText);
1818
1819  /**
1820   * Almost the same as the assignment operator.
1821   * Replace the characters in this UnicodeString
1822   * with the characters from <code>srcText</code>.
1823   *
1824   * This function works the same as the assignment operator
1825   * for all strings except for ones that are readonly aliases.
1826   *
1827   * Starting with ICU 2.4, the assignment operator and the copy constructor
1828   * allocate a new buffer and copy the buffer contents even for readonly aliases.
1829   * This function implements the old, more efficient but less safe behavior
1830   * of making this string also a readonly alias to the same buffer.
1831   *
1832   * The fastCopyFrom function must be used only if it is known that the lifetime of
1833   * this UnicodeString does not exceed the lifetime of the aliased buffer
1834   * including its contents, for example for strings from resource bundles
1835   * or aliases to string constants.
1836   *
1837   * @param src The text containing the characters to replace.
1838   * @return a reference to this
1839   * @stable ICU 2.4
1840   */
1841  UnicodeString &fastCopyFrom(const UnicodeString &src);
1842
1843  /**
1844   * Assignment operator.  Replace the characters in this UnicodeString
1845   * with the code unit <TT>ch</TT>.
1846   * @param ch the code unit to replace
1847   * @return a reference to this
1848   * @stable ICU 2.0
1849   */
1850  inline UnicodeString& operator= (UChar ch);
1851
1852  /**
1853   * Assignment operator.  Replace the characters in this UnicodeString
1854   * with the code point <TT>ch</TT>.
1855   * @param ch the code point to replace
1856   * @return a reference to this
1857   * @stable ICU 2.0
1858   */
1859  inline UnicodeString& operator= (UChar32 ch);
1860
1861  /**
1862   * Set the text in the UnicodeString object to the characters
1863   * in <TT>srcText</TT> in the range
1864   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1865   * <TT>srcText</TT> is not modified.
1866   * @param srcText the source for the new characters
1867   * @param srcStart the offset into <TT>srcText</TT> where new characters
1868   * will be obtained
1869   * @return a reference to this
1870   * @stable ICU 2.2
1871   */
1872  inline UnicodeString& setTo(const UnicodeString& srcText,
1873               int32_t srcStart);
1874
1875  /**
1876   * Set the text in the UnicodeString object to the characters
1877   * in <TT>srcText</TT> in the range
1878   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1879   * <TT>srcText</TT> is not modified.
1880   * @param srcText the source for the new characters
1881   * @param srcStart the offset into <TT>srcText</TT> where new characters
1882   * will be obtained
1883   * @param srcLength the number of characters in <TT>srcText</TT> in the
1884   * replace string.
1885   * @return a reference to this
1886   * @stable ICU 2.0
1887   */
1888  inline UnicodeString& setTo(const UnicodeString& srcText,
1889               int32_t srcStart,
1890               int32_t srcLength);
1891
1892  /**
1893   * Set the text in the UnicodeString object to the characters in
1894   * <TT>srcText</TT>.
1895   * <TT>srcText</TT> is not modified.
1896   * @param srcText the source for the new characters
1897   * @return a reference to this
1898   * @stable ICU 2.0
1899   */
1900  inline UnicodeString& setTo(const UnicodeString& srcText);
1901
1902  /**
1903   * Set the characters in the UnicodeString object to the characters
1904   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1905   * @param srcChars the source for the new characters
1906   * @param srcLength the number of Unicode characters in srcChars.
1907   * @return a reference to this
1908   * @stable ICU 2.0
1909   */
1910  inline UnicodeString& setTo(const UChar *srcChars,
1911               int32_t srcLength);
1912
1913  /**
1914   * Set the characters in the UnicodeString object to the code unit
1915   * <TT>srcChar</TT>.
1916   * @param srcChar the code unit which becomes the UnicodeString's character
1917   * content
1918   * @return a reference to this
1919   * @stable ICU 2.0
1920   */
1921  UnicodeString& setTo(UChar srcChar);
1922
1923  /**
1924   * Set the characters in the UnicodeString object to the code point
1925   * <TT>srcChar</TT>.
1926   * @param srcChar the code point which becomes the UnicodeString's character
1927   * content
1928   * @return a reference to this
1929   * @stable ICU 2.0
1930   */
1931  UnicodeString& setTo(UChar32 srcChar);
1932
1933  /**
1934   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1935   * The text will be used for the UnicodeString object, but
1936   * it will not be released when the UnicodeString is destroyed.
1937   * This has copy-on-write semantics:
1938   * When the string is modified, then the buffer is first copied into
1939   * newly allocated memory.
1940   * The aliased buffer is never modified.
1941   *
1942   * In an assignment to another UnicodeString, when using the copy constructor
1943   * or the assignment operator, the text will be copied.
1944   * When using fastCopyFrom(), the text will be aliased again,
1945   * so that both strings then alias the same readonly-text.
1946   *
1947   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1948   *                     This must be true if <code>textLength==-1</code>.
1949   * @param text The characters to alias for the UnicodeString.
1950   * @param textLength The number of Unicode characters in <code>text</code> to alias.
1951   *                   If -1, then this constructor will determine the length
1952   *                   by calling <code>u_strlen()</code>.
1953   * @return a reference to this
1954   * @stable ICU 2.0
1955   */
1956  UnicodeString &setTo(UBool isTerminated,
1957                       const UChar *text,
1958                       int32_t textLength);
1959
1960  /**
1961   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1962   * The text will be used for the UnicodeString object, but
1963   * it will not be released when the UnicodeString is destroyed.
1964   * This has write-through semantics:
1965   * For as long as the capacity of the buffer is sufficient, write operations
1966   * will directly affect the buffer. When more capacity is necessary, then
1967   * a new buffer will be allocated and the contents copied as with regularly
1968   * constructed strings.
1969   * In an assignment to another UnicodeString, the buffer will be copied.
1970   * The extract(UChar *dst) function detects whether the dst pointer is the same
1971   * as the string buffer itself and will in this case not copy the contents.
1972   *
1973   * @param buffer The characters to alias for the UnicodeString.
1974   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1975   * @param buffCapacity The size of <code>buffer</code> in UChars.
1976   * @return a reference to this
1977   * @stable ICU 2.0
1978   */
1979  UnicodeString &setTo(UChar *buffer,
1980                       int32_t buffLength,
1981                       int32_t buffCapacity);
1982
1983  /**
1984   * Make this UnicodeString object invalid.
1985   * The string will test TRUE with isBogus().
1986   *
1987   * A bogus string has no value. It is different from an empty string.
1988   * It can be used to indicate that no string value is available.
1989   * getBuffer() and getTerminatedBuffer() return NULL, and
1990   * length() returns 0.
1991   *
1992   * This utility function is used throughout the UnicodeString
1993   * implementation to indicate that a UnicodeString operation failed,
1994   * and may be used in other functions,
1995   * especially but not exclusively when such functions do not
1996   * take a UErrorCode for simplicity.
1997   *
1998   * The following methods, and no others, will clear a string object's bogus flag:
1999   * - remove()
2000   * - remove(0, INT32_MAX)
2001   * - truncate(0)
2002   * - operator=() (assignment operator)
2003   * - setTo(...)
2004   *
2005   * The simplest ways to turn a bogus string into an empty one
2006   * is to use the remove() function.
2007   * Examples for other functions that are equivalent to "set to empty string":
2008   * \code
2009   * if(s.isBogus()) {
2010   *   s.remove();           // set to an empty string (remove all), or
2011   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2012   *   s.truncate(0);        // set to an empty string (complete truncation), or
2013   *   s=UnicodeString();    // assign an empty string, or
2014   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2015   *   static const UChar nul=0;
2016   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
2017   * }
2018   * \endcode
2019   *
2020   * @see isBogus()
2021   * @stable ICU 2.0
2022   */
2023  void setToBogus();
2024
2025  /**
2026   * Set the character at the specified offset to the specified character.
2027   * @param offset A valid offset into the text of the character to set
2028   * @param ch The new character
2029   * @return A reference to this
2030   * @stable ICU 2.0
2031   */
2032  UnicodeString& setCharAt(int32_t offset,
2033               UChar ch);
2034
2035
2036  /* Append operations */
2037
2038  /**
2039   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2040   * object.
2041   * @param ch the code unit to be appended
2042   * @return a reference to this
2043   * @stable ICU 2.0
2044   */
2045 inline  UnicodeString& operator+= (UChar ch);
2046
2047  /**
2048   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2049   * object.
2050   * @param ch the code point to be appended
2051   * @return a reference to this
2052   * @stable ICU 2.0
2053   */
2054 inline  UnicodeString& operator+= (UChar32 ch);
2055
2056  /**
2057   * Append operator. Append the characters in <TT>srcText</TT> to the
2058   * UnicodeString object. <TT>srcText</TT> is not modified.
2059   * @param srcText the source for the new characters
2060   * @return a reference to this
2061   * @stable ICU 2.0
2062   */
2063  inline UnicodeString& operator+= (const UnicodeString& srcText);
2064
2065  /**
2066   * Append the characters
2067   * in <TT>srcText</TT> in the range
2068   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2069   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2070   * is not modified.
2071   * @param srcText the source for the new characters
2072   * @param srcStart the offset into <TT>srcText</TT> where new characters
2073   * will be obtained
2074   * @param srcLength the number of characters in <TT>srcText</TT> in
2075   * the append string
2076   * @return a reference to this
2077   * @stable ICU 2.0
2078   */
2079  inline UnicodeString& append(const UnicodeString& srcText,
2080            int32_t srcStart,
2081            int32_t srcLength);
2082
2083  /**
2084   * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2085   * <TT>srcText</TT> is not modified.
2086   * @param srcText the source for the new characters
2087   * @return a reference to this
2088   * @stable ICU 2.0
2089   */
2090  inline UnicodeString& append(const UnicodeString& srcText);
2091
2092  /**
2093   * Append the characters in <TT>srcChars</TT> in the range
2094   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2095   * object at offset
2096   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2097   * @param srcChars the source for the new characters
2098   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2099   * will be obtained
2100   * @param srcLength the number of characters in <TT>srcChars</TT> in
2101   *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2102   * @return a reference to this
2103   * @stable ICU 2.0
2104   */
2105  inline UnicodeString& append(const UChar *srcChars,
2106            int32_t srcStart,
2107            int32_t srcLength);
2108
2109  /**
2110   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2111   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2112   * @param srcChars the source for the new characters
2113   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2114   *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
2115   * @return a reference to this
2116   * @stable ICU 2.0
2117   */
2118  inline UnicodeString& append(const UChar *srcChars,
2119            int32_t srcLength);
2120
2121  /**
2122   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2123   * @param srcChar the code unit to append
2124   * @return a reference to this
2125   * @stable ICU 2.0
2126   */
2127  inline UnicodeString& append(UChar srcChar);
2128
2129  /**
2130   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2131   * @param srcChar the code point to append
2132   * @return a reference to this
2133   * @stable ICU 2.0
2134   */
2135  UnicodeString& append(UChar32 srcChar);
2136
2137
2138  /* Insert operations */
2139
2140  /**
2141   * Insert the characters in <TT>srcText</TT> in the range
2142   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2143   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2144   * @param start the offset where the insertion begins
2145   * @param srcText the source for the new characters
2146   * @param srcStart the offset into <TT>srcText</TT> where new characters
2147   * will be obtained
2148   * @param srcLength the number of characters in <TT>srcText</TT> in
2149   * the insert string
2150   * @return a reference to this
2151   * @stable ICU 2.0
2152   */
2153  inline UnicodeString& insert(int32_t start,
2154            const UnicodeString& srcText,
2155            int32_t srcStart,
2156            int32_t srcLength);
2157
2158  /**
2159   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2160   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2161   * @param start the offset where the insertion begins
2162   * @param srcText the source for the new characters
2163   * @return a reference to this
2164   * @stable ICU 2.0
2165   */
2166  inline UnicodeString& insert(int32_t start,
2167            const UnicodeString& srcText);
2168
2169  /**
2170   * Insert the characters in <TT>srcChars</TT> in the range
2171   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2172   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2173   * @param start the offset at which the insertion begins
2174   * @param srcChars the source for the new characters
2175   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2176   * will be obtained
2177   * @param srcLength the number of characters in <TT>srcChars</TT>
2178   * in the insert string
2179   * @return a reference to this
2180   * @stable ICU 2.0
2181   */
2182  inline UnicodeString& insert(int32_t start,
2183            const UChar *srcChars,
2184            int32_t srcStart,
2185            int32_t srcLength);
2186
2187  /**
2188   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2189   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2190   * @param start the offset where the insertion begins
2191   * @param srcChars the source for the new characters
2192   * @param srcLength the number of Unicode characters in srcChars.
2193   * @return a reference to this
2194   * @stable ICU 2.0
2195   */
2196  inline UnicodeString& insert(int32_t start,
2197            const UChar *srcChars,
2198            int32_t srcLength);
2199
2200  /**
2201   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2202   * offset <TT>start</TT>.
2203   * @param start the offset at which the insertion occurs
2204   * @param srcChar the code unit to insert
2205   * @return a reference to this
2206   * @stable ICU 2.0
2207   */
2208  inline UnicodeString& insert(int32_t start,
2209            UChar srcChar);
2210
2211  /**
2212   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2213   * offset <TT>start</TT>.
2214   * @param start the offset at which the insertion occurs
2215   * @param srcChar the code point to insert
2216   * @return a reference to this
2217   * @stable ICU 2.0
2218   */
2219  inline UnicodeString& insert(int32_t start,
2220            UChar32 srcChar);
2221
2222
2223  /* Replace operations */
2224
2225  /**
2226   * Replace the characters in the range
2227   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2228   * <TT>srcText</TT> in the range
2229   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2230   * <TT>srcText</TT> is not modified.
2231   * @param start the offset at which the replace operation begins
2232   * @param length the number of characters to replace. The character at
2233   * <TT>start + length</TT> is not modified.
2234   * @param srcText the source for the new characters
2235   * @param srcStart the offset into <TT>srcText</TT> where new characters
2236   * will be obtained
2237   * @param srcLength the number of characters in <TT>srcText</TT> in
2238   * the replace string
2239   * @return a reference to this
2240   * @stable ICU 2.0
2241   */
2242  UnicodeString& replace(int32_t start,
2243             int32_t length,
2244             const UnicodeString& srcText,
2245             int32_t srcStart,
2246             int32_t srcLength);
2247
2248  /**
2249   * Replace the characters in the range
2250   * [<TT>start</TT>, <TT>start + length</TT>)
2251   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2252   *  not modified.
2253   * @param start the offset at which the replace operation begins
2254   * @param length the number of characters to replace. The character at
2255   * <TT>start + length</TT> is not modified.
2256   * @param srcText the source for the new characters
2257   * @return a reference to this
2258   * @stable ICU 2.0
2259   */
2260  UnicodeString& replace(int32_t start,
2261             int32_t length,
2262             const UnicodeString& srcText);
2263
2264  /**
2265   * Replace the characters in the range
2266   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2267   * <TT>srcChars</TT> in the range
2268   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2269   * is not modified.
2270   * @param start the offset at which the replace operation begins
2271   * @param length the number of characters to replace.  The character at
2272   * <TT>start + length</TT> is not modified.
2273   * @param srcChars the source for the new characters
2274   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2275   * will be obtained
2276   * @param srcLength the number of characters in <TT>srcChars</TT>
2277   * in the replace string
2278   * @return a reference to this
2279   * @stable ICU 2.0
2280   */
2281  UnicodeString& replace(int32_t start,
2282             int32_t length,
2283             const UChar *srcChars,
2284             int32_t srcStart,
2285             int32_t srcLength);
2286
2287  /**
2288   * Replace the characters in the range
2289   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2290   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2291   * @param start the offset at which the replace operation begins
2292   * @param length number of characters to replace.  The character at
2293   * <TT>start + length</TT> is not modified.
2294   * @param srcChars the source for the new characters
2295   * @param srcLength the number of Unicode characters in srcChars
2296   * @return a reference to this
2297   * @stable ICU 2.0
2298   */
2299  inline UnicodeString& replace(int32_t start,
2300             int32_t length,
2301             const UChar *srcChars,
2302             int32_t srcLength);
2303
2304  /**
2305   * Replace the characters in the range
2306   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2307   * <TT>srcChar</TT>.
2308   * @param start the offset at which the replace operation begins
2309   * @param length the number of characters to replace.  The character at
2310   * <TT>start + length</TT> is not modified.
2311   * @param srcChar the new code unit
2312   * @return a reference to this
2313   * @stable ICU 2.0
2314   */
2315  inline UnicodeString& replace(int32_t start,
2316             int32_t length,
2317             UChar srcChar);
2318
2319  /**
2320   * Replace the characters in the range
2321   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2322   * <TT>srcChar</TT>.
2323   * @param start the offset at which the replace operation begins
2324   * @param length the number of characters to replace.  The character at
2325   * <TT>start + length</TT> is not modified.
2326   * @param srcChar the new code point
2327   * @return a reference to this
2328   * @stable ICU 2.0
2329   */
2330  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2331
2332  /**
2333   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2334   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2335   * @param start the offset at which the replace operation begins
2336   * @param limit the offset immediately following the replace range
2337   * @param srcText the source for the new characters
2338   * @return a reference to this
2339   * @stable ICU 2.0
2340   */
2341  inline UnicodeString& replaceBetween(int32_t start,
2342                int32_t limit,
2343                const UnicodeString& srcText);
2344
2345  /**
2346   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2347   * with the characters in <TT>srcText</TT> in the range
2348   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2349   * @param start the offset at which the replace operation begins
2350   * @param limit the offset immediately following the replace range
2351   * @param srcText the source for the new characters
2352   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2353   * will be obtained
2354   * @param srcLimit the offset immediately following the range to copy
2355   * in <TT>srcText</TT>
2356   * @return a reference to this
2357   * @stable ICU 2.0
2358   */
2359  inline UnicodeString& replaceBetween(int32_t start,
2360                int32_t limit,
2361                const UnicodeString& srcText,
2362                int32_t srcStart,
2363                int32_t srcLimit);
2364
2365  /**
2366   * Replace a substring of this object with the given text.
2367   * @param start the beginning index, inclusive; <code>0 <= start
2368   * <= limit</code>.
2369   * @param limit the ending index, exclusive; <code>start <= limit
2370   * <= length()</code>.
2371   * @param text the text to replace characters <code>start</code>
2372   * to <code>limit - 1</code>
2373   * @stable ICU 2.0
2374   */
2375  virtual void handleReplaceBetween(int32_t start,
2376                                    int32_t limit,
2377                                    const UnicodeString& text);
2378
2379  /**
2380   * Replaceable API
2381   * @return TRUE if it has MetaData
2382   * @stable ICU 2.4
2383   */
2384  virtual UBool hasMetaData() const;
2385
2386  /**
2387   * Copy a substring of this object, retaining attribute (out-of-band)
2388   * information.  This method is used to duplicate or reorder substrings.
2389   * The destination index must not overlap the source range.
2390   *
2391   * @param start the beginning index, inclusive; <code>0 <= start <=
2392   * limit</code>.
2393   * @param limit the ending index, exclusive; <code>start <= limit <=
2394   * length()</code>.
2395   * @param dest the destination index.  The characters from
2396   * <code>start..limit-1</code> will be copied to <code>dest</code>.
2397   * Implementations of this method may assume that <code>dest <= start ||
2398   * dest >= limit</code>.
2399   * @stable ICU 2.0
2400   */
2401  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2402
2403  /* Search and replace operations */
2404
2405  /**
2406   * Replace all occurrences of characters in oldText with the characters
2407   * in newText
2408   * @param oldText the text containing the search text
2409   * @param newText the text containing the replacement text
2410   * @return a reference to this
2411   * @stable ICU 2.0
2412   */
2413  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2414                const UnicodeString& newText);
2415
2416  /**
2417   * Replace all occurrences of characters in oldText with characters
2418   * in newText
2419   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2420   * @param start the start of the range in which replace will performed
2421   * @param length the length of the range in which replace will be performed
2422   * @param oldText the text containing the search text
2423   * @param newText the text containing the replacement text
2424   * @return a reference to this
2425   * @stable ICU 2.0
2426   */
2427  inline UnicodeString& findAndReplace(int32_t start,
2428                int32_t length,
2429                const UnicodeString& oldText,
2430                const UnicodeString& newText);
2431
2432  /**
2433   * Replace all occurrences of characters in oldText in the range
2434   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2435   * in newText in the range
2436   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2437   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2438   * @param start the start of the range in which replace will performed
2439   * @param length the length of the range in which replace will be performed
2440   * @param oldText the text containing the search text
2441   * @param oldStart the start of the search range in <TT>oldText</TT>
2442   * @param oldLength the length of the search range in <TT>oldText</TT>
2443   * @param newText the text containing the replacement text
2444   * @param newStart the start of the replacement range in <TT>newText</TT>
2445   * @param newLength the length of the replacement range in <TT>newText</TT>
2446   * @return a reference to this
2447   * @stable ICU 2.0
2448   */
2449  UnicodeString& findAndReplace(int32_t start,
2450                int32_t length,
2451                const UnicodeString& oldText,
2452                int32_t oldStart,
2453                int32_t oldLength,
2454                const UnicodeString& newText,
2455                int32_t newStart,
2456                int32_t newLength);
2457
2458
2459  /* Remove operations */
2460
2461  /**
2462   * Remove all characters from the UnicodeString object.
2463   * @return a reference to this
2464   * @stable ICU 2.0
2465   */
2466  inline UnicodeString& remove(void);
2467
2468  /**
2469   * Remove the characters in the range
2470   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2471   * @param start the offset of the first character to remove
2472   * @param length the number of characters to remove
2473   * @return a reference to this
2474   * @stable ICU 2.0
2475   */
2476  inline UnicodeString& remove(int32_t start,
2477                               int32_t length = (int32_t)INT32_MAX);
2478
2479  /**
2480   * Remove the characters in the range
2481   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2482   * @param start the offset of the first character to remove
2483   * @param limit the offset immediately following the range to remove
2484   * @return a reference to this
2485   * @stable ICU 2.0
2486   */
2487  inline UnicodeString& removeBetween(int32_t start,
2488                                      int32_t limit = (int32_t)INT32_MAX);
2489
2490  /**
2491   * Retain only the characters in the range
2492   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2493   * Removes characters before <code>start</code> and at and after <code>limit</code>.
2494   * @param start the offset of the first character to retain
2495   * @param limit the offset immediately following the range to retain
2496   * @return a reference to this
2497   * @stable ICU 4.4
2498   */
2499  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2500
2501  /* Length operations */
2502
2503  /**
2504   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2505   * If the length of this UnicodeString is less than targetLength,
2506   * length() - targetLength copies of padChar will be added to the
2507   * beginning of this UnicodeString.
2508   * @param targetLength the desired length of the string
2509   * @param padChar the character to use for padding. Defaults to
2510   * space (U+0020)
2511   * @return TRUE if the text was padded, FALSE otherwise.
2512   * @stable ICU 2.0
2513   */
2514  UBool padLeading(int32_t targetLength,
2515                    UChar padChar = 0x0020);
2516
2517  /**
2518   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2519   * If the length of this UnicodeString is less than targetLength,
2520   * length() - targetLength copies of padChar will be added to the
2521   * end of this UnicodeString.
2522   * @param targetLength the desired length of the string
2523   * @param padChar the character to use for padding. Defaults to
2524   * space (U+0020)
2525   * @return TRUE if the text was padded, FALSE otherwise.
2526   * @stable ICU 2.0
2527   */
2528  UBool padTrailing(int32_t targetLength,
2529                     UChar padChar = 0x0020);
2530
2531  /**
2532   * Truncate this UnicodeString to the <TT>targetLength</TT>.
2533   * @param targetLength the desired length of this UnicodeString.
2534   * @return TRUE if the text was truncated, FALSE otherwise
2535   * @stable ICU 2.0
2536   */
2537  inline UBool truncate(int32_t targetLength);
2538
2539  /**
2540   * Trims leading and trailing whitespace from this UnicodeString.
2541   * @return a reference to this
2542   * @stable ICU 2.0
2543   */
2544  UnicodeString& trim(void);
2545
2546
2547  /* Miscellaneous operations */
2548
2549  /**
2550   * Reverse this UnicodeString in place.
2551   * @return a reference to this
2552   * @stable ICU 2.0
2553   */
2554  inline UnicodeString& reverse(void);
2555
2556  /**
2557   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2558   * this UnicodeString.
2559   * @param start the start of the range to reverse
2560   * @param length the number of characters to to reverse
2561   * @return a reference to this
2562   * @stable ICU 2.0
2563   */
2564  inline UnicodeString& reverse(int32_t start,
2565             int32_t length);
2566
2567  /**
2568   * Convert the characters in this to UPPER CASE following the conventions of
2569   * the default locale.
2570   * @return A reference to this.
2571   * @stable ICU 2.0
2572   */
2573  UnicodeString& toUpper(void);
2574
2575  /**
2576   * Convert the characters in this to UPPER CASE following the conventions of
2577   * a specific locale.
2578   * @param locale The locale containing the conventions to use.
2579   * @return A reference to this.
2580   * @stable ICU 2.0
2581   */
2582  UnicodeString& toUpper(const Locale& locale);
2583
2584  /**
2585   * Convert the characters in this to lower case following the conventions of
2586   * the default locale.
2587   * @return A reference to this.
2588   * @stable ICU 2.0
2589   */
2590  UnicodeString& toLower(void);
2591
2592  /**
2593   * Convert the characters in this to lower case following the conventions of
2594   * a specific locale.
2595   * @param locale The locale containing the conventions to use.
2596   * @return A reference to this.
2597   * @stable ICU 2.0
2598   */
2599  UnicodeString& toLower(const Locale& locale);
2600
2601#if !UCONFIG_NO_BREAK_ITERATION
2602
2603  /**
2604   * Titlecase this string, convenience function using the default locale.
2605   *
2606   * Casing is locale-dependent and context-sensitive.
2607   * Titlecasing uses a break iterator to find the first characters of words
2608   * that are to be titlecased. It titlecases those characters and lowercases
2609   * all others.
2610   *
2611   * The titlecase break iterator can be provided to customize for arbitrary
2612   * styles, using rules and dictionaries beyond the standard iterators.
2613   * It may be more efficient to always provide an iterator to avoid
2614   * opening and closing one for each string.
2615   * The standard titlecase iterator for the root locale implements the
2616   * algorithm of Unicode TR 21.
2617   *
2618   * This function uses only the setText(), first() and next() methods of the
2619   * provided break iterator.
2620   *
2621   * @param titleIter A break iterator to find the first characters of words
2622   *                  that are to be titlecased.
2623   *                  If none is provided (0), then a standard titlecase
2624   *                  break iterator is opened.
2625   *                  Otherwise the provided iterator is set to the string's text.
2626   * @return A reference to this.
2627   * @stable ICU 2.1
2628   */
2629  UnicodeString &toTitle(BreakIterator *titleIter);
2630
2631  /**
2632   * Titlecase this string.
2633   *
2634   * Casing is locale-dependent and context-sensitive.
2635   * Titlecasing uses a break iterator to find the first characters of words
2636   * that are to be titlecased. It titlecases those characters and lowercases
2637   * all others.
2638   *
2639   * The titlecase break iterator can be provided to customize for arbitrary
2640   * styles, using rules and dictionaries beyond the standard iterators.
2641   * It may be more efficient to always provide an iterator to avoid
2642   * opening and closing one for each string.
2643   * The standard titlecase iterator for the root locale implements the
2644   * algorithm of Unicode TR 21.
2645   *
2646   * This function uses only the setText(), first() and next() methods of the
2647   * provided break iterator.
2648   *
2649   * @param titleIter A break iterator to find the first characters of words
2650   *                  that are to be titlecased.
2651   *                  If none is provided (0), then a standard titlecase
2652   *                  break iterator is opened.
2653   *                  Otherwise the provided iterator is set to the string's text.
2654   * @param locale    The locale to consider.
2655   * @return A reference to this.
2656   * @stable ICU 2.1
2657   */
2658  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2659
2660  /**
2661   * Titlecase this string, with options.
2662   *
2663   * Casing is locale-dependent and context-sensitive.
2664   * Titlecasing uses a break iterator to find the first characters of words
2665   * that are to be titlecased. It titlecases those characters and lowercases
2666   * all others. (This can be modified with options.)
2667   *
2668   * The titlecase break iterator can be provided to customize for arbitrary
2669   * styles, using rules and dictionaries beyond the standard iterators.
2670   * It may be more efficient to always provide an iterator to avoid
2671   * opening and closing one for each string.
2672   * The standard titlecase iterator for the root locale implements the
2673   * algorithm of Unicode TR 21.
2674   *
2675   * This function uses only the setText(), first() and next() methods of the
2676   * provided break iterator.
2677   *
2678   * @param titleIter A break iterator to find the first characters of words
2679   *                  that are to be titlecased.
2680   *                  If none is provided (0), then a standard titlecase
2681   *                  break iterator is opened.
2682   *                  Otherwise the provided iterator is set to the string's text.
2683   * @param locale    The locale to consider.
2684   * @param options Options bit set, see ucasemap_open().
2685   * @return A reference to this.
2686   * @see U_TITLECASE_NO_LOWERCASE
2687   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2688   * @see ucasemap_open
2689   * @stable ICU 3.8
2690   */
2691  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2692
2693#endif
2694
2695  /**
2696   * Case-folds the characters in this string.
2697   *
2698   * Case-folding is locale-independent and not context-sensitive,
2699   * but there is an option for whether to include or exclude mappings for dotted I
2700   * and dotless i that are marked with 'T' in CaseFolding.txt.
2701   *
2702   * The result may be longer or shorter than the original.
2703   *
2704   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2705   * @return A reference to this.
2706   * @stable ICU 2.0
2707   */
2708  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2709
2710  //========================================
2711  // Access to the internal buffer
2712  //========================================
2713
2714  /**
2715   * Get a read/write pointer to the internal buffer.
2716   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2717   * writable, and is still owned by the UnicodeString object.
2718   * Calls to getBuffer(minCapacity) must not be nested, and
2719   * must be matched with calls to releaseBuffer(newLength).
2720   * If the string buffer was read-only or shared,
2721   * then it will be reallocated and copied.
2722   *
2723   * An attempted nested call will return 0, and will not further modify the
2724   * state of the UnicodeString object.
2725   * It also returns 0 if the string is bogus.
2726   *
2727   * The actual capacity of the string buffer may be larger than minCapacity.
2728   * getCapacity() returns the actual capacity.
2729   * For many operations, the full capacity should be used to avoid reallocations.
2730   *
2731   * While the buffer is "open" between getBuffer(minCapacity)
2732   * and releaseBuffer(newLength), the following applies:
2733   * - The string length is set to 0.
2734   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2735   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2736   * - You can read from and write to the returned buffer.
2737   * - The previous string contents will still be in the buffer;
2738   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2739   *   If the length() was greater than minCapacity, then any contents after minCapacity
2740   *   may be lost.
2741   *   The buffer contents is not NUL-terminated by getBuffer().
2742   *   If length()<getCapacity() then you can terminate it by writing a NUL
2743   *   at index length().
2744   * - You must call releaseBuffer(newLength) before and in order to
2745   *   return to normal UnicodeString operation.
2746   *
2747   * @param minCapacity the minimum number of UChars that are to be available
2748   *        in the buffer, starting at the returned pointer;
2749   *        default to the current string capacity if minCapacity==-1
2750   * @return a writable pointer to the internal string buffer,
2751   *         or 0 if an error occurs (nested calls, out of memory)
2752   *
2753   * @see releaseBuffer
2754   * @see getTerminatedBuffer()
2755   * @stable ICU 2.0
2756   */
2757  UChar *getBuffer(int32_t minCapacity);
2758
2759  /**
2760   * Release a read/write buffer on a UnicodeString object with an
2761   * "open" getBuffer(minCapacity).
2762   * This function must be called in a matched pair with getBuffer(minCapacity).
2763   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2764   *
2765   * It will set the string length to newLength, at most to the current capacity.
2766   * If newLength==-1 then it will set the length according to the
2767   * first NUL in the buffer, or to the capacity if there is no NUL.
2768   *
2769   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2770   *
2771   * @param newLength the new length of the UnicodeString object;
2772   *        defaults to the current capacity if newLength is greater than that;
2773   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2774   *        the current capacity of the string
2775   *
2776   * @see getBuffer(int32_t minCapacity)
2777   * @stable ICU 2.0
2778   */
2779  void releaseBuffer(int32_t newLength=-1);
2780
2781  /**
2782   * Get a read-only pointer to the internal buffer.
2783   * This can be called at any time on a valid UnicodeString.
2784   *
2785   * It returns 0 if the string is bogus, or
2786   * during an "open" getBuffer(minCapacity).
2787   *
2788   * It can be called as many times as desired.
2789   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2790   * at which time the pointer is semantically invalidated and must not be used any more.
2791   *
2792   * The capacity of the buffer can be determined with getCapacity().
2793   * The part after length() may or may not be initialized and valid,
2794   * depending on the history of the UnicodeString object.
2795   *
2796   * The buffer contents is (probably) not NUL-terminated.
2797   * You can check if it is with
2798   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2799   * (See getTerminatedBuffer().)
2800   *
2801   * The buffer may reside in read-only memory. Its contents must not
2802   * be modified.
2803   *
2804   * @return a read-only pointer to the internal string buffer,
2805   *         or 0 if the string is empty or bogus
2806   *
2807   * @see getBuffer(int32_t minCapacity)
2808   * @see getTerminatedBuffer()
2809   * @stable ICU 2.0
2810   */
2811  inline const UChar *getBuffer() const;
2812
2813  /**
2814   * Get a read-only pointer to the internal buffer,
2815   * making sure that it is NUL-terminated.
2816   * This can be called at any time on a valid UnicodeString.
2817   *
2818   * It returns 0 if the string is bogus, or
2819   * during an "open" getBuffer(minCapacity), or if the buffer cannot
2820   * be NUL-terminated (because memory allocation failed).
2821   *
2822   * It can be called as many times as desired.
2823   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2824   * at which time the pointer is semantically invalidated and must not be used any more.
2825   *
2826   * The capacity of the buffer can be determined with getCapacity().
2827   * The part after length()+1 may or may not be initialized and valid,
2828   * depending on the history of the UnicodeString object.
2829   *
2830   * The buffer contents is guaranteed to be NUL-terminated.
2831   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2832   * is written.
2833   * For this reason, this function is not const, unlike getBuffer().
2834   * Note that a UnicodeString may also contain NUL characters as part of its contents.
2835   *
2836   * The buffer may reside in read-only memory. Its contents must not
2837   * be modified.
2838   *
2839   * @return a read-only pointer to the internal string buffer,
2840   *         or 0 if the string is empty or bogus
2841   *
2842   * @see getBuffer(int32_t minCapacity)
2843   * @see getBuffer()
2844   * @stable ICU 2.2
2845   */
2846  const UChar *getTerminatedBuffer();
2847
2848  //========================================
2849  // Constructors
2850  //========================================
2851
2852  /** Construct an empty UnicodeString.
2853   * @stable ICU 2.0
2854   */
2855  inline UnicodeString();
2856
2857  /**
2858   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2859   * @param capacity the number of UChars this UnicodeString should hold
2860   * before a resize is necessary; if count is greater than 0 and count
2861   * code points c take up more space than capacity, then capacity is adjusted
2862   * accordingly.
2863   * @param c is used to initially fill the string
2864   * @param count specifies how many code points c are to be written in the
2865   *              string
2866   * @stable ICU 2.0
2867   */
2868  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2869
2870  /**
2871   * Single UChar (code unit) constructor.
2872   *
2873   * It is recommended to mark this constructor "explicit" by
2874   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2875   * on the compiler command line or similar.
2876   * @param ch the character to place in the UnicodeString
2877   * @stable ICU 2.0
2878   */
2879  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
2880
2881  /**
2882   * Single UChar32 (code point) constructor.
2883   *
2884   * It is recommended to mark this constructor "explicit" by
2885   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2886   * on the compiler command line or similar.
2887   * @param ch the character to place in the UnicodeString
2888   * @stable ICU 2.0
2889   */
2890  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2891
2892  /**
2893   * UChar* constructor.
2894   *
2895   * It is recommended to mark this constructor "explicit" by
2896   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2897   * on the compiler command line or similar.
2898   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
2899   * must be NULL (U+0000) terminated.
2900   * @stable ICU 2.0
2901   */
2902  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
2903
2904  /**
2905   * UChar* constructor.
2906   * @param text The characters to place in the UnicodeString.
2907   * @param textLength The number of Unicode characters in <TT>text</TT>
2908   * to copy.
2909   * @stable ICU 2.0
2910   */
2911  UnicodeString(const UChar *text,
2912        int32_t textLength);
2913
2914  /**
2915   * Readonly-aliasing UChar* constructor.
2916   * The text will be used for the UnicodeString object, but
2917   * it will not be released when the UnicodeString is destroyed.
2918   * This has copy-on-write semantics:
2919   * When the string is modified, then the buffer is first copied into
2920   * newly allocated memory.
2921   * The aliased buffer is never modified.
2922   *
2923   * In an assignment to another UnicodeString, when using the copy constructor
2924   * or the assignment operator, the text will be copied.
2925   * When using fastCopyFrom(), the text will be aliased again,
2926   * so that both strings then alias the same readonly-text.
2927   *
2928   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2929   *                     This must be true if <code>textLength==-1</code>.
2930   * @param text The characters to alias for the UnicodeString.
2931   * @param textLength The number of Unicode characters in <code>text</code> to alias.
2932   *                   If -1, then this constructor will determine the length
2933   *                   by calling <code>u_strlen()</code>.
2934   * @stable ICU 2.0
2935   */
2936  UnicodeString(UBool isTerminated,
2937                const UChar *text,
2938                int32_t textLength);
2939
2940  /**
2941   * Writable-aliasing UChar* constructor.
2942   * The text will be used for the UnicodeString object, but
2943   * it will not be released when the UnicodeString is destroyed.
2944   * This has write-through semantics:
2945   * For as long as the capacity of the buffer is sufficient, write operations
2946   * will directly affect the buffer. When more capacity is necessary, then
2947   * a new buffer will be allocated and the contents copied as with regularly
2948   * constructed strings.
2949   * In an assignment to another UnicodeString, the buffer will be copied.
2950   * The extract(UChar *dst) function detects whether the dst pointer is the same
2951   * as the string buffer itself and will in this case not copy the contents.
2952   *
2953   * @param buffer The characters to alias for the UnicodeString.
2954   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2955   * @param buffCapacity The size of <code>buffer</code> in UChars.
2956   * @stable ICU 2.0
2957   */
2958  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2959
2960#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2961
2962  /**
2963   * char* constructor.
2964   * Uses the default converter (and thus depends on the ICU conversion code)
2965   * unless U_CHARSET_IS_UTF8 is set to 1.
2966   *
2967   * For ASCII (really "invariant character") strings it is more efficient to use
2968   * the constructor that takes a US_INV (for its enum EInvariant).
2969   * For ASCII (invariant-character) string literals, see UNICODE_STRING and
2970   * UNICODE_STRING_SIMPLE.
2971   *
2972   * It is recommended to mark this constructor "explicit" by
2973   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2974   * on the compiler command line or similar.
2975   * @param codepageData an array of bytes, null-terminated,
2976   *                     in the platform's default codepage.
2977   * @stable ICU 2.0
2978   * @see UNICODE_STRING
2979   * @see UNICODE_STRING_SIMPLE
2980   */
2981  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
2982
2983  /**
2984   * char* constructor.
2985   * Uses the default converter (and thus depends on the ICU conversion code)
2986   * unless U_CHARSET_IS_UTF8 is set to 1.
2987   * @param codepageData an array of bytes in the platform's default codepage.
2988   * @param dataLength The number of bytes in <TT>codepageData</TT>.
2989   * @stable ICU 2.0
2990   */
2991  UnicodeString(const char *codepageData, int32_t dataLength);
2992
2993#endif
2994
2995#if !UCONFIG_NO_CONVERSION
2996
2997  /**
2998   * char* constructor.
2999   * @param codepageData an array of bytes, null-terminated
3000   * @param codepage the encoding of <TT>codepageData</TT>.  The special
3001   * value 0 for <TT>codepage</TT> indicates that the text is in the
3002   * platform's default codepage.
3003   *
3004   * If <code>codepage</code> is an empty string (<code>""</code>),
3005   * then a simple conversion is performed on the codepage-invariant
3006   * subset ("invariant characters") of the platform encoding. See utypes.h.
3007   * Recommendation: For invariant-character strings use the constructor
3008   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3009   * because it avoids object code dependencies of UnicodeString on
3010   * the conversion code.
3011   *
3012   * @stable ICU 2.0
3013   */
3014  UnicodeString(const char *codepageData, const char *codepage);
3015
3016  /**
3017   * char* constructor.
3018   * @param codepageData an array of bytes.
3019   * @param dataLength The number of bytes in <TT>codepageData</TT>.
3020   * @param codepage the encoding of <TT>codepageData</TT>.  The special
3021   * value 0 for <TT>codepage</TT> indicates that the text is in the
3022   * platform's default codepage.
3023   * If <code>codepage</code> is an empty string (<code>""</code>),
3024   * then a simple conversion is performed on the codepage-invariant
3025   * subset ("invariant characters") of the platform encoding. See utypes.h.
3026   * Recommendation: For invariant-character strings use the constructor
3027   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3028   * because it avoids object code dependencies of UnicodeString on
3029   * the conversion code.
3030   *
3031   * @stable ICU 2.0
3032   */
3033  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3034
3035  /**
3036   * char * / UConverter constructor.
3037   * This constructor uses an existing UConverter object to
3038   * convert the codepage string to Unicode and construct a UnicodeString
3039   * from that.
3040   *
3041   * The converter is reset at first.
3042   * If the error code indicates a failure before this constructor is called,
3043   * or if an error occurs during conversion or construction,
3044   * then the string will be bogus.
3045   *
3046   * This function avoids the overhead of opening and closing a converter if
3047   * multiple strings are constructed.
3048   *
3049   * @param src input codepage string
3050   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3051   * @param cnv converter object (ucnv_resetToUnicode() will be called),
3052   *        can be NULL for the default converter
3053   * @param errorCode normal ICU error code
3054   * @stable ICU 2.0
3055   */
3056  UnicodeString(
3057        const char *src, int32_t srcLength,
3058        UConverter *cnv,
3059        UErrorCode &errorCode);
3060
3061#endif
3062
3063  /**
3064   * Constructs a Unicode string from an invariant-character char * string.
3065   * About invariant characters see utypes.h.
3066   * This constructor has no runtime dependency on conversion code and is
3067   * therefore recommended over ones taking a charset name string
3068   * (where the empty string "" indicates invariant-character conversion).
3069   *
3070   * Use the macro US_INV as the third, signature-distinguishing parameter.
3071   *
3072   * For example:
3073   * \code
3074   * void fn(const char *s) {
3075   *   UnicodeString ustr(s, -1, US_INV);
3076   *   // use ustr ...
3077   * }
3078   * \endcode
3079   *
3080   * @param src String using only invariant characters.
3081   * @param length Length of src, or -1 if NUL-terminated.
3082   * @param inv Signature-distinguishing paramater, use US_INV.
3083   *
3084   * @see US_INV
3085   * @stable ICU 3.2
3086   */
3087  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3088
3089
3090  /**
3091   * Copy constructor.
3092   * @param that The UnicodeString object to copy.
3093   * @stable ICU 2.0
3094   */
3095  UnicodeString(const UnicodeString& that);
3096
3097  /**
3098   * 'Substring' constructor from tail of source string.
3099   * @param src The UnicodeString object to copy.
3100   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3101   * @stable ICU 2.2
3102   */
3103  UnicodeString(const UnicodeString& src, int32_t srcStart);
3104
3105  /**
3106   * 'Substring' constructor from subrange of source string.
3107   * @param src The UnicodeString object to copy.
3108   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3109   * @param srcLength The number of characters from <tt>src</tt> to copy.
3110   * @stable ICU 2.2
3111   */
3112  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3113
3114  /**
3115   * Clone this object, an instance of a subclass of Replaceable.
3116   * Clones can be used concurrently in multiple threads.
3117   * If a subclass does not implement clone(), or if an error occurs,
3118   * then NULL is returned.
3119   * The clone functions in all subclasses return a pointer to a Replaceable
3120   * because some compilers do not support covariant (same-as-this)
3121   * return types; cast to the appropriate subclass if necessary.
3122   * The caller must delete the clone.
3123   *
3124   * @return a clone of this object
3125   *
3126   * @see Replaceable::clone
3127   * @see getDynamicClassID
3128   * @stable ICU 2.6
3129   */
3130  virtual Replaceable *clone() const;
3131
3132  /** Destructor.
3133   * @stable ICU 2.0
3134   */
3135  virtual ~UnicodeString();
3136
3137  /**
3138   * Create a UnicodeString from a UTF-8 string.
3139   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3140   * Calls u_strFromUTF8WithSub().
3141   *
3142   * @param utf8 UTF-8 input string.
3143   *             Note that a StringPiece can be implicitly constructed
3144   *             from a std::string or a NUL-terminated const char * string.
3145   * @return A UnicodeString with equivalent UTF-16 contents.
3146   * @see toUTF8
3147   * @see toUTF8String
3148   * @stable ICU 4.2
3149   */
3150  static UnicodeString fromUTF8(const StringPiece &utf8);
3151
3152  /**
3153   * Create a UnicodeString from a UTF-32 string.
3154   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3155   * Calls u_strFromUTF32WithSub().
3156   *
3157   * @param utf32 UTF-32 input string. Must not be NULL.
3158   * @param length Length of the input string, or -1 if NUL-terminated.
3159   * @return A UnicodeString with equivalent UTF-16 contents.
3160   * @see toUTF32
3161   * @stable ICU 4.2
3162   */
3163  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3164
3165  /* Miscellaneous operations */
3166
3167  /**
3168   * Unescape a string of characters and return a string containing
3169   * the result.  The following escape sequences are recognized:
3170   *
3171   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3172   * \\Uhhhhhhhh   8 hex digits
3173   * \\xhh         1-2 hex digits
3174   * \\ooo         1-3 octal digits; o in [0-7]
3175   * \\cX          control-X; X is masked with 0x1F
3176   *
3177   * as well as the standard ANSI C escapes:
3178   *
3179   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3180   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3181   * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3182   *
3183   * Anything else following a backslash is generically escaped.  For
3184   * example, "[a\\-z]" returns "[a-z]".
3185   *
3186   * If an escape sequence is ill-formed, this method returns an empty
3187   * string.  An example of an ill-formed sequence is "\\u" followed by
3188   * fewer than 4 hex digits.
3189   *
3190   * This function is similar to u_unescape() but not identical to it.
3191   * The latter takes a source char*, so it does escape recognition
3192   * and also invariant conversion.
3193   *
3194   * @return a string with backslash escapes interpreted, or an
3195   * empty string on error.
3196   * @see UnicodeString#unescapeAt()
3197   * @see u_unescape()
3198   * @see u_unescapeAt()
3199   * @stable ICU 2.0
3200   */
3201  UnicodeString unescape() const;
3202
3203  /**
3204   * Unescape a single escape sequence and return the represented
3205   * character.  See unescape() for a listing of the recognized escape
3206   * sequences.  The character at offset-1 is assumed (without
3207   * checking) to be a backslash.  If the escape sequence is
3208   * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3209   * returned.
3210   *
3211   * @param offset an input output parameter.  On input, it is the
3212   * offset into this string where the escape sequence is located,
3213   * after the initial backslash.  On output, it is advanced after the
3214   * last character parsed.  On error, it is not advanced at all.
3215   * @return the character represented by the escape sequence at
3216   * offset, or U_SENTINEL=-1 on error.
3217   * @see UnicodeString#unescape()
3218   * @see u_unescape()
3219   * @see u_unescapeAt()
3220   * @stable ICU 2.0
3221   */
3222  UChar32 unescapeAt(int32_t &offset) const;
3223
3224  /**
3225   * ICU "poor man's RTTI", returns a UClassID for this class.
3226   *
3227   * @stable ICU 2.2
3228   */
3229  static UClassID U_EXPORT2 getStaticClassID();
3230
3231  /**
3232   * ICU "poor man's RTTI", returns a UClassID for the actual class.
3233   *
3234   * @stable ICU 2.2
3235   */
3236  virtual UClassID getDynamicClassID() const;
3237
3238  //========================================
3239  // Implementation methods
3240  //========================================
3241
3242protected:
3243  /**
3244   * Implement Replaceable::getLength() (see jitterbug 1027).
3245   * @stable ICU 2.4
3246   */
3247  virtual int32_t getLength() const;
3248
3249  /**
3250   * The change in Replaceable to use virtual getCharAt() allows
3251   * UnicodeString::charAt() to be inline again (see jitterbug 709).
3252   * @stable ICU 2.4
3253   */
3254  virtual UChar getCharAt(int32_t offset) const;
3255
3256  /**
3257   * The change in Replaceable to use virtual getChar32At() allows
3258   * UnicodeString::char32At() to be inline again (see jitterbug 709).
3259   * @stable ICU 2.4
3260   */
3261  virtual UChar32 getChar32At(int32_t offset) const;
3262
3263private:
3264  // For char* constructors. Could be made public.
3265  UnicodeString &setToUTF8(const StringPiece &utf8);
3266  // For extract(char*).
3267  // We could make a toUTF8(target, capacity, errorCode) public but not
3268  // this version: New API will be cleaner if we make callers create substrings
3269  // rather than having start+length on every method,
3270  // and it should take a UErrorCode&.
3271  int32_t
3272  toUTF8(int32_t start, int32_t len,
3273         char *target, int32_t capacity) const;
3274
3275  /**
3276   * Internal string contents comparison, called by operator==.
3277   * Requires: this & text not bogus and have same lengths.
3278   */
3279  UBool doEquals(const UnicodeString &text, int32_t len) const;
3280
3281  inline int8_t
3282  doCompare(int32_t start,
3283           int32_t length,
3284           const UnicodeString& srcText,
3285           int32_t srcStart,
3286           int32_t srcLength) const;
3287
3288  int8_t doCompare(int32_t start,
3289           int32_t length,
3290           const UChar *srcChars,
3291           int32_t srcStart,
3292           int32_t srcLength) const;
3293
3294  inline int8_t
3295  doCompareCodePointOrder(int32_t start,
3296                          int32_t length,
3297                          const UnicodeString& srcText,
3298                          int32_t srcStart,
3299                          int32_t srcLength) const;
3300
3301  int8_t doCompareCodePointOrder(int32_t start,
3302                                 int32_t length,
3303                                 const UChar *srcChars,
3304                                 int32_t srcStart,
3305                                 int32_t srcLength) const;
3306
3307  inline int8_t
3308  doCaseCompare(int32_t start,
3309                int32_t length,
3310                const UnicodeString &srcText,
3311                int32_t srcStart,
3312                int32_t srcLength,
3313                uint32_t options) const;
3314
3315  int8_t
3316  doCaseCompare(int32_t start,
3317                int32_t length,
3318                const UChar *srcChars,
3319                int32_t srcStart,
3320                int32_t srcLength,
3321                uint32_t options) const;
3322
3323  int32_t doIndexOf(UChar c,
3324            int32_t start,
3325            int32_t length) const;
3326
3327  int32_t doIndexOf(UChar32 c,
3328                        int32_t start,
3329                        int32_t length) const;
3330
3331  int32_t doLastIndexOf(UChar c,
3332                int32_t start,
3333                int32_t length) const;
3334
3335  int32_t doLastIndexOf(UChar32 c,
3336                            int32_t start,
3337                            int32_t length) const;
3338
3339  void doExtract(int32_t start,
3340         int32_t length,
3341         UChar *dst,
3342         int32_t dstStart) const;
3343
3344  inline void doExtract(int32_t start,
3345         int32_t length,
3346         UnicodeString& target) const;
3347
3348  inline UChar doCharAt(int32_t offset)  const;
3349
3350  UnicodeString& doReplace(int32_t start,
3351               int32_t length,
3352               const UnicodeString& srcText,
3353               int32_t srcStart,
3354               int32_t srcLength);
3355
3356  UnicodeString& doReplace(int32_t start,
3357               int32_t length,
3358               const UChar *srcChars,
3359               int32_t srcStart,
3360               int32_t srcLength);
3361
3362  UnicodeString& doReverse(int32_t start,
3363               int32_t length);
3364
3365  // calculate hash code
3366  int32_t doHashCode(void) const;
3367
3368  // get pointer to start of array
3369  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3370  inline UChar* getArrayStart(void);
3371  inline const UChar* getArrayStart(void) const;
3372
3373  inline UBool hasShortLength() const;
3374  inline int32_t getShortLength() const;
3375
3376  // A UnicodeString object (not necessarily its current buffer)
3377  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3378  inline UBool isWritable() const;
3379
3380  // Is the current buffer writable?
3381  inline UBool isBufferWritable() const;
3382
3383  // None of the following does releaseArray().
3384  inline void setZeroLength();
3385  inline void setShortLength(int32_t len);
3386  inline void setLength(int32_t len);
3387  inline void setToEmpty();
3388  inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags
3389
3390  // allocate the array; result may be the stack buffer
3391  // sets refCount to 1 if appropriate
3392  // sets fArray, fCapacity, and flags
3393  // sets length to 0
3394  // returns boolean for success or failure
3395  UBool allocate(int32_t capacity);
3396
3397  // release the array if owned
3398  void releaseArray(void);
3399
3400  // turn a bogus string into an empty one
3401  void unBogus();
3402
3403  // implements assigment operator, copy constructor, and fastCopyFrom()
3404  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3405
3406  // Pin start and limit to acceptable values.
3407  inline void pinIndex(int32_t& start) const;
3408  inline void pinIndices(int32_t& start,
3409                         int32_t& length) const;
3410
3411#if !UCONFIG_NO_CONVERSION
3412
3413  /* Internal extract() using UConverter. */
3414  int32_t doExtract(int32_t start, int32_t length,
3415                    char *dest, int32_t destCapacity,
3416                    UConverter *cnv,
3417                    UErrorCode &errorCode) const;
3418
3419  /*
3420   * Real constructor for converting from codepage data.
3421   * It assumes that it is called with !fRefCounted.
3422   *
3423   * If <code>codepage==0</code>, then the default converter
3424   * is used for the platform encoding.
3425   * If <code>codepage</code> is an empty string (<code>""</code>),
3426   * then a simple conversion is performed on the codepage-invariant
3427   * subset ("invariant characters") of the platform encoding. See utypes.h.
3428   */
3429  void doCodepageCreate(const char *codepageData,
3430                        int32_t dataLength,
3431                        const char *codepage);
3432
3433  /*
3434   * Worker function for creating a UnicodeString from
3435   * a codepage string using a UConverter.
3436   */
3437  void
3438  doCodepageCreate(const char *codepageData,
3439                   int32_t dataLength,
3440                   UConverter *converter,
3441                   UErrorCode &status);
3442
3443#endif
3444
3445  /*
3446   * This function is called when write access to the array
3447   * is necessary.
3448   *
3449   * We need to make a copy of the array if
3450   * the buffer is read-only, or
3451   * the buffer is refCounted (shared), and refCount>1, or
3452   * the buffer is too small.
3453   *
3454   * Return FALSE if memory could not be allocated.
3455   */
3456  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3457                            int32_t growCapacity = -1,
3458                            UBool doCopyArray = TRUE,
3459                            int32_t **pBufferToDelete = 0,
3460                            UBool forceClone = FALSE);
3461
3462  /**
3463   * Common function for UnicodeString case mappings.
3464   * The stringCaseMapper has the same type UStringCaseMapper
3465   * as in ustr_imp.h for ustrcase_map().
3466   */
3467  UnicodeString &
3468  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3469
3470  // ref counting
3471  void addRef(void);
3472  int32_t removeRef(void);
3473  int32_t refCount(void) const;
3474
3475  // constants
3476  enum {
3477    // Set the stack buffer size so that sizeof(UnicodeString) is,
3478    // naturally (without padding), a multiple of sizeof(pointer).
3479    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3480    kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
3481    kGrowSize=128, // grow size for this buffer
3482    kInvalidHashCode=0, // invalid hash code
3483    kEmptyHashCode=1, // hash code for empty string
3484
3485    // bit flag values for fLengthAndFlags
3486    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3487    kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
3488    kRefCounted=4,      // there is a refCount field before the characters in fArray
3489    kBufferIsReadonly=8,// do not write to this buffer
3490    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3491                        // and releaseBuffer(newLength) must be called
3492    kAllStorageFlags=0x1f,
3493
3494    kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
3495    kLength1=1<<kLengthShift,
3496    kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
3497    kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength
3498
3499    // combined values for convenience
3500    kShortString=kUsingStackBuffer,
3501    kLongString=kRefCounted,
3502    kReadonlyAlias=kBufferIsReadonly,
3503    kWritableAlias=0
3504  };
3505
3506  friend class StringThreadTest;
3507  friend class UnicodeStringAppendable;
3508
3509  union StackBufferOrFields;        // forward declaration necessary before friend declaration
3510  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3511
3512  /*
3513   * The following are all the class fields that are stored
3514   * in each UnicodeString object.
3515   * Note that UnicodeString has virtual functions,
3516   * therefore there is an implicit vtable pointer
3517   * as the first real field.
3518   * The fields should be aligned such that no padding is necessary.
3519   * On 32-bit machines, the size should be 32 bytes,
3520   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3521   *
3522   * We use a hack to achieve this.
3523   *
3524   * With at least some compilers, each of the following is forced to
3525   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3526   * rounded up with additional padding if the fields do not already fit that requirement:
3527   * - sizeof(class UnicodeString)
3528   * - offsetof(UnicodeString, fUnion)
3529   * - sizeof(fUnion)
3530   * - sizeof(fStackFields)
3531   *
3532   * We optimize for the longest possible internal buffer for short strings.
3533   * fUnion.fStackFields begins with 2 bytes for storage flags
3534   * and the length of relatively short strings,
3535   * followed by the buffer for short string contents.
3536   * There is no padding inside fStackFields.
3537   *
3538   * Heap-allocated and aliased strings use fUnion.fFields.
3539   * Both fStackFields and fFields must begin with the same fields for flags and short length,
3540   * that is, those must have the same memory offsets inside the object,
3541   * because the flags must be inspected in order to decide which half of fUnion is being used.
3542   * We assume that the compiler does not reorder the fields.
3543   *
3544   * (Padding at the end of fFields is ok:
3545   * As long as it is no larger than fStackFields, it is not wasted space.)
3546   *
3547   * For some of the history of the UnicodeString class fields layout,
3548   * see ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3549   * and ticket #8322 "why is sizeof(UnicodeString)==48?".
3550   */
3551  // (implicit) *vtable;
3552  union StackBufferOrFields {
3553    // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3554    // Each struct of the union must begin with fLengthAndFlags.
3555    struct {
3556      int16_t fLengthAndFlags;          // bit fields: see constants above
3557      UChar fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
3558    } fStackFields;
3559    struct {
3560      int16_t fLengthAndFlags;          // bit fields: see constants above
3561      UChar   *fArray;    // the Unicode data
3562      int32_t fCapacity;  // capacity of fArray (in UChars)
3563      int32_t fLength;    // number of characters in fArray if >127; else undefined
3564    } fFields;
3565  } fUnion;
3566};
3567
3568/**
3569 * Create a new UnicodeString with the concatenation of two others.
3570 *
3571 * @param s1 The first string to be copied to the new one.
3572 * @param s2 The second string to be copied to the new one, after s1.
3573 * @return UnicodeString(s1).append(s2)
3574 * @stable ICU 2.8
3575 */
3576U_COMMON_API UnicodeString U_EXPORT2
3577operator+ (const UnicodeString &s1, const UnicodeString &s2);
3578
3579//========================================
3580// Inline members
3581//========================================
3582
3583//========================================
3584// Privates
3585//========================================
3586
3587inline void
3588UnicodeString::pinIndex(int32_t& start) const
3589{
3590  // pin index
3591  if(start < 0) {
3592    start = 0;
3593  } else if(start > length()) {
3594    start = length();
3595  }
3596}
3597
3598inline void
3599UnicodeString::pinIndices(int32_t& start,
3600                          int32_t& _length) const
3601{
3602  // pin indices
3603  int32_t len = length();
3604  if(start < 0) {
3605    start = 0;
3606  } else if(start > len) {
3607    start = len;
3608  }
3609  if(_length < 0) {
3610    _length = 0;
3611  } else if(_length > (len - start)) {
3612    _length = (len - start);
3613  }
3614}
3615
3616inline UChar*
3617UnicodeString::getArrayStart() {
3618  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3619    fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3620}
3621
3622inline const UChar*
3623UnicodeString::getArrayStart() const {
3624  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3625    fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3626}
3627
3628//========================================
3629// Default constructor
3630//========================================
3631
3632inline
3633UnicodeString::UnicodeString() {
3634  fUnion.fStackFields.fLengthAndFlags=kShortString;
3635}
3636
3637//========================================
3638// Read-only implementation methods
3639//========================================
3640inline UBool
3641UnicodeString::hasShortLength() const {
3642  return fUnion.fFields.fLengthAndFlags>=0;
3643}
3644
3645inline int32_t
3646UnicodeString::getShortLength() const {
3647  // fLengthAndFlags must be non-negative -> short length >= 0
3648  // and arithmetic or logical shift does not matter.
3649  return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3650}
3651
3652inline int32_t
3653UnicodeString::length() const {
3654  return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3655}
3656
3657inline int32_t
3658UnicodeString::getCapacity() const {
3659  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3660    US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3661}
3662
3663inline int32_t
3664UnicodeString::hashCode() const
3665{ return doHashCode(); }
3666
3667inline UBool
3668UnicodeString::isBogus() const
3669{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3670
3671inline UBool
3672UnicodeString::isWritable() const
3673{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
3674
3675inline UBool
3676UnicodeString::isBufferWritable() const
3677{
3678  return (UBool)(
3679      !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3680      (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
3681}
3682
3683inline const UChar *
3684UnicodeString::getBuffer() const {
3685  if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
3686    return 0;
3687  } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3688    return fUnion.fStackFields.fBuffer;
3689  } else {
3690    return fUnion.fFields.fArray;
3691  }
3692}
3693
3694//========================================
3695// Read-only alias methods
3696//========================================
3697inline int8_t
3698UnicodeString::doCompare(int32_t start,
3699              int32_t thisLength,
3700              const UnicodeString& srcText,
3701              int32_t srcStart,
3702              int32_t srcLength) const
3703{
3704  if(srcText.isBogus()) {
3705    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3706  } else {
3707    srcText.pinIndices(srcStart, srcLength);
3708    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3709  }
3710}
3711
3712inline UBool
3713UnicodeString::operator== (const UnicodeString& text) const
3714{
3715  if(isBogus()) {
3716    return text.isBogus();
3717  } else {
3718    int32_t len = length(), textLength = text.length();
3719    return !text.isBogus() && len == textLength && doEquals(text, len);
3720  }
3721}
3722
3723inline UBool
3724UnicodeString::operator!= (const UnicodeString& text) const
3725{ return (! operator==(text)); }
3726
3727inline UBool
3728UnicodeString::operator> (const UnicodeString& text) const
3729{ return doCompare(0, length(), text, 0, text.length()) == 1; }
3730
3731inline UBool
3732UnicodeString::operator< (const UnicodeString& text) const
3733{ return doCompare(0, length(), text, 0, text.length()) == -1; }
3734
3735inline UBool
3736UnicodeString::operator>= (const UnicodeString& text) const
3737{ return doCompare(0, length(), text, 0, text.length()) != -1; }
3738
3739inline UBool
3740UnicodeString::operator<= (const UnicodeString& text) const
3741{ return doCompare(0, length(), text, 0, text.length()) != 1; }
3742
3743inline int8_t
3744UnicodeString::compare(const UnicodeString& text) const
3745{ return doCompare(0, length(), text, 0, text.length()); }
3746
3747inline int8_t
3748UnicodeString::compare(int32_t start,
3749               int32_t _length,
3750               const UnicodeString& srcText) const
3751{ return doCompare(start, _length, srcText, 0, srcText.length()); }
3752
3753inline int8_t
3754UnicodeString::compare(const UChar *srcChars,
3755               int32_t srcLength) const
3756{ return doCompare(0, length(), srcChars, 0, srcLength); }
3757
3758inline int8_t
3759UnicodeString::compare(int32_t start,
3760               int32_t _length,
3761               const UnicodeString& srcText,
3762               int32_t srcStart,
3763               int32_t srcLength) const
3764{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3765
3766inline int8_t
3767UnicodeString::compare(int32_t start,
3768               int32_t _length,
3769               const UChar *srcChars) const
3770{ return doCompare(start, _length, srcChars, 0, _length); }
3771
3772inline int8_t
3773UnicodeString::compare(int32_t start,
3774               int32_t _length,
3775               const UChar *srcChars,
3776               int32_t srcStart,
3777               int32_t srcLength) const
3778{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3779
3780inline int8_t
3781UnicodeString::compareBetween(int32_t start,
3782                  int32_t limit,
3783                  const UnicodeString& srcText,
3784                  int32_t srcStart,
3785                  int32_t srcLimit) const
3786{ return doCompare(start, limit - start,
3787           srcText, srcStart, srcLimit - srcStart); }
3788
3789inline int8_t
3790UnicodeString::doCompareCodePointOrder(int32_t start,
3791                                       int32_t thisLength,
3792                                       const UnicodeString& srcText,
3793                                       int32_t srcStart,
3794                                       int32_t srcLength) const
3795{
3796  if(srcText.isBogus()) {
3797    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3798  } else {
3799    srcText.pinIndices(srcStart, srcLength);
3800    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3801  }
3802}
3803
3804inline int8_t
3805UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3806{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3807
3808inline int8_t
3809UnicodeString::compareCodePointOrder(int32_t start,
3810                                     int32_t _length,
3811                                     const UnicodeString& srcText) const
3812{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3813
3814inline int8_t
3815UnicodeString::compareCodePointOrder(const UChar *srcChars,
3816                                     int32_t srcLength) const
3817{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3818
3819inline int8_t
3820UnicodeString::compareCodePointOrder(int32_t start,
3821                                     int32_t _length,
3822                                     const UnicodeString& srcText,
3823                                     int32_t srcStart,
3824                                     int32_t srcLength) const
3825{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3826
3827inline int8_t
3828UnicodeString::compareCodePointOrder(int32_t start,
3829                                     int32_t _length,
3830                                     const UChar *srcChars) const
3831{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3832
3833inline int8_t
3834UnicodeString::compareCodePointOrder(int32_t start,
3835                                     int32_t _length,
3836                                     const UChar *srcChars,
3837                                     int32_t srcStart,
3838                                     int32_t srcLength) const
3839{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3840
3841inline int8_t
3842UnicodeString::compareCodePointOrderBetween(int32_t start,
3843                                            int32_t limit,
3844                                            const UnicodeString& srcText,
3845                                            int32_t srcStart,
3846                                            int32_t srcLimit) const
3847{ return doCompareCodePointOrder(start, limit - start,
3848           srcText, srcStart, srcLimit - srcStart); }
3849
3850inline int8_t
3851UnicodeString::doCaseCompare(int32_t start,
3852                             int32_t thisLength,
3853                             const UnicodeString &srcText,
3854                             int32_t srcStart,
3855                             int32_t srcLength,
3856                             uint32_t options) const
3857{
3858  if(srcText.isBogus()) {
3859    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3860  } else {
3861    srcText.pinIndices(srcStart, srcLength);
3862    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3863  }
3864}
3865
3866inline int8_t
3867UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3868  return doCaseCompare(0, length(), text, 0, text.length(), options);
3869}
3870
3871inline int8_t
3872UnicodeString::caseCompare(int32_t start,
3873                           int32_t _length,
3874                           const UnicodeString &srcText,
3875                           uint32_t options) const {
3876  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3877}
3878
3879inline int8_t
3880UnicodeString::caseCompare(const UChar *srcChars,
3881                           int32_t srcLength,
3882                           uint32_t options) const {
3883  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3884}
3885
3886inline int8_t
3887UnicodeString::caseCompare(int32_t start,
3888                           int32_t _length,
3889                           const UnicodeString &srcText,
3890                           int32_t srcStart,
3891                           int32_t srcLength,
3892                           uint32_t options) const {
3893  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3894}
3895
3896inline int8_t
3897UnicodeString::caseCompare(int32_t start,
3898                           int32_t _length,
3899                           const UChar *srcChars,
3900                           uint32_t options) const {
3901  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3902}
3903
3904inline int8_t
3905UnicodeString::caseCompare(int32_t start,
3906                           int32_t _length,
3907                           const UChar *srcChars,
3908                           int32_t srcStart,
3909                           int32_t srcLength,
3910                           uint32_t options) const {
3911  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3912}
3913
3914inline int8_t
3915UnicodeString::caseCompareBetween(int32_t start,
3916                                  int32_t limit,
3917                                  const UnicodeString &srcText,
3918                                  int32_t srcStart,
3919                                  int32_t srcLimit,
3920                                  uint32_t options) const {
3921  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3922}
3923
3924inline int32_t
3925UnicodeString::indexOf(const UnicodeString& srcText,
3926               int32_t srcStart,
3927               int32_t srcLength,
3928               int32_t start,
3929               int32_t _length) const
3930{
3931  if(!srcText.isBogus()) {
3932    srcText.pinIndices(srcStart, srcLength);
3933    if(srcLength > 0) {
3934      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3935    }
3936  }
3937  return -1;
3938}
3939
3940inline int32_t
3941UnicodeString::indexOf(const UnicodeString& text) const
3942{ return indexOf(text, 0, text.length(), 0, length()); }
3943
3944inline int32_t
3945UnicodeString::indexOf(const UnicodeString& text,
3946               int32_t start) const {
3947  pinIndex(start);
3948  return indexOf(text, 0, text.length(), start, length() - start);
3949}
3950
3951inline int32_t
3952UnicodeString::indexOf(const UnicodeString& text,
3953               int32_t start,
3954               int32_t _length) const
3955{ return indexOf(text, 0, text.length(), start, _length); }
3956
3957inline int32_t
3958UnicodeString::indexOf(const UChar *srcChars,
3959               int32_t srcLength,
3960               int32_t start) const {
3961  pinIndex(start);
3962  return indexOf(srcChars, 0, srcLength, start, length() - start);
3963}
3964
3965inline int32_t
3966UnicodeString::indexOf(const UChar *srcChars,
3967               int32_t srcLength,
3968               int32_t start,
3969               int32_t _length) const
3970{ return indexOf(srcChars, 0, srcLength, start, _length); }
3971
3972inline int32_t
3973UnicodeString::indexOf(UChar c,
3974               int32_t start,
3975               int32_t _length) const
3976{ return doIndexOf(c, start, _length); }
3977
3978inline int32_t
3979UnicodeString::indexOf(UChar32 c,
3980               int32_t start,
3981               int32_t _length) const
3982{ return doIndexOf(c, start, _length); }
3983
3984inline int32_t
3985UnicodeString::indexOf(UChar c) const
3986{ return doIndexOf(c, 0, length()); }
3987
3988inline int32_t
3989UnicodeString::indexOf(UChar32 c) const
3990{ return indexOf(c, 0, length()); }
3991
3992inline int32_t
3993UnicodeString::indexOf(UChar c,
3994               int32_t start) const {
3995  pinIndex(start);
3996  return doIndexOf(c, start, length() - start);
3997}
3998
3999inline int32_t
4000UnicodeString::indexOf(UChar32 c,
4001               int32_t start) const {
4002  pinIndex(start);
4003  return indexOf(c, start, length() - start);
4004}
4005
4006inline int32_t
4007UnicodeString::lastIndexOf(const UChar *srcChars,
4008               int32_t srcLength,
4009               int32_t start,
4010               int32_t _length) const
4011{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4012
4013inline int32_t
4014UnicodeString::lastIndexOf(const UChar *srcChars,
4015               int32_t srcLength,
4016               int32_t start) const {
4017  pinIndex(start);
4018  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4019}
4020
4021inline int32_t
4022UnicodeString::lastIndexOf(const UnicodeString& srcText,
4023               int32_t srcStart,
4024               int32_t srcLength,
4025               int32_t start,
4026               int32_t _length) const
4027{
4028  if(!srcText.isBogus()) {
4029    srcText.pinIndices(srcStart, srcLength);
4030    if(srcLength > 0) {
4031      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4032    }
4033  }
4034  return -1;
4035}
4036
4037inline int32_t
4038UnicodeString::lastIndexOf(const UnicodeString& text,
4039               int32_t start,
4040               int32_t _length) const
4041{ return lastIndexOf(text, 0, text.length(), start, _length); }
4042
4043inline int32_t
4044UnicodeString::lastIndexOf(const UnicodeString& text,
4045               int32_t start) const {
4046  pinIndex(start);
4047  return lastIndexOf(text, 0, text.length(), start, length() - start);
4048}
4049
4050inline int32_t
4051UnicodeString::lastIndexOf(const UnicodeString& text) const
4052{ return lastIndexOf(text, 0, text.length(), 0, length()); }
4053
4054inline int32_t
4055UnicodeString::lastIndexOf(UChar c,
4056               int32_t start,
4057               int32_t _length) const
4058{ return doLastIndexOf(c, start, _length); }
4059
4060inline int32_t
4061UnicodeString::lastIndexOf(UChar32 c,
4062               int32_t start,
4063               int32_t _length) const {
4064  return doLastIndexOf(c, start, _length);
4065}
4066
4067inline int32_t
4068UnicodeString::lastIndexOf(UChar c) const
4069{ return doLastIndexOf(c, 0, length()); }
4070
4071inline int32_t
4072UnicodeString::lastIndexOf(UChar32 c) const {
4073  return lastIndexOf(c, 0, length());
4074}
4075
4076inline int32_t
4077UnicodeString::lastIndexOf(UChar c,
4078               int32_t start) const {
4079  pinIndex(start);
4080  return doLastIndexOf(c, start, length() - start);
4081}
4082
4083inline int32_t
4084UnicodeString::lastIndexOf(UChar32 c,
4085               int32_t start) const {
4086  pinIndex(start);
4087  return lastIndexOf(c, start, length() - start);
4088}
4089
4090inline UBool
4091UnicodeString::startsWith(const UnicodeString& text) const
4092{ return compare(0, text.length(), text, 0, text.length()) == 0; }
4093
4094inline UBool
4095UnicodeString::startsWith(const UnicodeString& srcText,
4096              int32_t srcStart,
4097              int32_t srcLength) const
4098{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4099
4100inline UBool
4101UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4102  if(srcLength < 0) {
4103    srcLength = u_strlen(srcChars);
4104  }
4105  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4106}
4107
4108inline UBool
4109UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4110  if(srcLength < 0) {
4111    srcLength = u_strlen(srcChars);
4112  }
4113  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4114}
4115
4116inline UBool
4117UnicodeString::endsWith(const UnicodeString& text) const
4118{ return doCompare(length() - text.length(), text.length(),
4119           text, 0, text.length()) == 0; }
4120
4121inline UBool
4122UnicodeString::endsWith(const UnicodeString& srcText,
4123            int32_t srcStart,
4124            int32_t srcLength) const {
4125  srcText.pinIndices(srcStart, srcLength);
4126  return doCompare(length() - srcLength, srcLength,
4127                   srcText, srcStart, srcLength) == 0;
4128}
4129
4130inline UBool
4131UnicodeString::endsWith(const UChar *srcChars,
4132            int32_t srcLength) const {
4133  if(srcLength < 0) {
4134    srcLength = u_strlen(srcChars);
4135  }
4136  return doCompare(length() - srcLength, srcLength,
4137                   srcChars, 0, srcLength) == 0;
4138}
4139
4140inline UBool
4141UnicodeString::endsWith(const UChar *srcChars,
4142            int32_t srcStart,
4143            int32_t srcLength) const {
4144  if(srcLength < 0) {
4145    srcLength = u_strlen(srcChars + srcStart);
4146  }
4147  return doCompare(length() - srcLength, srcLength,
4148                   srcChars, srcStart, srcLength) == 0;
4149}
4150
4151//========================================
4152// replace
4153//========================================
4154inline UnicodeString&
4155UnicodeString::replace(int32_t start,
4156               int32_t _length,
4157               const UnicodeString& srcText)
4158{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4159
4160inline UnicodeString&
4161UnicodeString::replace(int32_t start,
4162               int32_t _length,
4163               const UnicodeString& srcText,
4164               int32_t srcStart,
4165               int32_t srcLength)
4166{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4167
4168inline UnicodeString&
4169UnicodeString::replace(int32_t start,
4170               int32_t _length,
4171               const UChar *srcChars,
4172               int32_t srcLength)
4173{ return doReplace(start, _length, srcChars, 0, srcLength); }
4174
4175inline UnicodeString&
4176UnicodeString::replace(int32_t start,
4177               int32_t _length,
4178               const UChar *srcChars,
4179               int32_t srcStart,
4180               int32_t srcLength)
4181{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4182
4183inline UnicodeString&
4184UnicodeString::replace(int32_t start,
4185               int32_t _length,
4186               UChar srcChar)
4187{ return doReplace(start, _length, &srcChar, 0, 1); }
4188
4189inline UnicodeString&
4190UnicodeString::replaceBetween(int32_t start,
4191                  int32_t limit,
4192                  const UnicodeString& srcText)
4193{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4194
4195inline UnicodeString&
4196UnicodeString::replaceBetween(int32_t start,
4197                  int32_t limit,
4198                  const UnicodeString& srcText,
4199                  int32_t srcStart,
4200                  int32_t srcLimit)
4201{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4202
4203inline UnicodeString&
4204UnicodeString::findAndReplace(const UnicodeString& oldText,
4205                  const UnicodeString& newText)
4206{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4207            newText, 0, newText.length()); }
4208
4209inline UnicodeString&
4210UnicodeString::findAndReplace(int32_t start,
4211                  int32_t _length,
4212                  const UnicodeString& oldText,
4213                  const UnicodeString& newText)
4214{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4215            newText, 0, newText.length()); }
4216
4217// ============================
4218// extract
4219// ============================
4220inline void
4221UnicodeString::doExtract(int32_t start,
4222             int32_t _length,
4223             UnicodeString& target) const
4224{ target.replace(0, target.length(), *this, start, _length); }
4225
4226inline void
4227UnicodeString::extract(int32_t start,
4228               int32_t _length,
4229               UChar *target,
4230               int32_t targetStart) const
4231{ doExtract(start, _length, target, targetStart); }
4232
4233inline void
4234UnicodeString::extract(int32_t start,
4235               int32_t _length,
4236               UnicodeString& target) const
4237{ doExtract(start, _length, target); }
4238
4239#if !UCONFIG_NO_CONVERSION
4240
4241inline int32_t
4242UnicodeString::extract(int32_t start,
4243               int32_t _length,
4244               char *dst,
4245               const char *codepage) const
4246
4247{
4248  // This dstSize value will be checked explicitly
4249  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4250}
4251
4252#endif
4253
4254inline void
4255UnicodeString::extractBetween(int32_t start,
4256                  int32_t limit,
4257                  UChar *dst,
4258                  int32_t dstStart) const {
4259  pinIndex(start);
4260  pinIndex(limit);
4261  doExtract(start, limit - start, dst, dstStart);
4262}
4263
4264inline UnicodeString
4265UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4266    return tempSubString(start, limit - start);
4267}
4268
4269inline UChar
4270UnicodeString::doCharAt(int32_t offset) const
4271{
4272  if((uint32_t)offset < (uint32_t)length()) {
4273    return getArrayStart()[offset];
4274  } else {
4275    return kInvalidUChar;
4276  }
4277}
4278
4279inline UChar
4280UnicodeString::charAt(int32_t offset) const
4281{ return doCharAt(offset); }
4282
4283inline UChar
4284UnicodeString::operator[] (int32_t offset) const
4285{ return doCharAt(offset); }
4286
4287inline UBool
4288UnicodeString::isEmpty() const {
4289  // Arithmetic or logical right shift does not matter: only testing for 0.
4290  return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4291}
4292
4293//========================================
4294// Write implementation methods
4295//========================================
4296inline void
4297UnicodeString::setZeroLength() {
4298  fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4299}
4300
4301inline void
4302UnicodeString::setShortLength(int32_t len) {
4303  // requires 0 <= len <= kMaxShortLength
4304  fUnion.fFields.fLengthAndFlags =
4305    (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4306}
4307
4308inline void
4309UnicodeString::setLength(int32_t len) {
4310  if(len <= kMaxShortLength) {
4311    setShortLength(len);
4312  } else {
4313    fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4314    fUnion.fFields.fLength = len;
4315  }
4316}
4317
4318inline void
4319UnicodeString::setToEmpty() {
4320  fUnion.fFields.fLengthAndFlags = kShortString;
4321}
4322
4323inline void
4324UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4325  setLength(len);
4326  fUnion.fFields.fArray = array;
4327  fUnion.fFields.fCapacity = capacity;
4328}
4329
4330inline UnicodeString&
4331UnicodeString::operator= (UChar ch)
4332{ return doReplace(0, length(), &ch, 0, 1); }
4333
4334inline UnicodeString&
4335UnicodeString::operator= (UChar32 ch)
4336{ return replace(0, length(), ch); }
4337
4338inline UnicodeString&
4339UnicodeString::setTo(const UnicodeString& srcText,
4340             int32_t srcStart,
4341             int32_t srcLength)
4342{
4343  unBogus();
4344  return doReplace(0, length(), srcText, srcStart, srcLength);
4345}
4346
4347inline UnicodeString&
4348UnicodeString::setTo(const UnicodeString& srcText,
4349             int32_t srcStart)
4350{
4351  unBogus();
4352  srcText.pinIndex(srcStart);
4353  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4354}
4355
4356inline UnicodeString&
4357UnicodeString::setTo(const UnicodeString& srcText)
4358{
4359  return copyFrom(srcText);
4360}
4361
4362inline UnicodeString&
4363UnicodeString::setTo(const UChar *srcChars,
4364             int32_t srcLength)
4365{
4366  unBogus();
4367  return doReplace(0, length(), srcChars, 0, srcLength);
4368}
4369
4370inline UnicodeString&
4371UnicodeString::setTo(UChar srcChar)
4372{
4373  unBogus();
4374  return doReplace(0, length(), &srcChar, 0, 1);
4375}
4376
4377inline UnicodeString&
4378UnicodeString::setTo(UChar32 srcChar)
4379{
4380  unBogus();
4381  return replace(0, length(), srcChar);
4382}
4383
4384inline UnicodeString&
4385UnicodeString::append(const UnicodeString& srcText,
4386              int32_t srcStart,
4387              int32_t srcLength)
4388{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
4389
4390inline UnicodeString&
4391UnicodeString::append(const UnicodeString& srcText)
4392{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4393
4394inline UnicodeString&
4395UnicodeString::append(const UChar *srcChars,
4396              int32_t srcStart,
4397              int32_t srcLength)
4398{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4399
4400inline UnicodeString&
4401UnicodeString::append(const UChar *srcChars,
4402              int32_t srcLength)
4403{ return doReplace(length(), 0, srcChars, 0, srcLength); }
4404
4405inline UnicodeString&
4406UnicodeString::append(UChar srcChar)
4407{ return doReplace(length(), 0, &srcChar, 0, 1); }
4408
4409inline UnicodeString&
4410UnicodeString::operator+= (UChar ch)
4411{ return doReplace(length(), 0, &ch, 0, 1); }
4412
4413inline UnicodeString&
4414UnicodeString::operator+= (UChar32 ch) {
4415  return append(ch);
4416}
4417
4418inline UnicodeString&
4419UnicodeString::operator+= (const UnicodeString& srcText)
4420{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4421
4422inline UnicodeString&
4423UnicodeString::insert(int32_t start,
4424              const UnicodeString& srcText,
4425              int32_t srcStart,
4426              int32_t srcLength)
4427{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4428
4429inline UnicodeString&
4430UnicodeString::insert(int32_t start,
4431              const UnicodeString& srcText)
4432{ return doReplace(start, 0, srcText, 0, srcText.length()); }
4433
4434inline UnicodeString&
4435UnicodeString::insert(int32_t start,
4436              const UChar *srcChars,
4437              int32_t srcStart,
4438              int32_t srcLength)
4439{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4440
4441inline UnicodeString&
4442UnicodeString::insert(int32_t start,
4443              const UChar *srcChars,
4444              int32_t srcLength)
4445{ return doReplace(start, 0, srcChars, 0, srcLength); }
4446
4447inline UnicodeString&
4448UnicodeString::insert(int32_t start,
4449              UChar srcChar)
4450{ return doReplace(start, 0, &srcChar, 0, 1); }
4451
4452inline UnicodeString&
4453UnicodeString::insert(int32_t start,
4454              UChar32 srcChar)
4455{ return replace(start, 0, srcChar); }
4456
4457
4458inline UnicodeString&
4459UnicodeString::remove()
4460{
4461  // remove() of a bogus string makes the string empty and non-bogus
4462  if(isBogus()) {
4463    setToEmpty();
4464  } else {
4465    setZeroLength();
4466  }
4467  return *this;
4468}
4469
4470inline UnicodeString&
4471UnicodeString::remove(int32_t start,
4472             int32_t _length)
4473{
4474    if(start <= 0 && _length == INT32_MAX) {
4475        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4476        return remove();
4477    }
4478    return doReplace(start, _length, NULL, 0, 0);
4479}
4480
4481inline UnicodeString&
4482UnicodeString::removeBetween(int32_t start,
4483                int32_t limit)
4484{ return doReplace(start, limit - start, NULL, 0, 0); }
4485
4486inline UnicodeString &
4487UnicodeString::retainBetween(int32_t start, int32_t limit) {
4488  truncate(limit);
4489  return doReplace(0, start, NULL, 0, 0);
4490}
4491
4492inline UBool
4493UnicodeString::truncate(int32_t targetLength)
4494{
4495  if(isBogus() && targetLength == 0) {
4496    // truncate(0) of a bogus string makes the string empty and non-bogus
4497    unBogus();
4498    return FALSE;
4499  } else if((uint32_t)targetLength < (uint32_t)length()) {
4500    setLength(targetLength);
4501    return TRUE;
4502  } else {
4503    return FALSE;
4504  }
4505}
4506
4507inline UnicodeString&
4508UnicodeString::reverse()
4509{ return doReverse(0, length()); }
4510
4511inline UnicodeString&
4512UnicodeString::reverse(int32_t start,
4513               int32_t _length)
4514{ return doReverse(start, _length); }
4515
4516U_NAMESPACE_END
4517
4518#endif
4519