unistr.h revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/*
2**********************************************************************
3*   Copyright (C) 1998-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   09/25/98    stephen     Creation.
13*   11/11/98    stephen     Changed per 11/9 code review.
14*   04/20/99    stephen     Overhauled per 4/16 code review.
15*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
16*                           handleReplaceBetween(); other methods unchanged.
17*   06/25/01    grhoten     Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
24/**
25 * \file
26 * \brief C++ API: Unicode String
27 */
28
29#include "unicode/utypes.h"
30#include "unicode/rep.h"
31#include "unicode/std_string.h"
32#include "unicode/stringpiece.h"
33#include "unicode/bytestream.h"
34#include "unicode/ucasemap.h"
35
36struct UConverter;          // unicode/ucnv.h
37class  StringThreadTest;
38
39#ifndef U_COMPARE_CODE_POINT_ORDER
40/* see also ustring.h and unorm.h */
41/**
42 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
43 * Compare strings in code point order instead of code unit order.
44 * @stable ICU 2.2
45 */
46#define U_COMPARE_CODE_POINT_ORDER  0x8000
47#endif
48
49#ifndef USTRING_H
50/**
51 * \ingroup ustring_ustrlen
52 */
53U_STABLE int32_t U_EXPORT2
54u_strlen(const UChar *s);
55#endif
56
57/**
58 * \def U_STRING_CASE_MAPPER_DEFINED
59 * @internal
60 */
61#ifndef U_STRING_CASE_MAPPER_DEFINED
62#define U_STRING_CASE_MAPPER_DEFINED
63
64/**
65 * Internal string case mapping function type.
66 * @internal
67 */
68typedef int32_t U_CALLCONV
69UStringCaseMapper(const UCaseMap *csm,
70                  UChar *dest, int32_t destCapacity,
71                  const UChar *src, int32_t srcLength,
72                  UErrorCode *pErrorCode);
73
74#endif
75
76U_NAMESPACE_BEGIN
77
78class BreakIterator;        // unicode/brkiter.h
79class Locale;               // unicode/locid.h
80class StringCharacterIterator;
81class UnicodeStringAppendable;  // unicode/appendable.h
82
83/* The <iostream> include has been moved to unicode/ustream.h */
84
85/**
86 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
87 * which constructs a Unicode string from an invariant-character char * string.
88 * About invariant characters see utypes.h.
89 * This constructor has no runtime dependency on conversion code and is
90 * therefore recommended over ones taking a charset name string
91 * (where the empty string "" indicates invariant-character conversion).
92 *
93 * @stable ICU 3.2
94 */
95#define US_INV icu::UnicodeString::kInvariant
96
97/**
98 * Unicode String literals in C++.
99 * Dependent on the platform properties, different UnicodeString
100 * constructors should be used to create a UnicodeString object from
101 * a string literal.
102 * The macros are defined for maximum performance.
103 * They work only for strings that contain "invariant characters", i.e.,
104 * only latin letters, digits, and some punctuation.
105 * See utypes.h for details.
106 *
107 * The string parameter must be a C string literal.
108 * The length of the string, not including the terminating
109 * <code>NUL</code>, must be specified as a constant.
110 * The U_STRING_DECL macro should be invoked exactly once for one
111 * such string variable before it is used.
112 * @stable ICU 2.0
113 */
114#if defined(U_DECLARE_UTF16)
115#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
116#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
117#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
118#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
119#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
120#else
121#   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
122#endif
123
124/**
125 * Unicode String literals in C++.
126 * Dependent on the platform properties, different UnicodeString
127 * constructors should be used to create a UnicodeString object from
128 * a string literal.
129 * The macros are defined for improved performance.
130 * They work only for strings that contain "invariant characters", i.e.,
131 * only latin letters, digits, and some punctuation.
132 * See utypes.h for details.
133 *
134 * The string parameter must be a C string literal.
135 * @stable ICU 2.0
136 */
137#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
138
139/**
140 * \def UNISTR_FROM_CHAR_EXPLICIT
141 * This can be defined to be empty or "explicit".
142 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
143 * constructors are marked as explicit, preventing their inadvertent use.
144 * @stable ICU 49
145 */
146#ifndef UNISTR_FROM_CHAR_EXPLICIT
147# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
148    // Auto-"explicit" in ICU library code.
149#   define UNISTR_FROM_CHAR_EXPLICIT explicit
150# else
151    // Empty by default for source code compatibility.
152#   define UNISTR_FROM_CHAR_EXPLICIT
153# endif
154#endif
155
156/**
157 * \def UNISTR_FROM_STRING_EXPLICIT
158 * This can be defined to be empty or "explicit".
159 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
160 * constructors are marked as explicit, preventing their inadvertent use.
161 *
162 * In particular, this helps prevent accidentally depending on ICU conversion code
163 * by passing a string literal into an API with a const UnicodeString & parameter.
164 * @stable ICU 49
165 */
166#ifndef UNISTR_FROM_STRING_EXPLICIT
167# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
168    // Auto-"explicit" in ICU library code.
169#   define UNISTR_FROM_STRING_EXPLICIT explicit
170# else
171    // Empty by default for source code compatibility.
172#   define UNISTR_FROM_STRING_EXPLICIT
173# endif
174#endif
175
176/**
177 * UnicodeString is a string class that stores Unicode characters directly and provides
178 * similar functionality as the Java String and StringBuffer classes.
179 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
180 *
181 * The UnicodeString class is not suitable for subclassing.
182 *
183 * <p>For an overview of Unicode strings in C and C++ see the
184 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
185 *
186 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
187 * A Unicode character may be stored with either one code unit
188 * (the most common case) or with a matched pair of special code units
189 * ("surrogates"). The data type for code units is UChar.
190 * For single-character handling, a Unicode character code <em>point</em> is a value
191 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
192 *
193 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
194 * This is the same as with multi-byte char* strings in traditional string handling.
195 * Operations on partial strings typically do not test for code point boundaries.
196 * If necessary, the user needs to take care of such boundaries by testing for the code unit
197 * values or by using functions like
198 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
199 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
200 *
201 * UnicodeString methods are more lenient with regard to input parameter values
202 * than other ICU APIs. In particular:
203 * - If indexes are out of bounds for a UnicodeString object
204 *   (<0 or >length()) then they are "pinned" to the nearest boundary.
205 * - If primitive string pointer values (e.g., const UChar * or char *)
206 *   for input strings are NULL, then those input string parameters are treated
207 *   as if they pointed to an empty string.
208 *   However, this is <em>not</em> the case for char * parameters for charset names
209 *   or other IDs.
210 * - Most UnicodeString methods do not take a UErrorCode parameter because
211 *   there are usually very few opportunities for failure other than a shortage
212 *   of memory, error codes in low-level C++ string methods would be inconvenient,
213 *   and the error code as the last parameter (ICU convention) would prevent
214 *   the use of default parameter values.
215 *   Instead, such methods set the UnicodeString into a "bogus" state
216 *   (see isBogus()) if an error occurs.
217 *
218 * In string comparisons, two UnicodeString objects that are both "bogus"
219 * compare equal (to be transitive and prevent endless loops in sorting),
220 * and a "bogus" string compares less than any non-"bogus" one.
221 *
222 * Const UnicodeString methods are thread-safe. Multiple threads can use
223 * const methods on the same UnicodeString object simultaneously,
224 * but non-const methods must not be called concurrently (in multiple threads)
225 * with any other (const or non-const) methods.
226 *
227 * Similarly, const UnicodeString & parameters are thread-safe.
228 * One object may be passed in as such a parameter concurrently in multiple threads.
229 * This includes the const UnicodeString & parameters for
230 * copy construction, assignment, and cloning.
231 *
232 * <p>UnicodeString uses several storage methods.
233 * String contents can be stored inside the UnicodeString object itself,
234 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
235 * Most of this is done transparently, but careful aliasing in particular provides
236 * significant performance improvements.
237 * Also, the internal buffer is accessible via special functions.
238 * For details see the
239 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
240 *
241 * @see utf.h
242 * @see CharacterIterator
243 * @stable ICU 2.0
244 */
245class U_COMMON_API UnicodeString : public Replaceable
246{
247public:
248
249  /**
250   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
251   * which constructs a Unicode string from an invariant-character char * string.
252   * Use the macro US_INV instead of the full qualification for this value.
253   *
254   * @see US_INV
255   * @stable ICU 3.2
256   */
257  enum EInvariant {
258    /**
259     * @see EInvariant
260     * @stable ICU 3.2
261     */
262    kInvariant
263  };
264
265  //========================================
266  // Read-only operations
267  //========================================
268
269  /* Comparison - bitwise only - for international comparison use collation */
270
271  /**
272   * Equality operator. Performs only bitwise comparison.
273   * @param text The UnicodeString to compare to this one.
274   * @return TRUE if <TT>text</TT> contains the same characters as this one,
275   * FALSE otherwise.
276   * @stable ICU 2.0
277   */
278  inline UBool operator== (const UnicodeString& text) const;
279
280  /**
281   * Inequality operator. Performs only bitwise comparison.
282   * @param text The UnicodeString to compare to this one.
283   * @return FALSE if <TT>text</TT> contains the same characters as this one,
284   * TRUE otherwise.
285   * @stable ICU 2.0
286   */
287  inline UBool operator!= (const UnicodeString& text) const;
288
289  /**
290   * Greater than operator. Performs only bitwise comparison.
291   * @param text The UnicodeString to compare to this one.
292   * @return TRUE if the characters in this are bitwise
293   * greater than the characters in <code>text</code>, FALSE otherwise
294   * @stable ICU 2.0
295   */
296  inline UBool operator> (const UnicodeString& text) const;
297
298  /**
299   * Less than operator. Performs only bitwise comparison.
300   * @param text The UnicodeString to compare to this one.
301   * @return TRUE if the characters in this are bitwise
302   * less than the characters in <code>text</code>, FALSE otherwise
303   * @stable ICU 2.0
304   */
305  inline UBool operator< (const UnicodeString& text) const;
306
307  /**
308   * Greater than or equal operator. Performs only bitwise comparison.
309   * @param text The UnicodeString to compare to this one.
310   * @return TRUE if the characters in this are bitwise
311   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
312   * @stable ICU 2.0
313   */
314  inline UBool operator>= (const UnicodeString& text) const;
315
316  /**
317   * Less than or equal operator. Performs only bitwise comparison.
318   * @param text The UnicodeString to compare to this one.
319   * @return TRUE if the characters in this are bitwise
320   * less than or equal to the characters in <code>text</code>, FALSE otherwise
321   * @stable ICU 2.0
322   */
323  inline UBool operator<= (const UnicodeString& text) const;
324
325  /**
326   * Compare the characters bitwise in this UnicodeString to
327   * the characters in <code>text</code>.
328   * @param text The UnicodeString to compare to this one.
329   * @return The result of bitwise character comparison: 0 if this
330   * contains the same characters as <code>text</code>, -1 if the characters in
331   * this are bitwise less than the characters in <code>text</code>, +1 if the
332   * characters in this are bitwise greater than the characters
333   * in <code>text</code>.
334   * @stable ICU 2.0
335   */
336  inline int8_t compare(const UnicodeString& text) const;
337
338  /**
339   * Compare the characters bitwise in the range
340   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
341   * in the <b>entire string</b> <TT>text</TT>.
342   * (The parameters "start" and "length" are not applied to the other text "text".)
343   * @param start the offset at which the compare operation begins
344   * @param length the number of characters of text to compare.
345   * @param text the other text to be compared against this string.
346   * @return The result of bitwise character comparison: 0 if this
347   * contains the same characters as <code>text</code>, -1 if the characters in
348   * this are bitwise less than the characters in <code>text</code>, +1 if the
349   * characters in this are bitwise greater than the characters
350   * in <code>text</code>.
351   * @stable ICU 2.0
352   */
353  inline int8_t compare(int32_t start,
354         int32_t length,
355         const UnicodeString& text) const;
356
357  /**
358   * Compare the characters bitwise in the range
359   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
360   * in <TT>srcText</TT> in the range
361   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
362   * @param start the offset at which the compare operation begins
363   * @param length the number of characters in this to compare.
364   * @param srcText the text to be compared
365   * @param srcStart the offset into <TT>srcText</TT> to start comparison
366   * @param srcLength the number of characters in <TT>src</TT> to compare
367   * @return The result of bitwise character comparison: 0 if this
368   * contains the same characters as <code>srcText</code>, -1 if the characters in
369   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
370   * characters in this are bitwise greater than the characters
371   * in <code>srcText</code>.
372   * @stable ICU 2.0
373   */
374   inline int8_t compare(int32_t start,
375         int32_t length,
376         const UnicodeString& srcText,
377         int32_t srcStart,
378         int32_t srcLength) const;
379
380  /**
381   * Compare the characters bitwise in this UnicodeString with the first
382   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
383   * @param srcChars The characters to compare to this UnicodeString.
384   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
385   * @return The result of bitwise character comparison: 0 if this
386   * contains the same characters as <code>srcChars</code>, -1 if the characters in
387   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
388   * characters in this are bitwise greater than the characters
389   * in <code>srcChars</code>.
390   * @stable ICU 2.0
391   */
392  inline int8_t compare(const UChar *srcChars,
393         int32_t srcLength) const;
394
395  /**
396   * Compare the characters bitwise in the range
397   * [<TT>start</TT>, <TT>start + length</TT>) with the first
398   * <TT>length</TT> characters in <TT>srcChars</TT>
399   * @param start the offset at which the compare operation begins
400   * @param length the number of characters to compare.
401   * @param srcChars the characters to be compared
402   * @return The result of bitwise character comparison: 0 if this
403   * contains the same characters as <code>srcChars</code>, -1 if the characters in
404   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
405   * characters in this are bitwise greater than the characters
406   * in <code>srcChars</code>.
407   * @stable ICU 2.0
408   */
409  inline int8_t compare(int32_t start,
410         int32_t length,
411         const UChar *srcChars) const;
412
413  /**
414   * Compare the characters bitwise in the range
415   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
416   * in <TT>srcChars</TT> in the range
417   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
418   * @param start the offset at which the compare operation begins
419   * @param length the number of characters in this to compare
420   * @param srcChars the characters to be compared
421   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
422   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
423   * @return The result of bitwise character comparison: 0 if this
424   * contains the same characters as <code>srcChars</code>, -1 if the characters in
425   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
426   * characters in this are bitwise greater than the characters
427   * in <code>srcChars</code>.
428   * @stable ICU 2.0
429   */
430  inline int8_t compare(int32_t start,
431         int32_t length,
432         const UChar *srcChars,
433         int32_t srcStart,
434         int32_t srcLength) const;
435
436  /**
437   * Compare the characters bitwise in the range
438   * [<TT>start</TT>, <TT>limit</TT>) with the characters
439   * in <TT>srcText</TT> in the range
440   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
441   * @param start the offset at which the compare operation begins
442   * @param limit the offset immediately following the compare operation
443   * @param srcText the text to be compared
444   * @param srcStart the offset into <TT>srcText</TT> to start comparison
445   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
446   * @return The result of bitwise character comparison: 0 if this
447   * contains the same characters as <code>srcText</code>, -1 if the characters in
448   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
449   * characters in this are bitwise greater than the characters
450   * in <code>srcText</code>.
451   * @stable ICU 2.0
452   */
453  inline int8_t compareBetween(int32_t start,
454            int32_t limit,
455            const UnicodeString& srcText,
456            int32_t srcStart,
457            int32_t srcLimit) const;
458
459  /**
460   * Compare two Unicode strings in code point order.
461   * The result may be different from the results of compare(), operator<, etc.
462   * if supplementary characters are present:
463   *
464   * In UTF-16, supplementary characters (with code points U+10000 and above) are
465   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
466   * which means that they compare as less than some other BMP characters like U+feff.
467   * This function compares Unicode strings in code point order.
468   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
469   *
470   * @param text Another string to compare this one to.
471   * @return a negative/zero/positive integer corresponding to whether
472   * this string is less than/equal to/greater than the second one
473   * in code point order
474   * @stable ICU 2.0
475   */
476  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
477
478  /**
479   * Compare two Unicode strings in code point order.
480   * The result may be different from the results of compare(), operator<, etc.
481   * if supplementary characters are present:
482   *
483   * In UTF-16, supplementary characters (with code points U+10000 and above) are
484   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
485   * which means that they compare as less than some other BMP characters like U+feff.
486   * This function compares Unicode strings in code point order.
487   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
488   *
489   * @param start The start offset in this string at which the compare operation begins.
490   * @param length The number of code units from this string to compare.
491   * @param srcText Another string to compare this one to.
492   * @return a negative/zero/positive integer corresponding to whether
493   * this string is less than/equal to/greater than the second one
494   * in code point order
495   * @stable ICU 2.0
496   */
497  inline int8_t compareCodePointOrder(int32_t start,
498                                      int32_t length,
499                                      const UnicodeString& srcText) const;
500
501  /**
502   * Compare two Unicode strings in code point order.
503   * The result may be different from the results of compare(), operator<, etc.
504   * if supplementary characters are present:
505   *
506   * In UTF-16, supplementary characters (with code points U+10000 and above) are
507   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
508   * which means that they compare as less than some other BMP characters like U+feff.
509   * This function compares Unicode strings in code point order.
510   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
511   *
512   * @param start The start offset in this string at which the compare operation begins.
513   * @param length The number of code units from this string to compare.
514   * @param srcText Another string to compare this one to.
515   * @param srcStart The start offset in that string at which the compare operation begins.
516   * @param srcLength The number of code units from that string to compare.
517   * @return a negative/zero/positive integer corresponding to whether
518   * this string is less than/equal to/greater than the second one
519   * in code point order
520   * @stable ICU 2.0
521   */
522   inline int8_t compareCodePointOrder(int32_t start,
523                                       int32_t length,
524                                       const UnicodeString& srcText,
525                                       int32_t srcStart,
526                                       int32_t srcLength) const;
527
528  /**
529   * Compare two Unicode strings in code point order.
530   * The result may be different from the results of compare(), operator<, etc.
531   * if supplementary characters are present:
532   *
533   * In UTF-16, supplementary characters (with code points U+10000 and above) are
534   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
535   * which means that they compare as less than some other BMP characters like U+feff.
536   * This function compares Unicode strings in code point order.
537   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
538   *
539   * @param srcChars A pointer to another string to compare this one to.
540   * @param srcLength The number of code units from that string to compare.
541   * @return a negative/zero/positive integer corresponding to whether
542   * this string is less than/equal to/greater than the second one
543   * in code point order
544   * @stable ICU 2.0
545   */
546  inline int8_t compareCodePointOrder(const UChar *srcChars,
547                                      int32_t srcLength) const;
548
549  /**
550   * Compare two Unicode strings in code point order.
551   * The result may be different from the results of compare(), operator<, etc.
552   * if supplementary characters are present:
553   *
554   * In UTF-16, supplementary characters (with code points U+10000 and above) are
555   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
556   * which means that they compare as less than some other BMP characters like U+feff.
557   * This function compares Unicode strings in code point order.
558   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
559   *
560   * @param start The start offset in this string at which the compare operation begins.
561   * @param length The number of code units from this string to compare.
562   * @param srcChars A pointer to another string to compare this one to.
563   * @return a negative/zero/positive integer corresponding to whether
564   * this string is less than/equal to/greater than the second one
565   * in code point order
566   * @stable ICU 2.0
567   */
568  inline int8_t compareCodePointOrder(int32_t start,
569                                      int32_t length,
570                                      const UChar *srcChars) const;
571
572  /**
573   * Compare two Unicode strings in code point order.
574   * The result may be different from the results of compare(), operator<, etc.
575   * if supplementary characters are present:
576   *
577   * In UTF-16, supplementary characters (with code points U+10000 and above) are
578   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
579   * which means that they compare as less than some other BMP characters like U+feff.
580   * This function compares Unicode strings in code point order.
581   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
582   *
583   * @param start The start offset in this string at which the compare operation begins.
584   * @param length The number of code units from this string to compare.
585   * @param srcChars A pointer to another string to compare this one to.
586   * @param srcStart The start offset in that string at which the compare operation begins.
587   * @param srcLength The number of code units from that string to compare.
588   * @return a negative/zero/positive integer corresponding to whether
589   * this string is less than/equal to/greater than the second one
590   * in code point order
591   * @stable ICU 2.0
592   */
593  inline int8_t compareCodePointOrder(int32_t start,
594                                      int32_t length,
595                                      const UChar *srcChars,
596                                      int32_t srcStart,
597                                      int32_t srcLength) const;
598
599  /**
600   * Compare two Unicode strings in code point order.
601   * The result may be different from the results of compare(), operator<, etc.
602   * if supplementary characters are present:
603   *
604   * In UTF-16, supplementary characters (with code points U+10000 and above) are
605   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
606   * which means that they compare as less than some other BMP characters like U+feff.
607   * This function compares Unicode strings in code point order.
608   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
609   *
610   * @param start The start offset in this string at which the compare operation begins.
611   * @param limit The offset after the last code unit from this string to compare.
612   * @param srcText Another string to compare this one to.
613   * @param srcStart The start offset in that string at which the compare operation begins.
614   * @param srcLimit The offset after the last code unit from that string to compare.
615   * @return a negative/zero/positive integer corresponding to whether
616   * this string is less than/equal to/greater than the second one
617   * in code point order
618   * @stable ICU 2.0
619   */
620  inline int8_t compareCodePointOrderBetween(int32_t start,
621                                             int32_t limit,
622                                             const UnicodeString& srcText,
623                                             int32_t srcStart,
624                                             int32_t srcLimit) const;
625
626  /**
627   * Compare two strings case-insensitively using full case folding.
628   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
629   *
630   * @param text Another string to compare this one to.
631   * @param options A bit set of options:
632   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
633   *     Comparison in code unit order with default case folding.
634   *
635   *   - U_COMPARE_CODE_POINT_ORDER
636   *     Set to choose code point order instead of code unit order
637   *     (see u_strCompare for details).
638   *
639   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
640   *
641   * @return A negative, zero, or positive integer indicating the comparison result.
642   * @stable ICU 2.0
643   */
644  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
645
646  /**
647   * Compare two strings case-insensitively using full case folding.
648   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
649   *
650   * @param start The start offset in this string at which the compare operation begins.
651   * @param length The number of code units from this string to compare.
652   * @param srcText Another string to compare this one to.
653   * @param options A bit set of options:
654   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
655   *     Comparison in code unit order with default case folding.
656   *
657   *   - U_COMPARE_CODE_POINT_ORDER
658   *     Set to choose code point order instead of code unit order
659   *     (see u_strCompare for details).
660   *
661   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
662   *
663   * @return A negative, zero, or positive integer indicating the comparison result.
664   * @stable ICU 2.0
665   */
666  inline int8_t caseCompare(int32_t start,
667         int32_t length,
668         const UnicodeString& srcText,
669         uint32_t options) const;
670
671  /**
672   * Compare two strings case-insensitively using full case folding.
673   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
674   *
675   * @param start The start offset in this string at which the compare operation begins.
676   * @param length The number of code units from this string to compare.
677   * @param srcText Another string to compare this one to.
678   * @param srcStart The start offset in that string at which the compare operation begins.
679   * @param srcLength The number of code units from that string to compare.
680   * @param options A bit set of options:
681   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
682   *     Comparison in code unit order with default case folding.
683   *
684   *   - U_COMPARE_CODE_POINT_ORDER
685   *     Set to choose code point order instead of code unit order
686   *     (see u_strCompare for details).
687   *
688   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
689   *
690   * @return A negative, zero, or positive integer indicating the comparison result.
691   * @stable ICU 2.0
692   */
693  inline int8_t caseCompare(int32_t start,
694         int32_t length,
695         const UnicodeString& srcText,
696         int32_t srcStart,
697         int32_t srcLength,
698         uint32_t options) const;
699
700  /**
701   * Compare two strings case-insensitively using full case folding.
702   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
703   *
704   * @param srcChars A pointer to another string to compare this one to.
705   * @param srcLength The number of code units from that string to compare.
706   * @param options A bit set of options:
707   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
708   *     Comparison in code unit order with default case folding.
709   *
710   *   - U_COMPARE_CODE_POINT_ORDER
711   *     Set to choose code point order instead of code unit order
712   *     (see u_strCompare for details).
713   *
714   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
715   *
716   * @return A negative, zero, or positive integer indicating the comparison result.
717   * @stable ICU 2.0
718   */
719  inline int8_t caseCompare(const UChar *srcChars,
720         int32_t srcLength,
721         uint32_t options) const;
722
723  /**
724   * Compare two strings case-insensitively using full case folding.
725   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
726   *
727   * @param start The start offset in this string at which the compare operation begins.
728   * @param length The number of code units from this string to compare.
729   * @param srcChars A pointer to another string to compare this one to.
730   * @param options A bit set of options:
731   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
732   *     Comparison in code unit order with default case folding.
733   *
734   *   - U_COMPARE_CODE_POINT_ORDER
735   *     Set to choose code point order instead of code unit order
736   *     (see u_strCompare for details).
737   *
738   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
739   *
740   * @return A negative, zero, or positive integer indicating the comparison result.
741   * @stable ICU 2.0
742   */
743  inline int8_t caseCompare(int32_t start,
744         int32_t length,
745         const UChar *srcChars,
746         uint32_t options) const;
747
748  /**
749   * Compare two strings case-insensitively using full case folding.
750   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
751   *
752   * @param start The start offset in this string at which the compare operation begins.
753   * @param length The number of code units from this string to compare.
754   * @param srcChars A pointer to another string to compare this one to.
755   * @param srcStart The start offset in that string at which the compare operation begins.
756   * @param srcLength The number of code units from that string to compare.
757   * @param options A bit set of options:
758   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
759   *     Comparison in code unit order with default case folding.
760   *
761   *   - U_COMPARE_CODE_POINT_ORDER
762   *     Set to choose code point order instead of code unit order
763   *     (see u_strCompare for details).
764   *
765   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
766   *
767   * @return A negative, zero, or positive integer indicating the comparison result.
768   * @stable ICU 2.0
769   */
770  inline int8_t caseCompare(int32_t start,
771         int32_t length,
772         const UChar *srcChars,
773         int32_t srcStart,
774         int32_t srcLength,
775         uint32_t options) const;
776
777  /**
778   * Compare two strings case-insensitively using full case folding.
779   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
780   *
781   * @param start The start offset in this string at which the compare operation begins.
782   * @param limit The offset after the last code unit from this string to compare.
783   * @param srcText Another string to compare this one to.
784   * @param srcStart The start offset in that string at which the compare operation begins.
785   * @param srcLimit The offset after the last code unit from that string to compare.
786   * @param options A bit set of options:
787   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
788   *     Comparison in code unit order with default case folding.
789   *
790   *   - U_COMPARE_CODE_POINT_ORDER
791   *     Set to choose code point order instead of code unit order
792   *     (see u_strCompare for details).
793   *
794   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
795   *
796   * @return A negative, zero, or positive integer indicating the comparison result.
797   * @stable ICU 2.0
798   */
799  inline int8_t caseCompareBetween(int32_t start,
800            int32_t limit,
801            const UnicodeString& srcText,
802            int32_t srcStart,
803            int32_t srcLimit,
804            uint32_t options) const;
805
806  /**
807   * Determine if this starts with the characters in <TT>text</TT>
808   * @param text The text to match.
809   * @return TRUE if this starts with the characters in <TT>text</TT>,
810   * FALSE otherwise
811   * @stable ICU 2.0
812   */
813  inline UBool startsWith(const UnicodeString& text) const;
814
815  /**
816   * Determine if this starts with the characters in <TT>srcText</TT>
817   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
818   * @param srcText The text to match.
819   * @param srcStart the offset into <TT>srcText</TT> to start matching
820   * @param srcLength the number of characters in <TT>srcText</TT> to match
821   * @return TRUE if this starts with the characters in <TT>text</TT>,
822   * FALSE otherwise
823   * @stable ICU 2.0
824   */
825  inline UBool startsWith(const UnicodeString& srcText,
826            int32_t srcStart,
827            int32_t srcLength) const;
828
829  /**
830   * Determine if this starts with the characters in <TT>srcChars</TT>
831   * @param srcChars The characters to match.
832   * @param srcLength the number of characters in <TT>srcChars</TT>
833   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
834   * FALSE otherwise
835   * @stable ICU 2.0
836   */
837  inline UBool startsWith(const UChar *srcChars,
838            int32_t srcLength) const;
839
840  /**
841   * Determine if this ends with the characters in <TT>srcChars</TT>
842   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
843   * @param srcChars The characters to match.
844   * @param srcStart the offset into <TT>srcText</TT> to start matching
845   * @param srcLength the number of characters in <TT>srcChars</TT> to match
846   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
847   * @stable ICU 2.0
848   */
849  inline UBool startsWith(const UChar *srcChars,
850            int32_t srcStart,
851            int32_t srcLength) const;
852
853  /**
854   * Determine if this ends with the characters in <TT>text</TT>
855   * @param text The text to match.
856   * @return TRUE if this ends with the characters in <TT>text</TT>,
857   * FALSE otherwise
858   * @stable ICU 2.0
859   */
860  inline UBool endsWith(const UnicodeString& text) const;
861
862  /**
863   * Determine if this ends with the characters in <TT>srcText</TT>
864   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
865   * @param srcText The text to match.
866   * @param srcStart the offset into <TT>srcText</TT> to start matching
867   * @param srcLength the number of characters in <TT>srcText</TT> to match
868   * @return TRUE if this ends with the characters in <TT>text</TT>,
869   * FALSE otherwise
870   * @stable ICU 2.0
871   */
872  inline UBool endsWith(const UnicodeString& srcText,
873          int32_t srcStart,
874          int32_t srcLength) const;
875
876  /**
877   * Determine if this ends with the characters in <TT>srcChars</TT>
878   * @param srcChars The characters to match.
879   * @param srcLength the number of characters in <TT>srcChars</TT>
880   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
881   * FALSE otherwise
882   * @stable ICU 2.0
883   */
884  inline UBool endsWith(const UChar *srcChars,
885          int32_t srcLength) const;
886
887  /**
888   * Determine if this ends with the characters in <TT>srcChars</TT>
889   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
890   * @param srcChars The characters to match.
891   * @param srcStart the offset into <TT>srcText</TT> to start matching
892   * @param srcLength the number of characters in <TT>srcChars</TT> to match
893   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
894   * FALSE otherwise
895   * @stable ICU 2.0
896   */
897  inline UBool endsWith(const UChar *srcChars,
898          int32_t srcStart,
899          int32_t srcLength) const;
900
901
902  /* Searching - bitwise only */
903
904  /**
905   * Locate in this the first occurrence of the characters in <TT>text</TT>,
906   * using bitwise comparison.
907   * @param text The text to search for.
908   * @return The offset into this of the start of <TT>text</TT>,
909   * or -1 if not found.
910   * @stable ICU 2.0
911   */
912  inline int32_t indexOf(const UnicodeString& text) const;
913
914  /**
915   * Locate in this the first occurrence of the characters in <TT>text</TT>
916   * starting at offset <TT>start</TT>, using bitwise comparison.
917   * @param text The text to search for.
918   * @param start The offset at which searching will start.
919   * @return The offset into this of the start of <TT>text</TT>,
920   * or -1 if not found.
921   * @stable ICU 2.0
922   */
923  inline int32_t indexOf(const UnicodeString& text,
924              int32_t start) const;
925
926  /**
927   * Locate in this the first occurrence in the range
928   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
929   * in <TT>text</TT>, using bitwise comparison.
930   * @param text The text to search for.
931   * @param start The offset at which searching will start.
932   * @param length The number of characters to search
933   * @return The offset into this of the start of <TT>text</TT>,
934   * or -1 if not found.
935   * @stable ICU 2.0
936   */
937  inline int32_t indexOf(const UnicodeString& text,
938              int32_t start,
939              int32_t length) const;
940
941  /**
942   * Locate in this the first occurrence in the range
943   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
944   *  in <TT>srcText</TT> in the range
945   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
946   * using bitwise comparison.
947   * @param srcText The text to search for.
948   * @param srcStart the offset into <TT>srcText</TT> at which
949   * to start matching
950   * @param srcLength the number of characters in <TT>srcText</TT> to match
951   * @param start the offset into this at which to start matching
952   * @param length the number of characters in this to search
953   * @return The offset into this of the start of <TT>text</TT>,
954   * or -1 if not found.
955   * @stable ICU 2.0
956   */
957  inline int32_t indexOf(const UnicodeString& srcText,
958              int32_t srcStart,
959              int32_t srcLength,
960              int32_t start,
961              int32_t length) const;
962
963  /**
964   * Locate in this the first occurrence of the characters in
965   * <TT>srcChars</TT>
966   * starting at offset <TT>start</TT>, using bitwise comparison.
967   * @param srcChars The text to search for.
968   * @param srcLength the number of characters in <TT>srcChars</TT> to match
969   * @param start the offset into this at which to start matching
970   * @return The offset into this of the start of <TT>text</TT>,
971   * or -1 if not found.
972   * @stable ICU 2.0
973   */
974  inline int32_t indexOf(const UChar *srcChars,
975              int32_t srcLength,
976              int32_t start) const;
977
978  /**
979   * Locate in this the first occurrence in the range
980   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
981   * in <TT>srcChars</TT>, using bitwise comparison.
982   * @param srcChars The text to search for.
983   * @param srcLength the number of characters in <TT>srcChars</TT>
984   * @param start The offset at which searching will start.
985   * @param length The number of characters to search
986   * @return The offset into this of the start of <TT>srcChars</TT>,
987   * or -1 if not found.
988   * @stable ICU 2.0
989   */
990  inline int32_t indexOf(const UChar *srcChars,
991              int32_t srcLength,
992              int32_t start,
993              int32_t length) const;
994
995  /**
996   * Locate in this the first occurrence in the range
997   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
998   * in <TT>srcChars</TT> in the range
999   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1000   * using bitwise comparison.
1001   * @param srcChars The text to search for.
1002   * @param srcStart the offset into <TT>srcChars</TT> at which
1003   * to start matching
1004   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1005   * @param start the offset into this at which to start matching
1006   * @param length the number of characters in this to search
1007   * @return The offset into this of the start of <TT>text</TT>,
1008   * or -1 if not found.
1009   * @stable ICU 2.0
1010   */
1011  int32_t indexOf(const UChar *srcChars,
1012              int32_t srcStart,
1013              int32_t srcLength,
1014              int32_t start,
1015              int32_t length) const;
1016
1017  /**
1018   * Locate in this the first occurrence of the BMP code point <code>c</code>,
1019   * using bitwise comparison.
1020   * @param c The code unit to search for.
1021   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1022   * @stable ICU 2.0
1023   */
1024  inline int32_t indexOf(UChar c) const;
1025
1026  /**
1027   * Locate in this the first occurrence of the code point <TT>c</TT>,
1028   * using bitwise comparison.
1029   *
1030   * @param c The code point to search for.
1031   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1032   * @stable ICU 2.0
1033   */
1034  inline int32_t indexOf(UChar32 c) const;
1035
1036  /**
1037   * Locate in this the first occurrence of the BMP code point <code>c</code>,
1038   * starting at offset <TT>start</TT>, using bitwise comparison.
1039   * @param c The code unit to search for.
1040   * @param start The offset at which searching will start.
1041   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1042   * @stable ICU 2.0
1043   */
1044  inline int32_t indexOf(UChar c,
1045              int32_t start) const;
1046
1047  /**
1048   * Locate in this the first occurrence of the code point <TT>c</TT>
1049   * starting at offset <TT>start</TT>, using bitwise comparison.
1050   *
1051   * @param c The code point to search for.
1052   * @param start The offset at which searching will start.
1053   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1054   * @stable ICU 2.0
1055   */
1056  inline int32_t indexOf(UChar32 c,
1057              int32_t start) const;
1058
1059  /**
1060   * Locate in this the first occurrence of the BMP code point <code>c</code>
1061   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1062   * using bitwise comparison.
1063   * @param c The code unit to search for.
1064   * @param start the offset into this at which to start matching
1065   * @param length the number of characters in this to search
1066   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1067   * @stable ICU 2.0
1068   */
1069  inline int32_t indexOf(UChar c,
1070              int32_t start,
1071              int32_t length) const;
1072
1073  /**
1074   * Locate in this the first occurrence of the code point <TT>c</TT>
1075   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1076   * using bitwise comparison.
1077   *
1078   * @param c The code point to search for.
1079   * @param start the offset into this at which to start matching
1080   * @param length the number of characters in this to search
1081   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1082   * @stable ICU 2.0
1083   */
1084  inline int32_t indexOf(UChar32 c,
1085              int32_t start,
1086              int32_t length) const;
1087
1088  /**
1089   * Locate in this the last occurrence of the characters in <TT>text</TT>,
1090   * using bitwise comparison.
1091   * @param text The text to search for.
1092   * @return The offset into this of the start of <TT>text</TT>,
1093   * or -1 if not found.
1094   * @stable ICU 2.0
1095   */
1096  inline int32_t lastIndexOf(const UnicodeString& text) const;
1097
1098  /**
1099   * Locate in this the last occurrence of the characters in <TT>text</TT>
1100   * starting at offset <TT>start</TT>, using bitwise comparison.
1101   * @param text The text to search for.
1102   * @param start The offset at which searching will start.
1103   * @return The offset into this of the start of <TT>text</TT>,
1104   * or -1 if not found.
1105   * @stable ICU 2.0
1106   */
1107  inline int32_t lastIndexOf(const UnicodeString& text,
1108              int32_t start) const;
1109
1110  /**
1111   * Locate in this the last occurrence in the range
1112   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1113   * in <TT>text</TT>, using bitwise comparison.
1114   * @param text The text to search for.
1115   * @param start The offset at which searching will start.
1116   * @param length The number of characters to search
1117   * @return The offset into this of the start of <TT>text</TT>,
1118   * or -1 if not found.
1119   * @stable ICU 2.0
1120   */
1121  inline int32_t lastIndexOf(const UnicodeString& text,
1122              int32_t start,
1123              int32_t length) const;
1124
1125  /**
1126   * Locate in this the last occurrence in the range
1127   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1128   * in <TT>srcText</TT> in the range
1129   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1130   * using bitwise comparison.
1131   * @param srcText The text to search for.
1132   * @param srcStart the offset into <TT>srcText</TT> at which
1133   * to start matching
1134   * @param srcLength the number of characters in <TT>srcText</TT> to match
1135   * @param start the offset into this at which to start matching
1136   * @param length the number of characters in this to search
1137   * @return The offset into this of the start of <TT>text</TT>,
1138   * or -1 if not found.
1139   * @stable ICU 2.0
1140   */
1141  inline int32_t lastIndexOf(const UnicodeString& srcText,
1142              int32_t srcStart,
1143              int32_t srcLength,
1144              int32_t start,
1145              int32_t length) const;
1146
1147  /**
1148   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1149   * starting at offset <TT>start</TT>, using bitwise comparison.
1150   * @param srcChars The text to search for.
1151   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1152   * @param start the offset into this at which to start matching
1153   * @return The offset into this of the start of <TT>text</TT>,
1154   * or -1 if not found.
1155   * @stable ICU 2.0
1156   */
1157  inline int32_t lastIndexOf(const UChar *srcChars,
1158              int32_t srcLength,
1159              int32_t start) const;
1160
1161  /**
1162   * Locate in this the last occurrence in the range
1163   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1164   * in <TT>srcChars</TT>, using bitwise comparison.
1165   * @param srcChars The text to search for.
1166   * @param srcLength the number of characters in <TT>srcChars</TT>
1167   * @param start The offset at which searching will start.
1168   * @param length The number of characters to search
1169   * @return The offset into this of the start of <TT>srcChars</TT>,
1170   * or -1 if not found.
1171   * @stable ICU 2.0
1172   */
1173  inline int32_t lastIndexOf(const UChar *srcChars,
1174              int32_t srcLength,
1175              int32_t start,
1176              int32_t length) const;
1177
1178  /**
1179   * Locate in this the last occurrence in the range
1180   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1181   * in <TT>srcChars</TT> in the range
1182   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1183   * using bitwise comparison.
1184   * @param srcChars The text to search for.
1185   * @param srcStart the offset into <TT>srcChars</TT> at which
1186   * to start matching
1187   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1188   * @param start the offset into this at which to start matching
1189   * @param length the number of characters in this to search
1190   * @return The offset into this of the start of <TT>text</TT>,
1191   * or -1 if not found.
1192   * @stable ICU 2.0
1193   */
1194  int32_t lastIndexOf(const UChar *srcChars,
1195              int32_t srcStart,
1196              int32_t srcLength,
1197              int32_t start,
1198              int32_t length) const;
1199
1200  /**
1201   * Locate in this the last occurrence of the BMP code point <code>c</code>,
1202   * using bitwise comparison.
1203   * @param c The code unit to search for.
1204   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1205   * @stable ICU 2.0
1206   */
1207  inline int32_t lastIndexOf(UChar c) const;
1208
1209  /**
1210   * Locate in this the last occurrence of the code point <TT>c</TT>,
1211   * using bitwise comparison.
1212   *
1213   * @param c The code point to search for.
1214   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1215   * @stable ICU 2.0
1216   */
1217  inline int32_t lastIndexOf(UChar32 c) const;
1218
1219  /**
1220   * Locate in this the last occurrence of the BMP code point <code>c</code>
1221   * starting at offset <TT>start</TT>, using bitwise comparison.
1222   * @param c The code unit to search for.
1223   * @param start The offset at which searching will start.
1224   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1225   * @stable ICU 2.0
1226   */
1227  inline int32_t lastIndexOf(UChar c,
1228              int32_t start) const;
1229
1230  /**
1231   * Locate in this the last occurrence of the code point <TT>c</TT>
1232   * starting at offset <TT>start</TT>, using bitwise comparison.
1233   *
1234   * @param c The code point to search for.
1235   * @param start The offset at which searching will start.
1236   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1237   * @stable ICU 2.0
1238   */
1239  inline int32_t lastIndexOf(UChar32 c,
1240              int32_t start) const;
1241
1242  /**
1243   * Locate in this the last occurrence of the BMP code point <code>c</code>
1244   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1245   * using bitwise comparison.
1246   * @param c The code unit to search for.
1247   * @param start the offset into this at which to start matching
1248   * @param length the number of characters in this to search
1249   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1250   * @stable ICU 2.0
1251   */
1252  inline int32_t lastIndexOf(UChar c,
1253              int32_t start,
1254              int32_t length) const;
1255
1256  /**
1257   * Locate in this the last occurrence of the code point <TT>c</TT>
1258   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1259   * using bitwise comparison.
1260   *
1261   * @param c The code point to search for.
1262   * @param start the offset into this at which to start matching
1263   * @param length the number of characters in this to search
1264   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1265   * @stable ICU 2.0
1266   */
1267  inline int32_t lastIndexOf(UChar32 c,
1268              int32_t start,
1269              int32_t length) const;
1270
1271
1272  /* Character access */
1273
1274  /**
1275   * Return the code unit at offset <tt>offset</tt>.
1276   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1277   * @param offset a valid offset into the text
1278   * @return the code unit at offset <tt>offset</tt>
1279   *         or 0xffff if the offset is not valid for this string
1280   * @stable ICU 2.0
1281   */
1282  inline UChar charAt(int32_t offset) const;
1283
1284  /**
1285   * Return the code unit at offset <tt>offset</tt>.
1286   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1287   * @param offset a valid offset into the text
1288   * @return the code unit at offset <tt>offset</tt>
1289   * @stable ICU 2.0
1290   */
1291  inline UChar operator[] (int32_t offset) const;
1292
1293  /**
1294   * Return the code point that contains the code unit
1295   * at offset <tt>offset</tt>.
1296   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1297   * @param offset a valid offset into the text
1298   * that indicates the text offset of any of the code units
1299   * that will be assembled into a code point (21-bit value) and returned
1300   * @return the code point of text at <tt>offset</tt>
1301   *         or 0xffff if the offset is not valid for this string
1302   * @stable ICU 2.0
1303   */
1304  UChar32 char32At(int32_t offset) const;
1305
1306  /**
1307   * Adjust a random-access offset so that
1308   * it points to the beginning of a Unicode character.
1309   * The offset that is passed in points to
1310   * any code unit of a code point,
1311   * while the returned offset will point to the first code unit
1312   * of the same code point.
1313   * In UTF-16, if the input offset points to a second surrogate
1314   * of a surrogate pair, then the returned offset will point
1315   * to the first surrogate.
1316   * @param offset a valid offset into one code point of the text
1317   * @return offset of the first code unit of the same code point
1318   * @see U16_SET_CP_START
1319   * @stable ICU 2.0
1320   */
1321  int32_t getChar32Start(int32_t offset) const;
1322
1323  /**
1324   * Adjust a random-access offset so that
1325   * it points behind a Unicode character.
1326   * The offset that is passed in points behind
1327   * any code unit of a code point,
1328   * while the returned offset will point behind the last code unit
1329   * of the same code point.
1330   * In UTF-16, if the input offset points behind the first surrogate
1331   * (i.e., to the second surrogate)
1332   * of a surrogate pair, then the returned offset will point
1333   * behind the second surrogate (i.e., to the first surrogate).
1334   * @param offset a valid offset after any code unit of a code point of the text
1335   * @return offset of the first code unit after the same code point
1336   * @see U16_SET_CP_LIMIT
1337   * @stable ICU 2.0
1338   */
1339  int32_t getChar32Limit(int32_t offset) const;
1340
1341  /**
1342   * Move the code unit index along the string by delta code points.
1343   * Interpret the input index as a code unit-based offset into the string,
1344   * move the index forward or backward by delta code points, and
1345   * return the resulting index.
1346   * The input index should point to the first code unit of a code point,
1347   * if there is more than one.
1348   *
1349   * Both input and output indexes are code unit-based as for all
1350   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1351   * If delta<0 then the index is moved backward (toward the start of the string).
1352   * If delta>0 then the index is moved forward (toward the end of the string).
1353   *
1354   * This behaves like CharacterIterator::move32(delta, kCurrent).
1355   *
1356   * Behavior for out-of-bounds indexes:
1357   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1358   * if the input index<0 then it is pinned to 0;
1359   * if it is index>length() then it is pinned to length().
1360   * Afterwards, the index is moved by <code>delta</code> code points
1361   * forward or backward,
1362   * but no further backward than to 0 and no further forward than to length().
1363   * The resulting index return value will be in between 0 and length(), inclusively.
1364   *
1365   * Examples:
1366   * <pre>
1367   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1368   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1369   *
1370   * // initial index: position of U+10000
1371   * int32_t index=1;
1372   *
1373   * // the following examples will all result in index==4, position of U+10ffff
1374   *
1375   * // skip 2 code points from some position in the string
1376   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1377   *
1378   * // go to the 3rd code point from the start of s (0-based)
1379   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1380   *
1381   * // go to the next-to-last code point of s
1382   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1383   * </pre>
1384   *
1385   * @param index input code unit index
1386   * @param delta (signed) code point count to move the index forward or backward
1387   *        in the string
1388   * @return the resulting code unit index
1389   * @stable ICU 2.0
1390   */
1391  int32_t moveIndex32(int32_t index, int32_t delta) const;
1392
1393  /* Substring extraction */
1394
1395  /**
1396   * Copy the characters in the range
1397   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1398   * beginning at <tt>dstStart</tt>.
1399   * If the string aliases to <code>dst</code> itself as an external buffer,
1400   * then extract() will not copy the contents.
1401   *
1402   * @param start offset of first character which will be copied into the array
1403   * @param length the number of characters to extract
1404   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1405   * must be at least (<tt>dstStart + length</tt>).
1406   * @param dstStart the offset in <TT>dst</TT> where the first character
1407   * will be extracted
1408   * @stable ICU 2.0
1409   */
1410  inline void extract(int32_t start,
1411           int32_t length,
1412           UChar *dst,
1413           int32_t dstStart = 0) const;
1414
1415  /**
1416   * Copy the contents of the string into dest.
1417   * This is a convenience function that
1418   * checks if there is enough space in dest,
1419   * extracts the entire string if possible,
1420   * and NUL-terminates dest if possible.
1421   *
1422   * If the string fits into dest but cannot be NUL-terminated
1423   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1424   * If the string itself does not fit into dest
1425   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1426   *
1427   * If the string aliases to <code>dest</code> itself as an external buffer,
1428   * then extract() will not copy the contents.
1429   *
1430   * @param dest Destination string buffer.
1431   * @param destCapacity Number of UChars available at dest.
1432   * @param errorCode ICU error code.
1433   * @return length()
1434   * @stable ICU 2.0
1435   */
1436  int32_t
1437  extract(UChar *dest, int32_t destCapacity,
1438          UErrorCode &errorCode) const;
1439
1440  /**
1441   * Copy the characters in the range
1442   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1443   * <tt>target</tt>.
1444   * @param start offset of first character which will be copied
1445   * @param length the number of characters to extract
1446   * @param target UnicodeString into which to copy characters.
1447   * @return A reference to <TT>target</TT>
1448   * @stable ICU 2.0
1449   */
1450  inline void extract(int32_t start,
1451           int32_t length,
1452           UnicodeString& target) const;
1453
1454  /**
1455   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1456   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1457   * @param start offset of first character which will be copied into the array
1458   * @param limit offset immediately following the last character to be copied
1459   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1460   * must be at least (<tt>dstStart + (limit - start)</tt>).
1461   * @param dstStart the offset in <TT>dst</TT> where the first character
1462   * will be extracted
1463   * @stable ICU 2.0
1464   */
1465  inline void extractBetween(int32_t start,
1466              int32_t limit,
1467              UChar *dst,
1468              int32_t dstStart = 0) const;
1469
1470  /**
1471   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1472   * into the UnicodeString <tt>target</tt>.  Replaceable API.
1473   * @param start offset of first character which will be copied
1474   * @param limit offset immediately following the last character to be copied
1475   * @param target UnicodeString into which to copy characters.
1476   * @return A reference to <TT>target</TT>
1477   * @stable ICU 2.0
1478   */
1479  virtual void extractBetween(int32_t start,
1480              int32_t limit,
1481              UnicodeString& target) const;
1482
1483  /**
1484   * Copy the characters in the range
1485   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1486   * All characters must be invariant (see utypes.h).
1487   * Use US_INV as the last, signature-distinguishing parameter.
1488   *
1489   * This function does not write any more than <code>targetLength</code>
1490   * characters but returns the length of the entire output string
1491   * so that one can allocate a larger buffer and call the function again
1492   * if necessary.
1493   * The output string is NUL-terminated if possible.
1494   *
1495   * @param start offset of first character which will be copied
1496   * @param startLength the number of characters to extract
1497   * @param target the target buffer for extraction, can be NULL
1498   *               if targetLength is 0
1499   * @param targetCapacity the length of the target buffer
1500   * @param inv Signature-distinguishing paramater, use US_INV.
1501   * @return the output string length, not including the terminating NUL
1502   * @stable ICU 3.2
1503   */
1504  int32_t extract(int32_t start,
1505           int32_t startLength,
1506           char *target,
1507           int32_t targetCapacity,
1508           enum EInvariant inv) const;
1509
1510#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1511
1512  /**
1513   * Copy the characters in the range
1514   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1515   * in the platform's default codepage.
1516   * This function does not write any more than <code>targetLength</code>
1517   * characters but returns the length of the entire output string
1518   * so that one can allocate a larger buffer and call the function again
1519   * if necessary.
1520   * The output string is NUL-terminated if possible.
1521   *
1522   * @param start offset of first character which will be copied
1523   * @param startLength the number of characters to extract
1524   * @param target the target buffer for extraction
1525   * @param targetLength the length of the target buffer
1526   * If <TT>target</TT> is NULL, then the number of bytes required for
1527   * <TT>target</TT> is returned.
1528   * @return the output string length, not including the terminating NUL
1529   * @stable ICU 2.0
1530   */
1531  int32_t extract(int32_t start,
1532           int32_t startLength,
1533           char *target,
1534           uint32_t targetLength) const;
1535
1536#endif
1537
1538#if !UCONFIG_NO_CONVERSION
1539
1540  /**
1541   * Copy the characters in the range
1542   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1543   * in a specified codepage.
1544   * The output string is NUL-terminated.
1545   *
1546   * Recommendation: For invariant-character strings use
1547   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1548   * because it avoids object code dependencies of UnicodeString on
1549   * the conversion code.
1550   *
1551   * @param start offset of first character which will be copied
1552   * @param startLength the number of characters to extract
1553   * @param target the target buffer for extraction
1554   * @param codepage the desired codepage for the characters.  0 has
1555   * the special meaning of the default codepage
1556   * If <code>codepage</code> is an empty string (<code>""</code>),
1557   * then a simple conversion is performed on the codepage-invariant
1558   * subset ("invariant characters") of the platform encoding. See utypes.h.
1559   * If <TT>target</TT> is NULL, then the number of bytes required for
1560   * <TT>target</TT> is returned. It is assumed that the target is big enough
1561   * to fit all of the characters.
1562   * @return the output string length, not including the terminating NUL
1563   * @stable ICU 2.0
1564   */
1565  inline int32_t extract(int32_t start,
1566                 int32_t startLength,
1567                 char *target,
1568                 const char *codepage = 0) const;
1569
1570  /**
1571   * Copy the characters in the range
1572   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1573   * in a specified codepage.
1574   * This function does not write any more than <code>targetLength</code>
1575   * characters but returns the length of the entire output string
1576   * so that one can allocate a larger buffer and call the function again
1577   * if necessary.
1578   * The output string is NUL-terminated if possible.
1579   *
1580   * Recommendation: For invariant-character strings use
1581   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1582   * because it avoids object code dependencies of UnicodeString on
1583   * the conversion code.
1584   *
1585   * @param start offset of first character which will be copied
1586   * @param startLength the number of characters to extract
1587   * @param target the target buffer for extraction
1588   * @param targetLength the length of the target buffer
1589   * @param codepage the desired codepage for the characters.  0 has
1590   * the special meaning of the default codepage
1591   * If <code>codepage</code> is an empty string (<code>""</code>),
1592   * then a simple conversion is performed on the codepage-invariant
1593   * subset ("invariant characters") of the platform encoding. See utypes.h.
1594   * If <TT>target</TT> is NULL, then the number of bytes required for
1595   * <TT>target</TT> is returned.
1596   * @return the output string length, not including the terminating NUL
1597   * @stable ICU 2.0
1598   */
1599  int32_t extract(int32_t start,
1600           int32_t startLength,
1601           char *target,
1602           uint32_t targetLength,
1603           const char *codepage) const;
1604
1605  /**
1606   * Convert the UnicodeString into a codepage string using an existing UConverter.
1607   * The output string is NUL-terminated if possible.
1608   *
1609   * This function avoids the overhead of opening and closing a converter if
1610   * multiple strings are extracted.
1611   *
1612   * @param dest destination string buffer, can be NULL if destCapacity==0
1613   * @param destCapacity the number of chars available at dest
1614   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1615   *        or NULL for the default converter
1616   * @param errorCode normal ICU error code
1617   * @return the length of the output string, not counting the terminating NUL;
1618   *         if the length is greater than destCapacity, then the string will not fit
1619   *         and a buffer of the indicated length would need to be passed in
1620   * @stable ICU 2.0
1621   */
1622  int32_t extract(char *dest, int32_t destCapacity,
1623                  UConverter *cnv,
1624                  UErrorCode &errorCode) const;
1625
1626#endif
1627
1628  /**
1629   * Create a temporary substring for the specified range.
1630   * Unlike the substring constructor and setTo() functions,
1631   * the object returned here will be a read-only alias (using getBuffer())
1632   * rather than copying the text.
1633   * As a result, this substring operation is much faster but requires
1634   * that the original string not be modified or deleted during the lifetime
1635   * of the returned substring object.
1636   * @param start offset of the first character visible in the substring
1637   * @param length length of the substring
1638   * @return a read-only alias UnicodeString object for the substring
1639   * @stable ICU 4.4
1640   */
1641  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1642
1643  /**
1644   * Create a temporary substring for the specified range.
1645   * Same as tempSubString(start, length) except that the substring range
1646   * is specified as a (start, limit) pair (with an exclusive limit index)
1647   * rather than a (start, length) pair.
1648   * @param start offset of the first character visible in the substring
1649   * @param limit offset immediately following the last character visible in the substring
1650   * @return a read-only alias UnicodeString object for the substring
1651   * @stable ICU 4.4
1652   */
1653  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1654
1655  /**
1656   * Convert the UnicodeString to UTF-8 and write the result
1657   * to a ByteSink. This is called by toUTF8String().
1658   * Unpaired surrogates are replaced with U+FFFD.
1659   * Calls u_strToUTF8WithSub().
1660   *
1661   * @param sink A ByteSink to which the UTF-8 version of the string is written.
1662   *             sink.Flush() is called at the end.
1663   * @stable ICU 4.2
1664   * @see toUTF8String
1665   */
1666  void toUTF8(ByteSink &sink) const;
1667
1668#if U_HAVE_STD_STRING
1669
1670  /**
1671   * Convert the UnicodeString to UTF-8 and append the result
1672   * to a standard string.
1673   * Unpaired surrogates are replaced with U+FFFD.
1674   * Calls toUTF8().
1675   *
1676   * @param result A standard string (or a compatible object)
1677   *        to which the UTF-8 version of the string is appended.
1678   * @return The string object.
1679   * @stable ICU 4.2
1680   * @see toUTF8
1681   */
1682  template<typename StringClass>
1683  StringClass &toUTF8String(StringClass &result) const {
1684    StringByteSink<StringClass> sbs(&result);
1685    toUTF8(sbs);
1686    return result;
1687  }
1688
1689#endif
1690
1691  /**
1692   * Convert the UnicodeString to UTF-32.
1693   * Unpaired surrogates are replaced with U+FFFD.
1694   * Calls u_strToUTF32WithSub().
1695   *
1696   * @param utf32 destination string buffer, can be NULL if capacity==0
1697   * @param capacity the number of UChar32s available at utf32
1698   * @param errorCode Standard ICU error code. Its input value must
1699   *                  pass the U_SUCCESS() test, or else the function returns
1700   *                  immediately. Check for U_FAILURE() on output or use with
1701   *                  function chaining. (See User Guide for details.)
1702   * @return The length of the UTF-32 string.
1703   * @see fromUTF32
1704   * @stable ICU 4.2
1705   */
1706  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1707
1708  /* Length operations */
1709
1710  /**
1711   * Return the length of the UnicodeString object.
1712   * The length is the number of UChar code units are in the UnicodeString.
1713   * If you want the number of code points, please use countChar32().
1714   * @return the length of the UnicodeString object
1715   * @see countChar32
1716   * @stable ICU 2.0
1717   */
1718  inline int32_t length(void) const;
1719
1720  /**
1721   * Count Unicode code points in the length UChar code units of the string.
1722   * A code point may occupy either one or two UChar code units.
1723   * Counting code points involves reading all code units.
1724   *
1725   * This functions is basically the inverse of moveIndex32().
1726   *
1727   * @param start the index of the first code unit to check
1728   * @param length the number of UChar code units to check
1729   * @return the number of code points in the specified code units
1730   * @see length
1731   * @stable ICU 2.0
1732   */
1733  int32_t
1734  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1735
1736  /**
1737   * Check if the length UChar code units of the string
1738   * contain more Unicode code points than a certain number.
1739   * This is more efficient than counting all code points in this part of the string
1740   * and comparing that number with a threshold.
1741   * This function may not need to scan the string at all if the length
1742   * falls within a certain range, and
1743   * never needs to count more than 'number+1' code points.
1744   * Logically equivalent to (countChar32(start, length)>number).
1745   * A Unicode code point may occupy either one or two UChar code units.
1746   *
1747   * @param start the index of the first code unit to check (0 for the entire string)
1748   * @param length the number of UChar code units to check
1749   *               (use INT32_MAX for the entire string; remember that start/length
1750   *                values are pinned)
1751   * @param number The number of code points in the (sub)string is compared against
1752   *               the 'number' parameter.
1753   * @return Boolean value for whether the string contains more Unicode code points
1754   *         than 'number'. Same as (u_countChar32(s, length)>number).
1755   * @see countChar32
1756   * @see u_strHasMoreChar32Than
1757   * @stable ICU 2.4
1758   */
1759  UBool
1760  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1761
1762  /**
1763   * Determine if this string is empty.
1764   * @return TRUE if this string contains 0 characters, FALSE otherwise.
1765   * @stable ICU 2.0
1766   */
1767  inline UBool isEmpty(void) const;
1768
1769  /**
1770   * Return the capacity of the internal buffer of the UnicodeString object.
1771   * This is useful together with the getBuffer functions.
1772   * See there for details.
1773   *
1774   * @return the number of UChars available in the internal buffer
1775   * @see getBuffer
1776   * @stable ICU 2.0
1777   */
1778  inline int32_t getCapacity(void) const;
1779
1780  /* Other operations */
1781
1782  /**
1783   * Generate a hash code for this object.
1784   * @return The hash code of this UnicodeString.
1785   * @stable ICU 2.0
1786   */
1787  inline int32_t hashCode(void) const;
1788
1789  /**
1790   * Determine if this object contains a valid string.
1791   * A bogus string has no value. It is different from an empty string,
1792   * although in both cases isEmpty() returns TRUE and length() returns 0.
1793   * setToBogus() and isBogus() can be used to indicate that no string value is available.
1794   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1795   * length() returns 0.
1796   *
1797   * @return TRUE if the string is bogus/invalid, FALSE otherwise
1798   * @see setToBogus()
1799   * @stable ICU 2.0
1800   */
1801  inline UBool isBogus(void) const;
1802
1803
1804  //========================================
1805  // Write operations
1806  //========================================
1807
1808  /* Assignment operations */
1809
1810  /**
1811   * Assignment operator.  Replace the characters in this UnicodeString
1812   * with the characters from <TT>srcText</TT>.
1813   * @param srcText The text containing the characters to replace
1814   * @return a reference to this
1815   * @stable ICU 2.0
1816   */
1817  UnicodeString &operator=(const UnicodeString &srcText);
1818
1819  /**
1820   * Almost the same as the assignment operator.
1821   * Replace the characters in this UnicodeString
1822   * with the characters from <code>srcText</code>.
1823   *
1824   * This function works the same as the assignment operator
1825   * for all strings except for ones that are readonly aliases.
1826   *
1827   * Starting with ICU 2.4, the assignment operator and the copy constructor
1828   * allocate a new buffer and copy the buffer contents even for readonly aliases.
1829   * This function implements the old, more efficient but less safe behavior
1830   * of making this string also a readonly alias to the same buffer.
1831   *
1832   * The fastCopyFrom function must be used only if it is known that the lifetime of
1833   * this UnicodeString does not exceed the lifetime of the aliased buffer
1834   * including its contents, for example for strings from resource bundles
1835   * or aliases to string constants.
1836   *
1837   * @param src The text containing the characters to replace.
1838   * @return a reference to this
1839   * @stable ICU 2.4
1840   */
1841  UnicodeString &fastCopyFrom(const UnicodeString &src);
1842
1843  /**
1844   * Assignment operator.  Replace the characters in this UnicodeString
1845   * with the code unit <TT>ch</TT>.
1846   * @param ch the code unit to replace
1847   * @return a reference to this
1848   * @stable ICU 2.0
1849   */
1850  inline UnicodeString& operator= (UChar ch);
1851
1852  /**
1853   * Assignment operator.  Replace the characters in this UnicodeString
1854   * with the code point <TT>ch</TT>.
1855   * @param ch the code point to replace
1856   * @return a reference to this
1857   * @stable ICU 2.0
1858   */
1859  inline UnicodeString& operator= (UChar32 ch);
1860
1861  /**
1862   * Set the text in the UnicodeString object to the characters
1863   * in <TT>srcText</TT> in the range
1864   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1865   * <TT>srcText</TT> is not modified.
1866   * @param srcText the source for the new characters
1867   * @param srcStart the offset into <TT>srcText</TT> where new characters
1868   * will be obtained
1869   * @return a reference to this
1870   * @stable ICU 2.2
1871   */
1872  inline UnicodeString& setTo(const UnicodeString& srcText,
1873               int32_t srcStart);
1874
1875  /**
1876   * Set the text in the UnicodeString object to the characters
1877   * in <TT>srcText</TT> in the range
1878   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1879   * <TT>srcText</TT> is not modified.
1880   * @param srcText the source for the new characters
1881   * @param srcStart the offset into <TT>srcText</TT> where new characters
1882   * will be obtained
1883   * @param srcLength the number of characters in <TT>srcText</TT> in the
1884   * replace string.
1885   * @return a reference to this
1886   * @stable ICU 2.0
1887   */
1888  inline UnicodeString& setTo(const UnicodeString& srcText,
1889               int32_t srcStart,
1890               int32_t srcLength);
1891
1892  /**
1893   * Set the text in the UnicodeString object to the characters in
1894   * <TT>srcText</TT>.
1895   * <TT>srcText</TT> is not modified.
1896   * @param srcText the source for the new characters
1897   * @return a reference to this
1898   * @stable ICU 2.0
1899   */
1900  inline UnicodeString& setTo(const UnicodeString& srcText);
1901
1902  /**
1903   * Set the characters in the UnicodeString object to the characters
1904   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1905   * @param srcChars the source for the new characters
1906   * @param srcLength the number of Unicode characters in srcChars.
1907   * @return a reference to this
1908   * @stable ICU 2.0
1909   */
1910  inline UnicodeString& setTo(const UChar *srcChars,
1911               int32_t srcLength);
1912
1913  /**
1914   * Set the characters in the UnicodeString object to the code unit
1915   * <TT>srcChar</TT>.
1916   * @param srcChar the code unit which becomes the UnicodeString's character
1917   * content
1918   * @return a reference to this
1919   * @stable ICU 2.0
1920   */
1921  UnicodeString& setTo(UChar srcChar);
1922
1923  /**
1924   * Set the characters in the UnicodeString object to the code point
1925   * <TT>srcChar</TT>.
1926   * @param srcChar the code point which becomes the UnicodeString's character
1927   * content
1928   * @return a reference to this
1929   * @stable ICU 2.0
1930   */
1931  UnicodeString& setTo(UChar32 srcChar);
1932
1933  /**
1934   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1935   * The text will be used for the UnicodeString object, but
1936   * it will not be released when the UnicodeString is destroyed.
1937   * This has copy-on-write semantics:
1938   * When the string is modified, then the buffer is first copied into
1939   * newly allocated memory.
1940   * The aliased buffer is never modified.
1941   *
1942   * In an assignment to another UnicodeString, when using the copy constructor
1943   * or the assignment operator, the text will be copied.
1944   * When using fastCopyFrom(), the text will be aliased again,
1945   * so that both strings then alias the same readonly-text.
1946   *
1947   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1948   *                     This must be true if <code>textLength==-1</code>.
1949   * @param text The characters to alias for the UnicodeString.
1950   * @param textLength The number of Unicode characters in <code>text</code> to alias.
1951   *                   If -1, then this constructor will determine the length
1952   *                   by calling <code>u_strlen()</code>.
1953   * @return a reference to this
1954   * @stable ICU 2.0
1955   */
1956  UnicodeString &setTo(UBool isTerminated,
1957                       const UChar *text,
1958                       int32_t textLength);
1959
1960  /**
1961   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1962   * The text will be used for the UnicodeString object, but
1963   * it will not be released when the UnicodeString is destroyed.
1964   * This has write-through semantics:
1965   * For as long as the capacity of the buffer is sufficient, write operations
1966   * will directly affect the buffer. When more capacity is necessary, then
1967   * a new buffer will be allocated and the contents copied as with regularly
1968   * constructed strings.
1969   * In an assignment to another UnicodeString, the buffer will be copied.
1970   * The extract(UChar *dst) function detects whether the dst pointer is the same
1971   * as the string buffer itself and will in this case not copy the contents.
1972   *
1973   * @param buffer The characters to alias for the UnicodeString.
1974   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1975   * @param buffCapacity The size of <code>buffer</code> in UChars.
1976   * @return a reference to this
1977   * @stable ICU 2.0
1978   */
1979  UnicodeString &setTo(UChar *buffer,
1980                       int32_t buffLength,
1981                       int32_t buffCapacity);
1982
1983  /**
1984   * Make this UnicodeString object invalid.
1985   * The string will test TRUE with isBogus().
1986   *
1987   * A bogus string has no value. It is different from an empty string.
1988   * It can be used to indicate that no string value is available.
1989   * getBuffer() and getTerminatedBuffer() return NULL, and
1990   * length() returns 0.
1991   *
1992   * This utility function is used throughout the UnicodeString
1993   * implementation to indicate that a UnicodeString operation failed,
1994   * and may be used in other functions,
1995   * especially but not exclusively when such functions do not
1996   * take a UErrorCode for simplicity.
1997   *
1998   * The following methods, and no others, will clear a string object's bogus flag:
1999   * - remove()
2000   * - remove(0, INT32_MAX)
2001   * - truncate(0)
2002   * - operator=() (assignment operator)
2003   * - setTo(...)
2004   *
2005   * The simplest ways to turn a bogus string into an empty one
2006   * is to use the remove() function.
2007   * Examples for other functions that are equivalent to "set to empty string":
2008   * \code
2009   * if(s.isBogus()) {
2010   *   s.remove();           // set to an empty string (remove all), or
2011   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2012   *   s.truncate(0);        // set to an empty string (complete truncation), or
2013   *   s=UnicodeString();    // assign an empty string, or
2014   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2015   *   static const UChar nul=0;
2016   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
2017   * }
2018   * \endcode
2019   *
2020   * @see isBogus()
2021   * @stable ICU 2.0
2022   */
2023  void setToBogus();
2024
2025  /**
2026   * Set the character at the specified offset to the specified character.
2027   * @param offset A valid offset into the text of the character to set
2028   * @param ch The new character
2029   * @return A reference to this
2030   * @stable ICU 2.0
2031   */
2032  UnicodeString& setCharAt(int32_t offset,
2033               UChar ch);
2034
2035
2036  /* Append operations */
2037
2038  /**
2039   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2040   * object.
2041   * @param ch the code unit to be appended
2042   * @return a reference to this
2043   * @stable ICU 2.0
2044   */
2045 inline  UnicodeString& operator+= (UChar ch);
2046
2047  /**
2048   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2049   * object.
2050   * @param ch the code point to be appended
2051   * @return a reference to this
2052   * @stable ICU 2.0
2053   */
2054 inline  UnicodeString& operator+= (UChar32 ch);
2055
2056  /**
2057   * Append operator. Append the characters in <TT>srcText</TT> to the
2058   * UnicodeString object. <TT>srcText</TT> is not modified.
2059   * @param srcText the source for the new characters
2060   * @return a reference to this
2061   * @stable ICU 2.0
2062   */
2063  inline UnicodeString& operator+= (const UnicodeString& srcText);
2064
2065  /**
2066   * Append the characters
2067   * in <TT>srcText</TT> in the range
2068   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2069   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2070   * is not modified.
2071   * @param srcText the source for the new characters
2072   * @param srcStart the offset into <TT>srcText</TT> where new characters
2073   * will be obtained
2074   * @param srcLength the number of characters in <TT>srcText</TT> in
2075   * the append string
2076   * @return a reference to this
2077   * @stable ICU 2.0
2078   */
2079  inline UnicodeString& append(const UnicodeString& srcText,
2080            int32_t srcStart,
2081            int32_t srcLength);
2082
2083  /**
2084   * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2085   * <TT>srcText</TT> is not modified.
2086   * @param srcText the source for the new characters
2087   * @return a reference to this
2088   * @stable ICU 2.0
2089   */
2090  inline UnicodeString& append(const UnicodeString& srcText);
2091
2092  /**
2093   * Append the characters in <TT>srcChars</TT> in the range
2094   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2095   * object at offset
2096   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2097   * @param srcChars the source for the new characters
2098   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2099   * will be obtained
2100   * @param srcLength the number of characters in <TT>srcChars</TT> in
2101   *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2102   * @return a reference to this
2103   * @stable ICU 2.0
2104   */
2105  inline UnicodeString& append(const UChar *srcChars,
2106            int32_t srcStart,
2107            int32_t srcLength);
2108
2109  /**
2110   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2111   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2112   * @param srcChars the source for the new characters
2113   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2114   *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
2115   * @return a reference to this
2116   * @stable ICU 2.0
2117   */
2118  inline UnicodeString& append(const UChar *srcChars,
2119            int32_t srcLength);
2120
2121  /**
2122   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2123   * @param srcChar the code unit to append
2124   * @return a reference to this
2125   * @stable ICU 2.0
2126   */
2127  inline UnicodeString& append(UChar srcChar);
2128
2129  /**
2130   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2131   * @param srcChar the code point to append
2132   * @return a reference to this
2133   * @stable ICU 2.0
2134   */
2135  UnicodeString& append(UChar32 srcChar);
2136
2137
2138  /* Insert operations */
2139
2140  /**
2141   * Insert the characters in <TT>srcText</TT> in the range
2142   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2143   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2144   * @param start the offset where the insertion begins
2145   * @param srcText the source for the new characters
2146   * @param srcStart the offset into <TT>srcText</TT> where new characters
2147   * will be obtained
2148   * @param srcLength the number of characters in <TT>srcText</TT> in
2149   * the insert string
2150   * @return a reference to this
2151   * @stable ICU 2.0
2152   */
2153  inline UnicodeString& insert(int32_t start,
2154            const UnicodeString& srcText,
2155            int32_t srcStart,
2156            int32_t srcLength);
2157
2158  /**
2159   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2160   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2161   * @param start the offset where the insertion begins
2162   * @param srcText the source for the new characters
2163   * @return a reference to this
2164   * @stable ICU 2.0
2165   */
2166  inline UnicodeString& insert(int32_t start,
2167            const UnicodeString& srcText);
2168
2169  /**
2170   * Insert the characters in <TT>srcChars</TT> in the range
2171   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2172   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2173   * @param start the offset at which the insertion begins
2174   * @param srcChars the source for the new characters
2175   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2176   * will be obtained
2177   * @param srcLength the number of characters in <TT>srcChars</TT>
2178   * in the insert string
2179   * @return a reference to this
2180   * @stable ICU 2.0
2181   */
2182  inline UnicodeString& insert(int32_t start,
2183            const UChar *srcChars,
2184            int32_t srcStart,
2185            int32_t srcLength);
2186
2187  /**
2188   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2189   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2190   * @param start the offset where the insertion begins
2191   * @param srcChars the source for the new characters
2192   * @param srcLength the number of Unicode characters in srcChars.
2193   * @return a reference to this
2194   * @stable ICU 2.0
2195   */
2196  inline UnicodeString& insert(int32_t start,
2197            const UChar *srcChars,
2198            int32_t srcLength);
2199
2200  /**
2201   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2202   * offset <TT>start</TT>.
2203   * @param start the offset at which the insertion occurs
2204   * @param srcChar the code unit to insert
2205   * @return a reference to this
2206   * @stable ICU 2.0
2207   */
2208  inline UnicodeString& insert(int32_t start,
2209            UChar srcChar);
2210
2211  /**
2212   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2213   * offset <TT>start</TT>.
2214   * @param start the offset at which the insertion occurs
2215   * @param srcChar the code point to insert
2216   * @return a reference to this
2217   * @stable ICU 2.0
2218   */
2219  inline UnicodeString& insert(int32_t start,
2220            UChar32 srcChar);
2221
2222
2223  /* Replace operations */
2224
2225  /**
2226   * Replace the characters in the range
2227   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2228   * <TT>srcText</TT> in the range
2229   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2230   * <TT>srcText</TT> is not modified.
2231   * @param start the offset at which the replace operation begins
2232   * @param length the number of characters to replace. The character at
2233   * <TT>start + length</TT> is not modified.
2234   * @param srcText the source for the new characters
2235   * @param srcStart the offset into <TT>srcText</TT> where new characters
2236   * will be obtained
2237   * @param srcLength the number of characters in <TT>srcText</TT> in
2238   * the replace string
2239   * @return a reference to this
2240   * @stable ICU 2.0
2241   */
2242  UnicodeString& replace(int32_t start,
2243             int32_t length,
2244             const UnicodeString& srcText,
2245             int32_t srcStart,
2246             int32_t srcLength);
2247
2248  /**
2249   * Replace the characters in the range
2250   * [<TT>start</TT>, <TT>start + length</TT>)
2251   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2252   *  not modified.
2253   * @param start the offset at which the replace operation begins
2254   * @param length the number of characters to replace. The character at
2255   * <TT>start + length</TT> is not modified.
2256   * @param srcText the source for the new characters
2257   * @return a reference to this
2258   * @stable ICU 2.0
2259   */
2260  UnicodeString& replace(int32_t start,
2261             int32_t length,
2262             const UnicodeString& srcText);
2263
2264  /**
2265   * Replace the characters in the range
2266   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2267   * <TT>srcChars</TT> in the range
2268   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2269   * is not modified.
2270   * @param start the offset at which the replace operation begins
2271   * @param length the number of characters to replace.  The character at
2272   * <TT>start + length</TT> is not modified.
2273   * @param srcChars the source for the new characters
2274   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2275   * will be obtained
2276   * @param srcLength the number of characters in <TT>srcChars</TT>
2277   * in the replace string
2278   * @return a reference to this
2279   * @stable ICU 2.0
2280   */
2281  UnicodeString& replace(int32_t start,
2282             int32_t length,
2283             const UChar *srcChars,
2284             int32_t srcStart,
2285             int32_t srcLength);
2286
2287  /**
2288   * Replace the characters in the range
2289   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2290   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2291   * @param start the offset at which the replace operation begins
2292   * @param length number of characters to replace.  The character at
2293   * <TT>start + length</TT> is not modified.
2294   * @param srcChars the source for the new characters
2295   * @param srcLength the number of Unicode characters in srcChars
2296   * @return a reference to this
2297   * @stable ICU 2.0
2298   */
2299  inline UnicodeString& replace(int32_t start,
2300             int32_t length,
2301             const UChar *srcChars,
2302             int32_t srcLength);
2303
2304  /**
2305   * Replace the characters in the range
2306   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2307   * <TT>srcChar</TT>.
2308   * @param start the offset at which the replace operation begins
2309   * @param length the number of characters to replace.  The character at
2310   * <TT>start + length</TT> is not modified.
2311   * @param srcChar the new code unit
2312   * @return a reference to this
2313   * @stable ICU 2.0
2314   */
2315  inline UnicodeString& replace(int32_t start,
2316             int32_t length,
2317             UChar srcChar);
2318
2319  /**
2320   * Replace the characters in the range
2321   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2322   * <TT>srcChar</TT>.
2323   * @param start the offset at which the replace operation begins
2324   * @param length the number of characters to replace.  The character at
2325   * <TT>start + length</TT> is not modified.
2326   * @param srcChar the new code point
2327   * @return a reference to this
2328   * @stable ICU 2.0
2329   */
2330  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2331
2332  /**
2333   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2334   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2335   * @param start the offset at which the replace operation begins
2336   * @param limit the offset immediately following the replace range
2337   * @param srcText the source for the new characters
2338   * @return a reference to this
2339   * @stable ICU 2.0
2340   */
2341  inline UnicodeString& replaceBetween(int32_t start,
2342                int32_t limit,
2343                const UnicodeString& srcText);
2344
2345  /**
2346   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2347   * with the characters in <TT>srcText</TT> in the range
2348   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2349   * @param start the offset at which the replace operation begins
2350   * @param limit the offset immediately following the replace range
2351   * @param srcText the source for the new characters
2352   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2353   * will be obtained
2354   * @param srcLimit the offset immediately following the range to copy
2355   * in <TT>srcText</TT>
2356   * @return a reference to this
2357   * @stable ICU 2.0
2358   */
2359  inline UnicodeString& replaceBetween(int32_t start,
2360                int32_t limit,
2361                const UnicodeString& srcText,
2362                int32_t srcStart,
2363                int32_t srcLimit);
2364
2365  /**
2366   * Replace a substring of this object with the given text.
2367   * @param start the beginning index, inclusive; <code>0 <= start
2368   * <= limit</code>.
2369   * @param limit the ending index, exclusive; <code>start <= limit
2370   * <= length()</code>.
2371   * @param text the text to replace characters <code>start</code>
2372   * to <code>limit - 1</code>
2373   * @stable ICU 2.0
2374   */
2375  virtual void handleReplaceBetween(int32_t start,
2376                                    int32_t limit,
2377                                    const UnicodeString& text);
2378
2379  /**
2380   * Replaceable API
2381   * @return TRUE if it has MetaData
2382   * @stable ICU 2.4
2383   */
2384  virtual UBool hasMetaData() const;
2385
2386  /**
2387   * Copy a substring of this object, retaining attribute (out-of-band)
2388   * information.  This method is used to duplicate or reorder substrings.
2389   * The destination index must not overlap the source range.
2390   *
2391   * @param start the beginning index, inclusive; <code>0 <= start <=
2392   * limit</code>.
2393   * @param limit the ending index, exclusive; <code>start <= limit <=
2394   * length()</code>.
2395   * @param dest the destination index.  The characters from
2396   * <code>start..limit-1</code> will be copied to <code>dest</code>.
2397   * Implementations of this method may assume that <code>dest <= start ||
2398   * dest >= limit</code>.
2399   * @stable ICU 2.0
2400   */
2401  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2402
2403  /* Search and replace operations */
2404
2405  /**
2406   * Replace all occurrences of characters in oldText with the characters
2407   * in newText
2408   * @param oldText the text containing the search text
2409   * @param newText the text containing the replacement text
2410   * @return a reference to this
2411   * @stable ICU 2.0
2412   */
2413  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2414                const UnicodeString& newText);
2415
2416  /**
2417   * Replace all occurrences of characters in oldText with characters
2418   * in newText
2419   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2420   * @param start the start of the range in which replace will performed
2421   * @param length the length of the range in which replace will be performed
2422   * @param oldText the text containing the search text
2423   * @param newText the text containing the replacement text
2424   * @return a reference to this
2425   * @stable ICU 2.0
2426   */
2427  inline UnicodeString& findAndReplace(int32_t start,
2428                int32_t length,
2429                const UnicodeString& oldText,
2430                const UnicodeString& newText);
2431
2432  /**
2433   * Replace all occurrences of characters in oldText in the range
2434   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2435   * in newText in the range
2436   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2437   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2438   * @param start the start of the range in which replace will performed
2439   * @param length the length of the range in which replace will be performed
2440   * @param oldText the text containing the search text
2441   * @param oldStart the start of the search range in <TT>oldText</TT>
2442   * @param oldLength the length of the search range in <TT>oldText</TT>
2443   * @param newText the text containing the replacement text
2444   * @param newStart the start of the replacement range in <TT>newText</TT>
2445   * @param newLength the length of the replacement range in <TT>newText</TT>
2446   * @return a reference to this
2447   * @stable ICU 2.0
2448   */
2449  UnicodeString& findAndReplace(int32_t start,
2450                int32_t length,
2451                const UnicodeString& oldText,
2452                int32_t oldStart,
2453                int32_t oldLength,
2454                const UnicodeString& newText,
2455                int32_t newStart,
2456                int32_t newLength);
2457
2458
2459  /* Remove operations */
2460
2461  /**
2462   * Remove all characters from the UnicodeString object.
2463   * @return a reference to this
2464   * @stable ICU 2.0
2465   */
2466  inline UnicodeString& remove(void);
2467
2468  /**
2469   * Remove the characters in the range
2470   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2471   * @param start the offset of the first character to remove
2472   * @param length the number of characters to remove
2473   * @return a reference to this
2474   * @stable ICU 2.0
2475   */
2476  inline UnicodeString& remove(int32_t start,
2477                               int32_t length = (int32_t)INT32_MAX);
2478
2479  /**
2480   * Remove the characters in the range
2481   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2482   * @param start the offset of the first character to remove
2483   * @param limit the offset immediately following the range to remove
2484   * @return a reference to this
2485   * @stable ICU 2.0
2486   */
2487  inline UnicodeString& removeBetween(int32_t start,
2488                                      int32_t limit = (int32_t)INT32_MAX);
2489
2490  /**
2491   * Retain only the characters in the range
2492   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2493   * Removes characters before <code>start</code> and at and after <code>limit</code>.
2494   * @param start the offset of the first character to retain
2495   * @param limit the offset immediately following the range to retain
2496   * @return a reference to this
2497   * @stable ICU 4.4
2498   */
2499  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2500
2501  /* Length operations */
2502
2503  /**
2504   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2505   * If the length of this UnicodeString is less than targetLength,
2506   * length() - targetLength copies of padChar will be added to the
2507   * beginning of this UnicodeString.
2508   * @param targetLength the desired length of the string
2509   * @param padChar the character to use for padding. Defaults to
2510   * space (U+0020)
2511   * @return TRUE if the text was padded, FALSE otherwise.
2512   * @stable ICU 2.0
2513   */
2514  UBool padLeading(int32_t targetLength,
2515                    UChar padChar = 0x0020);
2516
2517  /**
2518   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2519   * If the length of this UnicodeString is less than targetLength,
2520   * length() - targetLength copies of padChar will be added to the
2521   * end of this UnicodeString.
2522   * @param targetLength the desired length of the string
2523   * @param padChar the character to use for padding. Defaults to
2524   * space (U+0020)
2525   * @return TRUE if the text was padded, FALSE otherwise.
2526   * @stable ICU 2.0
2527   */
2528  UBool padTrailing(int32_t targetLength,
2529                     UChar padChar = 0x0020);
2530
2531  /**
2532   * Truncate this UnicodeString to the <TT>targetLength</TT>.
2533   * @param targetLength the desired length of this UnicodeString.
2534   * @return TRUE if the text was truncated, FALSE otherwise
2535   * @stable ICU 2.0
2536   */
2537  inline UBool truncate(int32_t targetLength);
2538
2539  /**
2540   * Trims leading and trailing whitespace from this UnicodeString.
2541   * @return a reference to this
2542   * @stable ICU 2.0
2543   */
2544  UnicodeString& trim(void);
2545
2546
2547  /* Miscellaneous operations */
2548
2549  /**
2550   * Reverse this UnicodeString in place.
2551   * @return a reference to this
2552   * @stable ICU 2.0
2553   */
2554  inline UnicodeString& reverse(void);
2555
2556  /**
2557   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2558   * this UnicodeString.
2559   * @param start the start of the range to reverse
2560   * @param length the number of characters to to reverse
2561   * @return a reference to this
2562   * @stable ICU 2.0
2563   */
2564  inline UnicodeString& reverse(int32_t start,
2565             int32_t length);
2566
2567  /**
2568   * Convert the characters in this to UPPER CASE following the conventions of
2569   * the default locale.
2570   * @return A reference to this.
2571   * @stable ICU 2.0
2572   */
2573  UnicodeString& toUpper(void);
2574
2575  /**
2576   * Convert the characters in this to UPPER CASE following the conventions of
2577   * a specific locale.
2578   * @param locale The locale containing the conventions to use.
2579   * @return A reference to this.
2580   * @stable ICU 2.0
2581   */
2582  UnicodeString& toUpper(const Locale& locale);
2583
2584  /**
2585   * Convert the characters in this to lower case following the conventions of
2586   * the default locale.
2587   * @return A reference to this.
2588   * @stable ICU 2.0
2589   */
2590  UnicodeString& toLower(void);
2591
2592  /**
2593   * Convert the characters in this to lower case following the conventions of
2594   * a specific locale.
2595   * @param locale The locale containing the conventions to use.
2596   * @return A reference to this.
2597   * @stable ICU 2.0
2598   */
2599  UnicodeString& toLower(const Locale& locale);
2600
2601#if !UCONFIG_NO_BREAK_ITERATION
2602
2603  /**
2604   * Titlecase this string, convenience function using the default locale.
2605   *
2606   * Casing is locale-dependent and context-sensitive.
2607   * Titlecasing uses a break iterator to find the first characters of words
2608   * that are to be titlecased. It titlecases those characters and lowercases
2609   * all others.
2610   *
2611   * The titlecase break iterator can be provided to customize for arbitrary
2612   * styles, using rules and dictionaries beyond the standard iterators.
2613   * It may be more efficient to always provide an iterator to avoid
2614   * opening and closing one for each string.
2615   * The standard titlecase iterator for the root locale implements the
2616   * algorithm of Unicode TR 21.
2617   *
2618   * This function uses only the setText(), first() and next() methods of the
2619   * provided break iterator.
2620   *
2621   * @param titleIter A break iterator to find the first characters of words
2622   *                  that are to be titlecased.
2623   *                  If none is provided (0), then a standard titlecase
2624   *                  break iterator is opened.
2625   *                  Otherwise the provided iterator is set to the string's text.
2626   * @return A reference to this.
2627   * @stable ICU 2.1
2628   */
2629  UnicodeString &toTitle(BreakIterator *titleIter);
2630
2631  /**
2632   * Titlecase this string.
2633   *
2634   * Casing is locale-dependent and context-sensitive.
2635   * Titlecasing uses a break iterator to find the first characters of words
2636   * that are to be titlecased. It titlecases those characters and lowercases
2637   * all others.
2638   *
2639   * The titlecase break iterator can be provided to customize for arbitrary
2640   * styles, using rules and dictionaries beyond the standard iterators.
2641   * It may be more efficient to always provide an iterator to avoid
2642   * opening and closing one for each string.
2643   * The standard titlecase iterator for the root locale implements the
2644   * algorithm of Unicode TR 21.
2645   *
2646   * This function uses only the setText(), first() and next() methods of the
2647   * provided break iterator.
2648   *
2649   * @param titleIter A break iterator to find the first characters of words
2650   *                  that are to be titlecased.
2651   *                  If none is provided (0), then a standard titlecase
2652   *                  break iterator is opened.
2653   *                  Otherwise the provided iterator is set to the string's text.
2654   * @param locale    The locale to consider.
2655   * @return A reference to this.
2656   * @stable ICU 2.1
2657   */
2658  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2659
2660  /**
2661   * Titlecase this string, with options.
2662   *
2663   * Casing is locale-dependent and context-sensitive.
2664   * Titlecasing uses a break iterator to find the first characters of words
2665   * that are to be titlecased. It titlecases those characters and lowercases
2666   * all others. (This can be modified with options.)
2667   *
2668   * The titlecase break iterator can be provided to customize for arbitrary
2669   * styles, using rules and dictionaries beyond the standard iterators.
2670   * It may be more efficient to always provide an iterator to avoid
2671   * opening and closing one for each string.
2672   * The standard titlecase iterator for the root locale implements the
2673   * algorithm of Unicode TR 21.
2674   *
2675   * This function uses only the setText(), first() and next() methods of the
2676   * provided break iterator.
2677   *
2678   * @param titleIter A break iterator to find the first characters of words
2679   *                  that are to be titlecased.
2680   *                  If none is provided (0), then a standard titlecase
2681   *                  break iterator is opened.
2682   *                  Otherwise the provided iterator is set to the string's text.
2683   * @param locale    The locale to consider.
2684   * @param options Options bit set, see ucasemap_open().
2685   * @return A reference to this.
2686   * @see U_TITLECASE_NO_LOWERCASE
2687   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2688   * @see ucasemap_open
2689   * @stable ICU 3.8
2690   */
2691  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2692
2693#endif
2694
2695  /**
2696   * Case-folds the characters in this string.
2697   *
2698   * Case-folding is locale-independent and not context-sensitive,
2699   * but there is an option for whether to include or exclude mappings for dotted I
2700   * and dotless i that are marked with 'T' in CaseFolding.txt.
2701   *
2702   * The result may be longer or shorter than the original.
2703   *
2704   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2705   * @return A reference to this.
2706   * @stable ICU 2.0
2707   */
2708  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2709
2710  //========================================
2711  // Access to the internal buffer
2712  //========================================
2713
2714  /**
2715   * Get a read/write pointer to the internal buffer.
2716   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2717   * writable, and is still owned by the UnicodeString object.
2718   * Calls to getBuffer(minCapacity) must not be nested, and
2719   * must be matched with calls to releaseBuffer(newLength).
2720   * If the string buffer was read-only or shared,
2721   * then it will be reallocated and copied.
2722   *
2723   * An attempted nested call will return 0, and will not further modify the
2724   * state of the UnicodeString object.
2725   * It also returns 0 if the string is bogus.
2726   *
2727   * The actual capacity of the string buffer may be larger than minCapacity.
2728   * getCapacity() returns the actual capacity.
2729   * For many operations, the full capacity should be used to avoid reallocations.
2730   *
2731   * While the buffer is "open" between getBuffer(minCapacity)
2732   * and releaseBuffer(newLength), the following applies:
2733   * - The string length is set to 0.
2734   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2735   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2736   * - You can read from and write to the returned buffer.
2737   * - The previous string contents will still be in the buffer;
2738   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2739   *   If the length() was greater than minCapacity, then any contents after minCapacity
2740   *   may be lost.
2741   *   The buffer contents is not NUL-terminated by getBuffer().
2742   *   If length()<getCapacity() then you can terminate it by writing a NUL
2743   *   at index length().
2744   * - You must call releaseBuffer(newLength) before and in order to
2745   *   return to normal UnicodeString operation.
2746   *
2747   * @param minCapacity the minimum number of UChars that are to be available
2748   *        in the buffer, starting at the returned pointer;
2749   *        default to the current string capacity if minCapacity==-1
2750   * @return a writable pointer to the internal string buffer,
2751   *         or 0 if an error occurs (nested calls, out of memory)
2752   *
2753   * @see releaseBuffer
2754   * @see getTerminatedBuffer()
2755   * @stable ICU 2.0
2756   */
2757  UChar *getBuffer(int32_t minCapacity);
2758
2759  /**
2760   * Release a read/write buffer on a UnicodeString object with an
2761   * "open" getBuffer(minCapacity).
2762   * This function must be called in a matched pair with getBuffer(minCapacity).
2763   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2764   *
2765   * It will set the string length to newLength, at most to the current capacity.
2766   * If newLength==-1 then it will set the length according to the
2767   * first NUL in the buffer, or to the capacity if there is no NUL.
2768   *
2769   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2770   *
2771   * @param newLength the new length of the UnicodeString object;
2772   *        defaults to the current capacity if newLength is greater than that;
2773   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2774   *        the current capacity of the string
2775   *
2776   * @see getBuffer(int32_t minCapacity)
2777   * @stable ICU 2.0
2778   */
2779  void releaseBuffer(int32_t newLength=-1);
2780
2781  /**
2782   * Get a read-only pointer to the internal buffer.
2783   * This can be called at any time on a valid UnicodeString.
2784   *
2785   * It returns 0 if the string is bogus, or
2786   * during an "open" getBuffer(minCapacity).
2787   *
2788   * It can be called as many times as desired.
2789   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2790   * at which time the pointer is semantically invalidated and must not be used any more.
2791   *
2792   * The capacity of the buffer can be determined with getCapacity().
2793   * The part after length() may or may not be initialized and valid,
2794   * depending on the history of the UnicodeString object.
2795   *
2796   * The buffer contents is (probably) not NUL-terminated.
2797   * You can check if it is with
2798   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2799   * (See getTerminatedBuffer().)
2800   *
2801   * The buffer may reside in read-only memory. Its contents must not
2802   * be modified.
2803   *
2804   * @return a read-only pointer to the internal string buffer,
2805   *         or 0 if the string is empty or bogus
2806   *
2807   * @see getBuffer(int32_t minCapacity)
2808   * @see getTerminatedBuffer()
2809   * @stable ICU 2.0
2810   */
2811  inline const UChar *getBuffer() const;
2812
2813  /**
2814   * Get a read-only pointer to the internal buffer,
2815   * making sure that it is NUL-terminated.
2816   * This can be called at any time on a valid UnicodeString.
2817   *
2818   * It returns 0 if the string is bogus, or
2819   * during an "open" getBuffer(minCapacity), or if the buffer cannot
2820   * be NUL-terminated (because memory allocation failed).
2821   *
2822   * It can be called as many times as desired.
2823   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2824   * at which time the pointer is semantically invalidated and must not be used any more.
2825   *
2826   * The capacity of the buffer can be determined with getCapacity().
2827   * The part after length()+1 may or may not be initialized and valid,
2828   * depending on the history of the UnicodeString object.
2829   *
2830   * The buffer contents is guaranteed to be NUL-terminated.
2831   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2832   * is written.
2833   * For this reason, this function is not const, unlike getBuffer().
2834   * Note that a UnicodeString may also contain NUL characters as part of its contents.
2835   *
2836   * The buffer may reside in read-only memory. Its contents must not
2837   * be modified.
2838   *
2839   * @return a read-only pointer to the internal string buffer,
2840   *         or 0 if the string is empty or bogus
2841   *
2842   * @see getBuffer(int32_t minCapacity)
2843   * @see getBuffer()
2844   * @stable ICU 2.2
2845   */
2846  const UChar *getTerminatedBuffer();
2847
2848  //========================================
2849  // Constructors
2850  //========================================
2851
2852  /** Construct an empty UnicodeString.
2853   * @stable ICU 2.0
2854   */
2855  inline UnicodeString();
2856
2857  /**
2858   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2859   * @param capacity the number of UChars this UnicodeString should hold
2860   * before a resize is necessary; if count is greater than 0 and count
2861   * code points c take up more space than capacity, then capacity is adjusted
2862   * accordingly.
2863   * @param c is used to initially fill the string
2864   * @param count specifies how many code points c are to be written in the
2865   *              string
2866   * @stable ICU 2.0
2867   */
2868  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2869
2870  /**
2871   * Single UChar (code unit) constructor.
2872   *
2873   * It is recommended to mark this constructor "explicit" by
2874   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2875   * on the compiler command line or similar.
2876   * @param ch the character to place in the UnicodeString
2877   * @stable ICU 2.0
2878   */
2879  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
2880
2881  /**
2882   * Single UChar32 (code point) constructor.
2883   *
2884   * It is recommended to mark this constructor "explicit" by
2885   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2886   * on the compiler command line or similar.
2887   * @param ch the character to place in the UnicodeString
2888   * @stable ICU 2.0
2889   */
2890  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2891
2892  /**
2893   * UChar* constructor.
2894   *
2895   * It is recommended to mark this constructor "explicit" by
2896   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2897   * on the compiler command line or similar.
2898   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
2899   * must be NULL (U+0000) terminated.
2900   * @stable ICU 2.0
2901   */
2902  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
2903
2904  /**
2905   * UChar* constructor.
2906   * @param text The characters to place in the UnicodeString.
2907   * @param textLength The number of Unicode characters in <TT>text</TT>
2908   * to copy.
2909   * @stable ICU 2.0
2910   */
2911  UnicodeString(const UChar *text,
2912        int32_t textLength);
2913
2914  /**
2915   * Readonly-aliasing UChar* constructor.
2916   * The text will be used for the UnicodeString object, but
2917   * it will not be released when the UnicodeString is destroyed.
2918   * This has copy-on-write semantics:
2919   * When the string is modified, then the buffer is first copied into
2920   * newly allocated memory.
2921   * The aliased buffer is never modified.
2922   *
2923   * In an assignment to another UnicodeString, when using the copy constructor
2924   * or the assignment operator, the text will be copied.
2925   * When using fastCopyFrom(), the text will be aliased again,
2926   * so that both strings then alias the same readonly-text.
2927   *
2928   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2929   *                     This must be true if <code>textLength==-1</code>.
2930   * @param text The characters to alias for the UnicodeString.
2931   * @param textLength The number of Unicode characters in <code>text</code> to alias.
2932   *                   If -1, then this constructor will determine the length
2933   *                   by calling <code>u_strlen()</code>.
2934   * @stable ICU 2.0
2935   */
2936  UnicodeString(UBool isTerminated,
2937                const UChar *text,
2938                int32_t textLength);
2939
2940  /**
2941   * Writable-aliasing UChar* constructor.
2942   * The text will be used for the UnicodeString object, but
2943   * it will not be released when the UnicodeString is destroyed.
2944   * This has write-through semantics:
2945   * For as long as the capacity of the buffer is sufficient, write operations
2946   * will directly affect the buffer. When more capacity is necessary, then
2947   * a new buffer will be allocated and the contents copied as with regularly
2948   * constructed strings.
2949   * In an assignment to another UnicodeString, the buffer will be copied.
2950   * The extract(UChar *dst) function detects whether the dst pointer is the same
2951   * as the string buffer itself and will in this case not copy the contents.
2952   *
2953   * @param buffer The characters to alias for the UnicodeString.
2954   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2955   * @param buffCapacity The size of <code>buffer</code> in UChars.
2956   * @stable ICU 2.0
2957   */
2958  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2959
2960#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2961
2962  /**
2963   * char* constructor.
2964   * Uses the default converter (and thus depends on the ICU conversion code)
2965   * unless U_CHARSET_IS_UTF8 is set to 1.
2966   *
2967   * For ASCII (really "invariant character") strings it is more efficient to use
2968   * the constructor that takes a US_INV (for its enum EInvariant).
2969   * For ASCII (invariant-character) string literals, see UNICODE_STRING and
2970   * UNICODE_STRING_SIMPLE.
2971   *
2972   * It is recommended to mark this constructor "explicit" by
2973   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2974   * on the compiler command line or similar.
2975   * @param codepageData an array of bytes, null-terminated,
2976   *                     in the platform's default codepage.
2977   * @stable ICU 2.0
2978   * @see UNICODE_STRING
2979   * @see UNICODE_STRING_SIMPLE
2980   */
2981  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
2982
2983  /**
2984   * char* constructor.
2985   * Uses the default converter (and thus depends on the ICU conversion code)
2986   * unless U_CHARSET_IS_UTF8 is set to 1.
2987   * @param codepageData an array of bytes in the platform's default codepage.
2988   * @param dataLength The number of bytes in <TT>codepageData</TT>.
2989   * @stable ICU 2.0
2990   */
2991  UnicodeString(const char *codepageData, int32_t dataLength);
2992
2993#endif
2994
2995#if !UCONFIG_NO_CONVERSION
2996
2997  /**
2998   * char* constructor.
2999   * @param codepageData an array of bytes, null-terminated
3000   * @param codepage the encoding of <TT>codepageData</TT>.  The special
3001   * value 0 for <TT>codepage</TT> indicates that the text is in the
3002   * platform's default codepage.
3003   *
3004   * If <code>codepage</code> is an empty string (<code>""</code>),
3005   * then a simple conversion is performed on the codepage-invariant
3006   * subset ("invariant characters") of the platform encoding. See utypes.h.
3007   * Recommendation: For invariant-character strings use the constructor
3008   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3009   * because it avoids object code dependencies of UnicodeString on
3010   * the conversion code.
3011   *
3012   * @stable ICU 2.0
3013   */
3014  UnicodeString(const char *codepageData, const char *codepage);
3015
3016  /**
3017   * char* constructor.
3018   * @param codepageData an array of bytes.
3019   * @param dataLength The number of bytes in <TT>codepageData</TT>.
3020   * @param codepage the encoding of <TT>codepageData</TT>.  The special
3021   * value 0 for <TT>codepage</TT> indicates that the text is in the
3022   * platform's default codepage.
3023   * If <code>codepage</code> is an empty string (<code>""</code>),
3024   * then a simple conversion is performed on the codepage-invariant
3025   * subset ("invariant characters") of the platform encoding. See utypes.h.
3026   * Recommendation: For invariant-character strings use the constructor
3027   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3028   * because it avoids object code dependencies of UnicodeString on
3029   * the conversion code.
3030   *
3031   * @stable ICU 2.0
3032   */
3033  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3034
3035  /**
3036   * char * / UConverter constructor.
3037   * This constructor uses an existing UConverter object to
3038   * convert the codepage string to Unicode and construct a UnicodeString
3039   * from that.
3040   *
3041   * The converter is reset at first.
3042   * If the error code indicates a failure before this constructor is called,
3043   * or if an error occurs during conversion or construction,
3044   * then the string will be bogus.
3045   *
3046   * This function avoids the overhead of opening and closing a converter if
3047   * multiple strings are constructed.
3048   *
3049   * @param src input codepage string
3050   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3051   * @param cnv converter object (ucnv_resetToUnicode() will be called),
3052   *        can be NULL for the default converter
3053   * @param errorCode normal ICU error code
3054   * @stable ICU 2.0
3055   */
3056  UnicodeString(
3057        const char *src, int32_t srcLength,
3058        UConverter *cnv,
3059        UErrorCode &errorCode);
3060
3061#endif
3062
3063  /**
3064   * Constructs a Unicode string from an invariant-character char * string.
3065   * About invariant characters see utypes.h.
3066   * This constructor has no runtime dependency on conversion code and is
3067   * therefore recommended over ones taking a charset name string
3068   * (where the empty string "" indicates invariant-character conversion).
3069   *
3070   * Use the macro US_INV as the third, signature-distinguishing parameter.
3071   *
3072   * For example:
3073   * \code
3074   * void fn(const char *s) {
3075   *   UnicodeString ustr(s, -1, US_INV);
3076   *   // use ustr ...
3077   * }
3078   * \endcode
3079   *
3080   * @param src String using only invariant characters.
3081   * @param length Length of src, or -1 if NUL-terminated.
3082   * @param inv Signature-distinguishing paramater, use US_INV.
3083   *
3084   * @see US_INV
3085   * @stable ICU 3.2
3086   */
3087  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3088
3089
3090  /**
3091   * Copy constructor.
3092   * @param that The UnicodeString object to copy.
3093   * @stable ICU 2.0
3094   */
3095  UnicodeString(const UnicodeString& that);
3096
3097  /**
3098   * 'Substring' constructor from tail of source string.
3099   * @param src The UnicodeString object to copy.
3100   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3101   * @stable ICU 2.2
3102   */
3103  UnicodeString(const UnicodeString& src, int32_t srcStart);
3104
3105  /**
3106   * 'Substring' constructor from subrange of source string.
3107   * @param src The UnicodeString object to copy.
3108   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3109   * @param srcLength The number of characters from <tt>src</tt> to copy.
3110   * @stable ICU 2.2
3111   */
3112  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3113
3114  /**
3115   * Clone this object, an instance of a subclass of Replaceable.
3116   * Clones can be used concurrently in multiple threads.
3117   * If a subclass does not implement clone(), or if an error occurs,
3118   * then NULL is returned.
3119   * The clone functions in all subclasses return a pointer to a Replaceable
3120   * because some compilers do not support covariant (same-as-this)
3121   * return types; cast to the appropriate subclass if necessary.
3122   * The caller must delete the clone.
3123   *
3124   * @return a clone of this object
3125   *
3126   * @see Replaceable::clone
3127   * @see getDynamicClassID
3128   * @stable ICU 2.6
3129   */
3130  virtual Replaceable *clone() const;
3131
3132  /** Destructor.
3133   * @stable ICU 2.0
3134   */
3135  virtual ~UnicodeString();
3136
3137  /**
3138   * Create a UnicodeString from a UTF-8 string.
3139   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3140   * Calls u_strFromUTF8WithSub().
3141   *
3142   * @param utf8 UTF-8 input string.
3143   *             Note that a StringPiece can be implicitly constructed
3144   *             from a std::string or a NUL-terminated const char * string.
3145   * @return A UnicodeString with equivalent UTF-16 contents.
3146   * @see toUTF8
3147   * @see toUTF8String
3148   * @stable ICU 4.2
3149   */
3150  static UnicodeString fromUTF8(const StringPiece &utf8);
3151
3152  /**
3153   * Create a UnicodeString from a UTF-32 string.
3154   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3155   * Calls u_strFromUTF32WithSub().
3156   *
3157   * @param utf32 UTF-32 input string. Must not be NULL.
3158   * @param length Length of the input string, or -1 if NUL-terminated.
3159   * @return A UnicodeString with equivalent UTF-16 contents.
3160   * @see toUTF32
3161   * @stable ICU 4.2
3162   */
3163  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3164
3165  /* Miscellaneous operations */
3166
3167  /**
3168   * Unescape a string of characters and return a string containing
3169   * the result.  The following escape sequences are recognized:
3170   *
3171   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3172   * \\Uhhhhhhhh   8 hex digits
3173   * \\xhh         1-2 hex digits
3174   * \\ooo         1-3 octal digits; o in [0-7]
3175   * \\cX          control-X; X is masked with 0x1F
3176   *
3177   * as well as the standard ANSI C escapes:
3178   *
3179   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3180   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3181   * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3182   *
3183   * Anything else following a backslash is generically escaped.  For
3184   * example, "[a\\-z]" returns "[a-z]".
3185   *
3186   * If an escape sequence is ill-formed, this method returns an empty
3187   * string.  An example of an ill-formed sequence is "\\u" followed by
3188   * fewer than 4 hex digits.
3189   *
3190   * This function is similar to u_unescape() but not identical to it.
3191   * The latter takes a source char*, so it does escape recognition
3192   * and also invariant conversion.
3193   *
3194   * @return a string with backslash escapes interpreted, or an
3195   * empty string on error.
3196   * @see UnicodeString#unescapeAt()
3197   * @see u_unescape()
3198   * @see u_unescapeAt()
3199   * @stable ICU 2.0
3200   */
3201  UnicodeString unescape() const;
3202
3203  /**
3204   * Unescape a single escape sequence and return the represented
3205   * character.  See unescape() for a listing of the recognized escape
3206   * sequences.  The character at offset-1 is assumed (without
3207   * checking) to be a backslash.  If the escape sequence is
3208   * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3209   * returned.
3210   *
3211   * @param offset an input output parameter.  On input, it is the
3212   * offset into this string where the escape sequence is located,
3213   * after the initial backslash.  On output, it is advanced after the
3214   * last character parsed.  On error, it is not advanced at all.
3215   * @return the character represented by the escape sequence at
3216   * offset, or U_SENTINEL=-1 on error.
3217   * @see UnicodeString#unescape()
3218   * @see u_unescape()
3219   * @see u_unescapeAt()
3220   * @stable ICU 2.0
3221   */
3222  UChar32 unescapeAt(int32_t &offset) const;
3223
3224  /**
3225   * ICU "poor man's RTTI", returns a UClassID for this class.
3226   *
3227   * @stable ICU 2.2
3228   */
3229  static UClassID U_EXPORT2 getStaticClassID();
3230
3231  /**
3232   * ICU "poor man's RTTI", returns a UClassID for the actual class.
3233   *
3234   * @stable ICU 2.2
3235   */
3236  virtual UClassID getDynamicClassID() const;
3237
3238  //========================================
3239  // Implementation methods
3240  //========================================
3241
3242protected:
3243  /**
3244   * Implement Replaceable::getLength() (see jitterbug 1027).
3245   * @stable ICU 2.4
3246   */
3247  virtual int32_t getLength() const;
3248
3249  /**
3250   * The change in Replaceable to use virtual getCharAt() allows
3251   * UnicodeString::charAt() to be inline again (see jitterbug 709).
3252   * @stable ICU 2.4
3253   */
3254  virtual UChar getCharAt(int32_t offset) const;
3255
3256  /**
3257   * The change in Replaceable to use virtual getChar32At() allows
3258   * UnicodeString::char32At() to be inline again (see jitterbug 709).
3259   * @stable ICU 2.4
3260   */
3261  virtual UChar32 getChar32At(int32_t offset) const;
3262
3263private:
3264  // For char* constructors. Could be made public.
3265  UnicodeString &setToUTF8(const StringPiece &utf8);
3266  // For extract(char*).
3267  // We could make a toUTF8(target, capacity, errorCode) public but not
3268  // this version: New API will be cleaner if we make callers create substrings
3269  // rather than having start+length on every method,
3270  // and it should take a UErrorCode&.
3271  int32_t
3272  toUTF8(int32_t start, int32_t len,
3273         char *target, int32_t capacity) const;
3274
3275  /**
3276   * Internal string contents comparison, called by operator==.
3277   * Requires: this & text not bogus and have same lengths.
3278   */
3279  UBool doEquals(const UnicodeString &text, int32_t len) const;
3280
3281  inline int8_t
3282  doCompare(int32_t start,
3283           int32_t length,
3284           const UnicodeString& srcText,
3285           int32_t srcStart,
3286           int32_t srcLength) const;
3287
3288  int8_t doCompare(int32_t start,
3289           int32_t length,
3290           const UChar *srcChars,
3291           int32_t srcStart,
3292           int32_t srcLength) const;
3293
3294  inline int8_t
3295  doCompareCodePointOrder(int32_t start,
3296                          int32_t length,
3297                          const UnicodeString& srcText,
3298                          int32_t srcStart,
3299                          int32_t srcLength) const;
3300
3301  int8_t doCompareCodePointOrder(int32_t start,
3302                                 int32_t length,
3303                                 const UChar *srcChars,
3304                                 int32_t srcStart,
3305                                 int32_t srcLength) const;
3306
3307  inline int8_t
3308  doCaseCompare(int32_t start,
3309                int32_t length,
3310                const UnicodeString &srcText,
3311                int32_t srcStart,
3312                int32_t srcLength,
3313                uint32_t options) const;
3314
3315  int8_t
3316  doCaseCompare(int32_t start,
3317                int32_t length,
3318                const UChar *srcChars,
3319                int32_t srcStart,
3320                int32_t srcLength,
3321                uint32_t options) const;
3322
3323  int32_t doIndexOf(UChar c,
3324            int32_t start,
3325            int32_t length) const;
3326
3327  int32_t doIndexOf(UChar32 c,
3328                        int32_t start,
3329                        int32_t length) const;
3330
3331  int32_t doLastIndexOf(UChar c,
3332                int32_t start,
3333                int32_t length) const;
3334
3335  int32_t doLastIndexOf(UChar32 c,
3336                            int32_t start,
3337                            int32_t length) const;
3338
3339  void doExtract(int32_t start,
3340         int32_t length,
3341         UChar *dst,
3342         int32_t dstStart) const;
3343
3344  inline void doExtract(int32_t start,
3345         int32_t length,
3346         UnicodeString& target) const;
3347
3348  inline UChar doCharAt(int32_t offset)  const;
3349
3350  UnicodeString& doReplace(int32_t start,
3351               int32_t length,
3352               const UnicodeString& srcText,
3353               int32_t srcStart,
3354               int32_t srcLength);
3355
3356  UnicodeString& doReplace(int32_t start,
3357               int32_t length,
3358               const UChar *srcChars,
3359               int32_t srcStart,
3360               int32_t srcLength);
3361
3362  UnicodeString& doReverse(int32_t start,
3363               int32_t length);
3364
3365  // calculate hash code
3366  int32_t doHashCode(void) const;
3367
3368  // get pointer to start of array
3369  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3370  inline UChar* getArrayStart(void);
3371  inline const UChar* getArrayStart(void) const;
3372
3373  // A UnicodeString object (not necessarily its current buffer)
3374  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3375  inline UBool isWritable() const;
3376
3377  // Is the current buffer writable?
3378  inline UBool isBufferWritable() const;
3379
3380  // None of the following does releaseArray().
3381  inline void setLength(int32_t len);        // sets only fShortLength and fLength
3382  inline void setToEmpty();                  // sets fFlags=kShortString
3383  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3384
3385  // allocate the array; result may be fStackBuffer
3386  // sets refCount to 1 if appropriate
3387  // sets fArray, fCapacity, and fFlags
3388  // returns boolean for success or failure
3389  UBool allocate(int32_t capacity);
3390
3391  // release the array if owned
3392  void releaseArray(void);
3393
3394  // turn a bogus string into an empty one
3395  void unBogus();
3396
3397  // implements assigment operator, copy constructor, and fastCopyFrom()
3398  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3399
3400  // Pin start and limit to acceptable values.
3401  inline void pinIndex(int32_t& start) const;
3402  inline void pinIndices(int32_t& start,
3403                         int32_t& length) const;
3404
3405#if !UCONFIG_NO_CONVERSION
3406
3407  /* Internal extract() using UConverter. */
3408  int32_t doExtract(int32_t start, int32_t length,
3409                    char *dest, int32_t destCapacity,
3410                    UConverter *cnv,
3411                    UErrorCode &errorCode) const;
3412
3413  /*
3414   * Real constructor for converting from codepage data.
3415   * It assumes that it is called with !fRefCounted.
3416   *
3417   * If <code>codepage==0</code>, then the default converter
3418   * is used for the platform encoding.
3419   * If <code>codepage</code> is an empty string (<code>""</code>),
3420   * then a simple conversion is performed on the codepage-invariant
3421   * subset ("invariant characters") of the platform encoding. See utypes.h.
3422   */
3423  void doCodepageCreate(const char *codepageData,
3424                        int32_t dataLength,
3425                        const char *codepage);
3426
3427  /*
3428   * Worker function for creating a UnicodeString from
3429   * a codepage string using a UConverter.
3430   */
3431  void
3432  doCodepageCreate(const char *codepageData,
3433                   int32_t dataLength,
3434                   UConverter *converter,
3435                   UErrorCode &status);
3436
3437#endif
3438
3439  /*
3440   * This function is called when write access to the array
3441   * is necessary.
3442   *
3443   * We need to make a copy of the array if
3444   * the buffer is read-only, or
3445   * the buffer is refCounted (shared), and refCount>1, or
3446   * the buffer is too small.
3447   *
3448   * Return FALSE if memory could not be allocated.
3449   */
3450  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3451                            int32_t growCapacity = -1,
3452                            UBool doCopyArray = TRUE,
3453                            int32_t **pBufferToDelete = 0,
3454                            UBool forceClone = FALSE);
3455
3456  /**
3457   * Common function for UnicodeString case mappings.
3458   * The stringCaseMapper has the same type UStringCaseMapper
3459   * as in ustr_imp.h for ustrcase_map().
3460   */
3461  UnicodeString &
3462  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3463
3464  // ref counting
3465  void addRef(void);
3466  int32_t removeRef(void);
3467  int32_t refCount(void) const;
3468
3469  // constants
3470  enum {
3471    // Set the stack buffer size so that sizeof(UnicodeString) is,
3472    // naturally (without padding), a multiple of sizeof(pointer).
3473    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3474    kInvalidUChar=0xffff, // invalid UChar index
3475    kGrowSize=128, // grow size for this buffer
3476    kInvalidHashCode=0, // invalid hash code
3477    kEmptyHashCode=1, // hash code for empty string
3478
3479    // bit flag values for fFlags
3480    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3481    kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3482    kRefCounted=4,      // there is a refCount field before the characters in fArray
3483    kBufferIsReadonly=8,// do not write to this buffer
3484    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3485                        // and releaseBuffer(newLength) must be called
3486
3487    // combined values for convenience
3488    kShortString=kUsingStackBuffer,
3489    kLongString=kRefCounted,
3490    kReadonlyAlias=kBufferIsReadonly,
3491    kWritableAlias=0
3492  };
3493
3494  friend class StringThreadTest;
3495  friend class UnicodeStringAppendable;
3496
3497  union StackBufferOrFields;        // forward declaration necessary before friend declaration
3498  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3499
3500  /*
3501   * The following are all the class fields that are stored
3502   * in each UnicodeString object.
3503   * Note that UnicodeString has virtual functions,
3504   * therefore there is an implicit vtable pointer
3505   * as the first real field.
3506   * The fields should be aligned such that no padding is necessary.
3507   * On 32-bit machines, the size should be 32 bytes,
3508   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3509   *
3510   * We use a hack to achieve this.
3511   *
3512   * With at least some compilers, each of the following is forced to
3513   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3514   * rounded up with additional padding if the fields do not already fit that requirement:
3515   * - sizeof(class UnicodeString)
3516   * - offsetof(UnicodeString, fUnion)
3517   * - sizeof(fUnion)
3518   * - sizeof(fFields)
3519   *
3520   * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3521   * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3522   * (Padding at the end of fFields is ok:
3523   * As long as there is no padding after fStackBuffer, it is not wasted space.)
3524   *
3525   * We further assume that the compiler does not reorder the fields,
3526   * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3527   * with at most some padding (but no other field) in between.
3528   * (Padding there would be wasted space, but functionally harmless.)
3529   *
3530   * We use a few more sizeof(pointer)'s chunks of space with
3531   * fRestOfStackBuffer, fShortLength and fFlags,
3532   * to get up exactly to the intended sizeof(UnicodeString).
3533   */
3534  // (implicit) *vtable;
3535  union StackBufferOrFields {
3536    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3537    // else fFields is used
3538    UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
3539    struct {
3540      UChar   *fArray;    // the Unicode data
3541      int32_t fCapacity;  // capacity of fArray (in UChars)
3542      int32_t fLength;    // number of characters in fArray if >127; else undefined
3543    } fFields;
3544  } fUnion;
3545  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3546  int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
3547  uint8_t fFlags;       // bit flags: see constants above
3548};
3549
3550/**
3551 * Create a new UnicodeString with the concatenation of two others.
3552 *
3553 * @param s1 The first string to be copied to the new one.
3554 * @param s2 The second string to be copied to the new one, after s1.
3555 * @return UnicodeString(s1).append(s2)
3556 * @stable ICU 2.8
3557 */
3558U_COMMON_API UnicodeString U_EXPORT2
3559operator+ (const UnicodeString &s1, const UnicodeString &s2);
3560
3561//========================================
3562// Inline members
3563//========================================
3564
3565//========================================
3566// Privates
3567//========================================
3568
3569inline void
3570UnicodeString::pinIndex(int32_t& start) const
3571{
3572  // pin index
3573  if(start < 0) {
3574    start = 0;
3575  } else if(start > length()) {
3576    start = length();
3577  }
3578}
3579
3580inline void
3581UnicodeString::pinIndices(int32_t& start,
3582                          int32_t& _length) const
3583{
3584  // pin indices
3585  int32_t len = length();
3586  if(start < 0) {
3587    start = 0;
3588  } else if(start > len) {
3589    start = len;
3590  }
3591  if(_length < 0) {
3592    _length = 0;
3593  } else if(_length > (len - start)) {
3594    _length = (len - start);
3595  }
3596}
3597
3598inline UChar*
3599UnicodeString::getArrayStart()
3600{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3601
3602inline const UChar*
3603UnicodeString::getArrayStart() const
3604{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3605
3606//========================================
3607// Default constructor
3608//========================================
3609
3610inline
3611UnicodeString::UnicodeString()
3612  : fShortLength(0),
3613    fFlags(kShortString)
3614{}
3615
3616//========================================
3617// Read-only implementation methods
3618//========================================
3619inline int32_t
3620UnicodeString::length() const
3621{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3622
3623inline int32_t
3624UnicodeString::getCapacity() const
3625{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3626
3627inline int32_t
3628UnicodeString::hashCode() const
3629{ return doHashCode(); }
3630
3631inline UBool
3632UnicodeString::isBogus() const
3633{ return (UBool)(fFlags & kIsBogus); }
3634
3635inline UBool
3636UnicodeString::isWritable() const
3637{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3638
3639inline UBool
3640UnicodeString::isBufferWritable() const
3641{
3642  return (UBool)(
3643      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3644      (!(fFlags&kRefCounted) || refCount()==1));
3645}
3646
3647inline const UChar *
3648UnicodeString::getBuffer() const {
3649  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3650    return 0;
3651  } else if(fFlags&kUsingStackBuffer) {
3652    return fUnion.fStackBuffer;
3653  } else {
3654    return fUnion.fFields.fArray;
3655  }
3656}
3657
3658//========================================
3659// Read-only alias methods
3660//========================================
3661inline int8_t
3662UnicodeString::doCompare(int32_t start,
3663              int32_t thisLength,
3664              const UnicodeString& srcText,
3665              int32_t srcStart,
3666              int32_t srcLength) const
3667{
3668  if(srcText.isBogus()) {
3669    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3670  } else {
3671    srcText.pinIndices(srcStart, srcLength);
3672    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3673  }
3674}
3675
3676inline UBool
3677UnicodeString::operator== (const UnicodeString& text) const
3678{
3679  if(isBogus()) {
3680    return text.isBogus();
3681  } else {
3682    int32_t len = length(), textLength = text.length();
3683    return !text.isBogus() && len == textLength && doEquals(text, len);
3684  }
3685}
3686
3687inline UBool
3688UnicodeString::operator!= (const UnicodeString& text) const
3689{ return (! operator==(text)); }
3690
3691inline UBool
3692UnicodeString::operator> (const UnicodeString& text) const
3693{ return doCompare(0, length(), text, 0, text.length()) == 1; }
3694
3695inline UBool
3696UnicodeString::operator< (const UnicodeString& text) const
3697{ return doCompare(0, length(), text, 0, text.length()) == -1; }
3698
3699inline UBool
3700UnicodeString::operator>= (const UnicodeString& text) const
3701{ return doCompare(0, length(), text, 0, text.length()) != -1; }
3702
3703inline UBool
3704UnicodeString::operator<= (const UnicodeString& text) const
3705{ return doCompare(0, length(), text, 0, text.length()) != 1; }
3706
3707inline int8_t
3708UnicodeString::compare(const UnicodeString& text) const
3709{ return doCompare(0, length(), text, 0, text.length()); }
3710
3711inline int8_t
3712UnicodeString::compare(int32_t start,
3713               int32_t _length,
3714               const UnicodeString& srcText) const
3715{ return doCompare(start, _length, srcText, 0, srcText.length()); }
3716
3717inline int8_t
3718UnicodeString::compare(const UChar *srcChars,
3719               int32_t srcLength) const
3720{ return doCompare(0, length(), srcChars, 0, srcLength); }
3721
3722inline int8_t
3723UnicodeString::compare(int32_t start,
3724               int32_t _length,
3725               const UnicodeString& srcText,
3726               int32_t srcStart,
3727               int32_t srcLength) const
3728{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3729
3730inline int8_t
3731UnicodeString::compare(int32_t start,
3732               int32_t _length,
3733               const UChar *srcChars) const
3734{ return doCompare(start, _length, srcChars, 0, _length); }
3735
3736inline int8_t
3737UnicodeString::compare(int32_t start,
3738               int32_t _length,
3739               const UChar *srcChars,
3740               int32_t srcStart,
3741               int32_t srcLength) const
3742{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3743
3744inline int8_t
3745UnicodeString::compareBetween(int32_t start,
3746                  int32_t limit,
3747                  const UnicodeString& srcText,
3748                  int32_t srcStart,
3749                  int32_t srcLimit) const
3750{ return doCompare(start, limit - start,
3751           srcText, srcStart, srcLimit - srcStart); }
3752
3753inline int8_t
3754UnicodeString::doCompareCodePointOrder(int32_t start,
3755                                       int32_t thisLength,
3756                                       const UnicodeString& srcText,
3757                                       int32_t srcStart,
3758                                       int32_t srcLength) const
3759{
3760  if(srcText.isBogus()) {
3761    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3762  } else {
3763    srcText.pinIndices(srcStart, srcLength);
3764    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3765  }
3766}
3767
3768inline int8_t
3769UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3770{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3771
3772inline int8_t
3773UnicodeString::compareCodePointOrder(int32_t start,
3774                                     int32_t _length,
3775                                     const UnicodeString& srcText) const
3776{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3777
3778inline int8_t
3779UnicodeString::compareCodePointOrder(const UChar *srcChars,
3780                                     int32_t srcLength) const
3781{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3782
3783inline int8_t
3784UnicodeString::compareCodePointOrder(int32_t start,
3785                                     int32_t _length,
3786                                     const UnicodeString& srcText,
3787                                     int32_t srcStart,
3788                                     int32_t srcLength) const
3789{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3790
3791inline int8_t
3792UnicodeString::compareCodePointOrder(int32_t start,
3793                                     int32_t _length,
3794                                     const UChar *srcChars) const
3795{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3796
3797inline int8_t
3798UnicodeString::compareCodePointOrder(int32_t start,
3799                                     int32_t _length,
3800                                     const UChar *srcChars,
3801                                     int32_t srcStart,
3802                                     int32_t srcLength) const
3803{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3804
3805inline int8_t
3806UnicodeString::compareCodePointOrderBetween(int32_t start,
3807                                            int32_t limit,
3808                                            const UnicodeString& srcText,
3809                                            int32_t srcStart,
3810                                            int32_t srcLimit) const
3811{ return doCompareCodePointOrder(start, limit - start,
3812           srcText, srcStart, srcLimit - srcStart); }
3813
3814inline int8_t
3815UnicodeString::doCaseCompare(int32_t start,
3816                             int32_t thisLength,
3817                             const UnicodeString &srcText,
3818                             int32_t srcStart,
3819                             int32_t srcLength,
3820                             uint32_t options) const
3821{
3822  if(srcText.isBogus()) {
3823    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3824  } else {
3825    srcText.pinIndices(srcStart, srcLength);
3826    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3827  }
3828}
3829
3830inline int8_t
3831UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3832  return doCaseCompare(0, length(), text, 0, text.length(), options);
3833}
3834
3835inline int8_t
3836UnicodeString::caseCompare(int32_t start,
3837                           int32_t _length,
3838                           const UnicodeString &srcText,
3839                           uint32_t options) const {
3840  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3841}
3842
3843inline int8_t
3844UnicodeString::caseCompare(const UChar *srcChars,
3845                           int32_t srcLength,
3846                           uint32_t options) const {
3847  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3848}
3849
3850inline int8_t
3851UnicodeString::caseCompare(int32_t start,
3852                           int32_t _length,
3853                           const UnicodeString &srcText,
3854                           int32_t srcStart,
3855                           int32_t srcLength,
3856                           uint32_t options) const {
3857  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3858}
3859
3860inline int8_t
3861UnicodeString::caseCompare(int32_t start,
3862                           int32_t _length,
3863                           const UChar *srcChars,
3864                           uint32_t options) const {
3865  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3866}
3867
3868inline int8_t
3869UnicodeString::caseCompare(int32_t start,
3870                           int32_t _length,
3871                           const UChar *srcChars,
3872                           int32_t srcStart,
3873                           int32_t srcLength,
3874                           uint32_t options) const {
3875  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3876}
3877
3878inline int8_t
3879UnicodeString::caseCompareBetween(int32_t start,
3880                                  int32_t limit,
3881                                  const UnicodeString &srcText,
3882                                  int32_t srcStart,
3883                                  int32_t srcLimit,
3884                                  uint32_t options) const {
3885  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3886}
3887
3888inline int32_t
3889UnicodeString::indexOf(const UnicodeString& srcText,
3890               int32_t srcStart,
3891               int32_t srcLength,
3892               int32_t start,
3893               int32_t _length) const
3894{
3895  if(!srcText.isBogus()) {
3896    srcText.pinIndices(srcStart, srcLength);
3897    if(srcLength > 0) {
3898      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3899    }
3900  }
3901  return -1;
3902}
3903
3904inline int32_t
3905UnicodeString::indexOf(const UnicodeString& text) const
3906{ return indexOf(text, 0, text.length(), 0, length()); }
3907
3908inline int32_t
3909UnicodeString::indexOf(const UnicodeString& text,
3910               int32_t start) const {
3911  pinIndex(start);
3912  return indexOf(text, 0, text.length(), start, length() - start);
3913}
3914
3915inline int32_t
3916UnicodeString::indexOf(const UnicodeString& text,
3917               int32_t start,
3918               int32_t _length) const
3919{ return indexOf(text, 0, text.length(), start, _length); }
3920
3921inline int32_t
3922UnicodeString::indexOf(const UChar *srcChars,
3923               int32_t srcLength,
3924               int32_t start) const {
3925  pinIndex(start);
3926  return indexOf(srcChars, 0, srcLength, start, length() - start);
3927}
3928
3929inline int32_t
3930UnicodeString::indexOf(const UChar *srcChars,
3931               int32_t srcLength,
3932               int32_t start,
3933               int32_t _length) const
3934{ return indexOf(srcChars, 0, srcLength, start, _length); }
3935
3936inline int32_t
3937UnicodeString::indexOf(UChar c,
3938               int32_t start,
3939               int32_t _length) const
3940{ return doIndexOf(c, start, _length); }
3941
3942inline int32_t
3943UnicodeString::indexOf(UChar32 c,
3944               int32_t start,
3945               int32_t _length) const
3946{ return doIndexOf(c, start, _length); }
3947
3948inline int32_t
3949UnicodeString::indexOf(UChar c) const
3950{ return doIndexOf(c, 0, length()); }
3951
3952inline int32_t
3953UnicodeString::indexOf(UChar32 c) const
3954{ return indexOf(c, 0, length()); }
3955
3956inline int32_t
3957UnicodeString::indexOf(UChar c,
3958               int32_t start) const {
3959  pinIndex(start);
3960  return doIndexOf(c, start, length() - start);
3961}
3962
3963inline int32_t
3964UnicodeString::indexOf(UChar32 c,
3965               int32_t start) const {
3966  pinIndex(start);
3967  return indexOf(c, start, length() - start);
3968}
3969
3970inline int32_t
3971UnicodeString::lastIndexOf(const UChar *srcChars,
3972               int32_t srcLength,
3973               int32_t start,
3974               int32_t _length) const
3975{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3976
3977inline int32_t
3978UnicodeString::lastIndexOf(const UChar *srcChars,
3979               int32_t srcLength,
3980               int32_t start) const {
3981  pinIndex(start);
3982  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3983}
3984
3985inline int32_t
3986UnicodeString::lastIndexOf(const UnicodeString& srcText,
3987               int32_t srcStart,
3988               int32_t srcLength,
3989               int32_t start,
3990               int32_t _length) const
3991{
3992  if(!srcText.isBogus()) {
3993    srcText.pinIndices(srcStart, srcLength);
3994    if(srcLength > 0) {
3995      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3996    }
3997  }
3998  return -1;
3999}
4000
4001inline int32_t
4002UnicodeString::lastIndexOf(const UnicodeString& text,
4003               int32_t start,
4004               int32_t _length) const
4005{ return lastIndexOf(text, 0, text.length(), start, _length); }
4006
4007inline int32_t
4008UnicodeString::lastIndexOf(const UnicodeString& text,
4009               int32_t start) const {
4010  pinIndex(start);
4011  return lastIndexOf(text, 0, text.length(), start, length() - start);
4012}
4013
4014inline int32_t
4015UnicodeString::lastIndexOf(const UnicodeString& text) const
4016{ return lastIndexOf(text, 0, text.length(), 0, length()); }
4017
4018inline int32_t
4019UnicodeString::lastIndexOf(UChar c,
4020               int32_t start,
4021               int32_t _length) const
4022{ return doLastIndexOf(c, start, _length); }
4023
4024inline int32_t
4025UnicodeString::lastIndexOf(UChar32 c,
4026               int32_t start,
4027               int32_t _length) const {
4028  return doLastIndexOf(c, start, _length);
4029}
4030
4031inline int32_t
4032UnicodeString::lastIndexOf(UChar c) const
4033{ return doLastIndexOf(c, 0, length()); }
4034
4035inline int32_t
4036UnicodeString::lastIndexOf(UChar32 c) const {
4037  return lastIndexOf(c, 0, length());
4038}
4039
4040inline int32_t
4041UnicodeString::lastIndexOf(UChar c,
4042               int32_t start) const {
4043  pinIndex(start);
4044  return doLastIndexOf(c, start, length() - start);
4045}
4046
4047inline int32_t
4048UnicodeString::lastIndexOf(UChar32 c,
4049               int32_t start) const {
4050  pinIndex(start);
4051  return lastIndexOf(c, start, length() - start);
4052}
4053
4054inline UBool
4055UnicodeString::startsWith(const UnicodeString& text) const
4056{ return compare(0, text.length(), text, 0, text.length()) == 0; }
4057
4058inline UBool
4059UnicodeString::startsWith(const UnicodeString& srcText,
4060              int32_t srcStart,
4061              int32_t srcLength) const
4062{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4063
4064inline UBool
4065UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4066  if(srcLength < 0) {
4067    srcLength = u_strlen(srcChars);
4068  }
4069  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4070}
4071
4072inline UBool
4073UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4074  if(srcLength < 0) {
4075    srcLength = u_strlen(srcChars);
4076  }
4077  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4078}
4079
4080inline UBool
4081UnicodeString::endsWith(const UnicodeString& text) const
4082{ return doCompare(length() - text.length(), text.length(),
4083           text, 0, text.length()) == 0; }
4084
4085inline UBool
4086UnicodeString::endsWith(const UnicodeString& srcText,
4087            int32_t srcStart,
4088            int32_t srcLength) const {
4089  srcText.pinIndices(srcStart, srcLength);
4090  return doCompare(length() - srcLength, srcLength,
4091                   srcText, srcStart, srcLength) == 0;
4092}
4093
4094inline UBool
4095UnicodeString::endsWith(const UChar *srcChars,
4096            int32_t srcLength) const {
4097  if(srcLength < 0) {
4098    srcLength = u_strlen(srcChars);
4099  }
4100  return doCompare(length() - srcLength, srcLength,
4101                   srcChars, 0, srcLength) == 0;
4102}
4103
4104inline UBool
4105UnicodeString::endsWith(const UChar *srcChars,
4106            int32_t srcStart,
4107            int32_t srcLength) const {
4108  if(srcLength < 0) {
4109    srcLength = u_strlen(srcChars + srcStart);
4110  }
4111  return doCompare(length() - srcLength, srcLength,
4112                   srcChars, srcStart, srcLength) == 0;
4113}
4114
4115//========================================
4116// replace
4117//========================================
4118inline UnicodeString&
4119UnicodeString::replace(int32_t start,
4120               int32_t _length,
4121               const UnicodeString& srcText)
4122{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4123
4124inline UnicodeString&
4125UnicodeString::replace(int32_t start,
4126               int32_t _length,
4127               const UnicodeString& srcText,
4128               int32_t srcStart,
4129               int32_t srcLength)
4130{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4131
4132inline UnicodeString&
4133UnicodeString::replace(int32_t start,
4134               int32_t _length,
4135               const UChar *srcChars,
4136               int32_t srcLength)
4137{ return doReplace(start, _length, srcChars, 0, srcLength); }
4138
4139inline UnicodeString&
4140UnicodeString::replace(int32_t start,
4141               int32_t _length,
4142               const UChar *srcChars,
4143               int32_t srcStart,
4144               int32_t srcLength)
4145{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4146
4147inline UnicodeString&
4148UnicodeString::replace(int32_t start,
4149               int32_t _length,
4150               UChar srcChar)
4151{ return doReplace(start, _length, &srcChar, 0, 1); }
4152
4153inline UnicodeString&
4154UnicodeString::replaceBetween(int32_t start,
4155                  int32_t limit,
4156                  const UnicodeString& srcText)
4157{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4158
4159inline UnicodeString&
4160UnicodeString::replaceBetween(int32_t start,
4161                  int32_t limit,
4162                  const UnicodeString& srcText,
4163                  int32_t srcStart,
4164                  int32_t srcLimit)
4165{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4166
4167inline UnicodeString&
4168UnicodeString::findAndReplace(const UnicodeString& oldText,
4169                  const UnicodeString& newText)
4170{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4171            newText, 0, newText.length()); }
4172
4173inline UnicodeString&
4174UnicodeString::findAndReplace(int32_t start,
4175                  int32_t _length,
4176                  const UnicodeString& oldText,
4177                  const UnicodeString& newText)
4178{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4179            newText, 0, newText.length()); }
4180
4181// ============================
4182// extract
4183// ============================
4184inline void
4185UnicodeString::doExtract(int32_t start,
4186             int32_t _length,
4187             UnicodeString& target) const
4188{ target.replace(0, target.length(), *this, start, _length); }
4189
4190inline void
4191UnicodeString::extract(int32_t start,
4192               int32_t _length,
4193               UChar *target,
4194               int32_t targetStart) const
4195{ doExtract(start, _length, target, targetStart); }
4196
4197inline void
4198UnicodeString::extract(int32_t start,
4199               int32_t _length,
4200               UnicodeString& target) const
4201{ doExtract(start, _length, target); }
4202
4203#if !UCONFIG_NO_CONVERSION
4204
4205inline int32_t
4206UnicodeString::extract(int32_t start,
4207               int32_t _length,
4208               char *dst,
4209               const char *codepage) const
4210
4211{
4212  // This dstSize value will be checked explicitly
4213  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4214}
4215
4216#endif
4217
4218inline void
4219UnicodeString::extractBetween(int32_t start,
4220                  int32_t limit,
4221                  UChar *dst,
4222                  int32_t dstStart) const {
4223  pinIndex(start);
4224  pinIndex(limit);
4225  doExtract(start, limit - start, dst, dstStart);
4226}
4227
4228inline UnicodeString
4229UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4230    return tempSubString(start, limit - start);
4231}
4232
4233inline UChar
4234UnicodeString::doCharAt(int32_t offset) const
4235{
4236  if((uint32_t)offset < (uint32_t)length()) {
4237    return getArrayStart()[offset];
4238  } else {
4239    return kInvalidUChar;
4240  }
4241}
4242
4243inline UChar
4244UnicodeString::charAt(int32_t offset) const
4245{ return doCharAt(offset); }
4246
4247inline UChar
4248UnicodeString::operator[] (int32_t offset) const
4249{ return doCharAt(offset); }
4250
4251inline UBool
4252UnicodeString::isEmpty() const {
4253  return fShortLength == 0;
4254}
4255
4256//========================================
4257// Write implementation methods
4258//========================================
4259inline void
4260UnicodeString::setLength(int32_t len) {
4261  if(len <= 127) {
4262    fShortLength = (int8_t)len;
4263  } else {
4264    fShortLength = (int8_t)-1;
4265    fUnion.fFields.fLength = len;
4266  }
4267}
4268
4269inline void
4270UnicodeString::setToEmpty() {
4271  fShortLength = 0;
4272  fFlags = kShortString;
4273}
4274
4275inline void
4276UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4277  setLength(len);
4278  fUnion.fFields.fArray = array;
4279  fUnion.fFields.fCapacity = capacity;
4280}
4281
4282inline UnicodeString&
4283UnicodeString::operator= (UChar ch)
4284{ return doReplace(0, length(), &ch, 0, 1); }
4285
4286inline UnicodeString&
4287UnicodeString::operator= (UChar32 ch)
4288{ return replace(0, length(), ch); }
4289
4290inline UnicodeString&
4291UnicodeString::setTo(const UnicodeString& srcText,
4292             int32_t srcStart,
4293             int32_t srcLength)
4294{
4295  unBogus();
4296  return doReplace(0, length(), srcText, srcStart, srcLength);
4297}
4298
4299inline UnicodeString&
4300UnicodeString::setTo(const UnicodeString& srcText,
4301             int32_t srcStart)
4302{
4303  unBogus();
4304  srcText.pinIndex(srcStart);
4305  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4306}
4307
4308inline UnicodeString&
4309UnicodeString::setTo(const UnicodeString& srcText)
4310{
4311  return copyFrom(srcText);
4312}
4313
4314inline UnicodeString&
4315UnicodeString::setTo(const UChar *srcChars,
4316             int32_t srcLength)
4317{
4318  unBogus();
4319  return doReplace(0, length(), srcChars, 0, srcLength);
4320}
4321
4322inline UnicodeString&
4323UnicodeString::setTo(UChar srcChar)
4324{
4325  unBogus();
4326  return doReplace(0, length(), &srcChar, 0, 1);
4327}
4328
4329inline UnicodeString&
4330UnicodeString::setTo(UChar32 srcChar)
4331{
4332  unBogus();
4333  return replace(0, length(), srcChar);
4334}
4335
4336inline UnicodeString&
4337UnicodeString::append(const UnicodeString& srcText,
4338              int32_t srcStart,
4339              int32_t srcLength)
4340{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
4341
4342inline UnicodeString&
4343UnicodeString::append(const UnicodeString& srcText)
4344{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4345
4346inline UnicodeString&
4347UnicodeString::append(const UChar *srcChars,
4348              int32_t srcStart,
4349              int32_t srcLength)
4350{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4351
4352inline UnicodeString&
4353UnicodeString::append(const UChar *srcChars,
4354              int32_t srcLength)
4355{ return doReplace(length(), 0, srcChars, 0, srcLength); }
4356
4357inline UnicodeString&
4358UnicodeString::append(UChar srcChar)
4359{ return doReplace(length(), 0, &srcChar, 0, 1); }
4360
4361inline UnicodeString&
4362UnicodeString::operator+= (UChar ch)
4363{ return doReplace(length(), 0, &ch, 0, 1); }
4364
4365inline UnicodeString&
4366UnicodeString::operator+= (UChar32 ch) {
4367  return append(ch);
4368}
4369
4370inline UnicodeString&
4371UnicodeString::operator+= (const UnicodeString& srcText)
4372{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4373
4374inline UnicodeString&
4375UnicodeString::insert(int32_t start,
4376              const UnicodeString& srcText,
4377              int32_t srcStart,
4378              int32_t srcLength)
4379{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4380
4381inline UnicodeString&
4382UnicodeString::insert(int32_t start,
4383              const UnicodeString& srcText)
4384{ return doReplace(start, 0, srcText, 0, srcText.length()); }
4385
4386inline UnicodeString&
4387UnicodeString::insert(int32_t start,
4388              const UChar *srcChars,
4389              int32_t srcStart,
4390              int32_t srcLength)
4391{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4392
4393inline UnicodeString&
4394UnicodeString::insert(int32_t start,
4395              const UChar *srcChars,
4396              int32_t srcLength)
4397{ return doReplace(start, 0, srcChars, 0, srcLength); }
4398
4399inline UnicodeString&
4400UnicodeString::insert(int32_t start,
4401              UChar srcChar)
4402{ return doReplace(start, 0, &srcChar, 0, 1); }
4403
4404inline UnicodeString&
4405UnicodeString::insert(int32_t start,
4406              UChar32 srcChar)
4407{ return replace(start, 0, srcChar); }
4408
4409
4410inline UnicodeString&
4411UnicodeString::remove()
4412{
4413  // remove() of a bogus string makes the string empty and non-bogus
4414  if(isBogus()) {
4415    setToEmpty();
4416  } else {
4417    fShortLength = 0;
4418  }
4419  return *this;
4420}
4421
4422inline UnicodeString&
4423UnicodeString::remove(int32_t start,
4424             int32_t _length)
4425{
4426    if(start <= 0 && _length == INT32_MAX) {
4427        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4428        return remove();
4429    }
4430    return doReplace(start, _length, NULL, 0, 0);
4431}
4432
4433inline UnicodeString&
4434UnicodeString::removeBetween(int32_t start,
4435                int32_t limit)
4436{ return doReplace(start, limit - start, NULL, 0, 0); }
4437
4438inline UnicodeString &
4439UnicodeString::retainBetween(int32_t start, int32_t limit) {
4440  truncate(limit);
4441  return doReplace(0, start, NULL, 0, 0);
4442}
4443
4444inline UBool
4445UnicodeString::truncate(int32_t targetLength)
4446{
4447  if(isBogus() && targetLength == 0) {
4448    // truncate(0) of a bogus string makes the string empty and non-bogus
4449    unBogus();
4450    return FALSE;
4451  } else if((uint32_t)targetLength < (uint32_t)length()) {
4452    setLength(targetLength);
4453    return TRUE;
4454  } else {
4455    return FALSE;
4456  }
4457}
4458
4459inline UnicodeString&
4460UnicodeString::reverse()
4461{ return doReverse(0, length()); }
4462
4463inline UnicodeString&
4464UnicodeString::reverse(int32_t start,
4465               int32_t _length)
4466{ return doReverse(start, _length); }
4467
4468U_NAMESPACE_END
4469
4470#endif
4471