unistr.h revision 8393335b955da7340c9f19b1b4b2d6c0c2c04be7
1/*
2**********************************************************************
3*   Copyright (C) 1998-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   09/25/98    stephen     Creation.
13*   11/11/98    stephen     Changed per 11/9 code review.
14*   04/20/99    stephen     Overhauled per 4/16 code review.
15*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
16*                           handleReplaceBetween(); other methods unchanged.
17*   06/25/01    grhoten     Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
24/**
25 * \file
26 * \brief C++ API: Unicode String
27 */
28
29#include "unicode/utypes.h"
30#include "unicode/rep.h"
31#include "unicode/std_string.h"
32#include "unicode/stringpiece.h"
33#include "unicode/bytestream.h"
34#include "unicode/ucasemap.h"
35
36struct UConverter;          // unicode/ucnv.h
37class  StringThreadTest;
38
39#ifndef U_COMPARE_CODE_POINT_ORDER
40/* see also ustring.h and unorm.h */
41/**
42 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
43 * Compare strings in code point order instead of code unit order.
44 * @stable ICU 2.2
45 */
46#define U_COMPARE_CODE_POINT_ORDER  0x8000
47#endif
48
49#ifndef USTRING_H
50/**
51 * \ingroup ustring_ustrlen
52 */
53U_STABLE int32_t U_EXPORT2
54u_strlen(const UChar *s);
55#endif
56
57#ifndef U_HIDE_INTERNAL_API
58/**
59 * \def U_STRING_CASE_MAPPER_DEFINED
60 * @internal
61 */
62
63#ifndef U_STRING_CASE_MAPPER_DEFINED
64#define U_STRING_CASE_MAPPER_DEFINED
65
66/**
67 * Internal string case mapping function type.
68 * @internal
69 */
70typedef int32_t U_CALLCONV
71UStringCaseMapper(const UCaseMap *csm,
72                  UChar *dest, int32_t destCapacity,
73                  const UChar *src, int32_t srcLength,
74                  UErrorCode *pErrorCode);
75
76#endif
77#endif  /* U_HIDE_INTERNAL_API */
78
79U_NAMESPACE_BEGIN
80
81class BreakIterator;        // unicode/brkiter.h
82class Locale;               // unicode/locid.h
83class StringCharacterIterator;
84class UnicodeStringAppendable;  // unicode/appendable.h
85
86/* The <iostream> include has been moved to unicode/ustream.h */
87
88/**
89 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
90 * which constructs a Unicode string from an invariant-character char * string.
91 * About invariant characters see utypes.h.
92 * This constructor has no runtime dependency on conversion code and is
93 * therefore recommended over ones taking a charset name string
94 * (where the empty string "" indicates invariant-character conversion).
95 *
96 * @stable ICU 3.2
97 */
98#define US_INV icu::UnicodeString::kInvariant
99
100/**
101 * Unicode String literals in C++.
102 * Dependent on the platform properties, different UnicodeString
103 * constructors should be used to create a UnicodeString object from
104 * a string literal.
105 * The macros are defined for maximum performance.
106 * They work only for strings that contain "invariant characters", i.e.,
107 * only latin letters, digits, and some punctuation.
108 * See utypes.h for details.
109 *
110 * The string parameter must be a C string literal.
111 * The length of the string, not including the terminating
112 * <code>NUL</code>, must be specified as a constant.
113 * The U_STRING_DECL macro should be invoked exactly once for one
114 * such string variable before it is used.
115 * @stable ICU 2.0
116 */
117#if defined(U_DECLARE_UTF16)
118#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
119#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
120#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
121#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
122#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
123#else
124#   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
125#endif
126
127/**
128 * Unicode String literals in C++.
129 * Dependent on the platform properties, different UnicodeString
130 * constructors should be used to create a UnicodeString object from
131 * a string literal.
132 * The macros are defined for improved performance.
133 * They work only for strings that contain "invariant characters", i.e.,
134 * only latin letters, digits, and some punctuation.
135 * See utypes.h for details.
136 *
137 * The string parameter must be a C string literal.
138 * @stable ICU 2.0
139 */
140#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
141
142/**
143 * \def UNISTR_FROM_CHAR_EXPLICIT
144 * This can be defined to be empty or "explicit".
145 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
146 * constructors are marked as explicit, preventing their inadvertent use.
147 * @stable ICU 49
148 */
149#ifndef UNISTR_FROM_CHAR_EXPLICIT
150# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
151    // Auto-"explicit" in ICU library code.
152#   define UNISTR_FROM_CHAR_EXPLICIT explicit
153# else
154    // Empty by default for source code compatibility.
155#   define UNISTR_FROM_CHAR_EXPLICIT
156# endif
157#endif
158
159/**
160 * \def UNISTR_FROM_STRING_EXPLICIT
161 * This can be defined to be empty or "explicit".
162 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
163 * constructors are marked as explicit, preventing their inadvertent use.
164 *
165 * In particular, this helps prevent accidentally depending on ICU conversion code
166 * by passing a string literal into an API with a const UnicodeString & parameter.
167 * @stable ICU 49
168 */
169#ifndef UNISTR_FROM_STRING_EXPLICIT
170# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
171    // Auto-"explicit" in ICU library code.
172#   define UNISTR_FROM_STRING_EXPLICIT explicit
173# else
174    // Empty by default for source code compatibility.
175#   define UNISTR_FROM_STRING_EXPLICIT
176# endif
177#endif
178
179/**
180 * UnicodeString is a string class that stores Unicode characters directly and provides
181 * similar functionality as the Java String and StringBuffer classes.
182 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
183 *
184 * The UnicodeString class is not suitable for subclassing.
185 *
186 * <p>For an overview of Unicode strings in C and C++ see the
187 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
188 *
189 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
190 * A Unicode character may be stored with either one code unit
191 * (the most common case) or with a matched pair of special code units
192 * ("surrogates"). The data type for code units is UChar.
193 * For single-character handling, a Unicode character code <em>point</em> is a value
194 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
195 *
196 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
197 * This is the same as with multi-byte char* strings in traditional string handling.
198 * Operations on partial strings typically do not test for code point boundaries.
199 * If necessary, the user needs to take care of such boundaries by testing for the code unit
200 * values or by using functions like
201 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
202 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
203 *
204 * UnicodeString methods are more lenient with regard to input parameter values
205 * than other ICU APIs. In particular:
206 * - If indexes are out of bounds for a UnicodeString object
207 *   (<0 or >length()) then they are "pinned" to the nearest boundary.
208 * - If primitive string pointer values (e.g., const UChar * or char *)
209 *   for input strings are NULL, then those input string parameters are treated
210 *   as if they pointed to an empty string.
211 *   However, this is <em>not</em> the case for char * parameters for charset names
212 *   or other IDs.
213 * - Most UnicodeString methods do not take a UErrorCode parameter because
214 *   there are usually very few opportunities for failure other than a shortage
215 *   of memory, error codes in low-level C++ string methods would be inconvenient,
216 *   and the error code as the last parameter (ICU convention) would prevent
217 *   the use of default parameter values.
218 *   Instead, such methods set the UnicodeString into a "bogus" state
219 *   (see isBogus()) if an error occurs.
220 *
221 * In string comparisons, two UnicodeString objects that are both "bogus"
222 * compare equal (to be transitive and prevent endless loops in sorting),
223 * and a "bogus" string compares less than any non-"bogus" one.
224 *
225 * Const UnicodeString methods are thread-safe. Multiple threads can use
226 * const methods on the same UnicodeString object simultaneously,
227 * but non-const methods must not be called concurrently (in multiple threads)
228 * with any other (const or non-const) methods.
229 *
230 * Similarly, const UnicodeString & parameters are thread-safe.
231 * One object may be passed in as such a parameter concurrently in multiple threads.
232 * This includes the const UnicodeString & parameters for
233 * copy construction, assignment, and cloning.
234 *
235 * <p>UnicodeString uses several storage methods.
236 * String contents can be stored inside the UnicodeString object itself,
237 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
238 * Most of this is done transparently, but careful aliasing in particular provides
239 * significant performance improvements.
240 * Also, the internal buffer is accessible via special functions.
241 * For details see the
242 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
243 *
244 * @see utf.h
245 * @see CharacterIterator
246 * @stable ICU 2.0
247 */
248class U_COMMON_API UnicodeString : public Replaceable
249{
250public:
251
252  /**
253   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
254   * which constructs a Unicode string from an invariant-character char * string.
255   * Use the macro US_INV instead of the full qualification for this value.
256   *
257   * @see US_INV
258   * @stable ICU 3.2
259   */
260  enum EInvariant {
261    /**
262     * @see EInvariant
263     * @stable ICU 3.2
264     */
265    kInvariant
266  };
267
268  //========================================
269  // Read-only operations
270  //========================================
271
272  /* Comparison - bitwise only - for international comparison use collation */
273
274  /**
275   * Equality operator. Performs only bitwise comparison.
276   * @param text The UnicodeString to compare to this one.
277   * @return TRUE if <TT>text</TT> contains the same characters as this one,
278   * FALSE otherwise.
279   * @stable ICU 2.0
280   */
281  inline UBool operator== (const UnicodeString& text) const;
282
283  /**
284   * Inequality operator. Performs only bitwise comparison.
285   * @param text The UnicodeString to compare to this one.
286   * @return FALSE if <TT>text</TT> contains the same characters as this one,
287   * TRUE otherwise.
288   * @stable ICU 2.0
289   */
290  inline UBool operator!= (const UnicodeString& text) const;
291
292  /**
293   * Greater than operator. Performs only bitwise comparison.
294   * @param text The UnicodeString to compare to this one.
295   * @return TRUE if the characters in this are bitwise
296   * greater than the characters in <code>text</code>, FALSE otherwise
297   * @stable ICU 2.0
298   */
299  inline UBool operator> (const UnicodeString& text) const;
300
301  /**
302   * Less than operator. Performs only bitwise comparison.
303   * @param text The UnicodeString to compare to this one.
304   * @return TRUE if the characters in this are bitwise
305   * less than the characters in <code>text</code>, FALSE otherwise
306   * @stable ICU 2.0
307   */
308  inline UBool operator< (const UnicodeString& text) const;
309
310  /**
311   * Greater than or equal operator. Performs only bitwise comparison.
312   * @param text The UnicodeString to compare to this one.
313   * @return TRUE if the characters in this are bitwise
314   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
315   * @stable ICU 2.0
316   */
317  inline UBool operator>= (const UnicodeString& text) const;
318
319  /**
320   * Less than or equal operator. Performs only bitwise comparison.
321   * @param text The UnicodeString to compare to this one.
322   * @return TRUE if the characters in this are bitwise
323   * less than or equal to the characters in <code>text</code>, FALSE otherwise
324   * @stable ICU 2.0
325   */
326  inline UBool operator<= (const UnicodeString& text) const;
327
328  /**
329   * Compare the characters bitwise in this UnicodeString to
330   * the characters in <code>text</code>.
331   * @param text The UnicodeString to compare to this one.
332   * @return The result of bitwise character comparison: 0 if this
333   * contains the same characters as <code>text</code>, -1 if the characters in
334   * this are bitwise less than the characters in <code>text</code>, +1 if the
335   * characters in this are bitwise greater than the characters
336   * in <code>text</code>.
337   * @stable ICU 2.0
338   */
339  inline int8_t compare(const UnicodeString& text) const;
340
341  /**
342   * Compare the characters bitwise in the range
343   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
344   * in <TT>text</TT>
345   * @param start the offset at which the compare operation begins
346   * @param length the number of characters of text to compare.
347   * @param text the other text to be compared against this string.
348   * @return The result of bitwise character comparison: 0 if this
349   * contains the same characters as <code>text</code>, -1 if the characters in
350   * this are bitwise less than the characters in <code>text</code>, +1 if the
351   * characters in this are bitwise greater than the characters
352   * in <code>text</code>.
353   * @stable ICU 2.0
354   */
355  inline int8_t compare(int32_t start,
356         int32_t length,
357         const UnicodeString& text) const;
358
359  /**
360   * Compare the characters bitwise in the range
361   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
362   * in <TT>srcText</TT> in the range
363   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
364   * @param start the offset at which the compare operation begins
365   * @param length the number of characters in this to compare.
366   * @param srcText the text to be compared
367   * @param srcStart the offset into <TT>srcText</TT> to start comparison
368   * @param srcLength the number of characters in <TT>src</TT> to compare
369   * @return The result of bitwise character comparison: 0 if this
370   * contains the same characters as <code>srcText</code>, -1 if the characters in
371   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
372   * characters in this are bitwise greater than the characters
373   * in <code>srcText</code>.
374   * @stable ICU 2.0
375   */
376   inline int8_t compare(int32_t start,
377         int32_t length,
378         const UnicodeString& srcText,
379         int32_t srcStart,
380         int32_t srcLength) const;
381
382  /**
383   * Compare the characters bitwise in this UnicodeString with the first
384   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
385   * @param srcChars The characters to compare to this UnicodeString.
386   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
387   * @return The result of bitwise character comparison: 0 if this
388   * contains the same characters as <code>srcChars</code>, -1 if the characters in
389   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
390   * characters in this are bitwise greater than the characters
391   * in <code>srcChars</code>.
392   * @stable ICU 2.0
393   */
394  inline int8_t compare(const UChar *srcChars,
395         int32_t srcLength) const;
396
397  /**
398   * Compare the characters bitwise in the range
399   * [<TT>start</TT>, <TT>start + length</TT>) with the first
400   * <TT>length</TT> characters in <TT>srcChars</TT>
401   * @param start the offset at which the compare operation begins
402   * @param length the number of characters to compare.
403   * @param srcChars the characters to be compared
404   * @return The result of bitwise character comparison: 0 if this
405   * contains the same characters as <code>srcChars</code>, -1 if the characters in
406   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
407   * characters in this are bitwise greater than the characters
408   * in <code>srcChars</code>.
409   * @stable ICU 2.0
410   */
411  inline int8_t compare(int32_t start,
412         int32_t length,
413         const UChar *srcChars) const;
414
415  /**
416   * Compare the characters bitwise in the range
417   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
418   * in <TT>srcChars</TT> in the range
419   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
420   * @param start the offset at which the compare operation begins
421   * @param length the number of characters in this to compare
422   * @param srcChars the characters to be compared
423   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
424   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
425   * @return The result of bitwise character comparison: 0 if this
426   * contains the same characters as <code>srcChars</code>, -1 if the characters in
427   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
428   * characters in this are bitwise greater than the characters
429   * in <code>srcChars</code>.
430   * @stable ICU 2.0
431   */
432  inline int8_t compare(int32_t start,
433         int32_t length,
434         const UChar *srcChars,
435         int32_t srcStart,
436         int32_t srcLength) const;
437
438  /**
439   * Compare the characters bitwise in the range
440   * [<TT>start</TT>, <TT>limit</TT>) with the characters
441   * in <TT>srcText</TT> in the range
442   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
443   * @param start the offset at which the compare operation begins
444   * @param limit the offset immediately following the compare operation
445   * @param srcText the text to be compared
446   * @param srcStart the offset into <TT>srcText</TT> to start comparison
447   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
448   * @return The result of bitwise character comparison: 0 if this
449   * contains the same characters as <code>srcText</code>, -1 if the characters in
450   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
451   * characters in this are bitwise greater than the characters
452   * in <code>srcText</code>.
453   * @stable ICU 2.0
454   */
455  inline int8_t compareBetween(int32_t start,
456            int32_t limit,
457            const UnicodeString& srcText,
458            int32_t srcStart,
459            int32_t srcLimit) const;
460
461  /**
462   * Compare two Unicode strings in code point order.
463   * The result may be different from the results of compare(), operator<, etc.
464   * if supplementary characters are present:
465   *
466   * In UTF-16, supplementary characters (with code points U+10000 and above) are
467   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
468   * which means that they compare as less than some other BMP characters like U+feff.
469   * This function compares Unicode strings in code point order.
470   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
471   *
472   * @param text Another string to compare this one to.
473   * @return a negative/zero/positive integer corresponding to whether
474   * this string is less than/equal to/greater than the second one
475   * in code point order
476   * @stable ICU 2.0
477   */
478  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
479
480  /**
481   * Compare two Unicode strings in code point order.
482   * The result may be different from the results of compare(), operator<, etc.
483   * if supplementary characters are present:
484   *
485   * In UTF-16, supplementary characters (with code points U+10000 and above) are
486   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
487   * which means that they compare as less than some other BMP characters like U+feff.
488   * This function compares Unicode strings in code point order.
489   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
490   *
491   * @param start The start offset in this string at which the compare operation begins.
492   * @param length The number of code units from this string to compare.
493   * @param srcText Another string to compare this one to.
494   * @return a negative/zero/positive integer corresponding to whether
495   * this string is less than/equal to/greater than the second one
496   * in code point order
497   * @stable ICU 2.0
498   */
499  inline int8_t compareCodePointOrder(int32_t start,
500                                      int32_t length,
501                                      const UnicodeString& srcText) const;
502
503  /**
504   * Compare two Unicode strings in code point order.
505   * The result may be different from the results of compare(), operator<, etc.
506   * if supplementary characters are present:
507   *
508   * In UTF-16, supplementary characters (with code points U+10000 and above) are
509   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
510   * which means that they compare as less than some other BMP characters like U+feff.
511   * This function compares Unicode strings in code point order.
512   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
513   *
514   * @param start The start offset in this string at which the compare operation begins.
515   * @param length The number of code units from this string to compare.
516   * @param srcText Another string to compare this one to.
517   * @param srcStart The start offset in that string at which the compare operation begins.
518   * @param srcLength The number of code units from that string to compare.
519   * @return a negative/zero/positive integer corresponding to whether
520   * this string is less than/equal to/greater than the second one
521   * in code point order
522   * @stable ICU 2.0
523   */
524   inline int8_t compareCodePointOrder(int32_t start,
525                                       int32_t length,
526                                       const UnicodeString& srcText,
527                                       int32_t srcStart,
528                                       int32_t srcLength) const;
529
530  /**
531   * Compare two Unicode strings in code point order.
532   * The result may be different from the results of compare(), operator<, etc.
533   * if supplementary characters are present:
534   *
535   * In UTF-16, supplementary characters (with code points U+10000 and above) are
536   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
537   * which means that they compare as less than some other BMP characters like U+feff.
538   * This function compares Unicode strings in code point order.
539   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
540   *
541   * @param srcChars A pointer to another string to compare this one to.
542   * @param srcLength The number of code units from that string to compare.
543   * @return a negative/zero/positive integer corresponding to whether
544   * this string is less than/equal to/greater than the second one
545   * in code point order
546   * @stable ICU 2.0
547   */
548  inline int8_t compareCodePointOrder(const UChar *srcChars,
549                                      int32_t srcLength) const;
550
551  /**
552   * Compare two Unicode strings in code point order.
553   * The result may be different from the results of compare(), operator<, etc.
554   * if supplementary characters are present:
555   *
556   * In UTF-16, supplementary characters (with code points U+10000 and above) are
557   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
558   * which means that they compare as less than some other BMP characters like U+feff.
559   * This function compares Unicode strings in code point order.
560   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
561   *
562   * @param start The start offset in this string at which the compare operation begins.
563   * @param length The number of code units from this string to compare.
564   * @param srcChars A pointer to another string to compare this one to.
565   * @return a negative/zero/positive integer corresponding to whether
566   * this string is less than/equal to/greater than the second one
567   * in code point order
568   * @stable ICU 2.0
569   */
570  inline int8_t compareCodePointOrder(int32_t start,
571                                      int32_t length,
572                                      const UChar *srcChars) const;
573
574  /**
575   * Compare two Unicode strings in code point order.
576   * The result may be different from the results of compare(), operator<, etc.
577   * if supplementary characters are present:
578   *
579   * In UTF-16, supplementary characters (with code points U+10000 and above) are
580   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
581   * which means that they compare as less than some other BMP characters like U+feff.
582   * This function compares Unicode strings in code point order.
583   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
584   *
585   * @param start The start offset in this string at which the compare operation begins.
586   * @param length The number of code units from this string to compare.
587   * @param srcChars A pointer to another string to compare this one to.
588   * @param srcStart The start offset in that string at which the compare operation begins.
589   * @param srcLength The number of code units from that string to compare.
590   * @return a negative/zero/positive integer corresponding to whether
591   * this string is less than/equal to/greater than the second one
592   * in code point order
593   * @stable ICU 2.0
594   */
595  inline int8_t compareCodePointOrder(int32_t start,
596                                      int32_t length,
597                                      const UChar *srcChars,
598                                      int32_t srcStart,
599                                      int32_t srcLength) const;
600
601  /**
602   * Compare two Unicode strings in code point order.
603   * The result may be different from the results of compare(), operator<, etc.
604   * if supplementary characters are present:
605   *
606   * In UTF-16, supplementary characters (with code points U+10000 and above) are
607   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
608   * which means that they compare as less than some other BMP characters like U+feff.
609   * This function compares Unicode strings in code point order.
610   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
611   *
612   * @param start The start offset in this string at which the compare operation begins.
613   * @param limit The offset after the last code unit from this string to compare.
614   * @param srcText Another string to compare this one to.
615   * @param srcStart The start offset in that string at which the compare operation begins.
616   * @param srcLimit The offset after the last code unit from that string to compare.
617   * @return a negative/zero/positive integer corresponding to whether
618   * this string is less than/equal to/greater than the second one
619   * in code point order
620   * @stable ICU 2.0
621   */
622  inline int8_t compareCodePointOrderBetween(int32_t start,
623                                             int32_t limit,
624                                             const UnicodeString& srcText,
625                                             int32_t srcStart,
626                                             int32_t srcLimit) const;
627
628  /**
629   * Compare two strings case-insensitively using full case folding.
630   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
631   *
632   * @param text Another string to compare this one to.
633   * @param options A bit set of options:
634   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
635   *     Comparison in code unit order with default case folding.
636   *
637   *   - U_COMPARE_CODE_POINT_ORDER
638   *     Set to choose code point order instead of code unit order
639   *     (see u_strCompare for details).
640   *
641   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
642   *
643   * @return A negative, zero, or positive integer indicating the comparison result.
644   * @stable ICU 2.0
645   */
646  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
647
648  /**
649   * Compare two strings case-insensitively using full case folding.
650   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
651   *
652   * @param start The start offset in this string at which the compare operation begins.
653   * @param length The number of code units from this string to compare.
654   * @param srcText Another string to compare this one to.
655   * @param options A bit set of options:
656   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
657   *     Comparison in code unit order with default case folding.
658   *
659   *   - U_COMPARE_CODE_POINT_ORDER
660   *     Set to choose code point order instead of code unit order
661   *     (see u_strCompare for details).
662   *
663   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
664   *
665   * @return A negative, zero, or positive integer indicating the comparison result.
666   * @stable ICU 2.0
667   */
668  inline int8_t caseCompare(int32_t start,
669         int32_t length,
670         const UnicodeString& srcText,
671         uint32_t options) const;
672
673  /**
674   * Compare two strings case-insensitively using full case folding.
675   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
676   *
677   * @param start The start offset in this string at which the compare operation begins.
678   * @param length The number of code units from this string to compare.
679   * @param srcText Another string to compare this one to.
680   * @param srcStart The start offset in that string at which the compare operation begins.
681   * @param srcLength The number of code units from that string to compare.
682   * @param options A bit set of options:
683   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
684   *     Comparison in code unit order with default case folding.
685   *
686   *   - U_COMPARE_CODE_POINT_ORDER
687   *     Set to choose code point order instead of code unit order
688   *     (see u_strCompare for details).
689   *
690   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
691   *
692   * @return A negative, zero, or positive integer indicating the comparison result.
693   * @stable ICU 2.0
694   */
695  inline int8_t caseCompare(int32_t start,
696         int32_t length,
697         const UnicodeString& srcText,
698         int32_t srcStart,
699         int32_t srcLength,
700         uint32_t options) const;
701
702  /**
703   * Compare two strings case-insensitively using full case folding.
704   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
705   *
706   * @param srcChars A pointer to another string to compare this one to.
707   * @param srcLength The number of code units from that string to compare.
708   * @param options A bit set of options:
709   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
710   *     Comparison in code unit order with default case folding.
711   *
712   *   - U_COMPARE_CODE_POINT_ORDER
713   *     Set to choose code point order instead of code unit order
714   *     (see u_strCompare for details).
715   *
716   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
717   *
718   * @return A negative, zero, or positive integer indicating the comparison result.
719   * @stable ICU 2.0
720   */
721  inline int8_t caseCompare(const UChar *srcChars,
722         int32_t srcLength,
723         uint32_t options) const;
724
725  /**
726   * Compare two strings case-insensitively using full case folding.
727   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
728   *
729   * @param start The start offset in this string at which the compare operation begins.
730   * @param length The number of code units from this string to compare.
731   * @param srcChars A pointer to another string to compare this one to.
732   * @param options A bit set of options:
733   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
734   *     Comparison in code unit order with default case folding.
735   *
736   *   - U_COMPARE_CODE_POINT_ORDER
737   *     Set to choose code point order instead of code unit order
738   *     (see u_strCompare for details).
739   *
740   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
741   *
742   * @return A negative, zero, or positive integer indicating the comparison result.
743   * @stable ICU 2.0
744   */
745  inline int8_t caseCompare(int32_t start,
746         int32_t length,
747         const UChar *srcChars,
748         uint32_t options) const;
749
750  /**
751   * Compare two strings case-insensitively using full case folding.
752   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
753   *
754   * @param start The start offset in this string at which the compare operation begins.
755   * @param length The number of code units from this string to compare.
756   * @param srcChars A pointer to another string to compare this one to.
757   * @param srcStart The start offset in that string at which the compare operation begins.
758   * @param srcLength The number of code units from that string to compare.
759   * @param options A bit set of options:
760   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
761   *     Comparison in code unit order with default case folding.
762   *
763   *   - U_COMPARE_CODE_POINT_ORDER
764   *     Set to choose code point order instead of code unit order
765   *     (see u_strCompare for details).
766   *
767   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
768   *
769   * @return A negative, zero, or positive integer indicating the comparison result.
770   * @stable ICU 2.0
771   */
772  inline int8_t caseCompare(int32_t start,
773         int32_t length,
774         const UChar *srcChars,
775         int32_t srcStart,
776         int32_t srcLength,
777         uint32_t options) const;
778
779  /**
780   * Compare two strings case-insensitively using full case folding.
781   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
782   *
783   * @param start The start offset in this string at which the compare operation begins.
784   * @param limit The offset after the last code unit from this string to compare.
785   * @param srcText Another string to compare this one to.
786   * @param srcStart The start offset in that string at which the compare operation begins.
787   * @param srcLimit The offset after the last code unit from that string to compare.
788   * @param options A bit set of options:
789   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
790   *     Comparison in code unit order with default case folding.
791   *
792   *   - U_COMPARE_CODE_POINT_ORDER
793   *     Set to choose code point order instead of code unit order
794   *     (see u_strCompare for details).
795   *
796   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
797   *
798   * @return A negative, zero, or positive integer indicating the comparison result.
799   * @stable ICU 2.0
800   */
801  inline int8_t caseCompareBetween(int32_t start,
802            int32_t limit,
803            const UnicodeString& srcText,
804            int32_t srcStart,
805            int32_t srcLimit,
806            uint32_t options) const;
807
808  /**
809   * Determine if this starts with the characters in <TT>text</TT>
810   * @param text The text to match.
811   * @return TRUE if this starts with the characters in <TT>text</TT>,
812   * FALSE otherwise
813   * @stable ICU 2.0
814   */
815  inline UBool startsWith(const UnicodeString& text) const;
816
817  /**
818   * Determine if this starts with the characters in <TT>srcText</TT>
819   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
820   * @param srcText The text to match.
821   * @param srcStart the offset into <TT>srcText</TT> to start matching
822   * @param srcLength the number of characters in <TT>srcText</TT> to match
823   * @return TRUE if this starts with the characters in <TT>text</TT>,
824   * FALSE otherwise
825   * @stable ICU 2.0
826   */
827  inline UBool startsWith(const UnicodeString& srcText,
828            int32_t srcStart,
829            int32_t srcLength) const;
830
831  /**
832   * Determine if this starts with the characters in <TT>srcChars</TT>
833   * @param srcChars The characters to match.
834   * @param srcLength the number of characters in <TT>srcChars</TT>
835   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
836   * FALSE otherwise
837   * @stable ICU 2.0
838   */
839  inline UBool startsWith(const UChar *srcChars,
840            int32_t srcLength) const;
841
842  /**
843   * Determine if this ends with the characters in <TT>srcChars</TT>
844   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
845   * @param srcChars The characters to match.
846   * @param srcStart the offset into <TT>srcText</TT> to start matching
847   * @param srcLength the number of characters in <TT>srcChars</TT> to match
848   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
849   * @stable ICU 2.0
850   */
851  inline UBool startsWith(const UChar *srcChars,
852            int32_t srcStart,
853            int32_t srcLength) const;
854
855  /**
856   * Determine if this ends with the characters in <TT>text</TT>
857   * @param text The text to match.
858   * @return TRUE if this ends with the characters in <TT>text</TT>,
859   * FALSE otherwise
860   * @stable ICU 2.0
861   */
862  inline UBool endsWith(const UnicodeString& text) const;
863
864  /**
865   * Determine if this ends with the characters in <TT>srcText</TT>
866   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
867   * @param srcText The text to match.
868   * @param srcStart the offset into <TT>srcText</TT> to start matching
869   * @param srcLength the number of characters in <TT>srcText</TT> to match
870   * @return TRUE if this ends with the characters in <TT>text</TT>,
871   * FALSE otherwise
872   * @stable ICU 2.0
873   */
874  inline UBool endsWith(const UnicodeString& srcText,
875          int32_t srcStart,
876          int32_t srcLength) const;
877
878  /**
879   * Determine if this ends with the characters in <TT>srcChars</TT>
880   * @param srcChars The characters to match.
881   * @param srcLength the number of characters in <TT>srcChars</TT>
882   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
883   * FALSE otherwise
884   * @stable ICU 2.0
885   */
886  inline UBool endsWith(const UChar *srcChars,
887          int32_t srcLength) const;
888
889  /**
890   * Determine if this ends with the characters in <TT>srcChars</TT>
891   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
892   * @param srcChars The characters to match.
893   * @param srcStart the offset into <TT>srcText</TT> to start matching
894   * @param srcLength the number of characters in <TT>srcChars</TT> to match
895   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
896   * FALSE otherwise
897   * @stable ICU 2.0
898   */
899  inline UBool endsWith(const UChar *srcChars,
900          int32_t srcStart,
901          int32_t srcLength) const;
902
903
904  /* Searching - bitwise only */
905
906  /**
907   * Locate in this the first occurrence of the characters in <TT>text</TT>,
908   * using bitwise comparison.
909   * @param text The text to search for.
910   * @return The offset into this of the start of <TT>text</TT>,
911   * or -1 if not found.
912   * @stable ICU 2.0
913   */
914  inline int32_t indexOf(const UnicodeString& text) const;
915
916  /**
917   * Locate in this the first occurrence of the characters in <TT>text</TT>
918   * starting at offset <TT>start</TT>, using bitwise comparison.
919   * @param text The text to search for.
920   * @param start The offset at which searching will start.
921   * @return The offset into this of the start of <TT>text</TT>,
922   * or -1 if not found.
923   * @stable ICU 2.0
924   */
925  inline int32_t indexOf(const UnicodeString& text,
926              int32_t start) const;
927
928  /**
929   * Locate in this the first occurrence in the range
930   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
931   * in <TT>text</TT>, using bitwise comparison.
932   * @param text The text to search for.
933   * @param start The offset at which searching will start.
934   * @param length The number of characters to search
935   * @return The offset into this of the start of <TT>text</TT>,
936   * or -1 if not found.
937   * @stable ICU 2.0
938   */
939  inline int32_t indexOf(const UnicodeString& text,
940              int32_t start,
941              int32_t length) const;
942
943  /**
944   * Locate in this the first occurrence in the range
945   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
946   *  in <TT>srcText</TT> in the range
947   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
948   * using bitwise comparison.
949   * @param srcText The text to search for.
950   * @param srcStart the offset into <TT>srcText</TT> at which
951   * to start matching
952   * @param srcLength the number of characters in <TT>srcText</TT> to match
953   * @param start the offset into this at which to start matching
954   * @param length the number of characters in this to search
955   * @return The offset into this of the start of <TT>text</TT>,
956   * or -1 if not found.
957   * @stable ICU 2.0
958   */
959  inline int32_t indexOf(const UnicodeString& srcText,
960              int32_t srcStart,
961              int32_t srcLength,
962              int32_t start,
963              int32_t length) const;
964
965  /**
966   * Locate in this the first occurrence of the characters in
967   * <TT>srcChars</TT>
968   * starting at offset <TT>start</TT>, using bitwise comparison.
969   * @param srcChars The text to search for.
970   * @param srcLength the number of characters in <TT>srcChars</TT> to match
971   * @param start the offset into this at which to start matching
972   * @return The offset into this of the start of <TT>text</TT>,
973   * or -1 if not found.
974   * @stable ICU 2.0
975   */
976  inline int32_t indexOf(const UChar *srcChars,
977              int32_t srcLength,
978              int32_t start) const;
979
980  /**
981   * Locate in this the first occurrence in the range
982   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
983   * in <TT>srcChars</TT>, using bitwise comparison.
984   * @param srcChars The text to search for.
985   * @param srcLength the number of characters in <TT>srcChars</TT>
986   * @param start The offset at which searching will start.
987   * @param length The number of characters to search
988   * @return The offset into this of the start of <TT>srcChars</TT>,
989   * or -1 if not found.
990   * @stable ICU 2.0
991   */
992  inline int32_t indexOf(const UChar *srcChars,
993              int32_t srcLength,
994              int32_t start,
995              int32_t length) const;
996
997  /**
998   * Locate in this the first occurrence in the range
999   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1000   * in <TT>srcChars</TT> in the range
1001   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1002   * using bitwise comparison.
1003   * @param srcChars The text to search for.
1004   * @param srcStart the offset into <TT>srcChars</TT> at which
1005   * to start matching
1006   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1007   * @param start the offset into this at which to start matching
1008   * @param length the number of characters in this to search
1009   * @return The offset into this of the start of <TT>text</TT>,
1010   * or -1 if not found.
1011   * @stable ICU 2.0
1012   */
1013  int32_t indexOf(const UChar *srcChars,
1014              int32_t srcStart,
1015              int32_t srcLength,
1016              int32_t start,
1017              int32_t length) const;
1018
1019  /**
1020   * Locate in this the first occurrence of the BMP code point <code>c</code>,
1021   * using bitwise comparison.
1022   * @param c The code unit to search for.
1023   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1024   * @stable ICU 2.0
1025   */
1026  inline int32_t indexOf(UChar c) const;
1027
1028  /**
1029   * Locate in this the first occurrence of the code point <TT>c</TT>,
1030   * using bitwise comparison.
1031   *
1032   * @param c The code point to search for.
1033   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1034   * @stable ICU 2.0
1035   */
1036  inline int32_t indexOf(UChar32 c) const;
1037
1038  /**
1039   * Locate in this the first occurrence of the BMP code point <code>c</code>,
1040   * starting at offset <TT>start</TT>, using bitwise comparison.
1041   * @param c The code unit to search for.
1042   * @param start The offset at which searching will start.
1043   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1044   * @stable ICU 2.0
1045   */
1046  inline int32_t indexOf(UChar c,
1047              int32_t start) const;
1048
1049  /**
1050   * Locate in this the first occurrence of the code point <TT>c</TT>
1051   * starting at offset <TT>start</TT>, using bitwise comparison.
1052   *
1053   * @param c The code point to search for.
1054   * @param start The offset at which searching will start.
1055   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1056   * @stable ICU 2.0
1057   */
1058  inline int32_t indexOf(UChar32 c,
1059              int32_t start) const;
1060
1061  /**
1062   * Locate in this the first occurrence of the BMP code point <code>c</code>
1063   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1064   * using bitwise comparison.
1065   * @param c The code unit to search for.
1066   * @param start the offset into this at which to start matching
1067   * @param length the number of characters in this to search
1068   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1069   * @stable ICU 2.0
1070   */
1071  inline int32_t indexOf(UChar c,
1072              int32_t start,
1073              int32_t length) const;
1074
1075  /**
1076   * Locate in this the first occurrence of the code point <TT>c</TT>
1077   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1078   * using bitwise comparison.
1079   *
1080   * @param c The code point to search for.
1081   * @param start the offset into this at which to start matching
1082   * @param length the number of characters in this to search
1083   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1084   * @stable ICU 2.0
1085   */
1086  inline int32_t indexOf(UChar32 c,
1087              int32_t start,
1088              int32_t length) const;
1089
1090  /**
1091   * Locate in this the last occurrence of the characters in <TT>text</TT>,
1092   * using bitwise comparison.
1093   * @param text The text to search for.
1094   * @return The offset into this of the start of <TT>text</TT>,
1095   * or -1 if not found.
1096   * @stable ICU 2.0
1097   */
1098  inline int32_t lastIndexOf(const UnicodeString& text) const;
1099
1100  /**
1101   * Locate in this the last occurrence of the characters in <TT>text</TT>
1102   * starting at offset <TT>start</TT>, using bitwise comparison.
1103   * @param text The text to search for.
1104   * @param start The offset at which searching will start.
1105   * @return The offset into this of the start of <TT>text</TT>,
1106   * or -1 if not found.
1107   * @stable ICU 2.0
1108   */
1109  inline int32_t lastIndexOf(const UnicodeString& text,
1110              int32_t start) const;
1111
1112  /**
1113   * Locate in this the last occurrence in the range
1114   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1115   * in <TT>text</TT>, using bitwise comparison.
1116   * @param text The text to search for.
1117   * @param start The offset at which searching will start.
1118   * @param length The number of characters to search
1119   * @return The offset into this of the start of <TT>text</TT>,
1120   * or -1 if not found.
1121   * @stable ICU 2.0
1122   */
1123  inline int32_t lastIndexOf(const UnicodeString& text,
1124              int32_t start,
1125              int32_t length) const;
1126
1127  /**
1128   * Locate in this the last occurrence in the range
1129   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1130   * in <TT>srcText</TT> in the range
1131   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1132   * using bitwise comparison.
1133   * @param srcText The text to search for.
1134   * @param srcStart the offset into <TT>srcText</TT> at which
1135   * to start matching
1136   * @param srcLength the number of characters in <TT>srcText</TT> to match
1137   * @param start the offset into this at which to start matching
1138   * @param length the number of characters in this to search
1139   * @return The offset into this of the start of <TT>text</TT>,
1140   * or -1 if not found.
1141   * @stable ICU 2.0
1142   */
1143  inline int32_t lastIndexOf(const UnicodeString& srcText,
1144              int32_t srcStart,
1145              int32_t srcLength,
1146              int32_t start,
1147              int32_t length) const;
1148
1149  /**
1150   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1151   * starting at offset <TT>start</TT>, using bitwise comparison.
1152   * @param srcChars The text to search for.
1153   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1154   * @param start the offset into this at which to start matching
1155   * @return The offset into this of the start of <TT>text</TT>,
1156   * or -1 if not found.
1157   * @stable ICU 2.0
1158   */
1159  inline int32_t lastIndexOf(const UChar *srcChars,
1160              int32_t srcLength,
1161              int32_t start) const;
1162
1163  /**
1164   * Locate in this the last occurrence in the range
1165   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1166   * in <TT>srcChars</TT>, using bitwise comparison.
1167   * @param srcChars The text to search for.
1168   * @param srcLength the number of characters in <TT>srcChars</TT>
1169   * @param start The offset at which searching will start.
1170   * @param length The number of characters to search
1171   * @return The offset into this of the start of <TT>srcChars</TT>,
1172   * or -1 if not found.
1173   * @stable ICU 2.0
1174   */
1175  inline int32_t lastIndexOf(const UChar *srcChars,
1176              int32_t srcLength,
1177              int32_t start,
1178              int32_t length) const;
1179
1180  /**
1181   * Locate in this the last occurrence in the range
1182   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1183   * in <TT>srcChars</TT> in the range
1184   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1185   * using bitwise comparison.
1186   * @param srcChars The text to search for.
1187   * @param srcStart the offset into <TT>srcChars</TT> at which
1188   * to start matching
1189   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1190   * @param start the offset into this at which to start matching
1191   * @param length the number of characters in this to search
1192   * @return The offset into this of the start of <TT>text</TT>,
1193   * or -1 if not found.
1194   * @stable ICU 2.0
1195   */
1196  int32_t lastIndexOf(const UChar *srcChars,
1197              int32_t srcStart,
1198              int32_t srcLength,
1199              int32_t start,
1200              int32_t length) const;
1201
1202  /**
1203   * Locate in this the last occurrence of the BMP code point <code>c</code>,
1204   * using bitwise comparison.
1205   * @param c The code unit to search for.
1206   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1207   * @stable ICU 2.0
1208   */
1209  inline int32_t lastIndexOf(UChar c) const;
1210
1211  /**
1212   * Locate in this the last occurrence of the code point <TT>c</TT>,
1213   * using bitwise comparison.
1214   *
1215   * @param c The code point to search for.
1216   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1217   * @stable ICU 2.0
1218   */
1219  inline int32_t lastIndexOf(UChar32 c) const;
1220
1221  /**
1222   * Locate in this the last occurrence of the BMP code point <code>c</code>
1223   * starting at offset <TT>start</TT>, using bitwise comparison.
1224   * @param c The code unit to search for.
1225   * @param start The offset at which searching will start.
1226   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1227   * @stable ICU 2.0
1228   */
1229  inline int32_t lastIndexOf(UChar c,
1230              int32_t start) const;
1231
1232  /**
1233   * Locate in this the last occurrence of the code point <TT>c</TT>
1234   * starting at offset <TT>start</TT>, using bitwise comparison.
1235   *
1236   * @param c The code point to search for.
1237   * @param start The offset at which searching will start.
1238   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1239   * @stable ICU 2.0
1240   */
1241  inline int32_t lastIndexOf(UChar32 c,
1242              int32_t start) const;
1243
1244  /**
1245   * Locate in this the last occurrence of the BMP code point <code>c</code>
1246   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1247   * using bitwise comparison.
1248   * @param c The code unit to search for.
1249   * @param start the offset into this at which to start matching
1250   * @param length the number of characters in this to search
1251   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1252   * @stable ICU 2.0
1253   */
1254  inline int32_t lastIndexOf(UChar c,
1255              int32_t start,
1256              int32_t length) const;
1257
1258  /**
1259   * Locate in this the last occurrence of the code point <TT>c</TT>
1260   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1261   * using bitwise comparison.
1262   *
1263   * @param c The code point to search for.
1264   * @param start the offset into this at which to start matching
1265   * @param length the number of characters in this to search
1266   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1267   * @stable ICU 2.0
1268   */
1269  inline int32_t lastIndexOf(UChar32 c,
1270              int32_t start,
1271              int32_t length) const;
1272
1273
1274  /* Character access */
1275
1276  /**
1277   * Return the code unit at offset <tt>offset</tt>.
1278   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1279   * @param offset a valid offset into the text
1280   * @return the code unit at offset <tt>offset</tt>
1281   *         or 0xffff if the offset is not valid for this string
1282   * @stable ICU 2.0
1283   */
1284  inline UChar charAt(int32_t offset) const;
1285
1286  /**
1287   * Return the code unit at offset <tt>offset</tt>.
1288   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1289   * @param offset a valid offset into the text
1290   * @return the code unit at offset <tt>offset</tt>
1291   * @stable ICU 2.0
1292   */
1293  inline UChar operator[] (int32_t offset) const;
1294
1295  /**
1296   * Return the code point that contains the code unit
1297   * at offset <tt>offset</tt>.
1298   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1299   * @param offset a valid offset into the text
1300   * that indicates the text offset of any of the code units
1301   * that will be assembled into a code point (21-bit value) and returned
1302   * @return the code point of text at <tt>offset</tt>
1303   *         or 0xffff if the offset is not valid for this string
1304   * @stable ICU 2.0
1305   */
1306  UChar32 char32At(int32_t offset) const;
1307
1308  /**
1309   * Adjust a random-access offset so that
1310   * it points to the beginning of a Unicode character.
1311   * The offset that is passed in points to
1312   * any code unit of a code point,
1313   * while the returned offset will point to the first code unit
1314   * of the same code point.
1315   * In UTF-16, if the input offset points to a second surrogate
1316   * of a surrogate pair, then the returned offset will point
1317   * to the first surrogate.
1318   * @param offset a valid offset into one code point of the text
1319   * @return offset of the first code unit of the same code point
1320   * @see U16_SET_CP_START
1321   * @stable ICU 2.0
1322   */
1323  int32_t getChar32Start(int32_t offset) const;
1324
1325  /**
1326   * Adjust a random-access offset so that
1327   * it points behind a Unicode character.
1328   * The offset that is passed in points behind
1329   * any code unit of a code point,
1330   * while the returned offset will point behind the last code unit
1331   * of the same code point.
1332   * In UTF-16, if the input offset points behind the first surrogate
1333   * (i.e., to the second surrogate)
1334   * of a surrogate pair, then the returned offset will point
1335   * behind the second surrogate (i.e., to the first surrogate).
1336   * @param offset a valid offset after any code unit of a code point of the text
1337   * @return offset of the first code unit after the same code point
1338   * @see U16_SET_CP_LIMIT
1339   * @stable ICU 2.0
1340   */
1341  int32_t getChar32Limit(int32_t offset) const;
1342
1343  /**
1344   * Move the code unit index along the string by delta code points.
1345   * Interpret the input index as a code unit-based offset into the string,
1346   * move the index forward or backward by delta code points, and
1347   * return the resulting index.
1348   * The input index should point to the first code unit of a code point,
1349   * if there is more than one.
1350   *
1351   * Both input and output indexes are code unit-based as for all
1352   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1353   * If delta<0 then the index is moved backward (toward the start of the string).
1354   * If delta>0 then the index is moved forward (toward the end of the string).
1355   *
1356   * This behaves like CharacterIterator::move32(delta, kCurrent).
1357   *
1358   * Behavior for out-of-bounds indexes:
1359   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1360   * if the input index<0 then it is pinned to 0;
1361   * if it is index>length() then it is pinned to length().
1362   * Afterwards, the index is moved by <code>delta</code> code points
1363   * forward or backward,
1364   * but no further backward than to 0 and no further forward than to length().
1365   * The resulting index return value will be in between 0 and length(), inclusively.
1366   *
1367   * Examples:
1368   * <pre>
1369   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1370   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1371   *
1372   * // initial index: position of U+10000
1373   * int32_t index=1;
1374   *
1375   * // the following examples will all result in index==4, position of U+10ffff
1376   *
1377   * // skip 2 code points from some position in the string
1378   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1379   *
1380   * // go to the 3rd code point from the start of s (0-based)
1381   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1382   *
1383   * // go to the next-to-last code point of s
1384   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1385   * </pre>
1386   *
1387   * @param index input code unit index
1388   * @param delta (signed) code point count to move the index forward or backward
1389   *        in the string
1390   * @return the resulting code unit index
1391   * @stable ICU 2.0
1392   */
1393  int32_t moveIndex32(int32_t index, int32_t delta) const;
1394
1395  /* Substring extraction */
1396
1397  /**
1398   * Copy the characters in the range
1399   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1400   * beginning at <tt>dstStart</tt>.
1401   * If the string aliases to <code>dst</code> itself as an external buffer,
1402   * then extract() will not copy the contents.
1403   *
1404   * @param start offset of first character which will be copied into the array
1405   * @param length the number of characters to extract
1406   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1407   * must be at least (<tt>dstStart + length</tt>).
1408   * @param dstStart the offset in <TT>dst</TT> where the first character
1409   * will be extracted
1410   * @stable ICU 2.0
1411   */
1412  inline void extract(int32_t start,
1413           int32_t length,
1414           UChar *dst,
1415           int32_t dstStart = 0) const;
1416
1417  /**
1418   * Copy the contents of the string into dest.
1419   * This is a convenience function that
1420   * checks if there is enough space in dest,
1421   * extracts the entire string if possible,
1422   * and NUL-terminates dest if possible.
1423   *
1424   * If the string fits into dest but cannot be NUL-terminated
1425   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1426   * If the string itself does not fit into dest
1427   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1428   *
1429   * If the string aliases to <code>dest</code> itself as an external buffer,
1430   * then extract() will not copy the contents.
1431   *
1432   * @param dest Destination string buffer.
1433   * @param destCapacity Number of UChars available at dest.
1434   * @param errorCode ICU error code.
1435   * @return length()
1436   * @stable ICU 2.0
1437   */
1438  int32_t
1439  extract(UChar *dest, int32_t destCapacity,
1440          UErrorCode &errorCode) const;
1441
1442  /**
1443   * Copy the characters in the range
1444   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1445   * <tt>target</tt>.
1446   * @param start offset of first character which will be copied
1447   * @param length the number of characters to extract
1448   * @param target UnicodeString into which to copy characters.
1449   * @return A reference to <TT>target</TT>
1450   * @stable ICU 2.0
1451   */
1452  inline void extract(int32_t start,
1453           int32_t length,
1454           UnicodeString& target) const;
1455
1456  /**
1457   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1458   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1459   * @param start offset of first character which will be copied into the array
1460   * @param limit offset immediately following the last character to be copied
1461   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1462   * must be at least (<tt>dstStart + (limit - start)</tt>).
1463   * @param dstStart the offset in <TT>dst</TT> where the first character
1464   * will be extracted
1465   * @stable ICU 2.0
1466   */
1467  inline void extractBetween(int32_t start,
1468              int32_t limit,
1469              UChar *dst,
1470              int32_t dstStart = 0) const;
1471
1472  /**
1473   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1474   * into the UnicodeString <tt>target</tt>.  Replaceable API.
1475   * @param start offset of first character which will be copied
1476   * @param limit offset immediately following the last character to be copied
1477   * @param target UnicodeString into which to copy characters.
1478   * @return A reference to <TT>target</TT>
1479   * @stable ICU 2.0
1480   */
1481  virtual void extractBetween(int32_t start,
1482              int32_t limit,
1483              UnicodeString& target) const;
1484
1485  /**
1486   * Copy the characters in the range
1487   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1488   * All characters must be invariant (see utypes.h).
1489   * Use US_INV as the last, signature-distinguishing parameter.
1490   *
1491   * This function does not write any more than <code>targetLength</code>
1492   * characters but returns the length of the entire output string
1493   * so that one can allocate a larger buffer and call the function again
1494   * if necessary.
1495   * The output string is NUL-terminated if possible.
1496   *
1497   * @param start offset of first character which will be copied
1498   * @param startLength the number of characters to extract
1499   * @param target the target buffer for extraction, can be NULL
1500   *               if targetLength is 0
1501   * @param targetCapacity the length of the target buffer
1502   * @param inv Signature-distinguishing paramater, use US_INV.
1503   * @return the output string length, not including the terminating NUL
1504   * @stable ICU 3.2
1505   */
1506  int32_t extract(int32_t start,
1507           int32_t startLength,
1508           char *target,
1509           int32_t targetCapacity,
1510           enum EInvariant inv) const;
1511
1512#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1513
1514  /**
1515   * Copy the characters in the range
1516   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1517   * in the platform's default codepage.
1518   * This function does not write any more than <code>targetLength</code>
1519   * characters but returns the length of the entire output string
1520   * so that one can allocate a larger buffer and call the function again
1521   * if necessary.
1522   * The output string is NUL-terminated if possible.
1523   *
1524   * @param start offset of first character which will be copied
1525   * @param startLength the number of characters to extract
1526   * @param target the target buffer for extraction
1527   * @param targetLength the length of the target buffer
1528   * If <TT>target</TT> is NULL, then the number of bytes required for
1529   * <TT>target</TT> is returned.
1530   * @return the output string length, not including the terminating NUL
1531   * @stable ICU 2.0
1532   */
1533  int32_t extract(int32_t start,
1534           int32_t startLength,
1535           char *target,
1536           uint32_t targetLength) const;
1537
1538#endif
1539
1540#if !UCONFIG_NO_CONVERSION
1541
1542  /**
1543   * Copy the characters in the range
1544   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1545   * in a specified codepage.
1546   * The output string is NUL-terminated.
1547   *
1548   * Recommendation: For invariant-character strings use
1549   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1550   * because it avoids object code dependencies of UnicodeString on
1551   * the conversion code.
1552   *
1553   * @param start offset of first character which will be copied
1554   * @param startLength the number of characters to extract
1555   * @param target the target buffer for extraction
1556   * @param codepage the desired codepage for the characters.  0 has
1557   * the special meaning of the default codepage
1558   * If <code>codepage</code> is an empty string (<code>""</code>),
1559   * then a simple conversion is performed on the codepage-invariant
1560   * subset ("invariant characters") of the platform encoding. See utypes.h.
1561   * If <TT>target</TT> is NULL, then the number of bytes required for
1562   * <TT>target</TT> is returned. It is assumed that the target is big enough
1563   * to fit all of the characters.
1564   * @return the output string length, not including the terminating NUL
1565   * @stable ICU 2.0
1566   */
1567  inline int32_t extract(int32_t start,
1568                 int32_t startLength,
1569                 char *target,
1570                 const char *codepage = 0) const;
1571
1572  /**
1573   * Copy the characters in the range
1574   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1575   * in a specified codepage.
1576   * This function does not write any more than <code>targetLength</code>
1577   * characters but returns the length of the entire output string
1578   * so that one can allocate a larger buffer and call the function again
1579   * if necessary.
1580   * The output string is NUL-terminated if possible.
1581   *
1582   * Recommendation: For invariant-character strings use
1583   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1584   * because it avoids object code dependencies of UnicodeString on
1585   * the conversion code.
1586   *
1587   * @param start offset of first character which will be copied
1588   * @param startLength the number of characters to extract
1589   * @param target the target buffer for extraction
1590   * @param targetLength the length of the target buffer
1591   * @param codepage the desired codepage for the characters.  0 has
1592   * the special meaning of the default codepage
1593   * If <code>codepage</code> is an empty string (<code>""</code>),
1594   * then a simple conversion is performed on the codepage-invariant
1595   * subset ("invariant characters") of the platform encoding. See utypes.h.
1596   * If <TT>target</TT> is NULL, then the number of bytes required for
1597   * <TT>target</TT> is returned.
1598   * @return the output string length, not including the terminating NUL
1599   * @stable ICU 2.0
1600   */
1601  int32_t extract(int32_t start,
1602           int32_t startLength,
1603           char *target,
1604           uint32_t targetLength,
1605           const char *codepage) const;
1606
1607  /**
1608   * Convert the UnicodeString into a codepage string using an existing UConverter.
1609   * The output string is NUL-terminated if possible.
1610   *
1611   * This function avoids the overhead of opening and closing a converter if
1612   * multiple strings are extracted.
1613   *
1614   * @param dest destination string buffer, can be NULL if destCapacity==0
1615   * @param destCapacity the number of chars available at dest
1616   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1617   *        or NULL for the default converter
1618   * @param errorCode normal ICU error code
1619   * @return the length of the output string, not counting the terminating NUL;
1620   *         if the length is greater than destCapacity, then the string will not fit
1621   *         and a buffer of the indicated length would need to be passed in
1622   * @stable ICU 2.0
1623   */
1624  int32_t extract(char *dest, int32_t destCapacity,
1625                  UConverter *cnv,
1626                  UErrorCode &errorCode) const;
1627
1628#endif
1629
1630  /**
1631   * Create a temporary substring for the specified range.
1632   * Unlike the substring constructor and setTo() functions,
1633   * the object returned here will be a read-only alias (using getBuffer())
1634   * rather than copying the text.
1635   * As a result, this substring operation is much faster but requires
1636   * that the original string not be modified or deleted during the lifetime
1637   * of the returned substring object.
1638   * @param start offset of the first character visible in the substring
1639   * @param length length of the substring
1640   * @return a read-only alias UnicodeString object for the substring
1641   * @stable ICU 4.4
1642   */
1643  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1644
1645  /**
1646   * Create a temporary substring for the specified range.
1647   * Same as tempSubString(start, length) except that the substring range
1648   * is specified as a (start, limit) pair (with an exclusive limit index)
1649   * rather than a (start, length) pair.
1650   * @param start offset of the first character visible in the substring
1651   * @param limit offset immediately following the last character visible in the substring
1652   * @return a read-only alias UnicodeString object for the substring
1653   * @stable ICU 4.4
1654   */
1655  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1656
1657  /**
1658   * Convert the UnicodeString to UTF-8 and write the result
1659   * to a ByteSink. This is called by toUTF8String().
1660   * Unpaired surrogates are replaced with U+FFFD.
1661   * Calls u_strToUTF8WithSub().
1662   *
1663   * @param sink A ByteSink to which the UTF-8 version of the string is written.
1664   *             sink.Flush() is called at the end.
1665   * @stable ICU 4.2
1666   * @see toUTF8String
1667   */
1668  void toUTF8(ByteSink &sink) const;
1669
1670#if U_HAVE_STD_STRING
1671
1672  /**
1673   * Convert the UnicodeString to UTF-8 and append the result
1674   * to a standard string.
1675   * Unpaired surrogates are replaced with U+FFFD.
1676   * Calls toUTF8().
1677   *
1678   * @param result A standard string (or a compatible object)
1679   *        to which the UTF-8 version of the string is appended.
1680   * @return The string object.
1681   * @stable ICU 4.2
1682   * @see toUTF8
1683   */
1684  template<typename StringClass>
1685  StringClass &toUTF8String(StringClass &result) const {
1686    StringByteSink<StringClass> sbs(&result);
1687    toUTF8(sbs);
1688    return result;
1689  }
1690
1691#endif
1692
1693  /**
1694   * Convert the UnicodeString to UTF-32.
1695   * Unpaired surrogates are replaced with U+FFFD.
1696   * Calls u_strToUTF32WithSub().
1697   *
1698   * @param utf32 destination string buffer, can be NULL if capacity==0
1699   * @param capacity the number of UChar32s available at utf32
1700   * @param errorCode Standard ICU error code. Its input value must
1701   *                  pass the U_SUCCESS() test, or else the function returns
1702   *                  immediately. Check for U_FAILURE() on output or use with
1703   *                  function chaining. (See User Guide for details.)
1704   * @return The length of the UTF-32 string.
1705   * @see fromUTF32
1706   * @stable ICU 4.2
1707   */
1708  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1709
1710  /* Length operations */
1711
1712  /**
1713   * Return the length of the UnicodeString object.
1714   * The length is the number of UChar code units are in the UnicodeString.
1715   * If you want the number of code points, please use countChar32().
1716   * @return the length of the UnicodeString object
1717   * @see countChar32
1718   * @stable ICU 2.0
1719   */
1720  inline int32_t length(void) const;
1721
1722  /**
1723   * Count Unicode code points in the length UChar code units of the string.
1724   * A code point may occupy either one or two UChar code units.
1725   * Counting code points involves reading all code units.
1726   *
1727   * This functions is basically the inverse of moveIndex32().
1728   *
1729   * @param start the index of the first code unit to check
1730   * @param length the number of UChar code units to check
1731   * @return the number of code points in the specified code units
1732   * @see length
1733   * @stable ICU 2.0
1734   */
1735  int32_t
1736  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1737
1738  /**
1739   * Check if the length UChar code units of the string
1740   * contain more Unicode code points than a certain number.
1741   * This is more efficient than counting all code points in this part of the string
1742   * and comparing that number with a threshold.
1743   * This function may not need to scan the string at all if the length
1744   * falls within a certain range, and
1745   * never needs to count more than 'number+1' code points.
1746   * Logically equivalent to (countChar32(start, length)>number).
1747   * A Unicode code point may occupy either one or two UChar code units.
1748   *
1749   * @param start the index of the first code unit to check (0 for the entire string)
1750   * @param length the number of UChar code units to check
1751   *               (use INT32_MAX for the entire string; remember that start/length
1752   *                values are pinned)
1753   * @param number The number of code points in the (sub)string is compared against
1754   *               the 'number' parameter.
1755   * @return Boolean value for whether the string contains more Unicode code points
1756   *         than 'number'. Same as (u_countChar32(s, length)>number).
1757   * @see countChar32
1758   * @see u_strHasMoreChar32Than
1759   * @stable ICU 2.4
1760   */
1761  UBool
1762  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1763
1764  /**
1765   * Determine if this string is empty.
1766   * @return TRUE if this string contains 0 characters, FALSE otherwise.
1767   * @stable ICU 2.0
1768   */
1769  inline UBool isEmpty(void) const;
1770
1771  /**
1772   * Return the capacity of the internal buffer of the UnicodeString object.
1773   * This is useful together with the getBuffer functions.
1774   * See there for details.
1775   *
1776   * @return the number of UChars available in the internal buffer
1777   * @see getBuffer
1778   * @stable ICU 2.0
1779   */
1780  inline int32_t getCapacity(void) const;
1781
1782  /* Other operations */
1783
1784  /**
1785   * Generate a hash code for this object.
1786   * @return The hash code of this UnicodeString.
1787   * @stable ICU 2.0
1788   */
1789  inline int32_t hashCode(void) const;
1790
1791  /**
1792   * Determine if this object contains a valid string.
1793   * A bogus string has no value. It is different from an empty string,
1794   * although in both cases isEmpty() returns TRUE and length() returns 0.
1795   * setToBogus() and isBogus() can be used to indicate that no string value is available.
1796   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1797   * length() returns 0.
1798   *
1799   * @return TRUE if the string is valid, FALSE otherwise
1800   * @see setToBogus()
1801   * @stable ICU 2.0
1802   */
1803  inline UBool isBogus(void) const;
1804
1805
1806  //========================================
1807  // Write operations
1808  //========================================
1809
1810  /* Assignment operations */
1811
1812  /**
1813   * Assignment operator.  Replace the characters in this UnicodeString
1814   * with the characters from <TT>srcText</TT>.
1815   * @param srcText The text containing the characters to replace
1816   * @return a reference to this
1817   * @stable ICU 2.0
1818   */
1819  UnicodeString &operator=(const UnicodeString &srcText);
1820
1821  /**
1822   * Almost the same as the assignment operator.
1823   * Replace the characters in this UnicodeString
1824   * with the characters from <code>srcText</code>.
1825   *
1826   * This function works the same as the assignment operator
1827   * for all strings except for ones that are readonly aliases.
1828   *
1829   * Starting with ICU 2.4, the assignment operator and the copy constructor
1830   * allocate a new buffer and copy the buffer contents even for readonly aliases.
1831   * This function implements the old, more efficient but less safe behavior
1832   * of making this string also a readonly alias to the same buffer.
1833   *
1834   * The fastCopyFrom function must be used only if it is known that the lifetime of
1835   * this UnicodeString does not exceed the lifetime of the aliased buffer
1836   * including its contents, for example for strings from resource bundles
1837   * or aliases to string constants.
1838   *
1839   * @param src The text containing the characters to replace.
1840   * @return a reference to this
1841   * @stable ICU 2.4
1842   */
1843  UnicodeString &fastCopyFrom(const UnicodeString &src);
1844
1845  /**
1846   * Assignment operator.  Replace the characters in this UnicodeString
1847   * with the code unit <TT>ch</TT>.
1848   * @param ch the code unit to replace
1849   * @return a reference to this
1850   * @stable ICU 2.0
1851   */
1852  inline UnicodeString& operator= (UChar ch);
1853
1854  /**
1855   * Assignment operator.  Replace the characters in this UnicodeString
1856   * with the code point <TT>ch</TT>.
1857   * @param ch the code point to replace
1858   * @return a reference to this
1859   * @stable ICU 2.0
1860   */
1861  inline UnicodeString& operator= (UChar32 ch);
1862
1863  /**
1864   * Set the text in the UnicodeString object to the characters
1865   * in <TT>srcText</TT> in the range
1866   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1867   * <TT>srcText</TT> is not modified.
1868   * @param srcText the source for the new characters
1869   * @param srcStart the offset into <TT>srcText</TT> where new characters
1870   * will be obtained
1871   * @return a reference to this
1872   * @stable ICU 2.2
1873   */
1874  inline UnicodeString& setTo(const UnicodeString& srcText,
1875               int32_t srcStart);
1876
1877  /**
1878   * Set the text in the UnicodeString object to the characters
1879   * in <TT>srcText</TT> in the range
1880   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1881   * <TT>srcText</TT> is not modified.
1882   * @param srcText the source for the new characters
1883   * @param srcStart the offset into <TT>srcText</TT> where new characters
1884   * will be obtained
1885   * @param srcLength the number of characters in <TT>srcText</TT> in the
1886   * replace string.
1887   * @return a reference to this
1888   * @stable ICU 2.0
1889   */
1890  inline UnicodeString& setTo(const UnicodeString& srcText,
1891               int32_t srcStart,
1892               int32_t srcLength);
1893
1894  /**
1895   * Set the text in the UnicodeString object to the characters in
1896   * <TT>srcText</TT>.
1897   * <TT>srcText</TT> is not modified.
1898   * @param srcText the source for the new characters
1899   * @return a reference to this
1900   * @stable ICU 2.0
1901   */
1902  inline UnicodeString& setTo(const UnicodeString& srcText);
1903
1904  /**
1905   * Set the characters in the UnicodeString object to the characters
1906   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1907   * @param srcChars the source for the new characters
1908   * @param srcLength the number of Unicode characters in srcChars.
1909   * @return a reference to this
1910   * @stable ICU 2.0
1911   */
1912  inline UnicodeString& setTo(const UChar *srcChars,
1913               int32_t srcLength);
1914
1915  /**
1916   * Set the characters in the UnicodeString object to the code unit
1917   * <TT>srcChar</TT>.
1918   * @param srcChar the code unit which becomes the UnicodeString's character
1919   * content
1920   * @return a reference to this
1921   * @stable ICU 2.0
1922   */
1923  UnicodeString& setTo(UChar srcChar);
1924
1925  /**
1926   * Set the characters in the UnicodeString object to the code point
1927   * <TT>srcChar</TT>.
1928   * @param srcChar the code point which becomes the UnicodeString's character
1929   * content
1930   * @return a reference to this
1931   * @stable ICU 2.0
1932   */
1933  UnicodeString& setTo(UChar32 srcChar);
1934
1935  /**
1936   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1937   * The text will be used for the UnicodeString object, but
1938   * it will not be released when the UnicodeString is destroyed.
1939   * This has copy-on-write semantics:
1940   * When the string is modified, then the buffer is first copied into
1941   * newly allocated memory.
1942   * The aliased buffer is never modified.
1943   *
1944   * In an assignment to another UnicodeString, when using the copy constructor
1945   * or the assignment operator, the text will be copied.
1946   * When using fastCopyFrom(), the text will be aliased again,
1947   * so that both strings then alias the same readonly-text.
1948   *
1949   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1950   *                     This must be true if <code>textLength==-1</code>.
1951   * @param text The characters to alias for the UnicodeString.
1952   * @param textLength The number of Unicode characters in <code>text</code> to alias.
1953   *                   If -1, then this constructor will determine the length
1954   *                   by calling <code>u_strlen()</code>.
1955   * @return a reference to this
1956   * @stable ICU 2.0
1957   */
1958  UnicodeString &setTo(UBool isTerminated,
1959                       const UChar *text,
1960                       int32_t textLength);
1961
1962  /**
1963   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1964   * The text will be used for the UnicodeString object, but
1965   * it will not be released when the UnicodeString is destroyed.
1966   * This has write-through semantics:
1967   * For as long as the capacity of the buffer is sufficient, write operations
1968   * will directly affect the buffer. When more capacity is necessary, then
1969   * a new buffer will be allocated and the contents copied as with regularly
1970   * constructed strings.
1971   * In an assignment to another UnicodeString, the buffer will be copied.
1972   * The extract(UChar *dst) function detects whether the dst pointer is the same
1973   * as the string buffer itself and will in this case not copy the contents.
1974   *
1975   * @param buffer The characters to alias for the UnicodeString.
1976   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1977   * @param buffCapacity The size of <code>buffer</code> in UChars.
1978   * @return a reference to this
1979   * @stable ICU 2.0
1980   */
1981  UnicodeString &setTo(UChar *buffer,
1982                       int32_t buffLength,
1983                       int32_t buffCapacity);
1984
1985  /**
1986   * Make this UnicodeString object invalid.
1987   * The string will test TRUE with isBogus().
1988   *
1989   * A bogus string has no value. It is different from an empty string.
1990   * It can be used to indicate that no string value is available.
1991   * getBuffer() and getTerminatedBuffer() return NULL, and
1992   * length() returns 0.
1993   *
1994   * This utility function is used throughout the UnicodeString
1995   * implementation to indicate that a UnicodeString operation failed,
1996   * and may be used in other functions,
1997   * especially but not exclusively when such functions do not
1998   * take a UErrorCode for simplicity.
1999   *
2000   * The following methods, and no others, will clear a string object's bogus flag:
2001   * - remove()
2002   * - remove(0, INT32_MAX)
2003   * - truncate(0)
2004   * - operator=() (assignment operator)
2005   * - setTo(...)
2006   *
2007   * The simplest ways to turn a bogus string into an empty one
2008   * is to use the remove() function.
2009   * Examples for other functions that are equivalent to "set to empty string":
2010   * \code
2011   * if(s.isBogus()) {
2012   *   s.remove();           // set to an empty string (remove all), or
2013   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2014   *   s.truncate(0);        // set to an empty string (complete truncation), or
2015   *   s=UnicodeString();    // assign an empty string, or
2016   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2017   *   static const UChar nul=0;
2018   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
2019   * }
2020   * \endcode
2021   *
2022   * @see isBogus()
2023   * @stable ICU 2.0
2024   */
2025  void setToBogus();
2026
2027  /**
2028   * Set the character at the specified offset to the specified character.
2029   * @param offset A valid offset into the text of the character to set
2030   * @param ch The new character
2031   * @return A reference to this
2032   * @stable ICU 2.0
2033   */
2034  UnicodeString& setCharAt(int32_t offset,
2035               UChar ch);
2036
2037
2038  /* Append operations */
2039
2040  /**
2041   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2042   * object.
2043   * @param ch the code unit to be appended
2044   * @return a reference to this
2045   * @stable ICU 2.0
2046   */
2047 inline  UnicodeString& operator+= (UChar ch);
2048
2049  /**
2050   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2051   * object.
2052   * @param ch the code point to be appended
2053   * @return a reference to this
2054   * @stable ICU 2.0
2055   */
2056 inline  UnicodeString& operator+= (UChar32 ch);
2057
2058  /**
2059   * Append operator. Append the characters in <TT>srcText</TT> to the
2060   * UnicodeString object. <TT>srcText</TT> is not modified.
2061   * @param srcText the source for the new characters
2062   * @return a reference to this
2063   * @stable ICU 2.0
2064   */
2065  inline UnicodeString& operator+= (const UnicodeString& srcText);
2066
2067  /**
2068   * Append the characters
2069   * in <TT>srcText</TT> in the range
2070   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2071   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2072   * is not modified.
2073   * @param srcText the source for the new characters
2074   * @param srcStart the offset into <TT>srcText</TT> where new characters
2075   * will be obtained
2076   * @param srcLength the number of characters in <TT>srcText</TT> in
2077   * the append string
2078   * @return a reference to this
2079   * @stable ICU 2.0
2080   */
2081  inline UnicodeString& append(const UnicodeString& srcText,
2082            int32_t srcStart,
2083            int32_t srcLength);
2084
2085  /**
2086   * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2087   * <TT>srcText</TT> is not modified.
2088   * @param srcText the source for the new characters
2089   * @return a reference to this
2090   * @stable ICU 2.0
2091   */
2092  inline UnicodeString& append(const UnicodeString& srcText);
2093
2094  /**
2095   * Append the characters in <TT>srcChars</TT> in the range
2096   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2097   * object at offset
2098   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2099   * @param srcChars the source for the new characters
2100   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2101   * will be obtained
2102   * @param srcLength the number of characters in <TT>srcChars</TT> in
2103   *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2104   * @return a reference to this
2105   * @stable ICU 2.0
2106   */
2107  inline UnicodeString& append(const UChar *srcChars,
2108            int32_t srcStart,
2109            int32_t srcLength);
2110
2111  /**
2112   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2113   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2114   * @param srcChars the source for the new characters
2115   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2116   *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
2117   * @return a reference to this
2118   * @stable ICU 2.0
2119   */
2120  inline UnicodeString& append(const UChar *srcChars,
2121            int32_t srcLength);
2122
2123  /**
2124   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2125   * @param srcChar the code unit to append
2126   * @return a reference to this
2127   * @stable ICU 2.0
2128   */
2129  inline UnicodeString& append(UChar srcChar);
2130
2131  /**
2132   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2133   * @param srcChar the code point to append
2134   * @return a reference to this
2135   * @stable ICU 2.0
2136   */
2137  UnicodeString& append(UChar32 srcChar);
2138
2139
2140  /* Insert operations */
2141
2142  /**
2143   * Insert the characters in <TT>srcText</TT> in the range
2144   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2145   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2146   * @param start the offset where the insertion begins
2147   * @param srcText the source for the new characters
2148   * @param srcStart the offset into <TT>srcText</TT> where new characters
2149   * will be obtained
2150   * @param srcLength the number of characters in <TT>srcText</TT> in
2151   * the insert string
2152   * @return a reference to this
2153   * @stable ICU 2.0
2154   */
2155  inline UnicodeString& insert(int32_t start,
2156            const UnicodeString& srcText,
2157            int32_t srcStart,
2158            int32_t srcLength);
2159
2160  /**
2161   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2162   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2163   * @param start the offset where the insertion begins
2164   * @param srcText the source for the new characters
2165   * @return a reference to this
2166   * @stable ICU 2.0
2167   */
2168  inline UnicodeString& insert(int32_t start,
2169            const UnicodeString& srcText);
2170
2171  /**
2172   * Insert the characters in <TT>srcChars</TT> in the range
2173   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2174   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2175   * @param start the offset at which the insertion begins
2176   * @param srcChars the source for the new characters
2177   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2178   * will be obtained
2179   * @param srcLength the number of characters in <TT>srcChars</TT>
2180   * in the insert string
2181   * @return a reference to this
2182   * @stable ICU 2.0
2183   */
2184  inline UnicodeString& insert(int32_t start,
2185            const UChar *srcChars,
2186            int32_t srcStart,
2187            int32_t srcLength);
2188
2189  /**
2190   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2191   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2192   * @param start the offset where the insertion begins
2193   * @param srcChars the source for the new characters
2194   * @param srcLength the number of Unicode characters in srcChars.
2195   * @return a reference to this
2196   * @stable ICU 2.0
2197   */
2198  inline UnicodeString& insert(int32_t start,
2199            const UChar *srcChars,
2200            int32_t srcLength);
2201
2202  /**
2203   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2204   * offset <TT>start</TT>.
2205   * @param start the offset at which the insertion occurs
2206   * @param srcChar the code unit to insert
2207   * @return a reference to this
2208   * @stable ICU 2.0
2209   */
2210  inline UnicodeString& insert(int32_t start,
2211            UChar srcChar);
2212
2213  /**
2214   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2215   * offset <TT>start</TT>.
2216   * @param start the offset at which the insertion occurs
2217   * @param srcChar the code point to insert
2218   * @return a reference to this
2219   * @stable ICU 2.0
2220   */
2221  inline UnicodeString& insert(int32_t start,
2222            UChar32 srcChar);
2223
2224
2225  /* Replace operations */
2226
2227  /**
2228   * Replace the characters in the range
2229   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2230   * <TT>srcText</TT> in the range
2231   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2232   * <TT>srcText</TT> is not modified.
2233   * @param start the offset at which the replace operation begins
2234   * @param length the number of characters to replace. The character at
2235   * <TT>start + length</TT> is not modified.
2236   * @param srcText the source for the new characters
2237   * @param srcStart the offset into <TT>srcText</TT> where new characters
2238   * will be obtained
2239   * @param srcLength the number of characters in <TT>srcText</TT> in
2240   * the replace string
2241   * @return a reference to this
2242   * @stable ICU 2.0
2243   */
2244  UnicodeString& replace(int32_t start,
2245             int32_t length,
2246             const UnicodeString& srcText,
2247             int32_t srcStart,
2248             int32_t srcLength);
2249
2250  /**
2251   * Replace the characters in the range
2252   * [<TT>start</TT>, <TT>start + length</TT>)
2253   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2254   *  not modified.
2255   * @param start the offset at which the replace operation begins
2256   * @param length the number of characters to replace. The character at
2257   * <TT>start + length</TT> is not modified.
2258   * @param srcText the source for the new characters
2259   * @return a reference to this
2260   * @stable ICU 2.0
2261   */
2262  UnicodeString& replace(int32_t start,
2263             int32_t length,
2264             const UnicodeString& srcText);
2265
2266  /**
2267   * Replace the characters in the range
2268   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2269   * <TT>srcChars</TT> in the range
2270   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2271   * is not modified.
2272   * @param start the offset at which the replace operation begins
2273   * @param length the number of characters to replace.  The character at
2274   * <TT>start + length</TT> is not modified.
2275   * @param srcChars the source for the new characters
2276   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2277   * will be obtained
2278   * @param srcLength the number of characters in <TT>srcChars</TT>
2279   * in the replace string
2280   * @return a reference to this
2281   * @stable ICU 2.0
2282   */
2283  UnicodeString& replace(int32_t start,
2284             int32_t length,
2285             const UChar *srcChars,
2286             int32_t srcStart,
2287             int32_t srcLength);
2288
2289  /**
2290   * Replace the characters in the range
2291   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2292   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2293   * @param start the offset at which the replace operation begins
2294   * @param length number of characters to replace.  The character at
2295   * <TT>start + length</TT> is not modified.
2296   * @param srcChars the source for the new characters
2297   * @param srcLength the number of Unicode characters in srcChars
2298   * @return a reference to this
2299   * @stable ICU 2.0
2300   */
2301  inline UnicodeString& replace(int32_t start,
2302             int32_t length,
2303             const UChar *srcChars,
2304             int32_t srcLength);
2305
2306  /**
2307   * Replace the characters in the range
2308   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2309   * <TT>srcChar</TT>.
2310   * @param start the offset at which the replace operation begins
2311   * @param length the number of characters to replace.  The character at
2312   * <TT>start + length</TT> is not modified.
2313   * @param srcChar the new code unit
2314   * @return a reference to this
2315   * @stable ICU 2.0
2316   */
2317  inline UnicodeString& replace(int32_t start,
2318             int32_t length,
2319             UChar srcChar);
2320
2321  /**
2322   * Replace the characters in the range
2323   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2324   * <TT>srcChar</TT>.
2325   * @param start the offset at which the replace operation begins
2326   * @param length the number of characters to replace.  The character at
2327   * <TT>start + length</TT> is not modified.
2328   * @param srcChar the new code point
2329   * @return a reference to this
2330   * @stable ICU 2.0
2331   */
2332  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2333
2334  /**
2335   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2336   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2337   * @param start the offset at which the replace operation begins
2338   * @param limit the offset immediately following the replace range
2339   * @param srcText the source for the new characters
2340   * @return a reference to this
2341   * @stable ICU 2.0
2342   */
2343  inline UnicodeString& replaceBetween(int32_t start,
2344                int32_t limit,
2345                const UnicodeString& srcText);
2346
2347  /**
2348   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2349   * with the characters in <TT>srcText</TT> in the range
2350   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2351   * @param start the offset at which the replace operation begins
2352   * @param limit the offset immediately following the replace range
2353   * @param srcText the source for the new characters
2354   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2355   * will be obtained
2356   * @param srcLimit the offset immediately following the range to copy
2357   * in <TT>srcText</TT>
2358   * @return a reference to this
2359   * @stable ICU 2.0
2360   */
2361  inline UnicodeString& replaceBetween(int32_t start,
2362                int32_t limit,
2363                const UnicodeString& srcText,
2364                int32_t srcStart,
2365                int32_t srcLimit);
2366
2367  /**
2368   * Replace a substring of this object with the given text.
2369   * @param start the beginning index, inclusive; <code>0 <= start
2370   * <= limit</code>.
2371   * @param limit the ending index, exclusive; <code>start <= limit
2372   * <= length()</code>.
2373   * @param text the text to replace characters <code>start</code>
2374   * to <code>limit - 1</code>
2375   * @stable ICU 2.0
2376   */
2377  virtual void handleReplaceBetween(int32_t start,
2378                                    int32_t limit,
2379                                    const UnicodeString& text);
2380
2381  /**
2382   * Replaceable API
2383   * @return TRUE if it has MetaData
2384   * @stable ICU 2.4
2385   */
2386  virtual UBool hasMetaData() const;
2387
2388  /**
2389   * Copy a substring of this object, retaining attribute (out-of-band)
2390   * information.  This method is used to duplicate or reorder substrings.
2391   * The destination index must not overlap the source range.
2392   *
2393   * @param start the beginning index, inclusive; <code>0 <= start <=
2394   * limit</code>.
2395   * @param limit the ending index, exclusive; <code>start <= limit <=
2396   * length()</code>.
2397   * @param dest the destination index.  The characters from
2398   * <code>start..limit-1</code> will be copied to <code>dest</code>.
2399   * Implementations of this method may assume that <code>dest <= start ||
2400   * dest >= limit</code>.
2401   * @stable ICU 2.0
2402   */
2403  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2404
2405  /* Search and replace operations */
2406
2407  /**
2408   * Replace all occurrences of characters in oldText with the characters
2409   * in newText
2410   * @param oldText the text containing the search text
2411   * @param newText the text containing the replacement text
2412   * @return a reference to this
2413   * @stable ICU 2.0
2414   */
2415  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2416                const UnicodeString& newText);
2417
2418  /**
2419   * Replace all occurrences of characters in oldText with characters
2420   * in newText
2421   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2422   * @param start the start of the range in which replace will performed
2423   * @param length the length of the range in which replace will be performed
2424   * @param oldText the text containing the search text
2425   * @param newText the text containing the replacement text
2426   * @return a reference to this
2427   * @stable ICU 2.0
2428   */
2429  inline UnicodeString& findAndReplace(int32_t start,
2430                int32_t length,
2431                const UnicodeString& oldText,
2432                const UnicodeString& newText);
2433
2434  /**
2435   * Replace all occurrences of characters in oldText in the range
2436   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2437   * in newText in the range
2438   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2439   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2440   * @param start the start of the range in which replace will performed
2441   * @param length the length of the range in which replace will be performed
2442   * @param oldText the text containing the search text
2443   * @param oldStart the start of the search range in <TT>oldText</TT>
2444   * @param oldLength the length of the search range in <TT>oldText</TT>
2445   * @param newText the text containing the replacement text
2446   * @param newStart the start of the replacement range in <TT>newText</TT>
2447   * @param newLength the length of the replacement range in <TT>newText</TT>
2448   * @return a reference to this
2449   * @stable ICU 2.0
2450   */
2451  UnicodeString& findAndReplace(int32_t start,
2452                int32_t length,
2453                const UnicodeString& oldText,
2454                int32_t oldStart,
2455                int32_t oldLength,
2456                const UnicodeString& newText,
2457                int32_t newStart,
2458                int32_t newLength);
2459
2460
2461  /* Remove operations */
2462
2463  /**
2464   * Remove all characters from the UnicodeString object.
2465   * @return a reference to this
2466   * @stable ICU 2.0
2467   */
2468  inline UnicodeString& remove(void);
2469
2470  /**
2471   * Remove the characters in the range
2472   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2473   * @param start the offset of the first character to remove
2474   * @param length the number of characters to remove
2475   * @return a reference to this
2476   * @stable ICU 2.0
2477   */
2478  inline UnicodeString& remove(int32_t start,
2479                               int32_t length = (int32_t)INT32_MAX);
2480
2481  /**
2482   * Remove the characters in the range
2483   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2484   * @param start the offset of the first character to remove
2485   * @param limit the offset immediately following the range to remove
2486   * @return a reference to this
2487   * @stable ICU 2.0
2488   */
2489  inline UnicodeString& removeBetween(int32_t start,
2490                                      int32_t limit = (int32_t)INT32_MAX);
2491
2492  /**
2493   * Retain only the characters in the range
2494   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2495   * Removes characters before <code>start</code> and at and after <code>limit</code>.
2496   * @param start the offset of the first character to retain
2497   * @param limit the offset immediately following the range to retain
2498   * @return a reference to this
2499   * @stable ICU 4.4
2500   */
2501  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2502
2503  /* Length operations */
2504
2505  /**
2506   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2507   * If the length of this UnicodeString is less than targetLength,
2508   * length() - targetLength copies of padChar will be added to the
2509   * beginning of this UnicodeString.
2510   * @param targetLength the desired length of the string
2511   * @param padChar the character to use for padding. Defaults to
2512   * space (U+0020)
2513   * @return TRUE if the text was padded, FALSE otherwise.
2514   * @stable ICU 2.0
2515   */
2516  UBool padLeading(int32_t targetLength,
2517                    UChar padChar = 0x0020);
2518
2519  /**
2520   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2521   * If the length of this UnicodeString is less than targetLength,
2522   * length() - targetLength copies of padChar will be added to the
2523   * end of this UnicodeString.
2524   * @param targetLength the desired length of the string
2525   * @param padChar the character to use for padding. Defaults to
2526   * space (U+0020)
2527   * @return TRUE if the text was padded, FALSE otherwise.
2528   * @stable ICU 2.0
2529   */
2530  UBool padTrailing(int32_t targetLength,
2531                     UChar padChar = 0x0020);
2532
2533  /**
2534   * Truncate this UnicodeString to the <TT>targetLength</TT>.
2535   * @param targetLength the desired length of this UnicodeString.
2536   * @return TRUE if the text was truncated, FALSE otherwise
2537   * @stable ICU 2.0
2538   */
2539  inline UBool truncate(int32_t targetLength);
2540
2541  /**
2542   * Trims leading and trailing whitespace from this UnicodeString.
2543   * @return a reference to this
2544   * @stable ICU 2.0
2545   */
2546  UnicodeString& trim(void);
2547
2548
2549  /* Miscellaneous operations */
2550
2551  /**
2552   * Reverse this UnicodeString in place.
2553   * @return a reference to this
2554   * @stable ICU 2.0
2555   */
2556  inline UnicodeString& reverse(void);
2557
2558  /**
2559   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2560   * this UnicodeString.
2561   * @param start the start of the range to reverse
2562   * @param length the number of characters to to reverse
2563   * @return a reference to this
2564   * @stable ICU 2.0
2565   */
2566  inline UnicodeString& reverse(int32_t start,
2567             int32_t length);
2568
2569  /**
2570   * Convert the characters in this to UPPER CASE following the conventions of
2571   * the default locale.
2572   * @return A reference to this.
2573   * @stable ICU 2.0
2574   */
2575  UnicodeString& toUpper(void);
2576
2577  /**
2578   * Convert the characters in this to UPPER CASE following the conventions of
2579   * a specific locale.
2580   * @param locale The locale containing the conventions to use.
2581   * @return A reference to this.
2582   * @stable ICU 2.0
2583   */
2584  UnicodeString& toUpper(const Locale& locale);
2585
2586  /**
2587   * Convert the characters in this to lower case following the conventions of
2588   * the default locale.
2589   * @return A reference to this.
2590   * @stable ICU 2.0
2591   */
2592  UnicodeString& toLower(void);
2593
2594  /**
2595   * Convert the characters in this to lower case following the conventions of
2596   * a specific locale.
2597   * @param locale The locale containing the conventions to use.
2598   * @return A reference to this.
2599   * @stable ICU 2.0
2600   */
2601  UnicodeString& toLower(const Locale& locale);
2602
2603#if !UCONFIG_NO_BREAK_ITERATION
2604
2605  /**
2606   * Titlecase this string, convenience function using the default locale.
2607   *
2608   * Casing is locale-dependent and context-sensitive.
2609   * Titlecasing uses a break iterator to find the first characters of words
2610   * that are to be titlecased. It titlecases those characters and lowercases
2611   * all others.
2612   *
2613   * The titlecase break iterator can be provided to customize for arbitrary
2614   * styles, using rules and dictionaries beyond the standard iterators.
2615   * It may be more efficient to always provide an iterator to avoid
2616   * opening and closing one for each string.
2617   * The standard titlecase iterator for the root locale implements the
2618   * algorithm of Unicode TR 21.
2619   *
2620   * This function uses only the setText(), first() and next() methods of the
2621   * provided break iterator.
2622   *
2623   * @param titleIter A break iterator to find the first characters of words
2624   *                  that are to be titlecased.
2625   *                  If none is provided (0), then a standard titlecase
2626   *                  break iterator is opened.
2627   *                  Otherwise the provided iterator is set to the string's text.
2628   * @return A reference to this.
2629   * @stable ICU 2.1
2630   */
2631  UnicodeString &toTitle(BreakIterator *titleIter);
2632
2633  /**
2634   * Titlecase this string.
2635   *
2636   * Casing is locale-dependent and context-sensitive.
2637   * Titlecasing uses a break iterator to find the first characters of words
2638   * that are to be titlecased. It titlecases those characters and lowercases
2639   * all others.
2640   *
2641   * The titlecase break iterator can be provided to customize for arbitrary
2642   * styles, using rules and dictionaries beyond the standard iterators.
2643   * It may be more efficient to always provide an iterator to avoid
2644   * opening and closing one for each string.
2645   * The standard titlecase iterator for the root locale implements the
2646   * algorithm of Unicode TR 21.
2647   *
2648   * This function uses only the setText(), first() and next() methods of the
2649   * provided break iterator.
2650   *
2651   * @param titleIter A break iterator to find the first characters of words
2652   *                  that are to be titlecased.
2653   *                  If none is provided (0), then a standard titlecase
2654   *                  break iterator is opened.
2655   *                  Otherwise the provided iterator is set to the string's text.
2656   * @param locale    The locale to consider.
2657   * @return A reference to this.
2658   * @stable ICU 2.1
2659   */
2660  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2661
2662  /**
2663   * Titlecase this string, with options.
2664   *
2665   * Casing is locale-dependent and context-sensitive.
2666   * Titlecasing uses a break iterator to find the first characters of words
2667   * that are to be titlecased. It titlecases those characters and lowercases
2668   * all others. (This can be modified with options.)
2669   *
2670   * The titlecase break iterator can be provided to customize for arbitrary
2671   * styles, using rules and dictionaries beyond the standard iterators.
2672   * It may be more efficient to always provide an iterator to avoid
2673   * opening and closing one for each string.
2674   * The standard titlecase iterator for the root locale implements the
2675   * algorithm of Unicode TR 21.
2676   *
2677   * This function uses only the setText(), first() and next() methods of the
2678   * provided break iterator.
2679   *
2680   * @param titleIter A break iterator to find the first characters of words
2681   *                  that are to be titlecased.
2682   *                  If none is provided (0), then a standard titlecase
2683   *                  break iterator is opened.
2684   *                  Otherwise the provided iterator is set to the string's text.
2685   * @param locale    The locale to consider.
2686   * @param options Options bit set, see ucasemap_open().
2687   * @return A reference to this.
2688   * @see U_TITLECASE_NO_LOWERCASE
2689   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2690   * @see ucasemap_open
2691   * @stable ICU 3.8
2692   */
2693  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2694
2695#endif
2696
2697  /**
2698   * Case-folds the characters in this string.
2699   *
2700   * Case-folding is locale-independent and not context-sensitive,
2701   * but there is an option for whether to include or exclude mappings for dotted I
2702   * and dotless i that are marked with 'T' in CaseFolding.txt.
2703   *
2704   * The result may be longer or shorter than the original.
2705   *
2706   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2707   * @return A reference to this.
2708   * @stable ICU 2.0
2709   */
2710  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2711
2712  //========================================
2713  // Access to the internal buffer
2714  //========================================
2715
2716  /**
2717   * Get a read/write pointer to the internal buffer.
2718   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2719   * writable, and is still owned by the UnicodeString object.
2720   * Calls to getBuffer(minCapacity) must not be nested, and
2721   * must be matched with calls to releaseBuffer(newLength).
2722   * If the string buffer was read-only or shared,
2723   * then it will be reallocated and copied.
2724   *
2725   * An attempted nested call will return 0, and will not further modify the
2726   * state of the UnicodeString object.
2727   * It also returns 0 if the string is bogus.
2728   *
2729   * The actual capacity of the string buffer may be larger than minCapacity.
2730   * getCapacity() returns the actual capacity.
2731   * For many operations, the full capacity should be used to avoid reallocations.
2732   *
2733   * While the buffer is "open" between getBuffer(minCapacity)
2734   * and releaseBuffer(newLength), the following applies:
2735   * - The string length is set to 0.
2736   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2737   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2738   * - You can read from and write to the returned buffer.
2739   * - The previous string contents will still be in the buffer;
2740   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2741   *   If the length() was greater than minCapacity, then any contents after minCapacity
2742   *   may be lost.
2743   *   The buffer contents is not NUL-terminated by getBuffer().
2744   *   If length()<getCapacity() then you can terminate it by writing a NUL
2745   *   at index length().
2746   * - You must call releaseBuffer(newLength) before and in order to
2747   *   return to normal UnicodeString operation.
2748   *
2749   * @param minCapacity the minimum number of UChars that are to be available
2750   *        in the buffer, starting at the returned pointer;
2751   *        default to the current string capacity if minCapacity==-1
2752   * @return a writable pointer to the internal string buffer,
2753   *         or 0 if an error occurs (nested calls, out of memory)
2754   *
2755   * @see releaseBuffer
2756   * @see getTerminatedBuffer()
2757   * @stable ICU 2.0
2758   */
2759  UChar *getBuffer(int32_t minCapacity);
2760
2761  /**
2762   * Release a read/write buffer on a UnicodeString object with an
2763   * "open" getBuffer(minCapacity).
2764   * This function must be called in a matched pair with getBuffer(minCapacity).
2765   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2766   *
2767   * It will set the string length to newLength, at most to the current capacity.
2768   * If newLength==-1 then it will set the length according to the
2769   * first NUL in the buffer, or to the capacity if there is no NUL.
2770   *
2771   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2772   *
2773   * @param newLength the new length of the UnicodeString object;
2774   *        defaults to the current capacity if newLength is greater than that;
2775   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2776   *        the current capacity of the string
2777   *
2778   * @see getBuffer(int32_t minCapacity)
2779   * @stable ICU 2.0
2780   */
2781  void releaseBuffer(int32_t newLength=-1);
2782
2783  /**
2784   * Get a read-only pointer to the internal buffer.
2785   * This can be called at any time on a valid UnicodeString.
2786   *
2787   * It returns 0 if the string is bogus, or
2788   * during an "open" getBuffer(minCapacity).
2789   *
2790   * It can be called as many times as desired.
2791   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2792   * at which time the pointer is semantically invalidated and must not be used any more.
2793   *
2794   * The capacity of the buffer can be determined with getCapacity().
2795   * The part after length() may or may not be initialized and valid,
2796   * depending on the history of the UnicodeString object.
2797   *
2798   * The buffer contents is (probably) not NUL-terminated.
2799   * You can check if it is with
2800   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2801   * (See getTerminatedBuffer().)
2802   *
2803   * The buffer may reside in read-only memory. Its contents must not
2804   * be modified.
2805   *
2806   * @return a read-only pointer to the internal string buffer,
2807   *         or 0 if the string is empty or bogus
2808   *
2809   * @see getBuffer(int32_t minCapacity)
2810   * @see getTerminatedBuffer()
2811   * @stable ICU 2.0
2812   */
2813  inline const UChar *getBuffer() const;
2814
2815  /**
2816   * Get a read-only pointer to the internal buffer,
2817   * making sure that it is NUL-terminated.
2818   * This can be called at any time on a valid UnicodeString.
2819   *
2820   * It returns 0 if the string is bogus, or
2821   * during an "open" getBuffer(minCapacity), or if the buffer cannot
2822   * be NUL-terminated (because memory allocation failed).
2823   *
2824   * It can be called as many times as desired.
2825   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2826   * at which time the pointer is semantically invalidated and must not be used any more.
2827   *
2828   * The capacity of the buffer can be determined with getCapacity().
2829   * The part after length()+1 may or may not be initialized and valid,
2830   * depending on the history of the UnicodeString object.
2831   *
2832   * The buffer contents is guaranteed to be NUL-terminated.
2833   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2834   * is written.
2835   * For this reason, this function is not const, unlike getBuffer().
2836   * Note that a UnicodeString may also contain NUL characters as part of its contents.
2837   *
2838   * The buffer may reside in read-only memory. Its contents must not
2839   * be modified.
2840   *
2841   * @return a read-only pointer to the internal string buffer,
2842   *         or 0 if the string is empty or bogus
2843   *
2844   * @see getBuffer(int32_t minCapacity)
2845   * @see getBuffer()
2846   * @stable ICU 2.2
2847   */
2848  inline const UChar *getTerminatedBuffer();
2849
2850  //========================================
2851  // Constructors
2852  //========================================
2853
2854  /** Construct an empty UnicodeString.
2855   * @stable ICU 2.0
2856   */
2857  inline UnicodeString();
2858
2859  /**
2860   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2861   * @param capacity the number of UChars this UnicodeString should hold
2862   * before a resize is necessary; if count is greater than 0 and count
2863   * code points c take up more space than capacity, then capacity is adjusted
2864   * accordingly.
2865   * @param c is used to initially fill the string
2866   * @param count specifies how many code points c are to be written in the
2867   *              string
2868   * @stable ICU 2.0
2869   */
2870  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2871
2872  /**
2873   * Single UChar (code unit) constructor.
2874   *
2875   * It is recommended to mark this constructor "explicit" by
2876   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2877   * on the compiler command line or similar.
2878   * @param ch the character to place in the UnicodeString
2879   * @stable ICU 2.0
2880   */
2881  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
2882
2883  /**
2884   * Single UChar32 (code point) constructor.
2885   *
2886   * It is recommended to mark this constructor "explicit" by
2887   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2888   * on the compiler command line or similar.
2889   * @param ch the character to place in the UnicodeString
2890   * @stable ICU 2.0
2891   */
2892  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2893
2894  /**
2895   * UChar* constructor.
2896   *
2897   * It is recommended to mark this constructor "explicit" by
2898   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2899   * on the compiler command line or similar.
2900   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
2901   * must be NULL (U+0000) terminated.
2902   * @stable ICU 2.0
2903   */
2904  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
2905
2906  /**
2907   * UChar* constructor.
2908   * @param text The characters to place in the UnicodeString.
2909   * @param textLength The number of Unicode characters in <TT>text</TT>
2910   * to copy.
2911   * @stable ICU 2.0
2912   */
2913  UnicodeString(const UChar *text,
2914        int32_t textLength);
2915
2916  /**
2917   * Readonly-aliasing UChar* constructor.
2918   * The text will be used for the UnicodeString object, but
2919   * it will not be released when the UnicodeString is destroyed.
2920   * This has copy-on-write semantics:
2921   * When the string is modified, then the buffer is first copied into
2922   * newly allocated memory.
2923   * The aliased buffer is never modified.
2924   *
2925   * In an assignment to another UnicodeString, when using the copy constructor
2926   * or the assignment operator, the text will be copied.
2927   * When using fastCopyFrom(), the text will be aliased again,
2928   * so that both strings then alias the same readonly-text.
2929   *
2930   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2931   *                     This must be true if <code>textLength==-1</code>.
2932   * @param text The characters to alias for the UnicodeString.
2933   * @param textLength The number of Unicode characters in <code>text</code> to alias.
2934   *                   If -1, then this constructor will determine the length
2935   *                   by calling <code>u_strlen()</code>.
2936   * @stable ICU 2.0
2937   */
2938  UnicodeString(UBool isTerminated,
2939                const UChar *text,
2940                int32_t textLength);
2941
2942  /**
2943   * Writable-aliasing UChar* constructor.
2944   * The text will be used for the UnicodeString object, but
2945   * it will not be released when the UnicodeString is destroyed.
2946   * This has write-through semantics:
2947   * For as long as the capacity of the buffer is sufficient, write operations
2948   * will directly affect the buffer. When more capacity is necessary, then
2949   * a new buffer will be allocated and the contents copied as with regularly
2950   * constructed strings.
2951   * In an assignment to another UnicodeString, the buffer will be copied.
2952   * The extract(UChar *dst) function detects whether the dst pointer is the same
2953   * as the string buffer itself and will in this case not copy the contents.
2954   *
2955   * @param buffer The characters to alias for the UnicodeString.
2956   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2957   * @param buffCapacity The size of <code>buffer</code> in UChars.
2958   * @stable ICU 2.0
2959   */
2960  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2961
2962#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2963
2964  /**
2965   * char* constructor.
2966   * Uses the default converter (and thus depends on the ICU conversion code)
2967   * unless U_CHARSET_IS_UTF8 is set to 1.
2968   *
2969   * For ASCII (really "invariant character") strings it is more efficient to use
2970   * the constructor that takes a US_INV (for its enum EInvariant).
2971   * For ASCII (invariant-character) string literals, see UNICODE_STRING and
2972   * UNICODE_STRING_SIMPLE.
2973   *
2974   * It is recommended to mark this constructor "explicit" by
2975   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2976   * on the compiler command line or similar.
2977   * @param codepageData an array of bytes, null-terminated,
2978   *                     in the platform's default codepage.
2979   * @stable ICU 2.0
2980   * @see UNICODE_STRING
2981   * @see UNICODE_STRING_SIMPLE
2982   */
2983  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
2984
2985  /**
2986   * char* constructor.
2987   * Uses the default converter (and thus depends on the ICU conversion code)
2988   * unless U_CHARSET_IS_UTF8 is set to 1.
2989   * @param codepageData an array of bytes in the platform's default codepage.
2990   * @param dataLength The number of bytes in <TT>codepageData</TT>.
2991   * @stable ICU 2.0
2992   */
2993  UnicodeString(const char *codepageData, int32_t dataLength);
2994
2995#endif
2996
2997#if !UCONFIG_NO_CONVERSION
2998
2999  /**
3000   * char* constructor.
3001   * @param codepageData an array of bytes, null-terminated
3002   * @param codepage the encoding of <TT>codepageData</TT>.  The special
3003   * value 0 for <TT>codepage</TT> indicates that the text is in the
3004   * platform's default codepage.
3005   *
3006   * If <code>codepage</code> is an empty string (<code>""</code>),
3007   * then a simple conversion is performed on the codepage-invariant
3008   * subset ("invariant characters") of the platform encoding. See utypes.h.
3009   * Recommendation: For invariant-character strings use the constructor
3010   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3011   * because it avoids object code dependencies of UnicodeString on
3012   * the conversion code.
3013   *
3014   * @stable ICU 2.0
3015   */
3016  UnicodeString(const char *codepageData, const char *codepage);
3017
3018  /**
3019   * char* constructor.
3020   * @param codepageData an array of bytes.
3021   * @param dataLength The number of bytes in <TT>codepageData</TT>.
3022   * @param codepage the encoding of <TT>codepageData</TT>.  The special
3023   * value 0 for <TT>codepage</TT> indicates that the text is in the
3024   * platform's default codepage.
3025   * If <code>codepage</code> is an empty string (<code>""</code>),
3026   * then a simple conversion is performed on the codepage-invariant
3027   * subset ("invariant characters") of the platform encoding. See utypes.h.
3028   * Recommendation: For invariant-character strings use the constructor
3029   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3030   * because it avoids object code dependencies of UnicodeString on
3031   * the conversion code.
3032   *
3033   * @stable ICU 2.0
3034   */
3035  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3036
3037  /**
3038   * char * / UConverter constructor.
3039   * This constructor uses an existing UConverter object to
3040   * convert the codepage string to Unicode and construct a UnicodeString
3041   * from that.
3042   *
3043   * The converter is reset at first.
3044   * If the error code indicates a failure before this constructor is called,
3045   * or if an error occurs during conversion or construction,
3046   * then the string will be bogus.
3047   *
3048   * This function avoids the overhead of opening and closing a converter if
3049   * multiple strings are constructed.
3050   *
3051   * @param src input codepage string
3052   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3053   * @param cnv converter object (ucnv_resetToUnicode() will be called),
3054   *        can be NULL for the default converter
3055   * @param errorCode normal ICU error code
3056   * @stable ICU 2.0
3057   */
3058  UnicodeString(
3059        const char *src, int32_t srcLength,
3060        UConverter *cnv,
3061        UErrorCode &errorCode);
3062
3063#endif
3064
3065  /**
3066   * Constructs a Unicode string from an invariant-character char * string.
3067   * About invariant characters see utypes.h.
3068   * This constructor has no runtime dependency on conversion code and is
3069   * therefore recommended over ones taking a charset name string
3070   * (where the empty string "" indicates invariant-character conversion).
3071   *
3072   * Use the macro US_INV as the third, signature-distinguishing parameter.
3073   *
3074   * For example:
3075   * \code
3076   * void fn(const char *s) {
3077   *   UnicodeString ustr(s, -1, US_INV);
3078   *   // use ustr ...
3079   * }
3080   * \endcode
3081   *
3082   * @param src String using only invariant characters.
3083   * @param length Length of src, or -1 if NUL-terminated.
3084   * @param inv Signature-distinguishing paramater, use US_INV.
3085   *
3086   * @see US_INV
3087   * @stable ICU 3.2
3088   */
3089  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3090
3091
3092  /**
3093   * Copy constructor.
3094   * @param that The UnicodeString object to copy.
3095   * @stable ICU 2.0
3096   */
3097  UnicodeString(const UnicodeString& that);
3098
3099  /**
3100   * 'Substring' constructor from tail of source string.
3101   * @param src The UnicodeString object to copy.
3102   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3103   * @stable ICU 2.2
3104   */
3105  UnicodeString(const UnicodeString& src, int32_t srcStart);
3106
3107  /**
3108   * 'Substring' constructor from subrange of source string.
3109   * @param src The UnicodeString object to copy.
3110   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3111   * @param srcLength The number of characters from <tt>src</tt> to copy.
3112   * @stable ICU 2.2
3113   */
3114  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3115
3116  /**
3117   * Clone this object, an instance of a subclass of Replaceable.
3118   * Clones can be used concurrently in multiple threads.
3119   * If a subclass does not implement clone(), or if an error occurs,
3120   * then NULL is returned.
3121   * The clone functions in all subclasses return a pointer to a Replaceable
3122   * because some compilers do not support covariant (same-as-this)
3123   * return types; cast to the appropriate subclass if necessary.
3124   * The caller must delete the clone.
3125   *
3126   * @return a clone of this object
3127   *
3128   * @see Replaceable::clone
3129   * @see getDynamicClassID
3130   * @stable ICU 2.6
3131   */
3132  virtual Replaceable *clone() const;
3133
3134  /** Destructor.
3135   * @stable ICU 2.0
3136   */
3137  virtual ~UnicodeString();
3138
3139  /**
3140   * Create a UnicodeString from a UTF-8 string.
3141   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3142   * Calls u_strFromUTF8WithSub().
3143   *
3144   * @param utf8 UTF-8 input string.
3145   *             Note that a StringPiece can be implicitly constructed
3146   *             from a std::string or a NUL-terminated const char * string.
3147   * @return A UnicodeString with equivalent UTF-16 contents.
3148   * @see toUTF8
3149   * @see toUTF8String
3150   * @stable ICU 4.2
3151   */
3152  static UnicodeString fromUTF8(const StringPiece &utf8);
3153
3154  /**
3155   * Create a UnicodeString from a UTF-32 string.
3156   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3157   * Calls u_strFromUTF32WithSub().
3158   *
3159   * @param utf32 UTF-32 input string. Must not be NULL.
3160   * @param length Length of the input string, or -1 if NUL-terminated.
3161   * @return A UnicodeString with equivalent UTF-16 contents.
3162   * @see toUTF32
3163   * @stable ICU 4.2
3164   */
3165  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3166
3167  /* Miscellaneous operations */
3168
3169  /**
3170   * Unescape a string of characters and return a string containing
3171   * the result.  The following escape sequences are recognized:
3172   *
3173   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3174   * \\Uhhhhhhhh   8 hex digits
3175   * \\xhh         1-2 hex digits
3176   * \\ooo         1-3 octal digits; o in [0-7]
3177   * \\cX          control-X; X is masked with 0x1F
3178   *
3179   * as well as the standard ANSI C escapes:
3180   *
3181   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3182   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3183   * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3184   *
3185   * Anything else following a backslash is generically escaped.  For
3186   * example, "[a\\-z]" returns "[a-z]".
3187   *
3188   * If an escape sequence is ill-formed, this method returns an empty
3189   * string.  An example of an ill-formed sequence is "\\u" followed by
3190   * fewer than 4 hex digits.
3191   *
3192   * This function is similar to u_unescape() but not identical to it.
3193   * The latter takes a source char*, so it does escape recognition
3194   * and also invariant conversion.
3195   *
3196   * @return a string with backslash escapes interpreted, or an
3197   * empty string on error.
3198   * @see UnicodeString#unescapeAt()
3199   * @see u_unescape()
3200   * @see u_unescapeAt()
3201   * @stable ICU 2.0
3202   */
3203  UnicodeString unescape() const;
3204
3205  /**
3206   * Unescape a single escape sequence and return the represented
3207   * character.  See unescape() for a listing of the recognized escape
3208   * sequences.  The character at offset-1 is assumed (without
3209   * checking) to be a backslash.  If the escape sequence is
3210   * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3211   * returned.
3212   *
3213   * @param offset an input output parameter.  On input, it is the
3214   * offset into this string where the escape sequence is located,
3215   * after the initial backslash.  On output, it is advanced after the
3216   * last character parsed.  On error, it is not advanced at all.
3217   * @return the character represented by the escape sequence at
3218   * offset, or U_SENTINEL=-1 on error.
3219   * @see UnicodeString#unescape()
3220   * @see u_unescape()
3221   * @see u_unescapeAt()
3222   * @stable ICU 2.0
3223   */
3224  UChar32 unescapeAt(int32_t &offset) const;
3225
3226  /**
3227   * ICU "poor man's RTTI", returns a UClassID for this class.
3228   *
3229   * @stable ICU 2.2
3230   */
3231  static UClassID U_EXPORT2 getStaticClassID();
3232
3233  /**
3234   * ICU "poor man's RTTI", returns a UClassID for the actual class.
3235   *
3236   * @stable ICU 2.2
3237   */
3238  virtual UClassID getDynamicClassID() const;
3239
3240  //========================================
3241  // Implementation methods
3242  //========================================
3243
3244protected:
3245  /**
3246   * Implement Replaceable::getLength() (see jitterbug 1027).
3247   * @stable ICU 2.4
3248   */
3249  virtual int32_t getLength() const;
3250
3251  /**
3252   * The change in Replaceable to use virtual getCharAt() allows
3253   * UnicodeString::charAt() to be inline again (see jitterbug 709).
3254   * @stable ICU 2.4
3255   */
3256  virtual UChar getCharAt(int32_t offset) const;
3257
3258  /**
3259   * The change in Replaceable to use virtual getChar32At() allows
3260   * UnicodeString::char32At() to be inline again (see jitterbug 709).
3261   * @stable ICU 2.4
3262   */
3263  virtual UChar32 getChar32At(int32_t offset) const;
3264
3265private:
3266  // For char* constructors. Could be made public.
3267  UnicodeString &setToUTF8(const StringPiece &utf8);
3268  // For extract(char*).
3269  // We could make a toUTF8(target, capacity, errorCode) public but not
3270  // this version: New API will be cleaner if we make callers create substrings
3271  // rather than having start+length on every method,
3272  // and it should take a UErrorCode&.
3273  int32_t
3274  toUTF8(int32_t start, int32_t len,
3275         char *target, int32_t capacity) const;
3276
3277  /**
3278   * Internal string contents comparison, called by operator==.
3279   * Requires: this & text not bogus and have same lengths.
3280   */
3281  UBool doEquals(const UnicodeString &text, int32_t len) const;
3282
3283  inline int8_t
3284  doCompare(int32_t start,
3285           int32_t length,
3286           const UnicodeString& srcText,
3287           int32_t srcStart,
3288           int32_t srcLength) const;
3289
3290  int8_t doCompare(int32_t start,
3291           int32_t length,
3292           const UChar *srcChars,
3293           int32_t srcStart,
3294           int32_t srcLength) const;
3295
3296  inline int8_t
3297  doCompareCodePointOrder(int32_t start,
3298                          int32_t length,
3299                          const UnicodeString& srcText,
3300                          int32_t srcStart,
3301                          int32_t srcLength) const;
3302
3303  int8_t doCompareCodePointOrder(int32_t start,
3304                                 int32_t length,
3305                                 const UChar *srcChars,
3306                                 int32_t srcStart,
3307                                 int32_t srcLength) const;
3308
3309  inline int8_t
3310  doCaseCompare(int32_t start,
3311                int32_t length,
3312                const UnicodeString &srcText,
3313                int32_t srcStart,
3314                int32_t srcLength,
3315                uint32_t options) const;
3316
3317  int8_t
3318  doCaseCompare(int32_t start,
3319                int32_t length,
3320                const UChar *srcChars,
3321                int32_t srcStart,
3322                int32_t srcLength,
3323                uint32_t options) const;
3324
3325  int32_t doIndexOf(UChar c,
3326            int32_t start,
3327            int32_t length) const;
3328
3329  int32_t doIndexOf(UChar32 c,
3330                        int32_t start,
3331                        int32_t length) const;
3332
3333  int32_t doLastIndexOf(UChar c,
3334                int32_t start,
3335                int32_t length) const;
3336
3337  int32_t doLastIndexOf(UChar32 c,
3338                            int32_t start,
3339                            int32_t length) const;
3340
3341  void doExtract(int32_t start,
3342         int32_t length,
3343         UChar *dst,
3344         int32_t dstStart) const;
3345
3346  inline void doExtract(int32_t start,
3347         int32_t length,
3348         UnicodeString& target) const;
3349
3350  inline UChar doCharAt(int32_t offset)  const;
3351
3352  UnicodeString& doReplace(int32_t start,
3353               int32_t length,
3354               const UnicodeString& srcText,
3355               int32_t srcStart,
3356               int32_t srcLength);
3357
3358  UnicodeString& doReplace(int32_t start,
3359               int32_t length,
3360               const UChar *srcChars,
3361               int32_t srcStart,
3362               int32_t srcLength);
3363
3364  UnicodeString& doReverse(int32_t start,
3365               int32_t length);
3366
3367  // calculate hash code
3368  int32_t doHashCode(void) const;
3369
3370  // get pointer to start of array
3371  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3372  inline UChar* getArrayStart(void);
3373  inline const UChar* getArrayStart(void) const;
3374
3375  // A UnicodeString object (not necessarily its current buffer)
3376  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3377  inline UBool isWritable() const;
3378
3379  // Is the current buffer writable?
3380  inline UBool isBufferWritable() const;
3381
3382  // None of the following does releaseArray().
3383  inline void setLength(int32_t len);        // sets only fShortLength and fLength
3384  inline void setToEmpty();                  // sets fFlags=kShortString
3385  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3386
3387  // allocate the array; result may be fStackBuffer
3388  // sets refCount to 1 if appropriate
3389  // sets fArray, fCapacity, and fFlags
3390  // returns boolean for success or failure
3391  UBool allocate(int32_t capacity);
3392
3393  // release the array if owned
3394  void releaseArray(void);
3395
3396  // turn a bogus string into an empty one
3397  void unBogus();
3398
3399  // implements assigment operator, copy constructor, and fastCopyFrom()
3400  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3401
3402  // Pin start and limit to acceptable values.
3403  inline void pinIndex(int32_t& start) const;
3404  inline void pinIndices(int32_t& start,
3405                         int32_t& length) const;
3406
3407#if !UCONFIG_NO_CONVERSION
3408
3409  /* Internal extract() using UConverter. */
3410  int32_t doExtract(int32_t start, int32_t length,
3411                    char *dest, int32_t destCapacity,
3412                    UConverter *cnv,
3413                    UErrorCode &errorCode) const;
3414
3415  /*
3416   * Real constructor for converting from codepage data.
3417   * It assumes that it is called with !fRefCounted.
3418   *
3419   * If <code>codepage==0</code>, then the default converter
3420   * is used for the platform encoding.
3421   * If <code>codepage</code> is an empty string (<code>""</code>),
3422   * then a simple conversion is performed on the codepage-invariant
3423   * subset ("invariant characters") of the platform encoding. See utypes.h.
3424   */
3425  void doCodepageCreate(const char *codepageData,
3426                        int32_t dataLength,
3427                        const char *codepage);
3428
3429  /*
3430   * Worker function for creating a UnicodeString from
3431   * a codepage string using a UConverter.
3432   */
3433  void
3434  doCodepageCreate(const char *codepageData,
3435                   int32_t dataLength,
3436                   UConverter *converter,
3437                   UErrorCode &status);
3438
3439#endif
3440
3441  /*
3442   * This function is called when write access to the array
3443   * is necessary.
3444   *
3445   * We need to make a copy of the array if
3446   * the buffer is read-only, or
3447   * the buffer is refCounted (shared), and refCount>1, or
3448   * the buffer is too small.
3449   *
3450   * Return FALSE if memory could not be allocated.
3451   */
3452  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3453                            int32_t growCapacity = -1,
3454                            UBool doCopyArray = TRUE,
3455                            int32_t **pBufferToDelete = 0,
3456                            UBool forceClone = FALSE);
3457
3458  /**
3459   * Common function for UnicodeString case mappings.
3460   * The stringCaseMapper has the same type UStringCaseMapper
3461   * as in ustr_imp.h for ustrcase_map().
3462   */
3463  UnicodeString &
3464  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3465
3466  // ref counting
3467  void addRef(void);
3468  int32_t removeRef(void);
3469  int32_t refCount(void) const;
3470
3471  // constants
3472  enum {
3473    // Set the stack buffer size so that sizeof(UnicodeString) is,
3474    // naturally (without padding), a multiple of sizeof(pointer).
3475    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3476    kInvalidUChar=0xffff, // invalid UChar index
3477    kGrowSize=128, // grow size for this buffer
3478    kInvalidHashCode=0, // invalid hash code
3479    kEmptyHashCode=1, // hash code for empty string
3480
3481    // bit flag values for fFlags
3482    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3483    kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3484    kRefCounted=4,      // there is a refCount field before the characters in fArray
3485    kBufferIsReadonly=8,// do not write to this buffer
3486    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3487                        // and releaseBuffer(newLength) must be called
3488
3489    // combined values for convenience
3490    kShortString=kUsingStackBuffer,
3491    kLongString=kRefCounted,
3492    kReadonlyAlias=kBufferIsReadonly,
3493    kWritableAlias=0
3494  };
3495
3496  friend class StringThreadTest;
3497  friend class UnicodeStringAppendable;
3498
3499  union StackBufferOrFields;        // forward declaration necessary before friend declaration
3500  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3501
3502  /*
3503   * The following are all the class fields that are stored
3504   * in each UnicodeString object.
3505   * Note that UnicodeString has virtual functions,
3506   * therefore there is an implicit vtable pointer
3507   * as the first real field.
3508   * The fields should be aligned such that no padding is necessary.
3509   * On 32-bit machines, the size should be 32 bytes,
3510   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3511   *
3512   * We use a hack to achieve this.
3513   *
3514   * With at least some compilers, each of the following is forced to
3515   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3516   * rounded up with additional padding if the fields do not already fit that requirement:
3517   * - sizeof(class UnicodeString)
3518   * - offsetof(UnicodeString, fUnion)
3519   * - sizeof(fUnion)
3520   * - sizeof(fFields)
3521   *
3522   * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3523   * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3524   * (Padding at the end of fFields is ok:
3525   * As long as there is no padding after fStackBuffer, it is not wasted space.)
3526   *
3527   * We further assume that the compiler does not reorder the fields,
3528   * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3529   * with at most some padding (but no other field) in between.
3530   * (Padding there would be wasted space, but functionally harmless.)
3531   *
3532   * We use a few more sizeof(pointer)'s chunks of space with
3533   * fRestOfStackBuffer, fShortLength and fFlags,
3534   * to get up exactly to the intended sizeof(UnicodeString).
3535   */
3536  // (implicit) *vtable;
3537  union StackBufferOrFields {
3538    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3539    // else fFields is used
3540    UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
3541    struct {
3542      UChar   *fArray;    // the Unicode data
3543      int32_t fCapacity;  // capacity of fArray (in UChars)
3544      int32_t fLength;    // number of characters in fArray if >127; else undefined
3545    } fFields;
3546  } fUnion;
3547  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3548  int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
3549  uint8_t fFlags;       // bit flags: see constants above
3550};
3551
3552/**
3553 * Create a new UnicodeString with the concatenation of two others.
3554 *
3555 * @param s1 The first string to be copied to the new one.
3556 * @param s2 The second string to be copied to the new one, after s1.
3557 * @return UnicodeString(s1).append(s2)
3558 * @stable ICU 2.8
3559 */
3560U_COMMON_API UnicodeString U_EXPORT2
3561operator+ (const UnicodeString &s1, const UnicodeString &s2);
3562
3563//========================================
3564// Inline members
3565//========================================
3566
3567//========================================
3568// Privates
3569//========================================
3570
3571inline void
3572UnicodeString::pinIndex(int32_t& start) const
3573{
3574  // pin index
3575  if(start < 0) {
3576    start = 0;
3577  } else if(start > length()) {
3578    start = length();
3579  }
3580}
3581
3582inline void
3583UnicodeString::pinIndices(int32_t& start,
3584                          int32_t& _length) const
3585{
3586  // pin indices
3587  int32_t len = length();
3588  if(start < 0) {
3589    start = 0;
3590  } else if(start > len) {
3591    start = len;
3592  }
3593  if(_length < 0) {
3594    _length = 0;
3595  } else if(_length > (len - start)) {
3596    _length = (len - start);
3597  }
3598}
3599
3600inline UChar*
3601UnicodeString::getArrayStart()
3602{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3603
3604inline const UChar*
3605UnicodeString::getArrayStart() const
3606{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3607
3608//========================================
3609// Default constructor
3610//========================================
3611
3612inline
3613UnicodeString::UnicodeString()
3614  : fShortLength(0),
3615    fFlags(kShortString)
3616{}
3617
3618//========================================
3619// Read-only implementation methods
3620//========================================
3621inline int32_t
3622UnicodeString::length() const
3623{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3624
3625inline int32_t
3626UnicodeString::getCapacity() const
3627{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3628
3629inline int32_t
3630UnicodeString::hashCode() const
3631{ return doHashCode(); }
3632
3633inline UBool
3634UnicodeString::isBogus() const
3635{ return (UBool)(fFlags & kIsBogus); }
3636
3637inline UBool
3638UnicodeString::isWritable() const
3639{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3640
3641inline UBool
3642UnicodeString::isBufferWritable() const
3643{
3644  return (UBool)(
3645      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3646      (!(fFlags&kRefCounted) || refCount()==1));
3647}
3648
3649inline const UChar *
3650UnicodeString::getBuffer() const {
3651  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3652    return 0;
3653  } else if(fFlags&kUsingStackBuffer) {
3654    return fUnion.fStackBuffer;
3655  } else {
3656    return fUnion.fFields.fArray;
3657  }
3658}
3659
3660//========================================
3661// Read-only alias methods
3662//========================================
3663inline int8_t
3664UnicodeString::doCompare(int32_t start,
3665              int32_t thisLength,
3666              const UnicodeString& srcText,
3667              int32_t srcStart,
3668              int32_t srcLength) const
3669{
3670  if(srcText.isBogus()) {
3671    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3672  } else {
3673    srcText.pinIndices(srcStart, srcLength);
3674    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3675  }
3676}
3677
3678inline UBool
3679UnicodeString::operator== (const UnicodeString& text) const
3680{
3681  if(isBogus()) {
3682    return text.isBogus();
3683  } else {
3684    int32_t len = length(), textLength = text.length();
3685    return !text.isBogus() && len == textLength && doEquals(text, len);
3686  }
3687}
3688
3689inline UBool
3690UnicodeString::operator!= (const UnicodeString& text) const
3691{ return (! operator==(text)); }
3692
3693inline UBool
3694UnicodeString::operator> (const UnicodeString& text) const
3695{ return doCompare(0, length(), text, 0, text.length()) == 1; }
3696
3697inline UBool
3698UnicodeString::operator< (const UnicodeString& text) const
3699{ return doCompare(0, length(), text, 0, text.length()) == -1; }
3700
3701inline UBool
3702UnicodeString::operator>= (const UnicodeString& text) const
3703{ return doCompare(0, length(), text, 0, text.length()) != -1; }
3704
3705inline UBool
3706UnicodeString::operator<= (const UnicodeString& text) const
3707{ return doCompare(0, length(), text, 0, text.length()) != 1; }
3708
3709inline int8_t
3710UnicodeString::compare(const UnicodeString& text) const
3711{ return doCompare(0, length(), text, 0, text.length()); }
3712
3713inline int8_t
3714UnicodeString::compare(int32_t start,
3715               int32_t _length,
3716               const UnicodeString& srcText) const
3717{ return doCompare(start, _length, srcText, 0, srcText.length()); }
3718
3719inline int8_t
3720UnicodeString::compare(const UChar *srcChars,
3721               int32_t srcLength) const
3722{ return doCompare(0, length(), srcChars, 0, srcLength); }
3723
3724inline int8_t
3725UnicodeString::compare(int32_t start,
3726               int32_t _length,
3727               const UnicodeString& srcText,
3728               int32_t srcStart,
3729               int32_t srcLength) const
3730{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3731
3732inline int8_t
3733UnicodeString::compare(int32_t start,
3734               int32_t _length,
3735               const UChar *srcChars) const
3736{ return doCompare(start, _length, srcChars, 0, _length); }
3737
3738inline int8_t
3739UnicodeString::compare(int32_t start,
3740               int32_t _length,
3741               const UChar *srcChars,
3742               int32_t srcStart,
3743               int32_t srcLength) const
3744{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3745
3746inline int8_t
3747UnicodeString::compareBetween(int32_t start,
3748                  int32_t limit,
3749                  const UnicodeString& srcText,
3750                  int32_t srcStart,
3751                  int32_t srcLimit) const
3752{ return doCompare(start, limit - start,
3753           srcText, srcStart, srcLimit - srcStart); }
3754
3755inline int8_t
3756UnicodeString::doCompareCodePointOrder(int32_t start,
3757                                       int32_t thisLength,
3758                                       const UnicodeString& srcText,
3759                                       int32_t srcStart,
3760                                       int32_t srcLength) const
3761{
3762  if(srcText.isBogus()) {
3763    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3764  } else {
3765    srcText.pinIndices(srcStart, srcLength);
3766    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3767  }
3768}
3769
3770inline int8_t
3771UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3772{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3773
3774inline int8_t
3775UnicodeString::compareCodePointOrder(int32_t start,
3776                                     int32_t _length,
3777                                     const UnicodeString& srcText) const
3778{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3779
3780inline int8_t
3781UnicodeString::compareCodePointOrder(const UChar *srcChars,
3782                                     int32_t srcLength) const
3783{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3784
3785inline int8_t
3786UnicodeString::compareCodePointOrder(int32_t start,
3787                                     int32_t _length,
3788                                     const UnicodeString& srcText,
3789                                     int32_t srcStart,
3790                                     int32_t srcLength) const
3791{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3792
3793inline int8_t
3794UnicodeString::compareCodePointOrder(int32_t start,
3795                                     int32_t _length,
3796                                     const UChar *srcChars) const
3797{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3798
3799inline int8_t
3800UnicodeString::compareCodePointOrder(int32_t start,
3801                                     int32_t _length,
3802                                     const UChar *srcChars,
3803                                     int32_t srcStart,
3804                                     int32_t srcLength) const
3805{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3806
3807inline int8_t
3808UnicodeString::compareCodePointOrderBetween(int32_t start,
3809                                            int32_t limit,
3810                                            const UnicodeString& srcText,
3811                                            int32_t srcStart,
3812                                            int32_t srcLimit) const
3813{ return doCompareCodePointOrder(start, limit - start,
3814           srcText, srcStart, srcLimit - srcStart); }
3815
3816inline int8_t
3817UnicodeString::doCaseCompare(int32_t start,
3818                             int32_t thisLength,
3819                             const UnicodeString &srcText,
3820                             int32_t srcStart,
3821                             int32_t srcLength,
3822                             uint32_t options) const
3823{
3824  if(srcText.isBogus()) {
3825    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3826  } else {
3827    srcText.pinIndices(srcStart, srcLength);
3828    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3829  }
3830}
3831
3832inline int8_t
3833UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3834  return doCaseCompare(0, length(), text, 0, text.length(), options);
3835}
3836
3837inline int8_t
3838UnicodeString::caseCompare(int32_t start,
3839                           int32_t _length,
3840                           const UnicodeString &srcText,
3841                           uint32_t options) const {
3842  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3843}
3844
3845inline int8_t
3846UnicodeString::caseCompare(const UChar *srcChars,
3847                           int32_t srcLength,
3848                           uint32_t options) const {
3849  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3850}
3851
3852inline int8_t
3853UnicodeString::caseCompare(int32_t start,
3854                           int32_t _length,
3855                           const UnicodeString &srcText,
3856                           int32_t srcStart,
3857                           int32_t srcLength,
3858                           uint32_t options) const {
3859  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3860}
3861
3862inline int8_t
3863UnicodeString::caseCompare(int32_t start,
3864                           int32_t _length,
3865                           const UChar *srcChars,
3866                           uint32_t options) const {
3867  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3868}
3869
3870inline int8_t
3871UnicodeString::caseCompare(int32_t start,
3872                           int32_t _length,
3873                           const UChar *srcChars,
3874                           int32_t srcStart,
3875                           int32_t srcLength,
3876                           uint32_t options) const {
3877  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3878}
3879
3880inline int8_t
3881UnicodeString::caseCompareBetween(int32_t start,
3882                                  int32_t limit,
3883                                  const UnicodeString &srcText,
3884                                  int32_t srcStart,
3885                                  int32_t srcLimit,
3886                                  uint32_t options) const {
3887  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3888}
3889
3890inline int32_t
3891UnicodeString::indexOf(const UnicodeString& srcText,
3892               int32_t srcStart,
3893               int32_t srcLength,
3894               int32_t start,
3895               int32_t _length) const
3896{
3897  if(!srcText.isBogus()) {
3898    srcText.pinIndices(srcStart, srcLength);
3899    if(srcLength > 0) {
3900      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3901    }
3902  }
3903  return -1;
3904}
3905
3906inline int32_t
3907UnicodeString::indexOf(const UnicodeString& text) const
3908{ return indexOf(text, 0, text.length(), 0, length()); }
3909
3910inline int32_t
3911UnicodeString::indexOf(const UnicodeString& text,
3912               int32_t start) const {
3913  pinIndex(start);
3914  return indexOf(text, 0, text.length(), start, length() - start);
3915}
3916
3917inline int32_t
3918UnicodeString::indexOf(const UnicodeString& text,
3919               int32_t start,
3920               int32_t _length) const
3921{ return indexOf(text, 0, text.length(), start, _length); }
3922
3923inline int32_t
3924UnicodeString::indexOf(const UChar *srcChars,
3925               int32_t srcLength,
3926               int32_t start) const {
3927  pinIndex(start);
3928  return indexOf(srcChars, 0, srcLength, start, length() - start);
3929}
3930
3931inline int32_t
3932UnicodeString::indexOf(const UChar *srcChars,
3933               int32_t srcLength,
3934               int32_t start,
3935               int32_t _length) const
3936{ return indexOf(srcChars, 0, srcLength, start, _length); }
3937
3938inline int32_t
3939UnicodeString::indexOf(UChar c,
3940               int32_t start,
3941               int32_t _length) const
3942{ return doIndexOf(c, start, _length); }
3943
3944inline int32_t
3945UnicodeString::indexOf(UChar32 c,
3946               int32_t start,
3947               int32_t _length) const
3948{ return doIndexOf(c, start, _length); }
3949
3950inline int32_t
3951UnicodeString::indexOf(UChar c) const
3952{ return doIndexOf(c, 0, length()); }
3953
3954inline int32_t
3955UnicodeString::indexOf(UChar32 c) const
3956{ return indexOf(c, 0, length()); }
3957
3958inline int32_t
3959UnicodeString::indexOf(UChar c,
3960               int32_t start) const {
3961  pinIndex(start);
3962  return doIndexOf(c, start, length() - start);
3963}
3964
3965inline int32_t
3966UnicodeString::indexOf(UChar32 c,
3967               int32_t start) const {
3968  pinIndex(start);
3969  return indexOf(c, start, length() - start);
3970}
3971
3972inline int32_t
3973UnicodeString::lastIndexOf(const UChar *srcChars,
3974               int32_t srcLength,
3975               int32_t start,
3976               int32_t _length) const
3977{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3978
3979inline int32_t
3980UnicodeString::lastIndexOf(const UChar *srcChars,
3981               int32_t srcLength,
3982               int32_t start) const {
3983  pinIndex(start);
3984  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3985}
3986
3987inline int32_t
3988UnicodeString::lastIndexOf(const UnicodeString& srcText,
3989               int32_t srcStart,
3990               int32_t srcLength,
3991               int32_t start,
3992               int32_t _length) const
3993{
3994  if(!srcText.isBogus()) {
3995    srcText.pinIndices(srcStart, srcLength);
3996    if(srcLength > 0) {
3997      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3998    }
3999  }
4000  return -1;
4001}
4002
4003inline int32_t
4004UnicodeString::lastIndexOf(const UnicodeString& text,
4005               int32_t start,
4006               int32_t _length) const
4007{ return lastIndexOf(text, 0, text.length(), start, _length); }
4008
4009inline int32_t
4010UnicodeString::lastIndexOf(const UnicodeString& text,
4011               int32_t start) const {
4012  pinIndex(start);
4013  return lastIndexOf(text, 0, text.length(), start, length() - start);
4014}
4015
4016inline int32_t
4017UnicodeString::lastIndexOf(const UnicodeString& text) const
4018{ return lastIndexOf(text, 0, text.length(), 0, length()); }
4019
4020inline int32_t
4021UnicodeString::lastIndexOf(UChar c,
4022               int32_t start,
4023               int32_t _length) const
4024{ return doLastIndexOf(c, start, _length); }
4025
4026inline int32_t
4027UnicodeString::lastIndexOf(UChar32 c,
4028               int32_t start,
4029               int32_t _length) const {
4030  return doLastIndexOf(c, start, _length);
4031}
4032
4033inline int32_t
4034UnicodeString::lastIndexOf(UChar c) const
4035{ return doLastIndexOf(c, 0, length()); }
4036
4037inline int32_t
4038UnicodeString::lastIndexOf(UChar32 c) const {
4039  return lastIndexOf(c, 0, length());
4040}
4041
4042inline int32_t
4043UnicodeString::lastIndexOf(UChar c,
4044               int32_t start) const {
4045  pinIndex(start);
4046  return doLastIndexOf(c, start, length() - start);
4047}
4048
4049inline int32_t
4050UnicodeString::lastIndexOf(UChar32 c,
4051               int32_t start) const {
4052  pinIndex(start);
4053  return lastIndexOf(c, start, length() - start);
4054}
4055
4056inline UBool
4057UnicodeString::startsWith(const UnicodeString& text) const
4058{ return compare(0, text.length(), text, 0, text.length()) == 0; }
4059
4060inline UBool
4061UnicodeString::startsWith(const UnicodeString& srcText,
4062              int32_t srcStart,
4063              int32_t srcLength) const
4064{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4065
4066inline UBool
4067UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4068  if(srcLength < 0) {
4069    srcLength = u_strlen(srcChars);
4070  }
4071  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4072}
4073
4074inline UBool
4075UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4076  if(srcLength < 0) {
4077    srcLength = u_strlen(srcChars);
4078  }
4079  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4080}
4081
4082inline UBool
4083UnicodeString::endsWith(const UnicodeString& text) const
4084{ return doCompare(length() - text.length(), text.length(),
4085           text, 0, text.length()) == 0; }
4086
4087inline UBool
4088UnicodeString::endsWith(const UnicodeString& srcText,
4089            int32_t srcStart,
4090            int32_t srcLength) const {
4091  srcText.pinIndices(srcStart, srcLength);
4092  return doCompare(length() - srcLength, srcLength,
4093                   srcText, srcStart, srcLength) == 0;
4094}
4095
4096inline UBool
4097UnicodeString::endsWith(const UChar *srcChars,
4098            int32_t srcLength) const {
4099  if(srcLength < 0) {
4100    srcLength = u_strlen(srcChars);
4101  }
4102  return doCompare(length() - srcLength, srcLength,
4103                   srcChars, 0, srcLength) == 0;
4104}
4105
4106inline UBool
4107UnicodeString::endsWith(const UChar *srcChars,
4108            int32_t srcStart,
4109            int32_t srcLength) const {
4110  if(srcLength < 0) {
4111    srcLength = u_strlen(srcChars + srcStart);
4112  }
4113  return doCompare(length() - srcLength, srcLength,
4114                   srcChars, srcStart, srcLength) == 0;
4115}
4116
4117//========================================
4118// replace
4119//========================================
4120inline UnicodeString&
4121UnicodeString::replace(int32_t start,
4122               int32_t _length,
4123               const UnicodeString& srcText)
4124{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4125
4126inline UnicodeString&
4127UnicodeString::replace(int32_t start,
4128               int32_t _length,
4129               const UnicodeString& srcText,
4130               int32_t srcStart,
4131               int32_t srcLength)
4132{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4133
4134inline UnicodeString&
4135UnicodeString::replace(int32_t start,
4136               int32_t _length,
4137               const UChar *srcChars,
4138               int32_t srcLength)
4139{ return doReplace(start, _length, srcChars, 0, srcLength); }
4140
4141inline UnicodeString&
4142UnicodeString::replace(int32_t start,
4143               int32_t _length,
4144               const UChar *srcChars,
4145               int32_t srcStart,
4146               int32_t srcLength)
4147{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4148
4149inline UnicodeString&
4150UnicodeString::replace(int32_t start,
4151               int32_t _length,
4152               UChar srcChar)
4153{ return doReplace(start, _length, &srcChar, 0, 1); }
4154
4155inline UnicodeString&
4156UnicodeString::replaceBetween(int32_t start,
4157                  int32_t limit,
4158                  const UnicodeString& srcText)
4159{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4160
4161inline UnicodeString&
4162UnicodeString::replaceBetween(int32_t start,
4163                  int32_t limit,
4164                  const UnicodeString& srcText,
4165                  int32_t srcStart,
4166                  int32_t srcLimit)
4167{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4168
4169inline UnicodeString&
4170UnicodeString::findAndReplace(const UnicodeString& oldText,
4171                  const UnicodeString& newText)
4172{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4173            newText, 0, newText.length()); }
4174
4175inline UnicodeString&
4176UnicodeString::findAndReplace(int32_t start,
4177                  int32_t _length,
4178                  const UnicodeString& oldText,
4179                  const UnicodeString& newText)
4180{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4181            newText, 0, newText.length()); }
4182
4183// ============================
4184// extract
4185// ============================
4186inline void
4187UnicodeString::doExtract(int32_t start,
4188             int32_t _length,
4189             UnicodeString& target) const
4190{ target.replace(0, target.length(), *this, start, _length); }
4191
4192inline void
4193UnicodeString::extract(int32_t start,
4194               int32_t _length,
4195               UChar *target,
4196               int32_t targetStart) const
4197{ doExtract(start, _length, target, targetStart); }
4198
4199inline void
4200UnicodeString::extract(int32_t start,
4201               int32_t _length,
4202               UnicodeString& target) const
4203{ doExtract(start, _length, target); }
4204
4205#if !UCONFIG_NO_CONVERSION
4206
4207inline int32_t
4208UnicodeString::extract(int32_t start,
4209               int32_t _length,
4210               char *dst,
4211               const char *codepage) const
4212
4213{
4214  // This dstSize value will be checked explicitly
4215  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4216}
4217
4218#endif
4219
4220inline void
4221UnicodeString::extractBetween(int32_t start,
4222                  int32_t limit,
4223                  UChar *dst,
4224                  int32_t dstStart) const {
4225  pinIndex(start);
4226  pinIndex(limit);
4227  doExtract(start, limit - start, dst, dstStart);
4228}
4229
4230inline UnicodeString
4231UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4232    return tempSubString(start, limit - start);
4233}
4234
4235inline UChar
4236UnicodeString::doCharAt(int32_t offset) const
4237{
4238  if((uint32_t)offset < (uint32_t)length()) {
4239    return getArrayStart()[offset];
4240  } else {
4241    return kInvalidUChar;
4242  }
4243}
4244
4245inline UChar
4246UnicodeString::charAt(int32_t offset) const
4247{ return doCharAt(offset); }
4248
4249inline UChar
4250UnicodeString::operator[] (int32_t offset) const
4251{ return doCharAt(offset); }
4252
4253inline UBool
4254UnicodeString::isEmpty() const {
4255  return fShortLength == 0;
4256}
4257
4258//========================================
4259// Write implementation methods
4260//========================================
4261inline void
4262UnicodeString::setLength(int32_t len) {
4263  if(len <= 127) {
4264    fShortLength = (int8_t)len;
4265  } else {
4266    fShortLength = (int8_t)-1;
4267    fUnion.fFields.fLength = len;
4268  }
4269}
4270
4271inline void
4272UnicodeString::setToEmpty() {
4273  fShortLength = 0;
4274  fFlags = kShortString;
4275}
4276
4277inline void
4278UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4279  setLength(len);
4280  fUnion.fFields.fArray = array;
4281  fUnion.fFields.fCapacity = capacity;
4282}
4283
4284inline const UChar *
4285UnicodeString::getTerminatedBuffer() {
4286  if(!isWritable()) {
4287    return 0;
4288  } else {
4289    UChar *array = getArrayStart();
4290    int32_t len = length();
4291    if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4292      /*
4293       * kRefCounted: Do not write the NUL if the buffer is shared.
4294       * That is mostly safe, except when the length of one copy was modified
4295       * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4296       * Then the NUL would be written into the middle of another copy's string.
4297       */
4298      if(!(fFlags&kBufferIsReadonly)) {
4299        /*
4300         * We must not write to a readonly buffer, but it is known to be
4301         * NUL-terminated if len<capacity.
4302         * A shared, allocated buffer (refCount()>1) must not have its contents
4303         * modified, but the NUL at [len] is beyond the string contents,
4304         * and multiple string objects and threads writing the same NUL into the
4305         * same location is harmless.
4306         * In all other cases, the buffer is fully writable and it is anyway safe
4307         * to write the NUL.
4308         *
4309         * Note: An earlier version of this code tested whether there is a NUL
4310         * at [len] already, but, while safe, it generated lots of warnings from
4311         * tools like valgrind and Purify.
4312         */
4313        array[len] = 0;
4314      }
4315      return array;
4316    } else if(cloneArrayIfNeeded(len+1)) {
4317      array = getArrayStart();
4318      array[len] = 0;
4319      return array;
4320    } else {
4321      return 0;
4322    }
4323  }
4324}
4325
4326inline UnicodeString&
4327UnicodeString::operator= (UChar ch)
4328{ return doReplace(0, length(), &ch, 0, 1); }
4329
4330inline UnicodeString&
4331UnicodeString::operator= (UChar32 ch)
4332{ return replace(0, length(), ch); }
4333
4334inline UnicodeString&
4335UnicodeString::setTo(const UnicodeString& srcText,
4336             int32_t srcStart,
4337             int32_t srcLength)
4338{
4339  unBogus();
4340  return doReplace(0, length(), srcText, srcStart, srcLength);
4341}
4342
4343inline UnicodeString&
4344UnicodeString::setTo(const UnicodeString& srcText,
4345             int32_t srcStart)
4346{
4347  unBogus();
4348  srcText.pinIndex(srcStart);
4349  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4350}
4351
4352inline UnicodeString&
4353UnicodeString::setTo(const UnicodeString& srcText)
4354{
4355  return copyFrom(srcText);
4356}
4357
4358inline UnicodeString&
4359UnicodeString::setTo(const UChar *srcChars,
4360             int32_t srcLength)
4361{
4362  unBogus();
4363  return doReplace(0, length(), srcChars, 0, srcLength);
4364}
4365
4366inline UnicodeString&
4367UnicodeString::setTo(UChar srcChar)
4368{
4369  unBogus();
4370  return doReplace(0, length(), &srcChar, 0, 1);
4371}
4372
4373inline UnicodeString&
4374UnicodeString::setTo(UChar32 srcChar)
4375{
4376  unBogus();
4377  return replace(0, length(), srcChar);
4378}
4379
4380inline UnicodeString&
4381UnicodeString::append(const UnicodeString& srcText,
4382              int32_t srcStart,
4383              int32_t srcLength)
4384{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
4385
4386inline UnicodeString&
4387UnicodeString::append(const UnicodeString& srcText)
4388{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4389
4390inline UnicodeString&
4391UnicodeString::append(const UChar *srcChars,
4392              int32_t srcStart,
4393              int32_t srcLength)
4394{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4395
4396inline UnicodeString&
4397UnicodeString::append(const UChar *srcChars,
4398              int32_t srcLength)
4399{ return doReplace(length(), 0, srcChars, 0, srcLength); }
4400
4401inline UnicodeString&
4402UnicodeString::append(UChar srcChar)
4403{ return doReplace(length(), 0, &srcChar, 0, 1); }
4404
4405inline UnicodeString&
4406UnicodeString::operator+= (UChar ch)
4407{ return doReplace(length(), 0, &ch, 0, 1); }
4408
4409inline UnicodeString&
4410UnicodeString::operator+= (UChar32 ch) {
4411  return append(ch);
4412}
4413
4414inline UnicodeString&
4415UnicodeString::operator+= (const UnicodeString& srcText)
4416{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4417
4418inline UnicodeString&
4419UnicodeString::insert(int32_t start,
4420              const UnicodeString& srcText,
4421              int32_t srcStart,
4422              int32_t srcLength)
4423{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4424
4425inline UnicodeString&
4426UnicodeString::insert(int32_t start,
4427              const UnicodeString& srcText)
4428{ return doReplace(start, 0, srcText, 0, srcText.length()); }
4429
4430inline UnicodeString&
4431UnicodeString::insert(int32_t start,
4432              const UChar *srcChars,
4433              int32_t srcStart,
4434              int32_t srcLength)
4435{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4436
4437inline UnicodeString&
4438UnicodeString::insert(int32_t start,
4439              const UChar *srcChars,
4440              int32_t srcLength)
4441{ return doReplace(start, 0, srcChars, 0, srcLength); }
4442
4443inline UnicodeString&
4444UnicodeString::insert(int32_t start,
4445              UChar srcChar)
4446{ return doReplace(start, 0, &srcChar, 0, 1); }
4447
4448inline UnicodeString&
4449UnicodeString::insert(int32_t start,
4450              UChar32 srcChar)
4451{ return replace(start, 0, srcChar); }
4452
4453
4454inline UnicodeString&
4455UnicodeString::remove()
4456{
4457  // remove() of a bogus string makes the string empty and non-bogus
4458  // we also un-alias a read-only alias to deal with NUL-termination
4459  // issues with getTerminatedBuffer()
4460  if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4461    setToEmpty();
4462  } else {
4463    fShortLength = 0;
4464  }
4465  return *this;
4466}
4467
4468inline UnicodeString&
4469UnicodeString::remove(int32_t start,
4470             int32_t _length)
4471{
4472    if(start <= 0 && _length == INT32_MAX) {
4473        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4474        return remove();
4475    }
4476    return doReplace(start, _length, NULL, 0, 0);
4477}
4478
4479inline UnicodeString&
4480UnicodeString::removeBetween(int32_t start,
4481                int32_t limit)
4482{ return doReplace(start, limit - start, NULL, 0, 0); }
4483
4484inline UnicodeString &
4485UnicodeString::retainBetween(int32_t start, int32_t limit) {
4486  truncate(limit);
4487  return doReplace(0, start, NULL, 0, 0);
4488}
4489
4490inline UBool
4491UnicodeString::truncate(int32_t targetLength)
4492{
4493  if(isBogus() && targetLength == 0) {
4494    // truncate(0) of a bogus string makes the string empty and non-bogus
4495    unBogus();
4496    return FALSE;
4497  } else if((uint32_t)targetLength < (uint32_t)length()) {
4498    setLength(targetLength);
4499    if(fFlags&kBufferIsReadonly) {
4500      fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
4501    }
4502    return TRUE;
4503  } else {
4504    return FALSE;
4505  }
4506}
4507
4508inline UnicodeString&
4509UnicodeString::reverse()
4510{ return doReverse(0, length()); }
4511
4512inline UnicodeString&
4513UnicodeString::reverse(int32_t start,
4514               int32_t _length)
4515{ return doReverse(start, _length); }
4516
4517U_NAMESPACE_END
4518
4519#endif
4520