1/*
2**********************************************************************
3*   Copyright (C) 1998-2011, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   09/25/98    stephen     Creation.
13*   11/11/98    stephen     Changed per 11/9 code review.
14*   04/20/99    stephen     Overhauled per 4/16 code review.
15*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
16*                           handleReplaceBetween(); other methods unchanged.
17*   06/25/01    grhoten     Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
24/**
25 * \file
26 * \brief C++ API: Unicode String
27 */
28
29#include "unicode/utypes.h"
30#include "unicode/rep.h"
31#include "unicode/std_string.h"
32#include "unicode/stringpiece.h"
33#include "unicode/bytestream.h"
34
35struct UConverter;          // unicode/ucnv.h
36class  StringThreadTest;
37
38#ifndef U_COMPARE_CODE_POINT_ORDER
39/* see also ustring.h and unorm.h */
40/**
41 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
42 * Compare strings in code point order instead of code unit order.
43 * @stable ICU 2.2
44 */
45#define U_COMPARE_CODE_POINT_ORDER  0x8000
46#endif
47
48#ifndef USTRING_H
49/**
50 * \ingroup ustring_ustrlen
51 */
52U_STABLE int32_t U_EXPORT2
53u_strlen(const UChar *s);
54#endif
55
56U_NAMESPACE_BEGIN
57
58class BreakIterator;        // unicode/brkiter.h
59class Locale;               // unicode/locid.h
60class StringCharacterIterator;
61class UnicodeStringAppendable;  // unicode/appendable.h
62
63/* The <iostream> include has been moved to unicode/ustream.h */
64
65/**
66 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
67 * which constructs a Unicode string from an invariant-character char * string.
68 * About invariant characters see utypes.h.
69 * This constructor has no runtime dependency on conversion code and is
70 * therefore recommended over ones taking a charset name string
71 * (where the empty string "" indicates invariant-character conversion).
72 *
73 * @stable ICU 3.2
74 */
75#define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
76
77/**
78 * Unicode String literals in C++.
79 * Dependent on the platform properties, different UnicodeString
80 * constructors should be used to create a UnicodeString object from
81 * a string literal.
82 * The macros are defined for maximum performance.
83 * They work only for strings that contain "invariant characters", i.e.,
84 * only latin letters, digits, and some punctuation.
85 * See utypes.h for details.
86 *
87 * The string parameter must be a C string literal.
88 * The length of the string, not including the terminating
89 * <code>NUL</code>, must be specified as a constant.
90 * The U_STRING_DECL macro should be invoked exactly once for one
91 * such string variable before it is used.
92 * @stable ICU 2.0
93 */
94#if defined(U_DECLARE_UTF16)
95#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
96#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
97#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
98#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
99#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
100#else
101#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
102#endif
103
104/**
105 * Unicode String literals in C++.
106 * Dependent on the platform properties, different UnicodeString
107 * constructors should be used to create a UnicodeString object from
108 * a string literal.
109 * The macros are defined for improved performance.
110 * They work only for strings that contain "invariant characters", i.e.,
111 * only latin letters, digits, and some punctuation.
112 * See utypes.h for details.
113 *
114 * The string parameter must be a C string literal.
115 * @stable ICU 2.0
116 */
117#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
118
119/**
120 * UnicodeString is a string class that stores Unicode characters directly and provides
121 * similar functionality as the Java String and StringBuffer classes.
122 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
123 *
124 * The UnicodeString class is not suitable for subclassing.
125 *
126 * <p>For an overview of Unicode strings in C and C++ see the
127 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
128 *
129 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
130 * A Unicode character may be stored with either one code unit
131 * (the most common case) or with a matched pair of special code units
132 * ("surrogates"). The data type for code units is UChar.
133 * For single-character handling, a Unicode character code <em>point</em> is a value
134 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
135 *
136 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
137 * This is the same as with multi-byte char* strings in traditional string handling.
138 * Operations on partial strings typically do not test for code point boundaries.
139 * If necessary, the user needs to take care of such boundaries by testing for the code unit
140 * values or by using functions like
141 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
142 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
143 *
144 * UnicodeString methods are more lenient with regard to input parameter values
145 * than other ICU APIs. In particular:
146 * - If indexes are out of bounds for a UnicodeString object
147 *   (<0 or >length()) then they are "pinned" to the nearest boundary.
148 * - If primitive string pointer values (e.g., const UChar * or char *)
149 *   for input strings are NULL, then those input string parameters are treated
150 *   as if they pointed to an empty string.
151 *   However, this is <em>not</em> the case for char * parameters for charset names
152 *   or other IDs.
153 * - Most UnicodeString methods do not take a UErrorCode parameter because
154 *   there are usually very few opportunities for failure other than a shortage
155 *   of memory, error codes in low-level C++ string methods would be inconvenient,
156 *   and the error code as the last parameter (ICU convention) would prevent
157 *   the use of default parameter values.
158 *   Instead, such methods set the UnicodeString into a "bogus" state
159 *   (see isBogus()) if an error occurs.
160 *
161 * In string comparisons, two UnicodeString objects that are both "bogus"
162 * compare equal (to be transitive and prevent endless loops in sorting),
163 * and a "bogus" string compares less than any non-"bogus" one.
164 *
165 * Const UnicodeString methods are thread-safe. Multiple threads can use
166 * const methods on the same UnicodeString object simultaneously,
167 * but non-const methods must not be called concurrently (in multiple threads)
168 * with any other (const or non-const) methods.
169 *
170 * Similarly, const UnicodeString & parameters are thread-safe.
171 * One object may be passed in as such a parameter concurrently in multiple threads.
172 * This includes the const UnicodeString & parameters for
173 * copy construction, assignment, and cloning.
174 *
175 * <p>UnicodeString uses several storage methods.
176 * String contents can be stored inside the UnicodeString object itself,
177 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
178 * Most of this is done transparently, but careful aliasing in particular provides
179 * significant performance improvements.
180 * Also, the internal buffer is accessible via special functions.
181 * For details see the
182 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
183 *
184 * @see utf.h
185 * @see CharacterIterator
186 * @stable ICU 2.0
187 */
188class U_COMMON_API UnicodeString : public Replaceable
189{
190public:
191
192  /**
193   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
194   * which constructs a Unicode string from an invariant-character char * string.
195   * Use the macro US_INV instead of the full qualification for this value.
196   *
197   * @see US_INV
198   * @stable ICU 3.2
199   */
200  enum EInvariant {
201    /**
202     * @see EInvariant
203     * @stable ICU 3.2
204     */
205    kInvariant
206  };
207
208  //========================================
209  // Read-only operations
210  //========================================
211
212  /* Comparison - bitwise only - for international comparison use collation */
213
214  /**
215   * Equality operator. Performs only bitwise comparison.
216   * @param text The UnicodeString to compare to this one.
217   * @return TRUE if <TT>text</TT> contains the same characters as this one,
218   * FALSE otherwise.
219   * @stable ICU 2.0
220   */
221  inline UBool operator== (const UnicodeString& text) const;
222
223  /**
224   * Inequality operator. Performs only bitwise comparison.
225   * @param text The UnicodeString to compare to this one.
226   * @return FALSE if <TT>text</TT> contains the same characters as this one,
227   * TRUE otherwise.
228   * @stable ICU 2.0
229   */
230  inline UBool operator!= (const UnicodeString& text) const;
231
232  /**
233   * Greater than operator. Performs only bitwise comparison.
234   * @param text The UnicodeString to compare to this one.
235   * @return TRUE if the characters in this are bitwise
236   * greater than the characters in <code>text</code>, FALSE otherwise
237   * @stable ICU 2.0
238   */
239  inline UBool operator> (const UnicodeString& text) const;
240
241  /**
242   * Less than operator. Performs only bitwise comparison.
243   * @param text The UnicodeString to compare to this one.
244   * @return TRUE if the characters in this are bitwise
245   * less than the characters in <code>text</code>, FALSE otherwise
246   * @stable ICU 2.0
247   */
248  inline UBool operator< (const UnicodeString& text) const;
249
250  /**
251   * Greater than or equal operator. Performs only bitwise comparison.
252   * @param text The UnicodeString to compare to this one.
253   * @return TRUE if the characters in this are bitwise
254   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
255   * @stable ICU 2.0
256   */
257  inline UBool operator>= (const UnicodeString& text) const;
258
259  /**
260   * Less than or equal operator. Performs only bitwise comparison.
261   * @param text The UnicodeString to compare to this one.
262   * @return TRUE if the characters in this are bitwise
263   * less than or equal to the characters in <code>text</code>, FALSE otherwise
264   * @stable ICU 2.0
265   */
266  inline UBool operator<= (const UnicodeString& text) const;
267
268  /**
269   * Compare the characters bitwise in this UnicodeString to
270   * the characters in <code>text</code>.
271   * @param text The UnicodeString to compare to this one.
272   * @return The result of bitwise character comparison: 0 if this
273   * contains the same characters as <code>text</code>, -1 if the characters in
274   * this are bitwise less than the characters in <code>text</code>, +1 if the
275   * characters in this are bitwise greater than the characters
276   * in <code>text</code>.
277   * @stable ICU 2.0
278   */
279  inline int8_t compare(const UnicodeString& text) const;
280
281  /**
282   * Compare the characters bitwise in the range
283   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
284   * in <TT>text</TT>
285   * @param start the offset at which the compare operation begins
286   * @param length the number of characters of text to compare.
287   * @param text the other text to be compared against this string.
288   * @return The result of bitwise character comparison: 0 if this
289   * contains the same characters as <code>text</code>, -1 if the characters in
290   * this are bitwise less than the characters in <code>text</code>, +1 if the
291   * characters in this are bitwise greater than the characters
292   * in <code>text</code>.
293   * @stable ICU 2.0
294   */
295  inline int8_t compare(int32_t start,
296         int32_t length,
297         const UnicodeString& text) const;
298
299  /**
300   * Compare the characters bitwise in the range
301   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
302   * in <TT>srcText</TT> in the range
303   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
304   * @param start the offset at which the compare operation begins
305   * @param length the number of characters in this to compare.
306   * @param srcText the text to be compared
307   * @param srcStart the offset into <TT>srcText</TT> to start comparison
308   * @param srcLength the number of characters in <TT>src</TT> to compare
309   * @return The result of bitwise character comparison: 0 if this
310   * contains the same characters as <code>srcText</code>, -1 if the characters in
311   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
312   * characters in this are bitwise greater than the characters
313   * in <code>srcText</code>.
314   * @stable ICU 2.0
315   */
316   inline int8_t compare(int32_t start,
317         int32_t length,
318         const UnicodeString& srcText,
319         int32_t srcStart,
320         int32_t srcLength) const;
321
322  /**
323   * Compare the characters bitwise in this UnicodeString with the first
324   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
325   * @param srcChars The characters to compare to this UnicodeString.
326   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
327   * @return The result of bitwise character comparison: 0 if this
328   * contains the same characters as <code>srcChars</code>, -1 if the characters in
329   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
330   * characters in this are bitwise greater than the characters
331   * in <code>srcChars</code>.
332   * @stable ICU 2.0
333   */
334  inline int8_t compare(const UChar *srcChars,
335         int32_t srcLength) const;
336
337  /**
338   * Compare the characters bitwise in the range
339   * [<TT>start</TT>, <TT>start + length</TT>) with the first
340   * <TT>length</TT> characters in <TT>srcChars</TT>
341   * @param start the offset at which the compare operation begins
342   * @param length the number of characters to compare.
343   * @param srcChars the characters to be compared
344   * @return The result of bitwise character comparison: 0 if this
345   * contains the same characters as <code>srcChars</code>, -1 if the characters in
346   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
347   * characters in this are bitwise greater than the characters
348   * in <code>srcChars</code>.
349   * @stable ICU 2.0
350   */
351  inline int8_t compare(int32_t start,
352         int32_t length,
353         const UChar *srcChars) const;
354
355  /**
356   * Compare the characters bitwise in the range
357   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
358   * in <TT>srcChars</TT> in the range
359   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
360   * @param start the offset at which the compare operation begins
361   * @param length the number of characters in this to compare
362   * @param srcChars the characters to be compared
363   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
364   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
365   * @return The result of bitwise character comparison: 0 if this
366   * contains the same characters as <code>srcChars</code>, -1 if the characters in
367   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
368   * characters in this are bitwise greater than the characters
369   * in <code>srcChars</code>.
370   * @stable ICU 2.0
371   */
372  inline int8_t compare(int32_t start,
373         int32_t length,
374         const UChar *srcChars,
375         int32_t srcStart,
376         int32_t srcLength) const;
377
378  /**
379   * Compare the characters bitwise in the range
380   * [<TT>start</TT>, <TT>limit</TT>) with the characters
381   * in <TT>srcText</TT> in the range
382   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
383   * @param start the offset at which the compare operation begins
384   * @param limit the offset immediately following the compare operation
385   * @param srcText the text to be compared
386   * @param srcStart the offset into <TT>srcText</TT> to start comparison
387   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
388   * @return The result of bitwise character comparison: 0 if this
389   * contains the same characters as <code>srcText</code>, -1 if the characters in
390   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
391   * characters in this are bitwise greater than the characters
392   * in <code>srcText</code>.
393   * @stable ICU 2.0
394   */
395  inline int8_t compareBetween(int32_t start,
396            int32_t limit,
397            const UnicodeString& srcText,
398            int32_t srcStart,
399            int32_t srcLimit) const;
400
401  /**
402   * Compare two Unicode strings in code point order.
403   * The result may be different from the results of compare(), operator<, etc.
404   * if supplementary characters are present:
405   *
406   * In UTF-16, supplementary characters (with code points U+10000 and above) are
407   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
408   * which means that they compare as less than some other BMP characters like U+feff.
409   * This function compares Unicode strings in code point order.
410   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
411   *
412   * @param text Another string to compare this one to.
413   * @return a negative/zero/positive integer corresponding to whether
414   * this string is less than/equal to/greater than the second one
415   * in code point order
416   * @stable ICU 2.0
417   */
418  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
419
420  /**
421   * Compare two Unicode strings in code point order.
422   * The result may be different from the results of compare(), operator<, etc.
423   * if supplementary characters are present:
424   *
425   * In UTF-16, supplementary characters (with code points U+10000 and above) are
426   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
427   * which means that they compare as less than some other BMP characters like U+feff.
428   * This function compares Unicode strings in code point order.
429   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
430   *
431   * @param start The start offset in this string at which the compare operation begins.
432   * @param length The number of code units from this string to compare.
433   * @param srcText Another string to compare this one to.
434   * @return a negative/zero/positive integer corresponding to whether
435   * this string is less than/equal to/greater than the second one
436   * in code point order
437   * @stable ICU 2.0
438   */
439  inline int8_t compareCodePointOrder(int32_t start,
440                                      int32_t length,
441                                      const UnicodeString& srcText) const;
442
443  /**
444   * Compare two Unicode strings in code point order.
445   * The result may be different from the results of compare(), operator<, etc.
446   * if supplementary characters are present:
447   *
448   * In UTF-16, supplementary characters (with code points U+10000 and above) are
449   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
450   * which means that they compare as less than some other BMP characters like U+feff.
451   * This function compares Unicode strings in code point order.
452   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
453   *
454   * @param start The start offset in this string at which the compare operation begins.
455   * @param length The number of code units from this string to compare.
456   * @param srcText Another string to compare this one to.
457   * @param srcStart The start offset in that string at which the compare operation begins.
458   * @param srcLength The number of code units from that string to compare.
459   * @return a negative/zero/positive integer corresponding to whether
460   * this string is less than/equal to/greater than the second one
461   * in code point order
462   * @stable ICU 2.0
463   */
464   inline int8_t compareCodePointOrder(int32_t start,
465                                       int32_t length,
466                                       const UnicodeString& srcText,
467                                       int32_t srcStart,
468                                       int32_t srcLength) const;
469
470  /**
471   * Compare two Unicode strings in code point order.
472   * The result may be different from the results of compare(), operator<, etc.
473   * if supplementary characters are present:
474   *
475   * In UTF-16, supplementary characters (with code points U+10000 and above) are
476   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
477   * which means that they compare as less than some other BMP characters like U+feff.
478   * This function compares Unicode strings in code point order.
479   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
480   *
481   * @param srcChars A pointer to another string to compare this one to.
482   * @param srcLength The number of code units from that string to compare.
483   * @return a negative/zero/positive integer corresponding to whether
484   * this string is less than/equal to/greater than the second one
485   * in code point order
486   * @stable ICU 2.0
487   */
488  inline int8_t compareCodePointOrder(const UChar *srcChars,
489                                      int32_t srcLength) const;
490
491  /**
492   * Compare two Unicode strings in code point order.
493   * The result may be different from the results of compare(), operator<, etc.
494   * if supplementary characters are present:
495   *
496   * In UTF-16, supplementary characters (with code points U+10000 and above) are
497   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
498   * which means that they compare as less than some other BMP characters like U+feff.
499   * This function compares Unicode strings in code point order.
500   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
501   *
502   * @param start The start offset in this string at which the compare operation begins.
503   * @param length The number of code units from this string to compare.
504   * @param srcChars A pointer to another string to compare this one to.
505   * @return a negative/zero/positive integer corresponding to whether
506   * this string is less than/equal to/greater than the second one
507   * in code point order
508   * @stable ICU 2.0
509   */
510  inline int8_t compareCodePointOrder(int32_t start,
511                                      int32_t length,
512                                      const UChar *srcChars) const;
513
514  /**
515   * Compare two Unicode strings in code point order.
516   * The result may be different from the results of compare(), operator<, etc.
517   * if supplementary characters are present:
518   *
519   * In UTF-16, supplementary characters (with code points U+10000 and above) are
520   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
521   * which means that they compare as less than some other BMP characters like U+feff.
522   * This function compares Unicode strings in code point order.
523   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
524   *
525   * @param start The start offset in this string at which the compare operation begins.
526   * @param length The number of code units from this string to compare.
527   * @param srcChars A pointer to another string to compare this one to.
528   * @param srcStart The start offset in that string at which the compare operation begins.
529   * @param srcLength The number of code units from that string to compare.
530   * @return a negative/zero/positive integer corresponding to whether
531   * this string is less than/equal to/greater than the second one
532   * in code point order
533   * @stable ICU 2.0
534   */
535  inline int8_t compareCodePointOrder(int32_t start,
536                                      int32_t length,
537                                      const UChar *srcChars,
538                                      int32_t srcStart,
539                                      int32_t srcLength) const;
540
541  /**
542   * Compare two Unicode strings in code point order.
543   * The result may be different from the results of compare(), operator<, etc.
544   * if supplementary characters are present:
545   *
546   * In UTF-16, supplementary characters (with code points U+10000 and above) are
547   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
548   * which means that they compare as less than some other BMP characters like U+feff.
549   * This function compares Unicode strings in code point order.
550   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
551   *
552   * @param start The start offset in this string at which the compare operation begins.
553   * @param limit The offset after the last code unit from this string to compare.
554   * @param srcText Another string to compare this one to.
555   * @param srcStart The start offset in that string at which the compare operation begins.
556   * @param srcLimit The offset after the last code unit from that string to compare.
557   * @return a negative/zero/positive integer corresponding to whether
558   * this string is less than/equal to/greater than the second one
559   * in code point order
560   * @stable ICU 2.0
561   */
562  inline int8_t compareCodePointOrderBetween(int32_t start,
563                                             int32_t limit,
564                                             const UnicodeString& srcText,
565                                             int32_t srcStart,
566                                             int32_t srcLimit) const;
567
568  /**
569   * Compare two strings case-insensitively using full case folding.
570   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
571   *
572   * @param text Another string to compare this one to.
573   * @param options A bit set of options:
574   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
575   *     Comparison in code unit order with default case folding.
576   *
577   *   - U_COMPARE_CODE_POINT_ORDER
578   *     Set to choose code point order instead of code unit order
579   *     (see u_strCompare for details).
580   *
581   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
582   *
583   * @return A negative, zero, or positive integer indicating the comparison result.
584   * @stable ICU 2.0
585   */
586  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
587
588  /**
589   * Compare two strings case-insensitively using full case folding.
590   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
591   *
592   * @param start The start offset in this string at which the compare operation begins.
593   * @param length The number of code units from this string to compare.
594   * @param srcText Another string to compare this one to.
595   * @param options A bit set of options:
596   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
597   *     Comparison in code unit order with default case folding.
598   *
599   *   - U_COMPARE_CODE_POINT_ORDER
600   *     Set to choose code point order instead of code unit order
601   *     (see u_strCompare for details).
602   *
603   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
604   *
605   * @return A negative, zero, or positive integer indicating the comparison result.
606   * @stable ICU 2.0
607   */
608  inline int8_t caseCompare(int32_t start,
609         int32_t length,
610         const UnicodeString& srcText,
611         uint32_t options) const;
612
613  /**
614   * Compare two strings case-insensitively using full case folding.
615   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
616   *
617   * @param start The start offset in this string at which the compare operation begins.
618   * @param length The number of code units from this string to compare.
619   * @param srcText Another string to compare this one to.
620   * @param srcStart The start offset in that string at which the compare operation begins.
621   * @param srcLength The number of code units from that string to compare.
622   * @param options A bit set of options:
623   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
624   *     Comparison in code unit order with default case folding.
625   *
626   *   - U_COMPARE_CODE_POINT_ORDER
627   *     Set to choose code point order instead of code unit order
628   *     (see u_strCompare for details).
629   *
630   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
631   *
632   * @return A negative, zero, or positive integer indicating the comparison result.
633   * @stable ICU 2.0
634   */
635  inline int8_t caseCompare(int32_t start,
636         int32_t length,
637         const UnicodeString& srcText,
638         int32_t srcStart,
639         int32_t srcLength,
640         uint32_t options) const;
641
642  /**
643   * Compare two strings case-insensitively using full case folding.
644   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
645   *
646   * @param srcChars A pointer to another string to compare this one to.
647   * @param srcLength The number of code units from that string to compare.
648   * @param options A bit set of options:
649   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
650   *     Comparison in code unit order with default case folding.
651   *
652   *   - U_COMPARE_CODE_POINT_ORDER
653   *     Set to choose code point order instead of code unit order
654   *     (see u_strCompare for details).
655   *
656   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
657   *
658   * @return A negative, zero, or positive integer indicating the comparison result.
659   * @stable ICU 2.0
660   */
661  inline int8_t caseCompare(const UChar *srcChars,
662         int32_t srcLength,
663         uint32_t options) const;
664
665  /**
666   * Compare two strings case-insensitively using full case folding.
667   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
668   *
669   * @param start The start offset in this string at which the compare operation begins.
670   * @param length The number of code units from this string to compare.
671   * @param srcChars A pointer to another string to compare this one to.
672   * @param options A bit set of options:
673   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
674   *     Comparison in code unit order with default case folding.
675   *
676   *   - U_COMPARE_CODE_POINT_ORDER
677   *     Set to choose code point order instead of code unit order
678   *     (see u_strCompare for details).
679   *
680   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
681   *
682   * @return A negative, zero, or positive integer indicating the comparison result.
683   * @stable ICU 2.0
684   */
685  inline int8_t caseCompare(int32_t start,
686         int32_t length,
687         const UChar *srcChars,
688         uint32_t options) const;
689
690  /**
691   * Compare two strings case-insensitively using full case folding.
692   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
693   *
694   * @param start The start offset in this string at which the compare operation begins.
695   * @param length The number of code units from this string to compare.
696   * @param srcChars A pointer to another string to compare this one to.
697   * @param srcStart The start offset in that string at which the compare operation begins.
698   * @param srcLength The number of code units from that string to compare.
699   * @param options A bit set of options:
700   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
701   *     Comparison in code unit order with default case folding.
702   *
703   *   - U_COMPARE_CODE_POINT_ORDER
704   *     Set to choose code point order instead of code unit order
705   *     (see u_strCompare for details).
706   *
707   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
708   *
709   * @return A negative, zero, or positive integer indicating the comparison result.
710   * @stable ICU 2.0
711   */
712  inline int8_t caseCompare(int32_t start,
713         int32_t length,
714         const UChar *srcChars,
715         int32_t srcStart,
716         int32_t srcLength,
717         uint32_t options) const;
718
719  /**
720   * Compare two strings case-insensitively using full case folding.
721   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
722   *
723   * @param start The start offset in this string at which the compare operation begins.
724   * @param limit The offset after the last code unit from this string to compare.
725   * @param srcText Another string to compare this one to.
726   * @param srcStart The start offset in that string at which the compare operation begins.
727   * @param srcLimit The offset after the last code unit from that string to compare.
728   * @param options A bit set of options:
729   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
730   *     Comparison in code unit order with default case folding.
731   *
732   *   - U_COMPARE_CODE_POINT_ORDER
733   *     Set to choose code point order instead of code unit order
734   *     (see u_strCompare for details).
735   *
736   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
737   *
738   * @return A negative, zero, or positive integer indicating the comparison result.
739   * @stable ICU 2.0
740   */
741  inline int8_t caseCompareBetween(int32_t start,
742            int32_t limit,
743            const UnicodeString& srcText,
744            int32_t srcStart,
745            int32_t srcLimit,
746            uint32_t options) const;
747
748  /**
749   * Determine if this starts with the characters in <TT>text</TT>
750   * @param text The text to match.
751   * @return TRUE if this starts with the characters in <TT>text</TT>,
752   * FALSE otherwise
753   * @stable ICU 2.0
754   */
755  inline UBool startsWith(const UnicodeString& text) const;
756
757  /**
758   * Determine if this starts with the characters in <TT>srcText</TT>
759   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
760   * @param srcText The text to match.
761   * @param srcStart the offset into <TT>srcText</TT> to start matching
762   * @param srcLength the number of characters in <TT>srcText</TT> to match
763   * @return TRUE if this starts with the characters in <TT>text</TT>,
764   * FALSE otherwise
765   * @stable ICU 2.0
766   */
767  inline UBool startsWith(const UnicodeString& srcText,
768            int32_t srcStart,
769            int32_t srcLength) const;
770
771  /**
772   * Determine if this starts with the characters in <TT>srcChars</TT>
773   * @param srcChars The characters to match.
774   * @param srcLength the number of characters in <TT>srcChars</TT>
775   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
776   * FALSE otherwise
777   * @stable ICU 2.0
778   */
779  inline UBool startsWith(const UChar *srcChars,
780            int32_t srcLength) const;
781
782  /**
783   * Determine if this ends with the characters in <TT>srcChars</TT>
784   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
785   * @param srcChars The characters to match.
786   * @param srcStart the offset into <TT>srcText</TT> to start matching
787   * @param srcLength the number of characters in <TT>srcChars</TT> to match
788   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
789   * @stable ICU 2.0
790   */
791  inline UBool startsWith(const UChar *srcChars,
792            int32_t srcStart,
793            int32_t srcLength) const;
794
795  /**
796   * Determine if this ends with the characters in <TT>text</TT>
797   * @param text The text to match.
798   * @return TRUE if this ends with the characters in <TT>text</TT>,
799   * FALSE otherwise
800   * @stable ICU 2.0
801   */
802  inline UBool endsWith(const UnicodeString& text) const;
803
804  /**
805   * Determine if this ends with the characters in <TT>srcText</TT>
806   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
807   * @param srcText The text to match.
808   * @param srcStart the offset into <TT>srcText</TT> to start matching
809   * @param srcLength the number of characters in <TT>srcText</TT> to match
810   * @return TRUE if this ends with the characters in <TT>text</TT>,
811   * FALSE otherwise
812   * @stable ICU 2.0
813   */
814  inline UBool endsWith(const UnicodeString& srcText,
815          int32_t srcStart,
816          int32_t srcLength) const;
817
818  /**
819   * Determine if this ends with the characters in <TT>srcChars</TT>
820   * @param srcChars The characters to match.
821   * @param srcLength the number of characters in <TT>srcChars</TT>
822   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
823   * FALSE otherwise
824   * @stable ICU 2.0
825   */
826  inline UBool endsWith(const UChar *srcChars,
827          int32_t srcLength) const;
828
829  /**
830   * Determine if this ends with the characters in <TT>srcChars</TT>
831   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
832   * @param srcChars The characters to match.
833   * @param srcStart the offset into <TT>srcText</TT> to start matching
834   * @param srcLength the number of characters in <TT>srcChars</TT> to match
835   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
836   * FALSE otherwise
837   * @stable ICU 2.0
838   */
839  inline UBool endsWith(const UChar *srcChars,
840          int32_t srcStart,
841          int32_t srcLength) const;
842
843
844  /* Searching - bitwise only */
845
846  /**
847   * Locate in this the first occurrence of the characters in <TT>text</TT>,
848   * using bitwise comparison.
849   * @param text The text to search for.
850   * @return The offset into this of the start of <TT>text</TT>,
851   * or -1 if not found.
852   * @stable ICU 2.0
853   */
854  inline int32_t indexOf(const UnicodeString& text) const;
855
856  /**
857   * Locate in this the first occurrence of the characters in <TT>text</TT>
858   * starting at offset <TT>start</TT>, using bitwise comparison.
859   * @param text The text to search for.
860   * @param start The offset at which searching will start.
861   * @return The offset into this of the start of <TT>text</TT>,
862   * or -1 if not found.
863   * @stable ICU 2.0
864   */
865  inline int32_t indexOf(const UnicodeString& text,
866              int32_t start) const;
867
868  /**
869   * Locate in this the first occurrence in the range
870   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
871   * in <TT>text</TT>, using bitwise comparison.
872   * @param text The text to search for.
873   * @param start The offset at which searching will start.
874   * @param length The number of characters to search
875   * @return The offset into this of the start of <TT>text</TT>,
876   * or -1 if not found.
877   * @stable ICU 2.0
878   */
879  inline int32_t indexOf(const UnicodeString& text,
880              int32_t start,
881              int32_t length) const;
882
883  /**
884   * Locate in this the first occurrence in the range
885   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
886   *  in <TT>srcText</TT> in the range
887   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
888   * using bitwise comparison.
889   * @param srcText The text to search for.
890   * @param srcStart the offset into <TT>srcText</TT> at which
891   * to start matching
892   * @param srcLength the number of characters in <TT>srcText</TT> to match
893   * @param start the offset into this at which to start matching
894   * @param length the number of characters in this to search
895   * @return The offset into this of the start of <TT>text</TT>,
896   * or -1 if not found.
897   * @stable ICU 2.0
898   */
899  inline int32_t indexOf(const UnicodeString& srcText,
900              int32_t srcStart,
901              int32_t srcLength,
902              int32_t start,
903              int32_t length) const;
904
905  /**
906   * Locate in this the first occurrence of the characters in
907   * <TT>srcChars</TT>
908   * starting at offset <TT>start</TT>, using bitwise comparison.
909   * @param srcChars The text to search for.
910   * @param srcLength the number of characters in <TT>srcChars</TT> to match
911   * @param start the offset into this at which to start matching
912   * @return The offset into this of the start of <TT>text</TT>,
913   * or -1 if not found.
914   * @stable ICU 2.0
915   */
916  inline int32_t indexOf(const UChar *srcChars,
917              int32_t srcLength,
918              int32_t start) const;
919
920  /**
921   * Locate in this the first occurrence in the range
922   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
923   * in <TT>srcChars</TT>, using bitwise comparison.
924   * @param srcChars The text to search for.
925   * @param srcLength the number of characters in <TT>srcChars</TT>
926   * @param start The offset at which searching will start.
927   * @param length The number of characters to search
928   * @return The offset into this of the start of <TT>srcChars</TT>,
929   * or -1 if not found.
930   * @stable ICU 2.0
931   */
932  inline int32_t indexOf(const UChar *srcChars,
933              int32_t srcLength,
934              int32_t start,
935              int32_t length) const;
936
937  /**
938   * Locate in this the first occurrence in the range
939   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
940   * in <TT>srcChars</TT> in the range
941   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
942   * using bitwise comparison.
943   * @param srcChars The text to search for.
944   * @param srcStart the offset into <TT>srcChars</TT> at which
945   * to start matching
946   * @param srcLength the number of characters in <TT>srcChars</TT> to match
947   * @param start the offset into this at which to start matching
948   * @param length the number of characters in this to search
949   * @return The offset into this of the start of <TT>text</TT>,
950   * or -1 if not found.
951   * @stable ICU 2.0
952   */
953  int32_t indexOf(const UChar *srcChars,
954              int32_t srcStart,
955              int32_t srcLength,
956              int32_t start,
957              int32_t length) const;
958
959  /**
960   * Locate in this the first occurrence of the BMP code point <code>c</code>,
961   * using bitwise comparison.
962   * @param c The code unit to search for.
963   * @return The offset into this of <TT>c</TT>, or -1 if not found.
964   * @stable ICU 2.0
965   */
966  inline int32_t indexOf(UChar c) const;
967
968  /**
969   * Locate in this the first occurrence of the code point <TT>c</TT>,
970   * using bitwise comparison.
971   *
972   * @param c The code point to search for.
973   * @return The offset into this of <TT>c</TT>, or -1 if not found.
974   * @stable ICU 2.0
975   */
976  inline int32_t indexOf(UChar32 c) const;
977
978  /**
979   * Locate in this the first occurrence of the BMP code point <code>c</code>,
980   * starting at offset <TT>start</TT>, using bitwise comparison.
981   * @param c The code unit to search for.
982   * @param start The offset at which searching will start.
983   * @return The offset into this of <TT>c</TT>, or -1 if not found.
984   * @stable ICU 2.0
985   */
986  inline int32_t indexOf(UChar c,
987              int32_t start) const;
988
989  /**
990   * Locate in this the first occurrence of the code point <TT>c</TT>
991   * starting at offset <TT>start</TT>, using bitwise comparison.
992   *
993   * @param c The code point to search for.
994   * @param start The offset at which searching will start.
995   * @return The offset into this of <TT>c</TT>, or -1 if not found.
996   * @stable ICU 2.0
997   */
998  inline int32_t indexOf(UChar32 c,
999              int32_t start) const;
1000
1001  /**
1002   * Locate in this the first occurrence of the BMP code point <code>c</code>
1003   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1004   * using bitwise comparison.
1005   * @param c The code unit to search for.
1006   * @param start the offset into this at which to start matching
1007   * @param length the number of characters in this to search
1008   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1009   * @stable ICU 2.0
1010   */
1011  inline int32_t indexOf(UChar c,
1012              int32_t start,
1013              int32_t length) const;
1014
1015  /**
1016   * Locate in this the first occurrence of the code point <TT>c</TT>
1017   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1018   * using bitwise comparison.
1019   *
1020   * @param c The code point to search for.
1021   * @param start the offset into this at which to start matching
1022   * @param length the number of characters in this to search
1023   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1024   * @stable ICU 2.0
1025   */
1026  inline int32_t indexOf(UChar32 c,
1027              int32_t start,
1028              int32_t length) const;
1029
1030  /**
1031   * Locate in this the last occurrence of the characters in <TT>text</TT>,
1032   * using bitwise comparison.
1033   * @param text The text to search for.
1034   * @return The offset into this of the start of <TT>text</TT>,
1035   * or -1 if not found.
1036   * @stable ICU 2.0
1037   */
1038  inline int32_t lastIndexOf(const UnicodeString& text) const;
1039
1040  /**
1041   * Locate in this the last occurrence of the characters in <TT>text</TT>
1042   * starting at offset <TT>start</TT>, using bitwise comparison.
1043   * @param text The text to search for.
1044   * @param start The offset at which searching will start.
1045   * @return The offset into this of the start of <TT>text</TT>,
1046   * or -1 if not found.
1047   * @stable ICU 2.0
1048   */
1049  inline int32_t lastIndexOf(const UnicodeString& text,
1050              int32_t start) const;
1051
1052  /**
1053   * Locate in this the last occurrence in the range
1054   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1055   * in <TT>text</TT>, using bitwise comparison.
1056   * @param text The text to search for.
1057   * @param start The offset at which searching will start.
1058   * @param length The number of characters to search
1059   * @return The offset into this of the start of <TT>text</TT>,
1060   * or -1 if not found.
1061   * @stable ICU 2.0
1062   */
1063  inline int32_t lastIndexOf(const UnicodeString& text,
1064              int32_t start,
1065              int32_t length) const;
1066
1067  /**
1068   * Locate in this the last occurrence in the range
1069   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1070   * in <TT>srcText</TT> in the range
1071   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1072   * using bitwise comparison.
1073   * @param srcText The text to search for.
1074   * @param srcStart the offset into <TT>srcText</TT> at which
1075   * to start matching
1076   * @param srcLength the number of characters in <TT>srcText</TT> to match
1077   * @param start the offset into this at which to start matching
1078   * @param length the number of characters in this to search
1079   * @return The offset into this of the start of <TT>text</TT>,
1080   * or -1 if not found.
1081   * @stable ICU 2.0
1082   */
1083  inline int32_t lastIndexOf(const UnicodeString& srcText,
1084              int32_t srcStart,
1085              int32_t srcLength,
1086              int32_t start,
1087              int32_t length) const;
1088
1089  /**
1090   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1091   * starting at offset <TT>start</TT>, using bitwise comparison.
1092   * @param srcChars The text to search for.
1093   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1094   * @param start the offset into this at which to start matching
1095   * @return The offset into this of the start of <TT>text</TT>,
1096   * or -1 if not found.
1097   * @stable ICU 2.0
1098   */
1099  inline int32_t lastIndexOf(const UChar *srcChars,
1100              int32_t srcLength,
1101              int32_t start) const;
1102
1103  /**
1104   * Locate in this the last occurrence in the range
1105   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1106   * in <TT>srcChars</TT>, using bitwise comparison.
1107   * @param srcChars The text to search for.
1108   * @param srcLength the number of characters in <TT>srcChars</TT>
1109   * @param start The offset at which searching will start.
1110   * @param length The number of characters to search
1111   * @return The offset into this of the start of <TT>srcChars</TT>,
1112   * or -1 if not found.
1113   * @stable ICU 2.0
1114   */
1115  inline int32_t lastIndexOf(const UChar *srcChars,
1116              int32_t srcLength,
1117              int32_t start,
1118              int32_t length) const;
1119
1120  /**
1121   * Locate in this the last occurrence in the range
1122   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1123   * in <TT>srcChars</TT> in the range
1124   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1125   * using bitwise comparison.
1126   * @param srcChars The text to search for.
1127   * @param srcStart the offset into <TT>srcChars</TT> at which
1128   * to start matching
1129   * @param srcLength the number of characters in <TT>srcChars</TT> to match
1130   * @param start the offset into this at which to start matching
1131   * @param length the number of characters in this to search
1132   * @return The offset into this of the start of <TT>text</TT>,
1133   * or -1 if not found.
1134   * @stable ICU 2.0
1135   */
1136  int32_t lastIndexOf(const UChar *srcChars,
1137              int32_t srcStart,
1138              int32_t srcLength,
1139              int32_t start,
1140              int32_t length) const;
1141
1142  /**
1143   * Locate in this the last occurrence of the BMP code point <code>c</code>,
1144   * using bitwise comparison.
1145   * @param c The code unit to search for.
1146   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1147   * @stable ICU 2.0
1148   */
1149  inline int32_t lastIndexOf(UChar c) const;
1150
1151  /**
1152   * Locate in this the last occurrence of the code point <TT>c</TT>,
1153   * using bitwise comparison.
1154   *
1155   * @param c The code point to search for.
1156   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1157   * @stable ICU 2.0
1158   */
1159  inline int32_t lastIndexOf(UChar32 c) const;
1160
1161  /**
1162   * Locate in this the last occurrence of the BMP code point <code>c</code>
1163   * starting at offset <TT>start</TT>, using bitwise comparison.
1164   * @param c The code unit to search for.
1165   * @param start The offset at which searching will start.
1166   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1167   * @stable ICU 2.0
1168   */
1169  inline int32_t lastIndexOf(UChar c,
1170              int32_t start) const;
1171
1172  /**
1173   * Locate in this the last occurrence of the code point <TT>c</TT>
1174   * starting at offset <TT>start</TT>, using bitwise comparison.
1175   *
1176   * @param c The code point to search for.
1177   * @param start The offset at which searching will start.
1178   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1179   * @stable ICU 2.0
1180   */
1181  inline int32_t lastIndexOf(UChar32 c,
1182              int32_t start) const;
1183
1184  /**
1185   * Locate in this the last occurrence of the BMP code point <code>c</code>
1186   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1187   * using bitwise comparison.
1188   * @param c The code unit to search for.
1189   * @param start the offset into this at which to start matching
1190   * @param length the number of characters in this to search
1191   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1192   * @stable ICU 2.0
1193   */
1194  inline int32_t lastIndexOf(UChar c,
1195              int32_t start,
1196              int32_t length) const;
1197
1198  /**
1199   * Locate in this the last occurrence of the code point <TT>c</TT>
1200   * in the range [<TT>start</TT>, <TT>start + length</TT>),
1201   * using bitwise comparison.
1202   *
1203   * @param c The code point to search for.
1204   * @param start the offset into this at which to start matching
1205   * @param length the number of characters in this to search
1206   * @return The offset into this of <TT>c</TT>, or -1 if not found.
1207   * @stable ICU 2.0
1208   */
1209  inline int32_t lastIndexOf(UChar32 c,
1210              int32_t start,
1211              int32_t length) const;
1212
1213
1214  /* Character access */
1215
1216  /**
1217   * Return the code unit at offset <tt>offset</tt>.
1218   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1219   * @param offset a valid offset into the text
1220   * @return the code unit at offset <tt>offset</tt>
1221   *         or 0xffff if the offset is not valid for this string
1222   * @stable ICU 2.0
1223   */
1224  inline UChar charAt(int32_t offset) const;
1225
1226  /**
1227   * Return the code unit at offset <tt>offset</tt>.
1228   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1229   * @param offset a valid offset into the text
1230   * @return the code unit at offset <tt>offset</tt>
1231   * @stable ICU 2.0
1232   */
1233  inline UChar operator[] (int32_t offset) const;
1234
1235  /**
1236   * Return the code point that contains the code unit
1237   * at offset <tt>offset</tt>.
1238   * If the offset is not valid (0..length()-1) then U+ffff is returned.
1239   * @param offset a valid offset into the text
1240   * that indicates the text offset of any of the code units
1241   * that will be assembled into a code point (21-bit value) and returned
1242   * @return the code point of text at <tt>offset</tt>
1243   *         or 0xffff if the offset is not valid for this string
1244   * @stable ICU 2.0
1245   */
1246  inline UChar32 char32At(int32_t offset) const;
1247
1248  /**
1249   * Adjust a random-access offset so that
1250   * it points to the beginning of a Unicode character.
1251   * The offset that is passed in points to
1252   * any code unit of a code point,
1253   * while the returned offset will point to the first code unit
1254   * of the same code point.
1255   * In UTF-16, if the input offset points to a second surrogate
1256   * of a surrogate pair, then the returned offset will point
1257   * to the first surrogate.
1258   * @param offset a valid offset into one code point of the text
1259   * @return offset of the first code unit of the same code point
1260   * @see U16_SET_CP_START
1261   * @stable ICU 2.0
1262   */
1263  inline int32_t getChar32Start(int32_t offset) const;
1264
1265  /**
1266   * Adjust a random-access offset so that
1267   * it points behind a Unicode character.
1268   * The offset that is passed in points behind
1269   * any code unit of a code point,
1270   * while the returned offset will point behind the last code unit
1271   * of the same code point.
1272   * In UTF-16, if the input offset points behind the first surrogate
1273   * (i.e., to the second surrogate)
1274   * of a surrogate pair, then the returned offset will point
1275   * behind the second surrogate (i.e., to the first surrogate).
1276   * @param offset a valid offset after any code unit of a code point of the text
1277   * @return offset of the first code unit after the same code point
1278   * @see U16_SET_CP_LIMIT
1279   * @stable ICU 2.0
1280   */
1281  inline int32_t getChar32Limit(int32_t offset) const;
1282
1283  /**
1284   * Move the code unit index along the string by delta code points.
1285   * Interpret the input index as a code unit-based offset into the string,
1286   * move the index forward or backward by delta code points, and
1287   * return the resulting index.
1288   * The input index should point to the first code unit of a code point,
1289   * if there is more than one.
1290   *
1291   * Both input and output indexes are code unit-based as for all
1292   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1293   * If delta<0 then the index is moved backward (toward the start of the string).
1294   * If delta>0 then the index is moved forward (toward the end of the string).
1295   *
1296   * This behaves like CharacterIterator::move32(delta, kCurrent).
1297   *
1298   * Behavior for out-of-bounds indexes:
1299   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1300   * if the input index<0 then it is pinned to 0;
1301   * if it is index>length() then it is pinned to length().
1302   * Afterwards, the index is moved by <code>delta</code> code points
1303   * forward or backward,
1304   * but no further backward than to 0 and no further forward than to length().
1305   * The resulting index return value will be in between 0 and length(), inclusively.
1306   *
1307   * Examples:
1308   * <pre>
1309   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1310   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1311   *
1312   * // initial index: position of U+10000
1313   * int32_t index=1;
1314   *
1315   * // the following examples will all result in index==4, position of U+10ffff
1316   *
1317   * // skip 2 code points from some position in the string
1318   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1319   *
1320   * // go to the 3rd code point from the start of s (0-based)
1321   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1322   *
1323   * // go to the next-to-last code point of s
1324   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1325   * </pre>
1326   *
1327   * @param index input code unit index
1328   * @param delta (signed) code point count to move the index forward or backward
1329   *        in the string
1330   * @return the resulting code unit index
1331   * @stable ICU 2.0
1332   */
1333  int32_t moveIndex32(int32_t index, int32_t delta) const;
1334
1335  /* Substring extraction */
1336
1337  /**
1338   * Copy the characters in the range
1339   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1340   * beginning at <tt>dstStart</tt>.
1341   * If the string aliases to <code>dst</code> itself as an external buffer,
1342   * then extract() will not copy the contents.
1343   *
1344   * @param start offset of first character which will be copied into the array
1345   * @param length the number of characters to extract
1346   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1347   * must be at least (<tt>dstStart + length</tt>).
1348   * @param dstStart the offset in <TT>dst</TT> where the first character
1349   * will be extracted
1350   * @stable ICU 2.0
1351   */
1352  inline void extract(int32_t start,
1353           int32_t length,
1354           UChar *dst,
1355           int32_t dstStart = 0) const;
1356
1357  /**
1358   * Copy the contents of the string into dest.
1359   * This is a convenience function that
1360   * checks if there is enough space in dest,
1361   * extracts the entire string if possible,
1362   * and NUL-terminates dest if possible.
1363   *
1364   * If the string fits into dest but cannot be NUL-terminated
1365   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1366   * If the string itself does not fit into dest
1367   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1368   *
1369   * If the string aliases to <code>dest</code> itself as an external buffer,
1370   * then extract() will not copy the contents.
1371   *
1372   * @param dest Destination string buffer.
1373   * @param destCapacity Number of UChars available at dest.
1374   * @param errorCode ICU error code.
1375   * @return length()
1376   * @stable ICU 2.0
1377   */
1378  int32_t
1379  extract(UChar *dest, int32_t destCapacity,
1380          UErrorCode &errorCode) const;
1381
1382  /**
1383   * Copy the characters in the range
1384   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1385   * <tt>target</tt>.
1386   * @param start offset of first character which will be copied
1387   * @param length the number of characters to extract
1388   * @param target UnicodeString into which to copy characters.
1389   * @return A reference to <TT>target</TT>
1390   * @stable ICU 2.0
1391   */
1392  inline void extract(int32_t start,
1393           int32_t length,
1394           UnicodeString& target) const;
1395
1396  /**
1397   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1398   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1399   * @param start offset of first character which will be copied into the array
1400   * @param limit offset immediately following the last character to be copied
1401   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1402   * must be at least (<tt>dstStart + (limit - start)</tt>).
1403   * @param dstStart the offset in <TT>dst</TT> where the first character
1404   * will be extracted
1405   * @stable ICU 2.0
1406   */
1407  inline void extractBetween(int32_t start,
1408              int32_t limit,
1409              UChar *dst,
1410              int32_t dstStart = 0) const;
1411
1412  /**
1413   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1414   * into the UnicodeString <tt>target</tt>.  Replaceable API.
1415   * @param start offset of first character which will be copied
1416   * @param limit offset immediately following the last character to be copied
1417   * @param target UnicodeString into which to copy characters.
1418   * @return A reference to <TT>target</TT>
1419   * @stable ICU 2.0
1420   */
1421  virtual void extractBetween(int32_t start,
1422              int32_t limit,
1423              UnicodeString& target) const;
1424
1425  /**
1426   * Copy the characters in the range
1427   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1428   * All characters must be invariant (see utypes.h).
1429   * Use US_INV as the last, signature-distinguishing parameter.
1430   *
1431   * This function does not write any more than <code>targetLength</code>
1432   * characters but returns the length of the entire output string
1433   * so that one can allocate a larger buffer and call the function again
1434   * if necessary.
1435   * The output string is NUL-terminated if possible.
1436   *
1437   * @param start offset of first character which will be copied
1438   * @param startLength the number of characters to extract
1439   * @param target the target buffer for extraction, can be NULL
1440   *               if targetLength is 0
1441   * @param targetCapacity the length of the target buffer
1442   * @param inv Signature-distinguishing paramater, use US_INV.
1443   * @return the output string length, not including the terminating NUL
1444   * @stable ICU 3.2
1445   */
1446  int32_t extract(int32_t start,
1447           int32_t startLength,
1448           char *target,
1449           int32_t targetCapacity,
1450           enum EInvariant inv) const;
1451
1452#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1453
1454  /**
1455   * Copy the characters in the range
1456   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1457   * in the platform's default codepage.
1458   * This function does not write any more than <code>targetLength</code>
1459   * characters but returns the length of the entire output string
1460   * so that one can allocate a larger buffer and call the function again
1461   * if necessary.
1462   * The output string is NUL-terminated if possible.
1463   *
1464   * @param start offset of first character which will be copied
1465   * @param startLength the number of characters to extract
1466   * @param target the target buffer for extraction
1467   * @param targetLength the length of the target buffer
1468   * If <TT>target</TT> is NULL, then the number of bytes required for
1469   * <TT>target</TT> is returned.
1470   * @return the output string length, not including the terminating NUL
1471   * @stable ICU 2.0
1472   */
1473  int32_t extract(int32_t start,
1474           int32_t startLength,
1475           char *target,
1476           uint32_t targetLength) const;
1477
1478#endif
1479
1480#if !UCONFIG_NO_CONVERSION
1481
1482  /**
1483   * Copy the characters in the range
1484   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1485   * in a specified codepage.
1486   * The output string is NUL-terminated.
1487   *
1488   * Recommendation: For invariant-character strings use
1489   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1490   * because it avoids object code dependencies of UnicodeString on
1491   * the conversion code.
1492   *
1493   * @param start offset of first character which will be copied
1494   * @param startLength the number of characters to extract
1495   * @param target the target buffer for extraction
1496   * @param codepage the desired codepage for the characters.  0 has
1497   * the special meaning of the default codepage
1498   * If <code>codepage</code> is an empty string (<code>""</code>),
1499   * then a simple conversion is performed on the codepage-invariant
1500   * subset ("invariant characters") of the platform encoding. See utypes.h.
1501   * If <TT>target</TT> is NULL, then the number of bytes required for
1502   * <TT>target</TT> is returned. It is assumed that the target is big enough
1503   * to fit all of the characters.
1504   * @return the output string length, not including the terminating NUL
1505   * @stable ICU 2.0
1506   */
1507  inline int32_t extract(int32_t start,
1508                 int32_t startLength,
1509                 char *target,
1510                 const char *codepage = 0) const;
1511
1512  /**
1513   * Copy the characters in the range
1514   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1515   * in a specified codepage.
1516   * This function does not write any more than <code>targetLength</code>
1517   * characters but returns the length of the entire output string
1518   * so that one can allocate a larger buffer and call the function again
1519   * if necessary.
1520   * The output string is NUL-terminated if possible.
1521   *
1522   * Recommendation: For invariant-character strings use
1523   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1524   * because it avoids object code dependencies of UnicodeString on
1525   * the conversion code.
1526   *
1527   * @param start offset of first character which will be copied
1528   * @param startLength the number of characters to extract
1529   * @param target the target buffer for extraction
1530   * @param targetLength the length of the target buffer
1531   * @param codepage the desired codepage for the characters.  0 has
1532   * the special meaning of the default codepage
1533   * If <code>codepage</code> is an empty string (<code>""</code>),
1534   * then a simple conversion is performed on the codepage-invariant
1535   * subset ("invariant characters") of the platform encoding. See utypes.h.
1536   * If <TT>target</TT> is NULL, then the number of bytes required for
1537   * <TT>target</TT> is returned.
1538   * @return the output string length, not including the terminating NUL
1539   * @stable ICU 2.0
1540   */
1541  int32_t extract(int32_t start,
1542           int32_t startLength,
1543           char *target,
1544           uint32_t targetLength,
1545           const char *codepage) const;
1546
1547  /**
1548   * Convert the UnicodeString into a codepage string using an existing UConverter.
1549   * The output string is NUL-terminated if possible.
1550   *
1551   * This function avoids the overhead of opening and closing a converter if
1552   * multiple strings are extracted.
1553   *
1554   * @param dest destination string buffer, can be NULL if destCapacity==0
1555   * @param destCapacity the number of chars available at dest
1556   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1557   *        or NULL for the default converter
1558   * @param errorCode normal ICU error code
1559   * @return the length of the output string, not counting the terminating NUL;
1560   *         if the length is greater than destCapacity, then the string will not fit
1561   *         and a buffer of the indicated length would need to be passed in
1562   * @stable ICU 2.0
1563   */
1564  int32_t extract(char *dest, int32_t destCapacity,
1565                  UConverter *cnv,
1566                  UErrorCode &errorCode) const;
1567
1568#endif
1569
1570  /**
1571   * Create a temporary substring for the specified range.
1572   * Unlike the substring constructor and setTo() functions,
1573   * the object returned here will be a read-only alias (using getBuffer())
1574   * rather than copying the text.
1575   * As a result, this substring operation is much faster but requires
1576   * that the original string not be modified or deleted during the lifetime
1577   * of the returned substring object.
1578   * @param start offset of the first character visible in the substring
1579   * @param length length of the substring
1580   * @return a read-only alias UnicodeString object for the substring
1581   * @stable ICU 4.4
1582   */
1583  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1584
1585  /**
1586   * Create a temporary substring for the specified range.
1587   * Same as tempSubString(start, length) except that the substring range
1588   * is specified as a (start, limit) pair (with an exclusive limit index)
1589   * rather than a (start, length) pair.
1590   * @param start offset of the first character visible in the substring
1591   * @param limit offset immediately following the last character visible in the substring
1592   * @return a read-only alias UnicodeString object for the substring
1593   * @stable ICU 4.4
1594   */
1595  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1596
1597  /**
1598   * Convert the UnicodeString to UTF-8 and write the result
1599   * to a ByteSink. This is called by toUTF8String().
1600   * Unpaired surrogates are replaced with U+FFFD.
1601   * Calls u_strToUTF8WithSub().
1602   *
1603   * @param sink A ByteSink to which the UTF-8 version of the string is written.
1604   *             sink.Flush() is called at the end.
1605   * @stable ICU 4.2
1606   * @see toUTF8String
1607   */
1608  void toUTF8(ByteSink &sink) const;
1609
1610#if U_HAVE_STD_STRING
1611
1612  /**
1613   * Convert the UnicodeString to UTF-8 and append the result
1614   * to a standard string.
1615   * Unpaired surrogates are replaced with U+FFFD.
1616   * Calls toUTF8().
1617   *
1618   * @param result A standard string (or a compatible object)
1619   *        to which the UTF-8 version of the string is appended.
1620   * @return The string object.
1621   * @stable ICU 4.2
1622   * @see toUTF8
1623   */
1624  template<typename StringClass>
1625  StringClass &toUTF8String(StringClass &result) const {
1626    StringByteSink<StringClass> sbs(&result);
1627    toUTF8(sbs);
1628    return result;
1629  }
1630
1631#endif
1632
1633  /**
1634   * Convert the UnicodeString to UTF-32.
1635   * Unpaired surrogates are replaced with U+FFFD.
1636   * Calls u_strToUTF32WithSub().
1637   *
1638   * @param utf32 destination string buffer, can be NULL if capacity==0
1639   * @param capacity the number of UChar32s available at utf32
1640   * @param errorCode Standard ICU error code. Its input value must
1641   *                  pass the U_SUCCESS() test, or else the function returns
1642   *                  immediately. Check for U_FAILURE() on output or use with
1643   *                  function chaining. (See User Guide for details.)
1644   * @return The length of the UTF-32 string.
1645   * @see fromUTF32
1646   * @stable ICU 4.2
1647   */
1648  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1649
1650  /* Length operations */
1651
1652  /**
1653   * Return the length of the UnicodeString object.
1654   * The length is the number of UChar code units are in the UnicodeString.
1655   * If you want the number of code points, please use countChar32().
1656   * @return the length of the UnicodeString object
1657   * @see countChar32
1658   * @stable ICU 2.0
1659   */
1660  inline int32_t length(void) const;
1661
1662  /**
1663   * Count Unicode code points in the length UChar code units of the string.
1664   * A code point may occupy either one or two UChar code units.
1665   * Counting code points involves reading all code units.
1666   *
1667   * This functions is basically the inverse of moveIndex32().
1668   *
1669   * @param start the index of the first code unit to check
1670   * @param length the number of UChar code units to check
1671   * @return the number of code points in the specified code units
1672   * @see length
1673   * @stable ICU 2.0
1674   */
1675  int32_t
1676  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1677
1678  /**
1679   * Check if the length UChar code units of the string
1680   * contain more Unicode code points than a certain number.
1681   * This is more efficient than counting all code points in this part of the string
1682   * and comparing that number with a threshold.
1683   * This function may not need to scan the string at all if the length
1684   * falls within a certain range, and
1685   * never needs to count more than 'number+1' code points.
1686   * Logically equivalent to (countChar32(start, length)>number).
1687   * A Unicode code point may occupy either one or two UChar code units.
1688   *
1689   * @param start the index of the first code unit to check (0 for the entire string)
1690   * @param length the number of UChar code units to check
1691   *               (use INT32_MAX for the entire string; remember that start/length
1692   *                values are pinned)
1693   * @param number The number of code points in the (sub)string is compared against
1694   *               the 'number' parameter.
1695   * @return Boolean value for whether the string contains more Unicode code points
1696   *         than 'number'. Same as (u_countChar32(s, length)>number).
1697   * @see countChar32
1698   * @see u_strHasMoreChar32Than
1699   * @stable ICU 2.4
1700   */
1701  UBool
1702  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1703
1704  /**
1705   * Determine if this string is empty.
1706   * @return TRUE if this string contains 0 characters, FALSE otherwise.
1707   * @stable ICU 2.0
1708   */
1709  inline UBool isEmpty(void) const;
1710
1711  /**
1712   * Return the capacity of the internal buffer of the UnicodeString object.
1713   * This is useful together with the getBuffer functions.
1714   * See there for details.
1715   *
1716   * @return the number of UChars available in the internal buffer
1717   * @see getBuffer
1718   * @stable ICU 2.0
1719   */
1720  inline int32_t getCapacity(void) const;
1721
1722  /* Other operations */
1723
1724  /**
1725   * Generate a hash code for this object.
1726   * @return The hash code of this UnicodeString.
1727   * @stable ICU 2.0
1728   */
1729  inline int32_t hashCode(void) const;
1730
1731  /**
1732   * Determine if this object contains a valid string.
1733   * A bogus string has no value. It is different from an empty string,
1734   * although in both cases isEmpty() returns TRUE and length() returns 0.
1735   * setToBogus() and isBogus() can be used to indicate that no string value is available.
1736   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1737   * length() returns 0.
1738   *
1739   * @return TRUE if the string is valid, FALSE otherwise
1740   * @see setToBogus()
1741   * @stable ICU 2.0
1742   */
1743  inline UBool isBogus(void) const;
1744
1745
1746  //========================================
1747  // Write operations
1748  //========================================
1749
1750  /* Assignment operations */
1751
1752  /**
1753   * Assignment operator.  Replace the characters in this UnicodeString
1754   * with the characters from <TT>srcText</TT>.
1755   * @param srcText The text containing the characters to replace
1756   * @return a reference to this
1757   * @stable ICU 2.0
1758   */
1759  UnicodeString &operator=(const UnicodeString &srcText);
1760
1761  /**
1762   * Almost the same as the assignment operator.
1763   * Replace the characters in this UnicodeString
1764   * with the characters from <code>srcText</code>.
1765   *
1766   * This function works the same for all strings except for ones that
1767   * are readonly aliases.
1768   * Starting with ICU 2.4, the assignment operator and the copy constructor
1769   * allocate a new buffer and copy the buffer contents even for readonly aliases.
1770   * This function implements the old, more efficient but less safe behavior
1771   * of making this string also a readonly alias to the same buffer.
1772   * The fastCopyFrom function must be used only if it is known that the lifetime of
1773   * this UnicodeString is at least as long as the lifetime of the aliased buffer
1774   * including its contents, for example for strings from resource bundles
1775   * or aliases to string contents.
1776   *
1777   * @param src The text containing the characters to replace.
1778   * @return a reference to this
1779   * @stable ICU 2.4
1780   */
1781  UnicodeString &fastCopyFrom(const UnicodeString &src);
1782
1783  /**
1784   * Assignment operator.  Replace the characters in this UnicodeString
1785   * with the code unit <TT>ch</TT>.
1786   * @param ch the code unit to replace
1787   * @return a reference to this
1788   * @stable ICU 2.0
1789   */
1790  inline UnicodeString& operator= (UChar ch);
1791
1792  /**
1793   * Assignment operator.  Replace the characters in this UnicodeString
1794   * with the code point <TT>ch</TT>.
1795   * @param ch the code point to replace
1796   * @return a reference to this
1797   * @stable ICU 2.0
1798   */
1799  inline UnicodeString& operator= (UChar32 ch);
1800
1801  /**
1802   * Set the text in the UnicodeString object to the characters
1803   * in <TT>srcText</TT> in the range
1804   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1805   * <TT>srcText</TT> is not modified.
1806   * @param srcText the source for the new characters
1807   * @param srcStart the offset into <TT>srcText</TT> where new characters
1808   * will be obtained
1809   * @return a reference to this
1810   * @stable ICU 2.2
1811   */
1812  inline UnicodeString& setTo(const UnicodeString& srcText,
1813               int32_t srcStart);
1814
1815  /**
1816   * Set the text in the UnicodeString object to the characters
1817   * in <TT>srcText</TT> in the range
1818   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1819   * <TT>srcText</TT> is not modified.
1820   * @param srcText the source for the new characters
1821   * @param srcStart the offset into <TT>srcText</TT> where new characters
1822   * will be obtained
1823   * @param srcLength the number of characters in <TT>srcText</TT> in the
1824   * replace string.
1825   * @return a reference to this
1826   * @stable ICU 2.0
1827   */
1828  inline UnicodeString& setTo(const UnicodeString& srcText,
1829               int32_t srcStart,
1830               int32_t srcLength);
1831
1832  /**
1833   * Set the text in the UnicodeString object to the characters in
1834   * <TT>srcText</TT>.
1835   * <TT>srcText</TT> is not modified.
1836   * @param srcText the source for the new characters
1837   * @return a reference to this
1838   * @stable ICU 2.0
1839   */
1840  inline UnicodeString& setTo(const UnicodeString& srcText);
1841
1842  /**
1843   * Set the characters in the UnicodeString object to the characters
1844   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1845   * @param srcChars the source for the new characters
1846   * @param srcLength the number of Unicode characters in srcChars.
1847   * @return a reference to this
1848   * @stable ICU 2.0
1849   */
1850  inline UnicodeString& setTo(const UChar *srcChars,
1851               int32_t srcLength);
1852
1853  /**
1854   * Set the characters in the UnicodeString object to the code unit
1855   * <TT>srcChar</TT>.
1856   * @param srcChar the code unit which becomes the UnicodeString's character
1857   * content
1858   * @return a reference to this
1859   * @stable ICU 2.0
1860   */
1861  UnicodeString& setTo(UChar srcChar);
1862
1863  /**
1864   * Set the characters in the UnicodeString object to the code point
1865   * <TT>srcChar</TT>.
1866   * @param srcChar the code point which becomes the UnicodeString's character
1867   * content
1868   * @return a reference to this
1869   * @stable ICU 2.0
1870   */
1871  UnicodeString& setTo(UChar32 srcChar);
1872
1873  /**
1874   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1875   * The text will be used for the UnicodeString object, but
1876   * it will not be released when the UnicodeString is destroyed.
1877   * This has copy-on-write semantics:
1878   * When the string is modified, then the buffer is first copied into
1879   * newly allocated memory.
1880   * The aliased buffer is never modified.
1881   * In an assignment to another UnicodeString, the text will be aliased again,
1882   * so that both strings then alias the same readonly-text.
1883   *
1884   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1885   *                     This must be true if <code>textLength==-1</code>.
1886   * @param text The characters to alias for the UnicodeString.
1887   * @param textLength The number of Unicode characters in <code>text</code> to alias.
1888   *                   If -1, then this constructor will determine the length
1889   *                   by calling <code>u_strlen()</code>.
1890   * @return a reference to this
1891   * @stable ICU 2.0
1892   */
1893  UnicodeString &setTo(UBool isTerminated,
1894                       const UChar *text,
1895                       int32_t textLength);
1896
1897  /**
1898   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1899   * The text will be used for the UnicodeString object, but
1900   * it will not be released when the UnicodeString is destroyed.
1901   * This has write-through semantics:
1902   * For as long as the capacity of the buffer is sufficient, write operations
1903   * will directly affect the buffer. When more capacity is necessary, then
1904   * a new buffer will be allocated and the contents copied as with regularly
1905   * constructed strings.
1906   * In an assignment to another UnicodeString, the buffer will be copied.
1907   * The extract(UChar *dst) function detects whether the dst pointer is the same
1908   * as the string buffer itself and will in this case not copy the contents.
1909   *
1910   * @param buffer The characters to alias for the UnicodeString.
1911   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1912   * @param buffCapacity The size of <code>buffer</code> in UChars.
1913   * @return a reference to this
1914   * @stable ICU 2.0
1915   */
1916  UnicodeString &setTo(UChar *buffer,
1917                       int32_t buffLength,
1918                       int32_t buffCapacity);
1919
1920  /**
1921   * Make this UnicodeString object invalid.
1922   * The string will test TRUE with isBogus().
1923   *
1924   * A bogus string has no value. It is different from an empty string.
1925   * It can be used to indicate that no string value is available.
1926   * getBuffer() and getTerminatedBuffer() return NULL, and
1927   * length() returns 0.
1928   *
1929   * This utility function is used throughout the UnicodeString
1930   * implementation to indicate that a UnicodeString operation failed,
1931   * and may be used in other functions,
1932   * especially but not exclusively when such functions do not
1933   * take a UErrorCode for simplicity.
1934   *
1935   * The following methods, and no others, will clear a string object's bogus flag:
1936   * - remove()
1937   * - remove(0, INT32_MAX)
1938   * - truncate(0)
1939   * - operator=() (assignment operator)
1940   * - setTo(...)
1941   *
1942   * The simplest ways to turn a bogus string into an empty one
1943   * is to use the remove() function.
1944   * Examples for other functions that are equivalent to "set to empty string":
1945   * \code
1946   * if(s.isBogus()) {
1947   *   s.remove();           // set to an empty string (remove all), or
1948   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1949   *   s.truncate(0);        // set to an empty string (complete truncation), or
1950   *   s=UnicodeString();    // assign an empty string, or
1951   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1952   *   static const UChar nul=0;
1953   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
1954   * }
1955   * \endcode
1956   *
1957   * @see isBogus()
1958   * @stable ICU 2.0
1959   */
1960  void setToBogus();
1961
1962  /**
1963   * Set the character at the specified offset to the specified character.
1964   * @param offset A valid offset into the text of the character to set
1965   * @param ch The new character
1966   * @return A reference to this
1967   * @stable ICU 2.0
1968   */
1969  UnicodeString& setCharAt(int32_t offset,
1970               UChar ch);
1971
1972
1973  /* Append operations */
1974
1975  /**
1976   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1977   * object.
1978   * @param ch the code unit to be appended
1979   * @return a reference to this
1980   * @stable ICU 2.0
1981   */
1982 inline  UnicodeString& operator+= (UChar ch);
1983
1984  /**
1985   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1986   * object.
1987   * @param ch the code point to be appended
1988   * @return a reference to this
1989   * @stable ICU 2.0
1990   */
1991 inline  UnicodeString& operator+= (UChar32 ch);
1992
1993  /**
1994   * Append operator. Append the characters in <TT>srcText</TT> to the
1995   * UnicodeString object. <TT>srcText</TT> is not modified.
1996   * @param srcText the source for the new characters
1997   * @return a reference to this
1998   * @stable ICU 2.0
1999   */
2000  inline UnicodeString& operator+= (const UnicodeString& srcText);
2001
2002  /**
2003   * Append the characters
2004   * in <TT>srcText</TT> in the range
2005   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2006   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2007   * is not modified.
2008   * @param srcText the source for the new characters
2009   * @param srcStart the offset into <TT>srcText</TT> where new characters
2010   * will be obtained
2011   * @param srcLength the number of characters in <TT>srcText</TT> in
2012   * the append string
2013   * @return a reference to this
2014   * @stable ICU 2.0
2015   */
2016  inline UnicodeString& append(const UnicodeString& srcText,
2017            int32_t srcStart,
2018            int32_t srcLength);
2019
2020  /**
2021   * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2022   * <TT>srcText</TT> is not modified.
2023   * @param srcText the source for the new characters
2024   * @return a reference to this
2025   * @stable ICU 2.0
2026   */
2027  inline UnicodeString& append(const UnicodeString& srcText);
2028
2029  /**
2030   * Append the characters in <TT>srcChars</TT> in the range
2031   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2032   * object at offset
2033   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2034   * @param srcChars the source for the new characters
2035   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2036   * will be obtained
2037   * @param srcLength the number of characters in <TT>srcChars</TT> in
2038   *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2039   * @return a reference to this
2040   * @stable ICU 2.0
2041   */
2042  inline UnicodeString& append(const UChar *srcChars,
2043            int32_t srcStart,
2044            int32_t srcLength);
2045
2046  /**
2047   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2048   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2049   * @param srcChars the source for the new characters
2050   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2051   *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
2052   * @return a reference to this
2053   * @stable ICU 2.0
2054   */
2055  inline UnicodeString& append(const UChar *srcChars,
2056            int32_t srcLength);
2057
2058  /**
2059   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2060   * @param srcChar the code unit to append
2061   * @return a reference to this
2062   * @stable ICU 2.0
2063   */
2064  inline UnicodeString& append(UChar srcChar);
2065
2066  /**
2067   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2068   * @param srcChar the code point to append
2069   * @return a reference to this
2070   * @stable ICU 2.0
2071   */
2072  inline UnicodeString& append(UChar32 srcChar);
2073
2074
2075  /* Insert operations */
2076
2077  /**
2078   * Insert the characters in <TT>srcText</TT> in the range
2079   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2080   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2081   * @param start the offset where the insertion begins
2082   * @param srcText the source for the new characters
2083   * @param srcStart the offset into <TT>srcText</TT> where new characters
2084   * will be obtained
2085   * @param srcLength the number of characters in <TT>srcText</TT> in
2086   * the insert string
2087   * @return a reference to this
2088   * @stable ICU 2.0
2089   */
2090  inline UnicodeString& insert(int32_t start,
2091            const UnicodeString& srcText,
2092            int32_t srcStart,
2093            int32_t srcLength);
2094
2095  /**
2096   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2097   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2098   * @param start the offset where the insertion begins
2099   * @param srcText the source for the new characters
2100   * @return a reference to this
2101   * @stable ICU 2.0
2102   */
2103  inline UnicodeString& insert(int32_t start,
2104            const UnicodeString& srcText);
2105
2106  /**
2107   * Insert the characters in <TT>srcChars</TT> in the range
2108   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2109   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2110   * @param start the offset at which the insertion begins
2111   * @param srcChars the source for the new characters
2112   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2113   * will be obtained
2114   * @param srcLength the number of characters in <TT>srcChars</TT>
2115   * in the insert string
2116   * @return a reference to this
2117   * @stable ICU 2.0
2118   */
2119  inline UnicodeString& insert(int32_t start,
2120            const UChar *srcChars,
2121            int32_t srcStart,
2122            int32_t srcLength);
2123
2124  /**
2125   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2126   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2127   * @param start the offset where the insertion begins
2128   * @param srcChars the source for the new characters
2129   * @param srcLength the number of Unicode characters in srcChars.
2130   * @return a reference to this
2131   * @stable ICU 2.0
2132   */
2133  inline UnicodeString& insert(int32_t start,
2134            const UChar *srcChars,
2135            int32_t srcLength);
2136
2137  /**
2138   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2139   * offset <TT>start</TT>.
2140   * @param start the offset at which the insertion occurs
2141   * @param srcChar the code unit to insert
2142   * @return a reference to this
2143   * @stable ICU 2.0
2144   */
2145  inline UnicodeString& insert(int32_t start,
2146            UChar srcChar);
2147
2148  /**
2149   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2150   * offset <TT>start</TT>.
2151   * @param start the offset at which the insertion occurs
2152   * @param srcChar the code point to insert
2153   * @return a reference to this
2154   * @stable ICU 2.0
2155   */
2156  inline UnicodeString& insert(int32_t start,
2157            UChar32 srcChar);
2158
2159
2160  /* Replace operations */
2161
2162  /**
2163   * Replace the characters in the range
2164   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2165   * <TT>srcText</TT> in the range
2166   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2167   * <TT>srcText</TT> is not modified.
2168   * @param start the offset at which the replace operation begins
2169   * @param length the number of characters to replace. The character at
2170   * <TT>start + length</TT> is not modified.
2171   * @param srcText the source for the new characters
2172   * @param srcStart the offset into <TT>srcText</TT> where new characters
2173   * will be obtained
2174   * @param srcLength the number of characters in <TT>srcText</TT> in
2175   * the replace string
2176   * @return a reference to this
2177   * @stable ICU 2.0
2178   */
2179  UnicodeString& replace(int32_t start,
2180             int32_t length,
2181             const UnicodeString& srcText,
2182             int32_t srcStart,
2183             int32_t srcLength);
2184
2185  /**
2186   * Replace the characters in the range
2187   * [<TT>start</TT>, <TT>start + length</TT>)
2188   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2189   *  not modified.
2190   * @param start the offset at which the replace operation begins
2191   * @param length the number of characters to replace. The character at
2192   * <TT>start + length</TT> is not modified.
2193   * @param srcText the source for the new characters
2194   * @return a reference to this
2195   * @stable ICU 2.0
2196   */
2197  UnicodeString& replace(int32_t start,
2198             int32_t length,
2199             const UnicodeString& srcText);
2200
2201  /**
2202   * Replace the characters in the range
2203   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2204   * <TT>srcChars</TT> in the range
2205   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2206   * is not modified.
2207   * @param start the offset at which the replace operation begins
2208   * @param length the number of characters to replace.  The character at
2209   * <TT>start + length</TT> is not modified.
2210   * @param srcChars the source for the new characters
2211   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2212   * will be obtained
2213   * @param srcLength the number of characters in <TT>srcChars</TT>
2214   * in the replace string
2215   * @return a reference to this
2216   * @stable ICU 2.0
2217   */
2218  UnicodeString& replace(int32_t start,
2219             int32_t length,
2220             const UChar *srcChars,
2221             int32_t srcStart,
2222             int32_t srcLength);
2223
2224  /**
2225   * Replace the characters in the range
2226   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2227   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2228   * @param start the offset at which the replace operation begins
2229   * @param length number of characters to replace.  The character at
2230   * <TT>start + length</TT> is not modified.
2231   * @param srcChars the source for the new characters
2232   * @param srcLength the number of Unicode characters in srcChars
2233   * @return a reference to this
2234   * @stable ICU 2.0
2235   */
2236  inline UnicodeString& replace(int32_t start,
2237             int32_t length,
2238             const UChar *srcChars,
2239             int32_t srcLength);
2240
2241  /**
2242   * Replace the characters in the range
2243   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2244   * <TT>srcChar</TT>.
2245   * @param start the offset at which the replace operation begins
2246   * @param length the number of characters to replace.  The character at
2247   * <TT>start + length</TT> is not modified.
2248   * @param srcChar the new code unit
2249   * @return a reference to this
2250   * @stable ICU 2.0
2251   */
2252  inline UnicodeString& replace(int32_t start,
2253             int32_t length,
2254             UChar srcChar);
2255
2256  /**
2257   * Replace the characters in the range
2258   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2259   * <TT>srcChar</TT>.
2260   * @param start the offset at which the replace operation begins
2261   * @param length the number of characters to replace.  The character at
2262   * <TT>start + length</TT> is not modified.
2263   * @param srcChar the new code point
2264   * @return a reference to this
2265   * @stable ICU 2.0
2266   */
2267  inline UnicodeString& replace(int32_t start,
2268             int32_t length,
2269             UChar32 srcChar);
2270
2271  /**
2272   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2273   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2274   * @param start the offset at which the replace operation begins
2275   * @param limit the offset immediately following the replace range
2276   * @param srcText the source for the new characters
2277   * @return a reference to this
2278   * @stable ICU 2.0
2279   */
2280  inline UnicodeString& replaceBetween(int32_t start,
2281                int32_t limit,
2282                const UnicodeString& srcText);
2283
2284  /**
2285   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2286   * with the characters in <TT>srcText</TT> in the range
2287   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2288   * @param start the offset at which the replace operation begins
2289   * @param limit the offset immediately following the replace range
2290   * @param srcText the source for the new characters
2291   * @param srcStart the offset into <TT>srcChars</TT> where new characters
2292   * will be obtained
2293   * @param srcLimit the offset immediately following the range to copy
2294   * in <TT>srcText</TT>
2295   * @return a reference to this
2296   * @stable ICU 2.0
2297   */
2298  inline UnicodeString& replaceBetween(int32_t start,
2299                int32_t limit,
2300                const UnicodeString& srcText,
2301                int32_t srcStart,
2302                int32_t srcLimit);
2303
2304  /**
2305   * Replace a substring of this object with the given text.
2306   * @param start the beginning index, inclusive; <code>0 <= start
2307   * <= limit</code>.
2308   * @param limit the ending index, exclusive; <code>start <= limit
2309   * <= length()</code>.
2310   * @param text the text to replace characters <code>start</code>
2311   * to <code>limit - 1</code>
2312   * @stable ICU 2.0
2313   */
2314  virtual void handleReplaceBetween(int32_t start,
2315                                    int32_t limit,
2316                                    const UnicodeString& text);
2317
2318  /**
2319   * Replaceable API
2320   * @return TRUE if it has MetaData
2321   * @stable ICU 2.4
2322   */
2323  virtual UBool hasMetaData() const;
2324
2325  /**
2326   * Copy a substring of this object, retaining attribute (out-of-band)
2327   * information.  This method is used to duplicate or reorder substrings.
2328   * The destination index must not overlap the source range.
2329   *
2330   * @param start the beginning index, inclusive; <code>0 <= start <=
2331   * limit</code>.
2332   * @param limit the ending index, exclusive; <code>start <= limit <=
2333   * length()</code>.
2334   * @param dest the destination index.  The characters from
2335   * <code>start..limit-1</code> will be copied to <code>dest</code>.
2336   * Implementations of this method may assume that <code>dest <= start ||
2337   * dest >= limit</code>.
2338   * @stable ICU 2.0
2339   */
2340  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2341
2342  /* Search and replace operations */
2343
2344  /**
2345   * Replace all occurrences of characters in oldText with the characters
2346   * in newText
2347   * @param oldText the text containing the search text
2348   * @param newText the text containing the replacement text
2349   * @return a reference to this
2350   * @stable ICU 2.0
2351   */
2352  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2353                const UnicodeString& newText);
2354
2355  /**
2356   * Replace all occurrences of characters in oldText with characters
2357   * in newText
2358   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2359   * @param start the start of the range in which replace will performed
2360   * @param length the length of the range in which replace will be performed
2361   * @param oldText the text containing the search text
2362   * @param newText the text containing the replacement text
2363   * @return a reference to this
2364   * @stable ICU 2.0
2365   */
2366  inline UnicodeString& findAndReplace(int32_t start,
2367                int32_t length,
2368                const UnicodeString& oldText,
2369                const UnicodeString& newText);
2370
2371  /**
2372   * Replace all occurrences of characters in oldText in the range
2373   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2374   * in newText in the range
2375   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2376   * in the range [<TT>start</TT>, <TT>start + length</TT>).
2377   * @param start the start of the range in which replace will performed
2378   * @param length the length of the range in which replace will be performed
2379   * @param oldText the text containing the search text
2380   * @param oldStart the start of the search range in <TT>oldText</TT>
2381   * @param oldLength the length of the search range in <TT>oldText</TT>
2382   * @param newText the text containing the replacement text
2383   * @param newStart the start of the replacement range in <TT>newText</TT>
2384   * @param newLength the length of the replacement range in <TT>newText</TT>
2385   * @return a reference to this
2386   * @stable ICU 2.0
2387   */
2388  UnicodeString& findAndReplace(int32_t start,
2389                int32_t length,
2390                const UnicodeString& oldText,
2391                int32_t oldStart,
2392                int32_t oldLength,
2393                const UnicodeString& newText,
2394                int32_t newStart,
2395                int32_t newLength);
2396
2397
2398  /* Remove operations */
2399
2400  /**
2401   * Remove all characters from the UnicodeString object.
2402   * @return a reference to this
2403   * @stable ICU 2.0
2404   */
2405  inline UnicodeString& remove(void);
2406
2407  /**
2408   * Remove the characters in the range
2409   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2410   * @param start the offset of the first character to remove
2411   * @param length the number of characters to remove
2412   * @return a reference to this
2413   * @stable ICU 2.0
2414   */
2415  inline UnicodeString& remove(int32_t start,
2416                               int32_t length = (int32_t)INT32_MAX);
2417
2418  /**
2419   * Remove the characters in the range
2420   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2421   * @param start the offset of the first character to remove
2422   * @param limit the offset immediately following the range to remove
2423   * @return a reference to this
2424   * @stable ICU 2.0
2425   */
2426  inline UnicodeString& removeBetween(int32_t start,
2427                                      int32_t limit = (int32_t)INT32_MAX);
2428
2429  /**
2430   * Retain only the characters in the range
2431   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2432   * Removes characters before <code>start</code> and at and after <code>limit</code>.
2433   * @param start the offset of the first character to retain
2434   * @param limit the offset immediately following the range to retain
2435   * @return a reference to this
2436   * @stable ICU 4.4
2437   */
2438  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2439
2440  /* Length operations */
2441
2442  /**
2443   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2444   * If the length of this UnicodeString is less than targetLength,
2445   * length() - targetLength copies of padChar will be added to the
2446   * beginning of this UnicodeString.
2447   * @param targetLength the desired length of the string
2448   * @param padChar the character to use for padding. Defaults to
2449   * space (U+0020)
2450   * @return TRUE if the text was padded, FALSE otherwise.
2451   * @stable ICU 2.0
2452   */
2453  UBool padLeading(int32_t targetLength,
2454                    UChar padChar = 0x0020);
2455
2456  /**
2457   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2458   * If the length of this UnicodeString is less than targetLength,
2459   * length() - targetLength copies of padChar will be added to the
2460   * end of this UnicodeString.
2461   * @param targetLength the desired length of the string
2462   * @param padChar the character to use for padding. Defaults to
2463   * space (U+0020)
2464   * @return TRUE if the text was padded, FALSE otherwise.
2465   * @stable ICU 2.0
2466   */
2467  UBool padTrailing(int32_t targetLength,
2468                     UChar padChar = 0x0020);
2469
2470  /**
2471   * Truncate this UnicodeString to the <TT>targetLength</TT>.
2472   * @param targetLength the desired length of this UnicodeString.
2473   * @return TRUE if the text was truncated, FALSE otherwise
2474   * @stable ICU 2.0
2475   */
2476  inline UBool truncate(int32_t targetLength);
2477
2478  /**
2479   * Trims leading and trailing whitespace from this UnicodeString.
2480   * @return a reference to this
2481   * @stable ICU 2.0
2482   */
2483  UnicodeString& trim(void);
2484
2485
2486  /* Miscellaneous operations */
2487
2488  /**
2489   * Reverse this UnicodeString in place.
2490   * @return a reference to this
2491   * @stable ICU 2.0
2492   */
2493  inline UnicodeString& reverse(void);
2494
2495  /**
2496   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2497   * this UnicodeString.
2498   * @param start the start of the range to reverse
2499   * @param length the number of characters to to reverse
2500   * @return a reference to this
2501   * @stable ICU 2.0
2502   */
2503  inline UnicodeString& reverse(int32_t start,
2504             int32_t length);
2505
2506  /**
2507   * Convert the characters in this to UPPER CASE following the conventions of
2508   * the default locale.
2509   * @return A reference to this.
2510   * @stable ICU 2.0
2511   */
2512  UnicodeString& toUpper(void);
2513
2514  /**
2515   * Convert the characters in this to UPPER CASE following the conventions of
2516   * a specific locale.
2517   * @param locale The locale containing the conventions to use.
2518   * @return A reference to this.
2519   * @stable ICU 2.0
2520   */
2521  UnicodeString& toUpper(const Locale& locale);
2522
2523  /**
2524   * Convert the characters in this to lower case following the conventions of
2525   * the default locale.
2526   * @return A reference to this.
2527   * @stable ICU 2.0
2528   */
2529  UnicodeString& toLower(void);
2530
2531  /**
2532   * Convert the characters in this to lower case following the conventions of
2533   * a specific locale.
2534   * @param locale The locale containing the conventions to use.
2535   * @return A reference to this.
2536   * @stable ICU 2.0
2537   */
2538  UnicodeString& toLower(const Locale& locale);
2539
2540#if !UCONFIG_NO_BREAK_ITERATION
2541
2542  /**
2543   * Titlecase this string, convenience function using the default locale.
2544   *
2545   * Casing is locale-dependent and context-sensitive.
2546   * Titlecasing uses a break iterator to find the first characters of words
2547   * that are to be titlecased. It titlecases those characters and lowercases
2548   * all others.
2549   *
2550   * The titlecase break iterator can be provided to customize for arbitrary
2551   * styles, using rules and dictionaries beyond the standard iterators.
2552   * It may be more efficient to always provide an iterator to avoid
2553   * opening and closing one for each string.
2554   * The standard titlecase iterator for the root locale implements the
2555   * algorithm of Unicode TR 21.
2556   *
2557   * This function uses only the setText(), first() and next() methods of the
2558   * provided break iterator.
2559   *
2560   * @param titleIter A break iterator to find the first characters of words
2561   *                  that are to be titlecased.
2562   *                  If none is provided (0), then a standard titlecase
2563   *                  break iterator is opened.
2564   *                  Otherwise the provided iterator is set to the string's text.
2565   * @return A reference to this.
2566   * @stable ICU 2.1
2567   */
2568  UnicodeString &toTitle(BreakIterator *titleIter);
2569
2570  /**
2571   * Titlecase this string.
2572   *
2573   * Casing is locale-dependent and context-sensitive.
2574   * Titlecasing uses a break iterator to find the first characters of words
2575   * that are to be titlecased. It titlecases those characters and lowercases
2576   * all others.
2577   *
2578   * The titlecase break iterator can be provided to customize for arbitrary
2579   * styles, using rules and dictionaries beyond the standard iterators.
2580   * It may be more efficient to always provide an iterator to avoid
2581   * opening and closing one for each string.
2582   * The standard titlecase iterator for the root locale implements the
2583   * algorithm of Unicode TR 21.
2584   *
2585   * This function uses only the setText(), first() and next() methods of the
2586   * provided break iterator.
2587   *
2588   * @param titleIter A break iterator to find the first characters of words
2589   *                  that are to be titlecased.
2590   *                  If none is provided (0), then a standard titlecase
2591   *                  break iterator is opened.
2592   *                  Otherwise the provided iterator is set to the string's text.
2593   * @param locale    The locale to consider.
2594   * @return A reference to this.
2595   * @stable ICU 2.1
2596   */
2597  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2598
2599  /**
2600   * Titlecase this string, with options.
2601   *
2602   * Casing is locale-dependent and context-sensitive.
2603   * Titlecasing uses a break iterator to find the first characters of words
2604   * that are to be titlecased. It titlecases those characters and lowercases
2605   * all others. (This can be modified with options.)
2606   *
2607   * The titlecase break iterator can be provided to customize for arbitrary
2608   * styles, using rules and dictionaries beyond the standard iterators.
2609   * It may be more efficient to always provide an iterator to avoid
2610   * opening and closing one for each string.
2611   * The standard titlecase iterator for the root locale implements the
2612   * algorithm of Unicode TR 21.
2613   *
2614   * This function uses only the setText(), first() and next() methods of the
2615   * provided break iterator.
2616   *
2617   * @param titleIter A break iterator to find the first characters of words
2618   *                  that are to be titlecased.
2619   *                  If none is provided (0), then a standard titlecase
2620   *                  break iterator is opened.
2621   *                  Otherwise the provided iterator is set to the string's text.
2622   * @param locale    The locale to consider.
2623   * @param options Options bit set, see ucasemap_open().
2624   * @return A reference to this.
2625   * @see U_TITLECASE_NO_LOWERCASE
2626   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2627   * @see ucasemap_open
2628   * @stable ICU 3.8
2629   */
2630  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2631
2632#endif
2633
2634  /**
2635   * Case-fold the characters in this string.
2636   * Case-folding is locale-independent and not context-sensitive,
2637   * but there is an option for whether to include or exclude mappings for dotted I
2638   * and dotless i that are marked with 'I' in CaseFolding.txt.
2639   * The result may be longer or shorter than the original.
2640   *
2641   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2642   * @return A reference to this.
2643   * @stable ICU 2.0
2644   */
2645  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2646
2647  //========================================
2648  // Access to the internal buffer
2649  //========================================
2650
2651  /**
2652   * Get a read/write pointer to the internal buffer.
2653   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2654   * writable, and is still owned by the UnicodeString object.
2655   * Calls to getBuffer(minCapacity) must not be nested, and
2656   * must be matched with calls to releaseBuffer(newLength).
2657   * If the string buffer was read-only or shared,
2658   * then it will be reallocated and copied.
2659   *
2660   * An attempted nested call will return 0, and will not further modify the
2661   * state of the UnicodeString object.
2662   * It also returns 0 if the string is bogus.
2663   *
2664   * The actual capacity of the string buffer may be larger than minCapacity.
2665   * getCapacity() returns the actual capacity.
2666   * For many operations, the full capacity should be used to avoid reallocations.
2667   *
2668   * While the buffer is "open" between getBuffer(minCapacity)
2669   * and releaseBuffer(newLength), the following applies:
2670   * - The string length is set to 0.
2671   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2672   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2673   * - You can read from and write to the returned buffer.
2674   * - The previous string contents will still be in the buffer;
2675   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2676   *   If the length() was greater than minCapacity, then any contents after minCapacity
2677   *   may be lost.
2678   *   The buffer contents is not NUL-terminated by getBuffer().
2679   *   If length()<getCapacity() then you can terminate it by writing a NUL
2680   *   at index length().
2681   * - You must call releaseBuffer(newLength) before and in order to
2682   *   return to normal UnicodeString operation.
2683   *
2684   * @param minCapacity the minimum number of UChars that are to be available
2685   *        in the buffer, starting at the returned pointer;
2686   *        default to the current string capacity if minCapacity==-1
2687   * @return a writable pointer to the internal string buffer,
2688   *         or 0 if an error occurs (nested calls, out of memory)
2689   *
2690   * @see releaseBuffer
2691   * @see getTerminatedBuffer()
2692   * @stable ICU 2.0
2693   */
2694  UChar *getBuffer(int32_t minCapacity);
2695
2696  /**
2697   * Release a read/write buffer on a UnicodeString object with an
2698   * "open" getBuffer(minCapacity).
2699   * This function must be called in a matched pair with getBuffer(minCapacity).
2700   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2701   *
2702   * It will set the string length to newLength, at most to the current capacity.
2703   * If newLength==-1 then it will set the length according to the
2704   * first NUL in the buffer, or to the capacity if there is no NUL.
2705   *
2706   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2707   *
2708   * @param newLength the new length of the UnicodeString object;
2709   *        defaults to the current capacity if newLength is greater than that;
2710   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2711   *        the current capacity of the string
2712   *
2713   * @see getBuffer(int32_t minCapacity)
2714   * @stable ICU 2.0
2715   */
2716  void releaseBuffer(int32_t newLength=-1);
2717
2718  /**
2719   * Get a read-only pointer to the internal buffer.
2720   * This can be called at any time on a valid UnicodeString.
2721   *
2722   * It returns 0 if the string is bogus, or
2723   * during an "open" getBuffer(minCapacity).
2724   *
2725   * It can be called as many times as desired.
2726   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2727   * at which time the pointer is semantically invalidated and must not be used any more.
2728   *
2729   * The capacity of the buffer can be determined with getCapacity().
2730   * The part after length() may or may not be initialized and valid,
2731   * depending on the history of the UnicodeString object.
2732   *
2733   * The buffer contents is (probably) not NUL-terminated.
2734   * You can check if it is with
2735   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2736   * (See getTerminatedBuffer().)
2737   *
2738   * The buffer may reside in read-only memory. Its contents must not
2739   * be modified.
2740   *
2741   * @return a read-only pointer to the internal string buffer,
2742   *         or 0 if the string is empty or bogus
2743   *
2744   * @see getBuffer(int32_t minCapacity)
2745   * @see getTerminatedBuffer()
2746   * @stable ICU 2.0
2747   */
2748  inline const UChar *getBuffer() const;
2749
2750  /**
2751   * Get a read-only pointer to the internal buffer,
2752   * making sure that it is NUL-terminated.
2753   * This can be called at any time on a valid UnicodeString.
2754   *
2755   * It returns 0 if the string is bogus, or
2756   * during an "open" getBuffer(minCapacity), or if the buffer cannot
2757   * be NUL-terminated (because memory allocation failed).
2758   *
2759   * It can be called as many times as desired.
2760   * The pointer that it returns will remain valid until the UnicodeString object is modified,
2761   * at which time the pointer is semantically invalidated and must not be used any more.
2762   *
2763   * The capacity of the buffer can be determined with getCapacity().
2764   * The part after length()+1 may or may not be initialized and valid,
2765   * depending on the history of the UnicodeString object.
2766   *
2767   * The buffer contents is guaranteed to be NUL-terminated.
2768   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2769   * is written.
2770   * For this reason, this function is not const, unlike getBuffer().
2771   * Note that a UnicodeString may also contain NUL characters as part of its contents.
2772   *
2773   * The buffer may reside in read-only memory. Its contents must not
2774   * be modified.
2775   *
2776   * @return a read-only pointer to the internal string buffer,
2777   *         or 0 if the string is empty or bogus
2778   *
2779   * @see getBuffer(int32_t minCapacity)
2780   * @see getBuffer()
2781   * @stable ICU 2.2
2782   */
2783  inline const UChar *getTerminatedBuffer();
2784
2785  //========================================
2786  // Constructors
2787  //========================================
2788
2789  /** Construct an empty UnicodeString.
2790   * @stable ICU 2.0
2791   */
2792  UnicodeString();
2793
2794  /**
2795   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2796   * @param capacity the number of UChars this UnicodeString should hold
2797   * before a resize is necessary; if count is greater than 0 and count
2798   * code points c take up more space than capacity, then capacity is adjusted
2799   * accordingly.
2800   * @param c is used to initially fill the string
2801   * @param count specifies how many code points c are to be written in the
2802   *              string
2803   * @stable ICU 2.0
2804   */
2805  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2806
2807  /**
2808   * Single UChar (code unit) constructor.
2809   * @param ch the character to place in the UnicodeString
2810   * @stable ICU 2.0
2811   */
2812  UnicodeString(UChar ch);
2813
2814  /**
2815   * Single UChar32 (code point) constructor.
2816   * @param ch the character to place in the UnicodeString
2817   * @stable ICU 2.0
2818   */
2819  UnicodeString(UChar32 ch);
2820
2821  /**
2822   * UChar* constructor.
2823   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
2824   * must be NULL (U+0000) terminated.
2825   * @stable ICU 2.0
2826   */
2827  UnicodeString(const UChar *text);
2828
2829  /**
2830   * UChar* constructor.
2831   * @param text The characters to place in the UnicodeString.
2832   * @param textLength The number of Unicode characters in <TT>text</TT>
2833   * to copy.
2834   * @stable ICU 2.0
2835   */
2836  UnicodeString(const UChar *text,
2837        int32_t textLength);
2838
2839  /**
2840   * Readonly-aliasing UChar* constructor.
2841   * The text will be used for the UnicodeString object, but
2842   * it will not be released when the UnicodeString is destroyed.
2843   * This has copy-on-write semantics:
2844   * When the string is modified, then the buffer is first copied into
2845   * newly allocated memory.
2846   * The aliased buffer is never modified.
2847   * In an assignment to another UnicodeString, the text will be aliased again,
2848   * so that both strings then alias the same readonly-text.
2849   *
2850   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2851   *                     This must be true if <code>textLength==-1</code>.
2852   * @param text The characters to alias for the UnicodeString.
2853   * @param textLength The number of Unicode characters in <code>text</code> to alias.
2854   *                   If -1, then this constructor will determine the length
2855   *                   by calling <code>u_strlen()</code>.
2856   * @stable ICU 2.0
2857   */
2858  UnicodeString(UBool isTerminated,
2859                const UChar *text,
2860                int32_t textLength);
2861
2862  /**
2863   * Writable-aliasing UChar* constructor.
2864   * The text will be used for the UnicodeString object, but
2865   * it will not be released when the UnicodeString is destroyed.
2866   * This has write-through semantics:
2867   * For as long as the capacity of the buffer is sufficient, write operations
2868   * will directly affect the buffer. When more capacity is necessary, then
2869   * a new buffer will be allocated and the contents copied as with regularly
2870   * constructed strings.
2871   * In an assignment to another UnicodeString, the buffer will be copied.
2872   * The extract(UChar *dst) function detects whether the dst pointer is the same
2873   * as the string buffer itself and will in this case not copy the contents.
2874   *
2875   * @param buffer The characters to alias for the UnicodeString.
2876   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2877   * @param buffCapacity The size of <code>buffer</code> in UChars.
2878   * @stable ICU 2.0
2879   */
2880  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2881
2882#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2883
2884  /**
2885   * char* constructor.
2886   * @param codepageData an array of bytes, null-terminated,
2887   *                     in the platform's default codepage.
2888   * @stable ICU 2.0
2889   */
2890  UnicodeString(const char *codepageData);
2891
2892  /**
2893   * char* constructor.
2894   * @param codepageData an array of bytes in the platform's default codepage.
2895   * @param dataLength The number of bytes in <TT>codepageData</TT>.
2896   * @stable ICU 2.0
2897   */
2898  UnicodeString(const char *codepageData, int32_t dataLength);
2899
2900#endif
2901
2902#if !UCONFIG_NO_CONVERSION
2903
2904  /**
2905   * char* constructor.
2906   * @param codepageData an array of bytes, null-terminated
2907   * @param codepage the encoding of <TT>codepageData</TT>.  The special
2908   * value 0 for <TT>codepage</TT> indicates that the text is in the
2909   * platform's default codepage.
2910   *
2911   * If <code>codepage</code> is an empty string (<code>""</code>),
2912   * then a simple conversion is performed on the codepage-invariant
2913   * subset ("invariant characters") of the platform encoding. See utypes.h.
2914   * Recommendation: For invariant-character strings use the constructor
2915   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2916   * because it avoids object code dependencies of UnicodeString on
2917   * the conversion code.
2918   *
2919   * @stable ICU 2.0
2920   */
2921  UnicodeString(const char *codepageData, const char *codepage);
2922
2923  /**
2924   * char* constructor.
2925   * @param codepageData an array of bytes.
2926   * @param dataLength The number of bytes in <TT>codepageData</TT>.
2927   * @param codepage the encoding of <TT>codepageData</TT>.  The special
2928   * value 0 for <TT>codepage</TT> indicates that the text is in the
2929   * platform's default codepage.
2930   * If <code>codepage</code> is an empty string (<code>""</code>),
2931   * then a simple conversion is performed on the codepage-invariant
2932   * subset ("invariant characters") of the platform encoding. See utypes.h.
2933   * Recommendation: For invariant-character strings use the constructor
2934   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2935   * because it avoids object code dependencies of UnicodeString on
2936   * the conversion code.
2937   *
2938   * @stable ICU 2.0
2939   */
2940  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
2941
2942  /**
2943   * char * / UConverter constructor.
2944   * This constructor uses an existing UConverter object to
2945   * convert the codepage string to Unicode and construct a UnicodeString
2946   * from that.
2947   *
2948   * The converter is reset at first.
2949   * If the error code indicates a failure before this constructor is called,
2950   * or if an error occurs during conversion or construction,
2951   * then the string will be bogus.
2952   *
2953   * This function avoids the overhead of opening and closing a converter if
2954   * multiple strings are constructed.
2955   *
2956   * @param src input codepage string
2957   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
2958   * @param cnv converter object (ucnv_resetToUnicode() will be called),
2959   *        can be NULL for the default converter
2960   * @param errorCode normal ICU error code
2961   * @stable ICU 2.0
2962   */
2963  UnicodeString(
2964        const char *src, int32_t srcLength,
2965        UConverter *cnv,
2966        UErrorCode &errorCode);
2967
2968#endif
2969
2970  /**
2971   * Constructs a Unicode string from an invariant-character char * string.
2972   * About invariant characters see utypes.h.
2973   * This constructor has no runtime dependency on conversion code and is
2974   * therefore recommended over ones taking a charset name string
2975   * (where the empty string "" indicates invariant-character conversion).
2976   *
2977   * Use the macro US_INV as the third, signature-distinguishing parameter.
2978   *
2979   * For example:
2980   * \code
2981   * void fn(const char *s) {
2982   *   UnicodeString ustr(s, -1, US_INV);
2983   *   // use ustr ...
2984   * }
2985   * \endcode
2986   *
2987   * @param src String using only invariant characters.
2988   * @param length Length of src, or -1 if NUL-terminated.
2989   * @param inv Signature-distinguishing paramater, use US_INV.
2990   *
2991   * @see US_INV
2992   * @stable ICU 3.2
2993   */
2994  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
2995
2996
2997  /**
2998   * Copy constructor.
2999   * @param that The UnicodeString object to copy.
3000   * @stable ICU 2.0
3001   */
3002  UnicodeString(const UnicodeString& that);
3003
3004  /**
3005   * 'Substring' constructor from tail of source string.
3006   * @param src The UnicodeString object to copy.
3007   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3008   * @stable ICU 2.2
3009   */
3010  UnicodeString(const UnicodeString& src, int32_t srcStart);
3011
3012  /**
3013   * 'Substring' constructor from subrange of source string.
3014   * @param src The UnicodeString object to copy.
3015   * @param srcStart The offset into <tt>src</tt> at which to start copying.
3016   * @param srcLength The number of characters from <tt>src</tt> to copy.
3017   * @stable ICU 2.2
3018   */
3019  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3020
3021  /**
3022   * Clone this object, an instance of a subclass of Replaceable.
3023   * Clones can be used concurrently in multiple threads.
3024   * If a subclass does not implement clone(), or if an error occurs,
3025   * then NULL is returned.
3026   * The clone functions in all subclasses return a pointer to a Replaceable
3027   * because some compilers do not support covariant (same-as-this)
3028   * return types; cast to the appropriate subclass if necessary.
3029   * The caller must delete the clone.
3030   *
3031   * @return a clone of this object
3032   *
3033   * @see Replaceable::clone
3034   * @see getDynamicClassID
3035   * @stable ICU 2.6
3036   */
3037  virtual Replaceable *clone() const;
3038
3039  /** Destructor.
3040   * @stable ICU 2.0
3041   */
3042  virtual ~UnicodeString();
3043
3044  /**
3045   * Create a UnicodeString from a UTF-8 string.
3046   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3047   * Calls u_strFromUTF8WithSub().
3048   *
3049   * @param utf8 UTF-8 input string.
3050   *             Note that a StringPiece can be implicitly constructed
3051   *             from a std::string or a NUL-terminated const char * string.
3052   * @return A UnicodeString with equivalent UTF-16 contents.
3053   * @see toUTF8
3054   * @see toUTF8String
3055   * @stable ICU 4.2
3056   */
3057  static UnicodeString fromUTF8(const StringPiece &utf8);
3058
3059  /**
3060   * Create a UnicodeString from a UTF-32 string.
3061   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3062   * Calls u_strFromUTF32WithSub().
3063   *
3064   * @param utf32 UTF-32 input string. Must not be NULL.
3065   * @param length Length of the input string, or -1 if NUL-terminated.
3066   * @return A UnicodeString with equivalent UTF-16 contents.
3067   * @see toUTF32
3068   * @stable ICU 4.2
3069   */
3070  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3071
3072  /* Miscellaneous operations */
3073
3074  /**
3075   * Unescape a string of characters and return a string containing
3076   * the result.  The following escape sequences are recognized:
3077   *
3078   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3079   * \\Uhhhhhhhh   8 hex digits
3080   * \\xhh         1-2 hex digits
3081   * \\ooo         1-3 octal digits; o in [0-7]
3082   * \\cX          control-X; X is masked with 0x1F
3083   *
3084   * as well as the standard ANSI C escapes:
3085   *
3086   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3087   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3088   * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3089   *
3090   * Anything else following a backslash is generically escaped.  For
3091   * example, "[a\\-z]" returns "[a-z]".
3092   *
3093   * If an escape sequence is ill-formed, this method returns an empty
3094   * string.  An example of an ill-formed sequence is "\\u" followed by
3095   * fewer than 4 hex digits.
3096   *
3097   * This function is similar to u_unescape() but not identical to it.
3098   * The latter takes a source char*, so it does escape recognition
3099   * and also invariant conversion.
3100   *
3101   * @return a string with backslash escapes interpreted, or an
3102   * empty string on error.
3103   * @see UnicodeString#unescapeAt()
3104   * @see u_unescape()
3105   * @see u_unescapeAt()
3106   * @stable ICU 2.0
3107   */
3108  UnicodeString unescape() const;
3109
3110  /**
3111   * Unescape a single escape sequence and return the represented
3112   * character.  See unescape() for a listing of the recognized escape
3113   * sequences.  The character at offset-1 is assumed (without
3114   * checking) to be a backslash.  If the escape sequence is
3115   * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
3116   * returned.
3117   *
3118   * @param offset an input output parameter.  On input, it is the
3119   * offset into this string where the escape sequence is located,
3120   * after the initial backslash.  On output, it is advanced after the
3121   * last character parsed.  On error, it is not advanced at all.
3122   * @return the character represented by the escape sequence at
3123   * offset, or (UChar32)0xFFFFFFFF on error.
3124   * @see UnicodeString#unescape()
3125   * @see u_unescape()
3126   * @see u_unescapeAt()
3127   * @stable ICU 2.0
3128   */
3129  UChar32 unescapeAt(int32_t &offset) const;
3130
3131  /**
3132   * ICU "poor man's RTTI", returns a UClassID for this class.
3133   *
3134   * @stable ICU 2.2
3135   */
3136  static UClassID U_EXPORT2 getStaticClassID();
3137
3138  /**
3139   * ICU "poor man's RTTI", returns a UClassID for the actual class.
3140   *
3141   * @stable ICU 2.2
3142   */
3143  virtual UClassID getDynamicClassID() const;
3144
3145  //========================================
3146  // Implementation methods
3147  //========================================
3148
3149protected:
3150  /**
3151   * Implement Replaceable::getLength() (see jitterbug 1027).
3152   * @stable ICU 2.4
3153   */
3154  virtual int32_t getLength() const;
3155
3156  /**
3157   * The change in Replaceable to use virtual getCharAt() allows
3158   * UnicodeString::charAt() to be inline again (see jitterbug 709).
3159   * @stable ICU 2.4
3160   */
3161  virtual UChar getCharAt(int32_t offset) const;
3162
3163  /**
3164   * The change in Replaceable to use virtual getChar32At() allows
3165   * UnicodeString::char32At() to be inline again (see jitterbug 709).
3166   * @stable ICU 2.4
3167   */
3168  virtual UChar32 getChar32At(int32_t offset) const;
3169
3170private:
3171  // For char* constructors. Could be made public.
3172  UnicodeString &setToUTF8(const StringPiece &utf8);
3173  // For extract(char*).
3174  // We could make a toUTF8(target, capacity, errorCode) public but not
3175  // this version: New API will be cleaner if we make callers create substrings
3176  // rather than having start+length on every method,
3177  // and it should take a UErrorCode&.
3178  int32_t
3179  toUTF8(int32_t start, int32_t len,
3180         char *target, int32_t capacity) const;
3181
3182
3183  inline int8_t
3184  doCompare(int32_t start,
3185           int32_t length,
3186           const UnicodeString& srcText,
3187           int32_t srcStart,
3188           int32_t srcLength) const;
3189
3190  int8_t doCompare(int32_t start,
3191           int32_t length,
3192           const UChar *srcChars,
3193           int32_t srcStart,
3194           int32_t srcLength) const;
3195
3196  inline int8_t
3197  doCompareCodePointOrder(int32_t start,
3198                          int32_t length,
3199                          const UnicodeString& srcText,
3200                          int32_t srcStart,
3201                          int32_t srcLength) const;
3202
3203  int8_t doCompareCodePointOrder(int32_t start,
3204                                 int32_t length,
3205                                 const UChar *srcChars,
3206                                 int32_t srcStart,
3207                                 int32_t srcLength) const;
3208
3209  inline int8_t
3210  doCaseCompare(int32_t start,
3211                int32_t length,
3212                const UnicodeString &srcText,
3213                int32_t srcStart,
3214                int32_t srcLength,
3215                uint32_t options) const;
3216
3217  int8_t
3218  doCaseCompare(int32_t start,
3219                int32_t length,
3220                const UChar *srcChars,
3221                int32_t srcStart,
3222                int32_t srcLength,
3223                uint32_t options) const;
3224
3225  int32_t doIndexOf(UChar c,
3226            int32_t start,
3227            int32_t length) const;
3228
3229  int32_t doIndexOf(UChar32 c,
3230                        int32_t start,
3231                        int32_t length) const;
3232
3233  int32_t doLastIndexOf(UChar c,
3234                int32_t start,
3235                int32_t length) const;
3236
3237  int32_t doLastIndexOf(UChar32 c,
3238                            int32_t start,
3239                            int32_t length) const;
3240
3241  void doExtract(int32_t start,
3242         int32_t length,
3243         UChar *dst,
3244         int32_t dstStart) const;
3245
3246  inline void doExtract(int32_t start,
3247         int32_t length,
3248         UnicodeString& target) const;
3249
3250  inline UChar doCharAt(int32_t offset)  const;
3251
3252  UnicodeString& doReplace(int32_t start,
3253               int32_t length,
3254               const UnicodeString& srcText,
3255               int32_t srcStart,
3256               int32_t srcLength);
3257
3258  UnicodeString& doReplace(int32_t start,
3259               int32_t length,
3260               const UChar *srcChars,
3261               int32_t srcStart,
3262               int32_t srcLength);
3263
3264  UnicodeString& doReverse(int32_t start,
3265               int32_t length);
3266
3267  // calculate hash code
3268  int32_t doHashCode(void) const;
3269
3270  // get pointer to start of array
3271  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3272  inline UChar* getArrayStart(void);
3273  inline const UChar* getArrayStart(void) const;
3274
3275  // A UnicodeString object (not necessarily its current buffer)
3276  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3277  inline UBool isWritable() const;
3278
3279  // Is the current buffer writable?
3280  inline UBool isBufferWritable() const;
3281
3282  // None of the following does releaseArray().
3283  inline void setLength(int32_t len);        // sets only fShortLength and fLength
3284  inline void setToEmpty();                  // sets fFlags=kShortString
3285  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3286
3287  // allocate the array; result may be fStackBuffer
3288  // sets refCount to 1 if appropriate
3289  // sets fArray, fCapacity, and fFlags
3290  // returns boolean for success or failure
3291  UBool allocate(int32_t capacity);
3292
3293  // release the array if owned
3294  void releaseArray(void);
3295
3296  // turn a bogus string into an empty one
3297  void unBogus();
3298
3299  // implements assigment operator, copy constructor, and fastCopyFrom()
3300  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3301
3302  // Pin start and limit to acceptable values.
3303  inline void pinIndex(int32_t& start) const;
3304  inline void pinIndices(int32_t& start,
3305                         int32_t& length) const;
3306
3307#if !UCONFIG_NO_CONVERSION
3308
3309  /* Internal extract() using UConverter. */
3310  int32_t doExtract(int32_t start, int32_t length,
3311                    char *dest, int32_t destCapacity,
3312                    UConverter *cnv,
3313                    UErrorCode &errorCode) const;
3314
3315  /*
3316   * Real constructor for converting from codepage data.
3317   * It assumes that it is called with !fRefCounted.
3318   *
3319   * If <code>codepage==0</code>, then the default converter
3320   * is used for the platform encoding.
3321   * If <code>codepage</code> is an empty string (<code>""</code>),
3322   * then a simple conversion is performed on the codepage-invariant
3323   * subset ("invariant characters") of the platform encoding. See utypes.h.
3324   */
3325  void doCodepageCreate(const char *codepageData,
3326                        int32_t dataLength,
3327                        const char *codepage);
3328
3329  /*
3330   * Worker function for creating a UnicodeString from
3331   * a codepage string using a UConverter.
3332   */
3333  void
3334  doCodepageCreate(const char *codepageData,
3335                   int32_t dataLength,
3336                   UConverter *converter,
3337                   UErrorCode &status);
3338
3339#endif
3340
3341  /*
3342   * This function is called when write access to the array
3343   * is necessary.
3344   *
3345   * We need to make a copy of the array if
3346   * the buffer is read-only, or
3347   * the buffer is refCounted (shared), and refCount>1, or
3348   * the buffer is too small.
3349   *
3350   * Return FALSE if memory could not be allocated.
3351   */
3352  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3353                            int32_t growCapacity = -1,
3354                            UBool doCopyArray = TRUE,
3355                            int32_t **pBufferToDelete = 0,
3356                            UBool forceClone = FALSE);
3357
3358  // common function for case mappings
3359  UnicodeString &
3360  caseMap(BreakIterator *titleIter,
3361          const char *locale,
3362          uint32_t options,
3363          int32_t toWhichCase);
3364
3365  // ref counting
3366  void addRef(void);
3367  int32_t removeRef(void);
3368  int32_t refCount(void) const;
3369
3370  // constants
3371  enum {
3372    // Set the stack buffer size so that sizeof(UnicodeString) is,
3373    // naturally (without padding), a multiple of sizeof(pointer).
3374    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3375    kInvalidUChar=0xffff, // invalid UChar index
3376    kGrowSize=128, // grow size for this buffer
3377    kInvalidHashCode=0, // invalid hash code
3378    kEmptyHashCode=1, // hash code for empty string
3379
3380    // bit flag values for fFlags
3381    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3382    kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3383    kRefCounted=4,      // there is a refCount field before the characters in fArray
3384    kBufferIsReadonly=8,// do not write to this buffer
3385    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3386                        // and releaseBuffer(newLength) must be called
3387
3388    // combined values for convenience
3389    kShortString=kUsingStackBuffer,
3390    kLongString=kRefCounted,
3391    kReadonlyAlias=kBufferIsReadonly,
3392    kWritableAlias=0
3393  };
3394
3395  friend class StringThreadTest;
3396  friend class UnicodeStringAppendable;
3397
3398  union StackBufferOrFields;        // forward declaration necessary before friend declaration
3399  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3400
3401  /*
3402   * The following are all the class fields that are stored
3403   * in each UnicodeString object.
3404   * Note that UnicodeString has virtual functions,
3405   * therefore there is an implicit vtable pointer
3406   * as the first real field.
3407   * The fields should be aligned such that no padding is necessary.
3408   * On 32-bit machines, the size should be 32 bytes,
3409   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3410   *
3411   * We use a hack to achieve this.
3412   *
3413   * With at least some compilers, each of the following is forced to
3414   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3415   * rounded up with additional padding if the fields do not already fit that requirement:
3416   * - sizeof(class UnicodeString)
3417   * - offsetof(UnicodeString, fUnion)
3418   * - sizeof(fUnion)
3419   * - sizeof(fFields)
3420   *
3421   * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3422   * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3423   * (Padding at the end of fFields is ok:
3424   * As long as there is no padding after fStackBuffer, it is not wasted space.)
3425   *
3426   * We further assume that the compiler does not reorder the fields,
3427   * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3428   * with at most some padding (but no other field) in between.
3429   * (Padding there would be wasted space, but functionally harmless.)
3430   *
3431   * We use a few more sizeof(pointer)'s chunks of space with
3432   * fRestOfStackBuffer, fShortLength and fFlags,
3433   * to get up exactly to the intended sizeof(UnicodeString).
3434   */
3435  // (implicit) *vtable;
3436  union StackBufferOrFields {
3437    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3438    // else fFields is used
3439    UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
3440    struct {
3441      UChar   *fArray;    // the Unicode data
3442      int32_t fCapacity;  // capacity of fArray (in UChars)
3443      int32_t fLength;    // number of characters in fArray if >127; else undefined
3444    } fFields;
3445  } fUnion;
3446  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3447  int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
3448  uint8_t fFlags;       // bit flags: see constants above
3449};
3450
3451/**
3452 * Create a new UnicodeString with the concatenation of two others.
3453 *
3454 * @param s1 The first string to be copied to the new one.
3455 * @param s2 The second string to be copied to the new one, after s1.
3456 * @return UnicodeString(s1).append(s2)
3457 * @stable ICU 2.8
3458 */
3459U_COMMON_API UnicodeString U_EXPORT2
3460operator+ (const UnicodeString &s1, const UnicodeString &s2);
3461
3462//========================================
3463// Inline members
3464//========================================
3465
3466//========================================
3467// Privates
3468//========================================
3469
3470inline void
3471UnicodeString::pinIndex(int32_t& start) const
3472{
3473  // pin index
3474  if(start < 0) {
3475    start = 0;
3476  } else if(start > length()) {
3477    start = length();
3478  }
3479}
3480
3481inline void
3482UnicodeString::pinIndices(int32_t& start,
3483                          int32_t& _length) const
3484{
3485  // pin indices
3486  int32_t len = length();
3487  if(start < 0) {
3488    start = 0;
3489  } else if(start > len) {
3490    start = len;
3491  }
3492  if(_length < 0) {
3493    _length = 0;
3494  } else if(_length > (len - start)) {
3495    _length = (len - start);
3496  }
3497}
3498
3499inline UChar*
3500UnicodeString::getArrayStart()
3501{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3502
3503inline const UChar*
3504UnicodeString::getArrayStart() const
3505{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3506
3507//========================================
3508// Read-only implementation methods
3509//========================================
3510inline int32_t
3511UnicodeString::length() const
3512{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3513
3514inline int32_t
3515UnicodeString::getCapacity() const
3516{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3517
3518inline int32_t
3519UnicodeString::hashCode() const
3520{ return doHashCode(); }
3521
3522inline UBool
3523UnicodeString::isBogus() const
3524{ return (UBool)(fFlags & kIsBogus); }
3525
3526inline UBool
3527UnicodeString::isWritable() const
3528{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3529
3530inline UBool
3531UnicodeString::isBufferWritable() const
3532{
3533  return (UBool)(
3534      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3535      (!(fFlags&kRefCounted) || refCount()==1));
3536}
3537
3538inline const UChar *
3539UnicodeString::getBuffer() const {
3540  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3541    return 0;
3542  } else if(fFlags&kUsingStackBuffer) {
3543    return fUnion.fStackBuffer;
3544  } else {
3545    return fUnion.fFields.fArray;
3546  }
3547}
3548
3549//========================================
3550// Read-only alias methods
3551//========================================
3552inline int8_t
3553UnicodeString::doCompare(int32_t start,
3554              int32_t thisLength,
3555              const UnicodeString& srcText,
3556              int32_t srcStart,
3557              int32_t srcLength) const
3558{
3559  if(srcText.isBogus()) {
3560    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3561  } else {
3562    srcText.pinIndices(srcStart, srcLength);
3563    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3564  }
3565}
3566
3567inline UBool
3568UnicodeString::operator== (const UnicodeString& text) const
3569{
3570  if(isBogus()) {
3571    return text.isBogus();
3572  } else {
3573    int32_t len = length(), textLength = text.length();
3574    return
3575      !text.isBogus() &&
3576      len == textLength &&
3577      doCompare(0, len, text, 0, textLength) == 0;
3578  }
3579}
3580
3581inline UBool
3582UnicodeString::operator!= (const UnicodeString& text) const
3583{ return (! operator==(text)); }
3584
3585inline UBool
3586UnicodeString::operator> (const UnicodeString& text) const
3587{ return doCompare(0, length(), text, 0, text.length()) == 1; }
3588
3589inline UBool
3590UnicodeString::operator< (const UnicodeString& text) const
3591{ return doCompare(0, length(), text, 0, text.length()) == -1; }
3592
3593inline UBool
3594UnicodeString::operator>= (const UnicodeString& text) const
3595{ return doCompare(0, length(), text, 0, text.length()) != -1; }
3596
3597inline UBool
3598UnicodeString::operator<= (const UnicodeString& text) const
3599{ return doCompare(0, length(), text, 0, text.length()) != 1; }
3600
3601inline int8_t
3602UnicodeString::compare(const UnicodeString& text) const
3603{ return doCompare(0, length(), text, 0, text.length()); }
3604
3605inline int8_t
3606UnicodeString::compare(int32_t start,
3607               int32_t _length,
3608               const UnicodeString& srcText) const
3609{ return doCompare(start, _length, srcText, 0, srcText.length()); }
3610
3611inline int8_t
3612UnicodeString::compare(const UChar *srcChars,
3613               int32_t srcLength) const
3614{ return doCompare(0, length(), srcChars, 0, srcLength); }
3615
3616inline int8_t
3617UnicodeString::compare(int32_t start,
3618               int32_t _length,
3619               const UnicodeString& srcText,
3620               int32_t srcStart,
3621               int32_t srcLength) const
3622{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3623
3624inline int8_t
3625UnicodeString::compare(int32_t start,
3626               int32_t _length,
3627               const UChar *srcChars) const
3628{ return doCompare(start, _length, srcChars, 0, _length); }
3629
3630inline int8_t
3631UnicodeString::compare(int32_t start,
3632               int32_t _length,
3633               const UChar *srcChars,
3634               int32_t srcStart,
3635               int32_t srcLength) const
3636{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3637
3638inline int8_t
3639UnicodeString::compareBetween(int32_t start,
3640                  int32_t limit,
3641                  const UnicodeString& srcText,
3642                  int32_t srcStart,
3643                  int32_t srcLimit) const
3644{ return doCompare(start, limit - start,
3645           srcText, srcStart, srcLimit - srcStart); }
3646
3647inline int8_t
3648UnicodeString::doCompareCodePointOrder(int32_t start,
3649                                       int32_t thisLength,
3650                                       const UnicodeString& srcText,
3651                                       int32_t srcStart,
3652                                       int32_t srcLength) const
3653{
3654  if(srcText.isBogus()) {
3655    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3656  } else {
3657    srcText.pinIndices(srcStart, srcLength);
3658    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3659  }
3660}
3661
3662inline int8_t
3663UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3664{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3665
3666inline int8_t
3667UnicodeString::compareCodePointOrder(int32_t start,
3668                                     int32_t _length,
3669                                     const UnicodeString& srcText) const
3670{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3671
3672inline int8_t
3673UnicodeString::compareCodePointOrder(const UChar *srcChars,
3674                                     int32_t srcLength) const
3675{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3676
3677inline int8_t
3678UnicodeString::compareCodePointOrder(int32_t start,
3679                                     int32_t _length,
3680                                     const UnicodeString& srcText,
3681                                     int32_t srcStart,
3682                                     int32_t srcLength) const
3683{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3684
3685inline int8_t
3686UnicodeString::compareCodePointOrder(int32_t start,
3687                                     int32_t _length,
3688                                     const UChar *srcChars) const
3689{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3690
3691inline int8_t
3692UnicodeString::compareCodePointOrder(int32_t start,
3693                                     int32_t _length,
3694                                     const UChar *srcChars,
3695                                     int32_t srcStart,
3696                                     int32_t srcLength) const
3697{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3698
3699inline int8_t
3700UnicodeString::compareCodePointOrderBetween(int32_t start,
3701                                            int32_t limit,
3702                                            const UnicodeString& srcText,
3703                                            int32_t srcStart,
3704                                            int32_t srcLimit) const
3705{ return doCompareCodePointOrder(start, limit - start,
3706           srcText, srcStart, srcLimit - srcStart); }
3707
3708inline int8_t
3709UnicodeString::doCaseCompare(int32_t start,
3710                             int32_t thisLength,
3711                             const UnicodeString &srcText,
3712                             int32_t srcStart,
3713                             int32_t srcLength,
3714                             uint32_t options) const
3715{
3716  if(srcText.isBogus()) {
3717    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3718  } else {
3719    srcText.pinIndices(srcStart, srcLength);
3720    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3721  }
3722}
3723
3724inline int8_t
3725UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3726  return doCaseCompare(0, length(), text, 0, text.length(), options);
3727}
3728
3729inline int8_t
3730UnicodeString::caseCompare(int32_t start,
3731                           int32_t _length,
3732                           const UnicodeString &srcText,
3733                           uint32_t options) const {
3734  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3735}
3736
3737inline int8_t
3738UnicodeString::caseCompare(const UChar *srcChars,
3739                           int32_t srcLength,
3740                           uint32_t options) const {
3741  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3742}
3743
3744inline int8_t
3745UnicodeString::caseCompare(int32_t start,
3746                           int32_t _length,
3747                           const UnicodeString &srcText,
3748                           int32_t srcStart,
3749                           int32_t srcLength,
3750                           uint32_t options) const {
3751  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3752}
3753
3754inline int8_t
3755UnicodeString::caseCompare(int32_t start,
3756                           int32_t _length,
3757                           const UChar *srcChars,
3758                           uint32_t options) const {
3759  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3760}
3761
3762inline int8_t
3763UnicodeString::caseCompare(int32_t start,
3764                           int32_t _length,
3765                           const UChar *srcChars,
3766                           int32_t srcStart,
3767                           int32_t srcLength,
3768                           uint32_t options) const {
3769  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3770}
3771
3772inline int8_t
3773UnicodeString::caseCompareBetween(int32_t start,
3774                                  int32_t limit,
3775                                  const UnicodeString &srcText,
3776                                  int32_t srcStart,
3777                                  int32_t srcLimit,
3778                                  uint32_t options) const {
3779  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3780}
3781
3782inline int32_t
3783UnicodeString::indexOf(const UnicodeString& srcText,
3784               int32_t srcStart,
3785               int32_t srcLength,
3786               int32_t start,
3787               int32_t _length) const
3788{
3789  if(!srcText.isBogus()) {
3790    srcText.pinIndices(srcStart, srcLength);
3791    if(srcLength > 0) {
3792      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3793    }
3794  }
3795  return -1;
3796}
3797
3798inline int32_t
3799UnicodeString::indexOf(const UnicodeString& text) const
3800{ return indexOf(text, 0, text.length(), 0, length()); }
3801
3802inline int32_t
3803UnicodeString::indexOf(const UnicodeString& text,
3804               int32_t start) const {
3805  pinIndex(start);
3806  return indexOf(text, 0, text.length(), start, length() - start);
3807}
3808
3809inline int32_t
3810UnicodeString::indexOf(const UnicodeString& text,
3811               int32_t start,
3812               int32_t _length) const
3813{ return indexOf(text, 0, text.length(), start, _length); }
3814
3815inline int32_t
3816UnicodeString::indexOf(const UChar *srcChars,
3817               int32_t srcLength,
3818               int32_t start) const {
3819  pinIndex(start);
3820  return indexOf(srcChars, 0, srcLength, start, length() - start);
3821}
3822
3823inline int32_t
3824UnicodeString::indexOf(const UChar *srcChars,
3825               int32_t srcLength,
3826               int32_t start,
3827               int32_t _length) const
3828{ return indexOf(srcChars, 0, srcLength, start, _length); }
3829
3830inline int32_t
3831UnicodeString::indexOf(UChar c,
3832               int32_t start,
3833               int32_t _length) const
3834{ return doIndexOf(c, start, _length); }
3835
3836inline int32_t
3837UnicodeString::indexOf(UChar32 c,
3838               int32_t start,
3839               int32_t _length) const
3840{ return doIndexOf(c, start, _length); }
3841
3842inline int32_t
3843UnicodeString::indexOf(UChar c) const
3844{ return doIndexOf(c, 0, length()); }
3845
3846inline int32_t
3847UnicodeString::indexOf(UChar32 c) const
3848{ return indexOf(c, 0, length()); }
3849
3850inline int32_t
3851UnicodeString::indexOf(UChar c,
3852               int32_t start) const {
3853  pinIndex(start);
3854  return doIndexOf(c, start, length() - start);
3855}
3856
3857inline int32_t
3858UnicodeString::indexOf(UChar32 c,
3859               int32_t start) const {
3860  pinIndex(start);
3861  return indexOf(c, start, length() - start);
3862}
3863
3864inline int32_t
3865UnicodeString::lastIndexOf(const UChar *srcChars,
3866               int32_t srcLength,
3867               int32_t start,
3868               int32_t _length) const
3869{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3870
3871inline int32_t
3872UnicodeString::lastIndexOf(const UChar *srcChars,
3873               int32_t srcLength,
3874               int32_t start) const {
3875  pinIndex(start);
3876  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3877}
3878
3879inline int32_t
3880UnicodeString::lastIndexOf(const UnicodeString& srcText,
3881               int32_t srcStart,
3882               int32_t srcLength,
3883               int32_t start,
3884               int32_t _length) const
3885{
3886  if(!srcText.isBogus()) {
3887    srcText.pinIndices(srcStart, srcLength);
3888    if(srcLength > 0) {
3889      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3890    }
3891  }
3892  return -1;
3893}
3894
3895inline int32_t
3896UnicodeString::lastIndexOf(const UnicodeString& text,
3897               int32_t start,
3898               int32_t _length) const
3899{ return lastIndexOf(text, 0, text.length(), start, _length); }
3900
3901inline int32_t
3902UnicodeString::lastIndexOf(const UnicodeString& text,
3903               int32_t start) const {
3904  pinIndex(start);
3905  return lastIndexOf(text, 0, text.length(), start, length() - start);
3906}
3907
3908inline int32_t
3909UnicodeString::lastIndexOf(const UnicodeString& text) const
3910{ return lastIndexOf(text, 0, text.length(), 0, length()); }
3911
3912inline int32_t
3913UnicodeString::lastIndexOf(UChar c,
3914               int32_t start,
3915               int32_t _length) const
3916{ return doLastIndexOf(c, start, _length); }
3917
3918inline int32_t
3919UnicodeString::lastIndexOf(UChar32 c,
3920               int32_t start,
3921               int32_t _length) const {
3922  return doLastIndexOf(c, start, _length);
3923}
3924
3925inline int32_t
3926UnicodeString::lastIndexOf(UChar c) const
3927{ return doLastIndexOf(c, 0, length()); }
3928
3929inline int32_t
3930UnicodeString::lastIndexOf(UChar32 c) const {
3931  return lastIndexOf(c, 0, length());
3932}
3933
3934inline int32_t
3935UnicodeString::lastIndexOf(UChar c,
3936               int32_t start) const {
3937  pinIndex(start);
3938  return doLastIndexOf(c, start, length() - start);
3939}
3940
3941inline int32_t
3942UnicodeString::lastIndexOf(UChar32 c,
3943               int32_t start) const {
3944  pinIndex(start);
3945  return lastIndexOf(c, start, length() - start);
3946}
3947
3948inline UBool
3949UnicodeString::startsWith(const UnicodeString& text) const
3950{ return compare(0, text.length(), text, 0, text.length()) == 0; }
3951
3952inline UBool
3953UnicodeString::startsWith(const UnicodeString& srcText,
3954              int32_t srcStart,
3955              int32_t srcLength) const
3956{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3957
3958inline UBool
3959UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
3960  if(srcLength < 0) {
3961    srcLength = u_strlen(srcChars);
3962  }
3963  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
3964}
3965
3966inline UBool
3967UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
3968  if(srcLength < 0) {
3969    srcLength = u_strlen(srcChars);
3970  }
3971  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
3972}
3973
3974inline UBool
3975UnicodeString::endsWith(const UnicodeString& text) const
3976{ return doCompare(length() - text.length(), text.length(),
3977           text, 0, text.length()) == 0; }
3978
3979inline UBool
3980UnicodeString::endsWith(const UnicodeString& srcText,
3981            int32_t srcStart,
3982            int32_t srcLength) const {
3983  srcText.pinIndices(srcStart, srcLength);
3984  return doCompare(length() - srcLength, srcLength,
3985                   srcText, srcStart, srcLength) == 0;
3986}
3987
3988inline UBool
3989UnicodeString::endsWith(const UChar *srcChars,
3990            int32_t srcLength) const {
3991  if(srcLength < 0) {
3992    srcLength = u_strlen(srcChars);
3993  }
3994  return doCompare(length() - srcLength, srcLength,
3995                   srcChars, 0, srcLength) == 0;
3996}
3997
3998inline UBool
3999UnicodeString::endsWith(const UChar *srcChars,
4000            int32_t srcStart,
4001            int32_t srcLength) const {
4002  if(srcLength < 0) {
4003    srcLength = u_strlen(srcChars + srcStart);
4004  }
4005  return doCompare(length() - srcLength, srcLength,
4006                   srcChars, srcStart, srcLength) == 0;
4007}
4008
4009//========================================
4010// replace
4011//========================================
4012inline UnicodeString&
4013UnicodeString::replace(int32_t start,
4014               int32_t _length,
4015               const UnicodeString& srcText)
4016{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4017
4018inline UnicodeString&
4019UnicodeString::replace(int32_t start,
4020               int32_t _length,
4021               const UnicodeString& srcText,
4022               int32_t srcStart,
4023               int32_t srcLength)
4024{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4025
4026inline UnicodeString&
4027UnicodeString::replace(int32_t start,
4028               int32_t _length,
4029               const UChar *srcChars,
4030               int32_t srcLength)
4031{ return doReplace(start, _length, srcChars, 0, srcLength); }
4032
4033inline UnicodeString&
4034UnicodeString::replace(int32_t start,
4035               int32_t _length,
4036               const UChar *srcChars,
4037               int32_t srcStart,
4038               int32_t srcLength)
4039{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4040
4041inline UnicodeString&
4042UnicodeString::replace(int32_t start,
4043               int32_t _length,
4044               UChar srcChar)
4045{ return doReplace(start, _length, &srcChar, 0, 1); }
4046
4047inline UnicodeString&
4048UnicodeString::replace(int32_t start,
4049               int32_t _length,
4050               UChar32 srcChar) {
4051  UChar buffer[U16_MAX_LENGTH];
4052  int32_t count = 0;
4053  UBool isError = FALSE;
4054  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
4055  (void)isError;
4056  return doReplace(start, _length, buffer, 0, count);
4057}
4058
4059inline UnicodeString&
4060UnicodeString::replaceBetween(int32_t start,
4061                  int32_t limit,
4062                  const UnicodeString& srcText)
4063{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4064
4065inline UnicodeString&
4066UnicodeString::replaceBetween(int32_t start,
4067                  int32_t limit,
4068                  const UnicodeString& srcText,
4069                  int32_t srcStart,
4070                  int32_t srcLimit)
4071{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4072
4073inline UnicodeString&
4074UnicodeString::findAndReplace(const UnicodeString& oldText,
4075                  const UnicodeString& newText)
4076{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4077            newText, 0, newText.length()); }
4078
4079inline UnicodeString&
4080UnicodeString::findAndReplace(int32_t start,
4081                  int32_t _length,
4082                  const UnicodeString& oldText,
4083                  const UnicodeString& newText)
4084{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4085            newText, 0, newText.length()); }
4086
4087// ============================
4088// extract
4089// ============================
4090inline void
4091UnicodeString::doExtract(int32_t start,
4092             int32_t _length,
4093             UnicodeString& target) const
4094{ target.replace(0, target.length(), *this, start, _length); }
4095
4096inline void
4097UnicodeString::extract(int32_t start,
4098               int32_t _length,
4099               UChar *target,
4100               int32_t targetStart) const
4101{ doExtract(start, _length, target, targetStart); }
4102
4103inline void
4104UnicodeString::extract(int32_t start,
4105               int32_t _length,
4106               UnicodeString& target) const
4107{ doExtract(start, _length, target); }
4108
4109#if !UCONFIG_NO_CONVERSION
4110
4111inline int32_t
4112UnicodeString::extract(int32_t start,
4113               int32_t _length,
4114               char *dst,
4115               const char *codepage) const
4116
4117{
4118  // This dstSize value will be checked explicitly
4119  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4120}
4121
4122#endif
4123
4124inline void
4125UnicodeString::extractBetween(int32_t start,
4126                  int32_t limit,
4127                  UChar *dst,
4128                  int32_t dstStart) const {
4129  pinIndex(start);
4130  pinIndex(limit);
4131  doExtract(start, limit - start, dst, dstStart);
4132}
4133
4134inline UnicodeString
4135UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4136    return tempSubString(start, limit - start);
4137}
4138
4139inline UChar
4140UnicodeString::doCharAt(int32_t offset) const
4141{
4142  if((uint32_t)offset < (uint32_t)length()) {
4143    return getArrayStart()[offset];
4144  } else {
4145    return kInvalidUChar;
4146  }
4147}
4148
4149inline UChar
4150UnicodeString::charAt(int32_t offset) const
4151{ return doCharAt(offset); }
4152
4153inline UChar
4154UnicodeString::operator[] (int32_t offset) const
4155{ return doCharAt(offset); }
4156
4157inline UChar32
4158UnicodeString::char32At(int32_t offset) const
4159{
4160  int32_t len = length();
4161  if((uint32_t)offset < (uint32_t)len) {
4162    const UChar *array = getArrayStart();
4163    UChar32 c;
4164    U16_GET(array, 0, offset, len, c);
4165    return c;
4166  } else {
4167    return kInvalidUChar;
4168  }
4169}
4170
4171inline int32_t
4172UnicodeString::getChar32Start(int32_t offset) const {
4173  if((uint32_t)offset < (uint32_t)length()) {
4174    const UChar *array = getArrayStart();
4175    U16_SET_CP_START(array, 0, offset);
4176    return offset;
4177  } else {
4178    return 0;
4179  }
4180}
4181
4182inline int32_t
4183UnicodeString::getChar32Limit(int32_t offset) const {
4184  int32_t len = length();
4185  if((uint32_t)offset < (uint32_t)len) {
4186    const UChar *array = getArrayStart();
4187    U16_SET_CP_LIMIT(array, 0, offset, len);
4188    return offset;
4189  } else {
4190    return len;
4191  }
4192}
4193
4194inline UBool
4195UnicodeString::isEmpty() const {
4196  return fShortLength == 0;
4197}
4198
4199//========================================
4200// Write implementation methods
4201//========================================
4202inline void
4203UnicodeString::setLength(int32_t len) {
4204  if(len <= 127) {
4205    fShortLength = (int8_t)len;
4206  } else {
4207    fShortLength = (int8_t)-1;
4208    fUnion.fFields.fLength = len;
4209  }
4210}
4211
4212inline void
4213UnicodeString::setToEmpty() {
4214  fShortLength = 0;
4215  fFlags = kShortString;
4216}
4217
4218inline void
4219UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4220  setLength(len);
4221  fUnion.fFields.fArray = array;
4222  fUnion.fFields.fCapacity = capacity;
4223}
4224
4225inline const UChar *
4226UnicodeString::getTerminatedBuffer() {
4227  if(!isWritable()) {
4228    return 0;
4229  } else {
4230    UChar *array = getArrayStart();
4231    int32_t len = length();
4232    if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4233      /*
4234       * kRefCounted: Do not write the NUL if the buffer is shared.
4235       * That is mostly safe, except when the length of one copy was modified
4236       * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4237       * Then the NUL would be written into the middle of another copy's string.
4238       */
4239      if(!(fFlags&kBufferIsReadonly)) {
4240        /*
4241         * We must not write to a readonly buffer, but it is known to be
4242         * NUL-terminated if len<capacity.
4243         * A shared, allocated buffer (refCount()>1) must not have its contents
4244         * modified, but the NUL at [len] is beyond the string contents,
4245         * and multiple string objects and threads writing the same NUL into the
4246         * same location is harmless.
4247         * In all other cases, the buffer is fully writable and it is anyway safe
4248         * to write the NUL.
4249         *
4250         * Note: An earlier version of this code tested whether there is a NUL
4251         * at [len] already, but, while safe, it generated lots of warnings from
4252         * tools like valgrind and Purify.
4253         */
4254        array[len] = 0;
4255      }
4256      return array;
4257    } else if(cloneArrayIfNeeded(len+1)) {
4258      array = getArrayStart();
4259      array[len] = 0;
4260      return array;
4261    } else {
4262      return 0;
4263    }
4264  }
4265}
4266
4267inline UnicodeString&
4268UnicodeString::operator= (UChar ch)
4269{ return doReplace(0, length(), &ch, 0, 1); }
4270
4271inline UnicodeString&
4272UnicodeString::operator= (UChar32 ch)
4273{ return replace(0, length(), ch); }
4274
4275inline UnicodeString&
4276UnicodeString::setTo(const UnicodeString& srcText,
4277             int32_t srcStart,
4278             int32_t srcLength)
4279{
4280  unBogus();
4281  return doReplace(0, length(), srcText, srcStart, srcLength);
4282}
4283
4284inline UnicodeString&
4285UnicodeString::setTo(const UnicodeString& srcText,
4286             int32_t srcStart)
4287{
4288  unBogus();
4289  srcText.pinIndex(srcStart);
4290  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4291}
4292
4293inline UnicodeString&
4294UnicodeString::setTo(const UnicodeString& srcText)
4295{
4296  return copyFrom(srcText);
4297}
4298
4299inline UnicodeString&
4300UnicodeString::setTo(const UChar *srcChars,
4301             int32_t srcLength)
4302{
4303  unBogus();
4304  return doReplace(0, length(), srcChars, 0, srcLength);
4305}
4306
4307inline UnicodeString&
4308UnicodeString::setTo(UChar srcChar)
4309{
4310  unBogus();
4311  return doReplace(0, length(), &srcChar, 0, 1);
4312}
4313
4314inline UnicodeString&
4315UnicodeString::setTo(UChar32 srcChar)
4316{
4317  unBogus();
4318  return replace(0, length(), srcChar);
4319}
4320
4321inline UnicodeString&
4322UnicodeString::append(const UnicodeString& srcText,
4323              int32_t srcStart,
4324              int32_t srcLength)
4325{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
4326
4327inline UnicodeString&
4328UnicodeString::append(const UnicodeString& srcText)
4329{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4330
4331inline UnicodeString&
4332UnicodeString::append(const UChar *srcChars,
4333              int32_t srcStart,
4334              int32_t srcLength)
4335{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4336
4337inline UnicodeString&
4338UnicodeString::append(const UChar *srcChars,
4339              int32_t srcLength)
4340{ return doReplace(length(), 0, srcChars, 0, srcLength); }
4341
4342inline UnicodeString&
4343UnicodeString::append(UChar srcChar)
4344{ return doReplace(length(), 0, &srcChar, 0, 1); }
4345
4346inline UnicodeString&
4347UnicodeString::append(UChar32 srcChar) {
4348  UChar buffer[U16_MAX_LENGTH];
4349  int32_t _length = 0;
4350  UBool isError = FALSE;
4351  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
4352  (void)isError;
4353  return doReplace(length(), 0, buffer, 0, _length);
4354}
4355
4356inline UnicodeString&
4357UnicodeString::operator+= (UChar ch)
4358{ return doReplace(length(), 0, &ch, 0, 1); }
4359
4360inline UnicodeString&
4361UnicodeString::operator+= (UChar32 ch) {
4362  return append(ch);
4363}
4364
4365inline UnicodeString&
4366UnicodeString::operator+= (const UnicodeString& srcText)
4367{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
4368
4369inline UnicodeString&
4370UnicodeString::insert(int32_t start,
4371              const UnicodeString& srcText,
4372              int32_t srcStart,
4373              int32_t srcLength)
4374{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4375
4376inline UnicodeString&
4377UnicodeString::insert(int32_t start,
4378              const UnicodeString& srcText)
4379{ return doReplace(start, 0, srcText, 0, srcText.length()); }
4380
4381inline UnicodeString&
4382UnicodeString::insert(int32_t start,
4383              const UChar *srcChars,
4384              int32_t srcStart,
4385              int32_t srcLength)
4386{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4387
4388inline UnicodeString&
4389UnicodeString::insert(int32_t start,
4390              const UChar *srcChars,
4391              int32_t srcLength)
4392{ return doReplace(start, 0, srcChars, 0, srcLength); }
4393
4394inline UnicodeString&
4395UnicodeString::insert(int32_t start,
4396              UChar srcChar)
4397{ return doReplace(start, 0, &srcChar, 0, 1); }
4398
4399inline UnicodeString&
4400UnicodeString::insert(int32_t start,
4401              UChar32 srcChar)
4402{ return replace(start, 0, srcChar); }
4403
4404
4405inline UnicodeString&
4406UnicodeString::remove()
4407{
4408  // remove() of a bogus string makes the string empty and non-bogus
4409  // we also un-alias a read-only alias to deal with NUL-termination
4410  // issues with getTerminatedBuffer()
4411  if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4412    setToEmpty();
4413  } else {
4414    fShortLength = 0;
4415  }
4416  return *this;
4417}
4418
4419inline UnicodeString&
4420UnicodeString::remove(int32_t start,
4421             int32_t _length)
4422{
4423    if(start <= 0 && _length == INT32_MAX) {
4424        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4425        return remove();
4426    }
4427    return doReplace(start, _length, NULL, 0, 0);
4428}
4429
4430inline UnicodeString&
4431UnicodeString::removeBetween(int32_t start,
4432                int32_t limit)
4433{ return doReplace(start, limit - start, NULL, 0, 0); }
4434
4435inline UnicodeString &
4436UnicodeString::retainBetween(int32_t start, int32_t limit) {
4437  truncate(limit);
4438  return doReplace(0, start, NULL, 0, 0);
4439}
4440
4441inline UBool
4442UnicodeString::truncate(int32_t targetLength)
4443{
4444  if(isBogus() && targetLength == 0) {
4445    // truncate(0) of a bogus string makes the string empty and non-bogus
4446    unBogus();
4447    return FALSE;
4448  } else if((uint32_t)targetLength < (uint32_t)length()) {
4449    setLength(targetLength);
4450    if(fFlags&kBufferIsReadonly) {
4451      fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
4452    }
4453    return TRUE;
4454  } else {
4455    return FALSE;
4456  }
4457}
4458
4459inline UnicodeString&
4460UnicodeString::reverse()
4461{ return doReverse(0, length()); }
4462
4463inline UnicodeString&
4464UnicodeString::reverse(int32_t start,
4465               int32_t _length)
4466{ return doReverse(start, _length); }
4467
4468U_NAMESPACE_END
4469
4470#endif
4471