1/*
2******************************************************************************
3* Copyright (C) 1999-2013, International Business Machines Corporation and
4* others. All Rights Reserved.
5******************************************************************************
6*
7* File unistr.cpp
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   09/25/98    stephen     Creation.
13*   04/20/99    stephen     Overhauled per 4/16 code review.
14*   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15*   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
16*                           Replaceable.
17*   06/25/01    grhoten     Removed the dependency on iostream
18******************************************************************************
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/appendable.h"
23#include "unicode/putil.h"
24#include "cstring.h"
25#include "cmemory.h"
26#include "unicode/ustring.h"
27#include "unicode/unistr.h"
28#include "unicode/utf.h"
29#include "unicode/utf16.h"
30#include "uelement.h"
31#include "ustr_imp.h"
32#include "umutex.h"
33#include "uassert.h"
34
35#if 0
36
37#include <iostream>
38using namespace std;
39
40//DEBUGGING
41void
42print(const UnicodeString& s,
43      const char *name)
44{
45  UChar c;
46  cout << name << ":|";
47  for(int i = 0; i < s.length(); ++i) {
48    c = s[i];
49    if(c>= 0x007E || c < 0x0020)
50      cout << "[0x" << hex << s[i] << "]";
51    else
52      cout << (char) s[i];
53  }
54  cout << '|' << endl;
55}
56
57void
58print(const UChar *s,
59      int32_t len,
60      const char *name)
61{
62  UChar c;
63  cout << name << ":|";
64  for(int i = 0; i < len; ++i) {
65    c = s[i];
66    if(c>= 0x007E || c < 0x0020)
67      cout << "[0x" << hex << s[i] << "]";
68    else
69      cout << (char) s[i];
70  }
71  cout << '|' << endl;
72}
73// END DEBUGGING
74#endif
75
76// Local function definitions for now
77
78// need to copy areas that may overlap
79static
80inline void
81us_arrayCopy(const UChar *src, int32_t srcStart,
82         UChar *dst, int32_t dstStart, int32_t count)
83{
84  if(count>0) {
85    uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86  }
87}
88
89// u_unescapeAt() callback to get a UChar from a UnicodeString
90U_CDECL_BEGIN
91static UChar U_CALLCONV
92UnicodeString_charAt(int32_t offset, void *context) {
93    return ((icu::UnicodeString*) context)->charAt(offset);
94}
95U_CDECL_END
96
97U_NAMESPACE_BEGIN
98
99/* The Replaceable virtual destructor can't be defined in the header
100   due to how AIX works with multiple definitions of virtual functions.
101*/
102Replaceable::~Replaceable() {}
103
104UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106UnicodeString U_EXPORT2
107operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108    return
109        UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110            append(s1).
111                append(s2);
112}
113
114//========================================
115// Reference Counting functions, put at top of file so that optimizing compilers
116//                               have a chance to automatically inline.
117//========================================
118
119void
120UnicodeString::addRef() {
121  umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
122}
123
124int32_t
125UnicodeString::removeRef() {
126  return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
127}
128
129int32_t
130UnicodeString::refCount() const {
131  return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
132}
133
134void
135UnicodeString::releaseArray() {
136  if((fFlags & kRefCounted) && removeRef() == 0) {
137    uprv_free((int32_t *)fUnion.fFields.fArray - 1);
138  }
139}
140
141
142
143//========================================
144// Constructors
145//========================================
146
147// The default constructor is inline in unistr.h.
148
149UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
150  : fShortLength(0),
151    fFlags(0)
152{
153  if(count <= 0 || (uint32_t)c > 0x10ffff) {
154    // just allocate and do not do anything else
155    allocate(capacity);
156  } else {
157    // count > 0, allocate and fill the new string with count c's
158    int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
159    if(capacity < length) {
160      capacity = length;
161    }
162    if(allocate(capacity)) {
163      UChar *array = getArrayStart();
164      int32_t i = 0;
165
166      // fill the new string with c
167      if(unitCount == 1) {
168        // fill with length UChars
169        while(i < length) {
170          array[i++] = (UChar)c;
171        }
172      } else {
173        // get the code units for c
174        UChar units[U16_MAX_LENGTH];
175        U16_APPEND_UNSAFE(units, i, c);
176
177        // now it must be i==unitCount
178        i = 0;
179
180        // for Unicode, unitCount can only be 1, 2, 3, or 4
181        // 1 is handled above
182        while(i < length) {
183          int32_t unitIdx = 0;
184          while(unitIdx < unitCount) {
185            array[i++]=units[unitIdx++];
186          }
187        }
188      }
189    }
190    setLength(length);
191  }
192}
193
194UnicodeString::UnicodeString(UChar ch)
195  : fShortLength(1),
196    fFlags(kShortString)
197{
198  fUnion.fStackBuffer[0] = ch;
199}
200
201UnicodeString::UnicodeString(UChar32 ch)
202  : fShortLength(0),
203    fFlags(kShortString)
204{
205  int32_t i = 0;
206  UBool isError = FALSE;
207  U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
208  // We test isError so that the compiler does not complain that we don't.
209  // If isError then i==0 which is what we want anyway.
210  if(!isError) {
211    fShortLength = (int8_t)i;
212  }
213}
214
215UnicodeString::UnicodeString(const UChar *text)
216  : fShortLength(0),
217    fFlags(kShortString)
218{
219  doReplace(0, 0, text, 0, -1);
220}
221
222UnicodeString::UnicodeString(const UChar *text,
223                             int32_t textLength)
224  : fShortLength(0),
225    fFlags(kShortString)
226{
227  doReplace(0, 0, text, 0, textLength);
228}
229
230UnicodeString::UnicodeString(UBool isTerminated,
231                             const UChar *text,
232                             int32_t textLength)
233  : fShortLength(0),
234    fFlags(kReadonlyAlias)
235{
236  if(text == NULL) {
237    // treat as an empty string, do not alias
238    setToEmpty();
239  } else if(textLength < -1 ||
240            (textLength == -1 && !isTerminated) ||
241            (textLength >= 0 && isTerminated && text[textLength] != 0)
242  ) {
243    setToBogus();
244  } else {
245    if(textLength == -1) {
246      // text is terminated, or else it would have failed the above test
247      textLength = u_strlen(text);
248    }
249    setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
250  }
251}
252
253UnicodeString::UnicodeString(UChar *buff,
254                             int32_t buffLength,
255                             int32_t buffCapacity)
256  : fShortLength(0),
257    fFlags(kWritableAlias)
258{
259  if(buff == NULL) {
260    // treat as an empty string, do not alias
261    setToEmpty();
262  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
263    setToBogus();
264  } else {
265    if(buffLength == -1) {
266      // fLength = u_strlen(buff); but do not look beyond buffCapacity
267      const UChar *p = buff, *limit = buff + buffCapacity;
268      while(p != limit && *p != 0) {
269        ++p;
270      }
271      buffLength = (int32_t)(p - buff);
272    }
273    setArray(buff, buffLength, buffCapacity);
274  }
275}
276
277UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
278  : fShortLength(0),
279    fFlags(kShortString)
280{
281  if(src==NULL) {
282    // treat as an empty string
283  } else {
284    if(length<0) {
285      length=(int32_t)uprv_strlen(src);
286    }
287    if(cloneArrayIfNeeded(length, length, FALSE)) {
288      u_charsToUChars(src, getArrayStart(), length);
289      setLength(length);
290    } else {
291      setToBogus();
292    }
293  }
294}
295
296#if U_CHARSET_IS_UTF8
297
298UnicodeString::UnicodeString(const char *codepageData)
299  : fShortLength(0),
300    fFlags(kShortString) {
301  if(codepageData != 0) {
302    setToUTF8(codepageData);
303  }
304}
305
306UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
307  : fShortLength(0),
308    fFlags(kShortString) {
309  // if there's nothing to convert, do nothing
310  if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
311    return;
312  }
313  if(dataLength == -1) {
314    dataLength = (int32_t)uprv_strlen(codepageData);
315  }
316  setToUTF8(StringPiece(codepageData, dataLength));
317}
318
319// else see unistr_cnv.cpp
320#endif
321
322UnicodeString::UnicodeString(const UnicodeString& that)
323  : Replaceable(),
324    fShortLength(0),
325    fFlags(kShortString)
326{
327  copyFrom(that);
328}
329
330UnicodeString::UnicodeString(const UnicodeString& that,
331                             int32_t srcStart)
332  : Replaceable(),
333    fShortLength(0),
334    fFlags(kShortString)
335{
336  setTo(that, srcStart);
337}
338
339UnicodeString::UnicodeString(const UnicodeString& that,
340                             int32_t srcStart,
341                             int32_t srcLength)
342  : Replaceable(),
343    fShortLength(0),
344    fFlags(kShortString)
345{
346  setTo(that, srcStart, srcLength);
347}
348
349// Replaceable base class clone() default implementation, does not clone
350Replaceable *
351Replaceable::clone() const {
352  return NULL;
353}
354
355// UnicodeString overrides clone() with a real implementation
356Replaceable *
357UnicodeString::clone() const {
358  return new UnicodeString(*this);
359}
360
361//========================================
362// array allocation
363//========================================
364
365UBool
366UnicodeString::allocate(int32_t capacity) {
367  if(capacity <= US_STACKBUF_SIZE) {
368    fFlags = kShortString;
369  } else {
370    // count bytes for the refCounter and the string capacity, and
371    // round up to a multiple of 16; then divide by 4 and allocate int32_t's
372    // to be safely aligned for the refCount
373    // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
374    int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
375    int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
376    if(array != 0) {
377      // set initial refCount and point behind the refCount
378      *array++ = 1;
379
380      // have fArray point to the first UChar
381      fUnion.fFields.fArray = (UChar *)array;
382      fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
383      fFlags = kLongString;
384    } else {
385      fShortLength = 0;
386      fUnion.fFields.fArray = 0;
387      fUnion.fFields.fCapacity = 0;
388      fFlags = kIsBogus;
389      return FALSE;
390    }
391  }
392  return TRUE;
393}
394
395//========================================
396// Destructor
397//========================================
398UnicodeString::~UnicodeString()
399{
400  releaseArray();
401}
402
403//========================================
404// Factory methods
405//========================================
406
407UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
408  UnicodeString result;
409  result.setToUTF8(utf8);
410  return result;
411}
412
413UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
414  UnicodeString result;
415  int32_t capacity;
416  // Most UTF-32 strings will be BMP-only and result in a same-length
417  // UTF-16 string. We overestimate the capacity just slightly,
418  // just in case there are a few supplementary characters.
419  if(length <= US_STACKBUF_SIZE) {
420    capacity = US_STACKBUF_SIZE;
421  } else {
422    capacity = length + (length >> 4) + 4;
423  }
424  do {
425    UChar *utf16 = result.getBuffer(capacity);
426    int32_t length16;
427    UErrorCode errorCode = U_ZERO_ERROR;
428    u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
429        utf32, length,
430        0xfffd,  // Substitution character.
431        NULL,    // Don't care about number of substitutions.
432        &errorCode);
433    result.releaseBuffer(length16);
434    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
435      capacity = length16 + 1;  // +1 for the terminating NUL.
436      continue;
437    } else if(U_FAILURE(errorCode)) {
438      result.setToBogus();
439    }
440    break;
441  } while(TRUE);
442  return result;
443}
444
445//========================================
446// Assignment
447//========================================
448
449UnicodeString &
450UnicodeString::operator=(const UnicodeString &src) {
451  return copyFrom(src);
452}
453
454UnicodeString &
455UnicodeString::fastCopyFrom(const UnicodeString &src) {
456  return copyFrom(src, TRUE);
457}
458
459UnicodeString &
460UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
461  // if assigning to ourselves, do nothing
462  if(this == 0 || this == &src) {
463    return *this;
464  }
465
466  // is the right side bogus?
467  if(&src == 0 || src.isBogus()) {
468    setToBogus();
469    return *this;
470  }
471
472  // delete the current contents
473  releaseArray();
474
475  if(src.isEmpty()) {
476    // empty string - use the stack buffer
477    setToEmpty();
478    return *this;
479  }
480
481  // we always copy the length
482  int32_t srcLength = src.length();
483  setLength(srcLength);
484
485  // fLength>0 and not an "open" src.getBuffer(minCapacity)
486  switch(src.fFlags) {
487  case kShortString:
488    // short string using the stack buffer, do the same
489    fFlags = kShortString;
490    uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
491    break;
492  case kLongString:
493    // src uses a refCounted string buffer, use that buffer with refCount
494    // src is const, use a cast - we don't really change it
495    ((UnicodeString &)src).addRef();
496    // copy all fields, share the reference-counted buffer
497    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
498    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
499    fFlags = src.fFlags;
500    break;
501  case kReadonlyAlias:
502    if(fastCopy) {
503      // src is a readonly alias, do the same
504      // -> maintain the readonly alias as such
505      fUnion.fFields.fArray = src.fUnion.fFields.fArray;
506      fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
507      fFlags = src.fFlags;
508      break;
509    }
510    // else if(!fastCopy) fall through to case kWritableAlias
511    // -> allocate a new buffer and copy the contents
512  case kWritableAlias:
513    // src is a writable alias; we make a copy of that instead
514    if(allocate(srcLength)) {
515      uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
516      break;
517    }
518    // if there is not enough memory, then fall through to setting to bogus
519  default:
520    // if src is bogus, set ourselves to bogus
521    // do not call setToBogus() here because fArray and fFlags are not consistent here
522    fShortLength = 0;
523    fUnion.fFields.fArray = 0;
524    fUnion.fFields.fCapacity = 0;
525    fFlags = kIsBogus;
526    break;
527  }
528
529  return *this;
530}
531
532//========================================
533// Miscellaneous operations
534//========================================
535
536UnicodeString UnicodeString::unescape() const {
537    UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
538    const UChar *array = getBuffer();
539    int32_t len = length();
540    int32_t prev = 0;
541    for (int32_t i=0;;) {
542        if (i == len) {
543            result.append(array, prev, len - prev);
544            break;
545        }
546        if (array[i++] == 0x5C /*'\\'*/) {
547            result.append(array, prev, (i - 1) - prev);
548            UChar32 c = unescapeAt(i); // advances i
549            if (c < 0) {
550                result.remove(); // return empty string
551                break; // invalid escape sequence
552            }
553            result.append(c);
554            prev = i;
555        }
556    }
557    return result;
558}
559
560UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
561    return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
562}
563
564//========================================
565// Read-only implementation
566//========================================
567UBool
568UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
569  // Requires: this & text not bogus and have same lengths.
570  // Byte-wise comparison works for equality regardless of endianness.
571  return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
572}
573
574int8_t
575UnicodeString::doCompare( int32_t start,
576              int32_t length,
577              const UChar *srcChars,
578              int32_t srcStart,
579              int32_t srcLength) const
580{
581  // compare illegal string values
582  if(isBogus()) {
583    return -1;
584  }
585
586  // pin indices to legal values
587  pinIndices(start, length);
588
589  if(srcChars == NULL) {
590    // treat const UChar *srcChars==NULL as an empty string
591    return length == 0 ? 0 : 1;
592  }
593
594  // get the correct pointer
595  const UChar *chars = getArrayStart();
596
597  chars += start;
598  srcChars += srcStart;
599
600  int32_t minLength;
601  int8_t lengthResult;
602
603  // get the srcLength if necessary
604  if(srcLength < 0) {
605    srcLength = u_strlen(srcChars + srcStart);
606  }
607
608  // are we comparing different lengths?
609  if(length != srcLength) {
610    if(length < srcLength) {
611      minLength = length;
612      lengthResult = -1;
613    } else {
614      minLength = srcLength;
615      lengthResult = 1;
616    }
617  } else {
618    minLength = length;
619    lengthResult = 0;
620  }
621
622  /*
623   * note that uprv_memcmp() returns an int but we return an int8_t;
624   * we need to take care not to truncate the result -
625   * one way to do this is to right-shift the value to
626   * move the sign bit into the lower 8 bits and making sure that this
627   * does not become 0 itself
628   */
629
630  if(minLength > 0 && chars != srcChars) {
631    int32_t result;
632
633#   if U_IS_BIG_ENDIAN
634      // big-endian: byte comparison works
635      result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
636      if(result != 0) {
637        return (int8_t)(result >> 15 | 1);
638      }
639#   else
640      // little-endian: compare UChar units
641      do {
642        result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
643        if(result != 0) {
644          return (int8_t)(result >> 15 | 1);
645        }
646      } while(--minLength > 0);
647#   endif
648  }
649  return lengthResult;
650}
651
652/* String compare in code point order - doCompare() compares in code unit order. */
653int8_t
654UnicodeString::doCompareCodePointOrder(int32_t start,
655                                       int32_t length,
656                                       const UChar *srcChars,
657                                       int32_t srcStart,
658                                       int32_t srcLength) const
659{
660  // compare illegal string values
661  // treat const UChar *srcChars==NULL as an empty string
662  if(isBogus()) {
663    return -1;
664  }
665
666  // pin indices to legal values
667  pinIndices(start, length);
668
669  if(srcChars == NULL) {
670    srcStart = srcLength = 0;
671  }
672
673  int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
674  /* translate the 32-bit result into an 8-bit one */
675  if(diff!=0) {
676    return (int8_t)(diff >> 15 | 1);
677  } else {
678    return 0;
679  }
680}
681
682int32_t
683UnicodeString::getLength() const {
684    return length();
685}
686
687UChar
688UnicodeString::getCharAt(int32_t offset) const {
689  return charAt(offset);
690}
691
692UChar32
693UnicodeString::getChar32At(int32_t offset) const {
694  return char32At(offset);
695}
696
697UChar32
698UnicodeString::char32At(int32_t offset) const
699{
700  int32_t len = length();
701  if((uint32_t)offset < (uint32_t)len) {
702    const UChar *array = getArrayStart();
703    UChar32 c;
704    U16_GET(array, 0, offset, len, c);
705    return c;
706  } else {
707    return kInvalidUChar;
708  }
709}
710
711int32_t
712UnicodeString::getChar32Start(int32_t offset) const {
713  if((uint32_t)offset < (uint32_t)length()) {
714    const UChar *array = getArrayStart();
715    U16_SET_CP_START(array, 0, offset);
716    return offset;
717  } else {
718    return 0;
719  }
720}
721
722int32_t
723UnicodeString::getChar32Limit(int32_t offset) const {
724  int32_t len = length();
725  if((uint32_t)offset < (uint32_t)len) {
726    const UChar *array = getArrayStart();
727    U16_SET_CP_LIMIT(array, 0, offset, len);
728    return offset;
729  } else {
730    return len;
731  }
732}
733
734int32_t
735UnicodeString::countChar32(int32_t start, int32_t length) const {
736  pinIndices(start, length);
737  // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
738  return u_countChar32(getArrayStart()+start, length);
739}
740
741UBool
742UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
743  pinIndices(start, length);
744  // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
745  return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
746}
747
748int32_t
749UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
750  // pin index
751  int32_t len = length();
752  if(index<0) {
753    index=0;
754  } else if(index>len) {
755    index=len;
756  }
757
758  const UChar *array = getArrayStart();
759  if(delta>0) {
760    U16_FWD_N(array, index, len, delta);
761  } else {
762    U16_BACK_N(array, 0, index, -delta);
763  }
764
765  return index;
766}
767
768void
769UnicodeString::doExtract(int32_t start,
770             int32_t length,
771             UChar *dst,
772             int32_t dstStart) const
773{
774  // pin indices to legal values
775  pinIndices(start, length);
776
777  // do not copy anything if we alias dst itself
778  const UChar *array = getArrayStart();
779  if(array + start != dst + dstStart) {
780    us_arrayCopy(array, start, dst, dstStart, length);
781  }
782}
783
784int32_t
785UnicodeString::extract(UChar *dest, int32_t destCapacity,
786                       UErrorCode &errorCode) const {
787  int32_t len = length();
788  if(U_SUCCESS(errorCode)) {
789    if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
790      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
791    } else {
792      const UChar *array = getArrayStart();
793      if(len>0 && len<=destCapacity && array!=dest) {
794        uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
795      }
796      return u_terminateUChars(dest, destCapacity, len, &errorCode);
797    }
798  }
799
800  return len;
801}
802
803int32_t
804UnicodeString::extract(int32_t start,
805                       int32_t length,
806                       char *target,
807                       int32_t targetCapacity,
808                       enum EInvariant) const
809{
810  // if the arguments are illegal, then do nothing
811  if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
812    return 0;
813  }
814
815  // pin the indices to legal values
816  pinIndices(start, length);
817
818  if(length <= targetCapacity) {
819    u_UCharsToChars(getArrayStart() + start, target, length);
820  }
821  UErrorCode status = U_ZERO_ERROR;
822  return u_terminateChars(target, targetCapacity, length, &status);
823}
824
825UnicodeString
826UnicodeString::tempSubString(int32_t start, int32_t len) const {
827  pinIndices(start, len);
828  const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
829  if(array==NULL) {
830    array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
831    len=-2;  // bogus result string
832  }
833  return UnicodeString(FALSE, array + start, len);
834}
835
836int32_t
837UnicodeString::toUTF8(int32_t start, int32_t len,
838                      char *target, int32_t capacity) const {
839  pinIndices(start, len);
840  int32_t length8;
841  UErrorCode errorCode = U_ZERO_ERROR;
842  u_strToUTF8WithSub(target, capacity, &length8,
843                     getBuffer() + start, len,
844                     0xFFFD,  // Standard substitution character.
845                     NULL,    // Don't care about number of substitutions.
846                     &errorCode);
847  return length8;
848}
849
850#if U_CHARSET_IS_UTF8
851
852int32_t
853UnicodeString::extract(int32_t start, int32_t len,
854                       char *target, uint32_t dstSize) const {
855  // if the arguments are illegal, then do nothing
856  if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
857    return 0;
858  }
859  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
860}
861
862// else see unistr_cnv.cpp
863#endif
864
865void
866UnicodeString::extractBetween(int32_t start,
867                  int32_t limit,
868                  UnicodeString& target) const {
869  pinIndex(start);
870  pinIndex(limit);
871  doExtract(start, limit - start, target);
872}
873
874// When converting from UTF-16 to UTF-8, the result will have at most 3 times
875// as many bytes as the source has UChars.
876// The "worst cases" are writing systems like Indic, Thai and CJK with
877// 3:1 bytes:UChars.
878void
879UnicodeString::toUTF8(ByteSink &sink) const {
880  int32_t length16 = length();
881  if(length16 != 0) {
882    char stackBuffer[1024];
883    int32_t capacity = (int32_t)sizeof(stackBuffer);
884    UBool utf8IsOwned = FALSE;
885    char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
886                                      3*length16,
887                                      stackBuffer, capacity,
888                                      &capacity);
889    int32_t length8 = 0;
890    UErrorCode errorCode = U_ZERO_ERROR;
891    u_strToUTF8WithSub(utf8, capacity, &length8,
892                       getBuffer(), length16,
893                       0xFFFD,  // Standard substitution character.
894                       NULL,    // Don't care about number of substitutions.
895                       &errorCode);
896    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
897      utf8 = (char *)uprv_malloc(length8);
898      if(utf8 != NULL) {
899        utf8IsOwned = TRUE;
900        errorCode = U_ZERO_ERROR;
901        u_strToUTF8WithSub(utf8, length8, &length8,
902                           getBuffer(), length16,
903                           0xFFFD,  // Standard substitution character.
904                           NULL,    // Don't care about number of substitutions.
905                           &errorCode);
906      } else {
907        errorCode = U_MEMORY_ALLOCATION_ERROR;
908      }
909    }
910    if(U_SUCCESS(errorCode)) {
911      sink.Append(utf8, length8);
912      sink.Flush();
913    }
914    if(utf8IsOwned) {
915      uprv_free(utf8);
916    }
917  }
918}
919
920int32_t
921UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
922  int32_t length32=0;
923  if(U_SUCCESS(errorCode)) {
924    // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
925    u_strToUTF32WithSub(utf32, capacity, &length32,
926        getBuffer(), length(),
927        0xfffd,  // Substitution character.
928        NULL,    // Don't care about number of substitutions.
929        &errorCode);
930  }
931  return length32;
932}
933
934int32_t
935UnicodeString::indexOf(const UChar *srcChars,
936               int32_t srcStart,
937               int32_t srcLength,
938               int32_t start,
939               int32_t length) const
940{
941  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
942    return -1;
943  }
944
945  // UnicodeString does not find empty substrings
946  if(srcLength < 0 && srcChars[srcStart] == 0) {
947    return -1;
948  }
949
950  // get the indices within bounds
951  pinIndices(start, length);
952
953  // find the first occurrence of the substring
954  const UChar *array = getArrayStart();
955  const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
956  if(match == NULL) {
957    return -1;
958  } else {
959    return (int32_t)(match - array);
960  }
961}
962
963int32_t
964UnicodeString::doIndexOf(UChar c,
965             int32_t start,
966             int32_t length) const
967{
968  // pin indices
969  pinIndices(start, length);
970
971  // find the first occurrence of c
972  const UChar *array = getArrayStart();
973  const UChar *match = u_memchr(array + start, c, length);
974  if(match == NULL) {
975    return -1;
976  } else {
977    return (int32_t)(match - array);
978  }
979}
980
981int32_t
982UnicodeString::doIndexOf(UChar32 c,
983                         int32_t start,
984                         int32_t length) const {
985  // pin indices
986  pinIndices(start, length);
987
988  // find the first occurrence of c
989  const UChar *array = getArrayStart();
990  const UChar *match = u_memchr32(array + start, c, length);
991  if(match == NULL) {
992    return -1;
993  } else {
994    return (int32_t)(match - array);
995  }
996}
997
998int32_t
999UnicodeString::lastIndexOf(const UChar *srcChars,
1000               int32_t srcStart,
1001               int32_t srcLength,
1002               int32_t start,
1003               int32_t length) const
1004{
1005  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1006    return -1;
1007  }
1008
1009  // UnicodeString does not find empty substrings
1010  if(srcLength < 0 && srcChars[srcStart] == 0) {
1011    return -1;
1012  }
1013
1014  // get the indices within bounds
1015  pinIndices(start, length);
1016
1017  // find the last occurrence of the substring
1018  const UChar *array = getArrayStart();
1019  const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1020  if(match == NULL) {
1021    return -1;
1022  } else {
1023    return (int32_t)(match - array);
1024  }
1025}
1026
1027int32_t
1028UnicodeString::doLastIndexOf(UChar c,
1029                 int32_t start,
1030                 int32_t length) const
1031{
1032  if(isBogus()) {
1033    return -1;
1034  }
1035
1036  // pin indices
1037  pinIndices(start, length);
1038
1039  // find the last occurrence of c
1040  const UChar *array = getArrayStart();
1041  const UChar *match = u_memrchr(array + start, c, length);
1042  if(match == NULL) {
1043    return -1;
1044  } else {
1045    return (int32_t)(match - array);
1046  }
1047}
1048
1049int32_t
1050UnicodeString::doLastIndexOf(UChar32 c,
1051                             int32_t start,
1052                             int32_t length) const {
1053  // pin indices
1054  pinIndices(start, length);
1055
1056  // find the last occurrence of c
1057  const UChar *array = getArrayStart();
1058  const UChar *match = u_memrchr32(array + start, c, length);
1059  if(match == NULL) {
1060    return -1;
1061  } else {
1062    return (int32_t)(match - array);
1063  }
1064}
1065
1066//========================================
1067// Write implementation
1068//========================================
1069
1070UnicodeString&
1071UnicodeString::findAndReplace(int32_t start,
1072                  int32_t length,
1073                  const UnicodeString& oldText,
1074                  int32_t oldStart,
1075                  int32_t oldLength,
1076                  const UnicodeString& newText,
1077                  int32_t newStart,
1078                  int32_t newLength)
1079{
1080  if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1081    return *this;
1082  }
1083
1084  pinIndices(start, length);
1085  oldText.pinIndices(oldStart, oldLength);
1086  newText.pinIndices(newStart, newLength);
1087
1088  if(oldLength == 0) {
1089    return *this;
1090  }
1091
1092  while(length > 0 && length >= oldLength) {
1093    int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1094    if(pos < 0) {
1095      // no more oldText's here: done
1096      break;
1097    } else {
1098      // we found oldText, replace it by newText and go beyond it
1099      replace(pos, oldLength, newText, newStart, newLength);
1100      length -= pos + oldLength - start;
1101      start = pos + newLength;
1102    }
1103  }
1104
1105  return *this;
1106}
1107
1108
1109void
1110UnicodeString::setToBogus()
1111{
1112  releaseArray();
1113
1114  fShortLength = 0;
1115  fUnion.fFields.fArray = 0;
1116  fUnion.fFields.fCapacity = 0;
1117  fFlags = kIsBogus;
1118}
1119
1120// turn a bogus string into an empty one
1121void
1122UnicodeString::unBogus() {
1123  if(fFlags & kIsBogus) {
1124    setToEmpty();
1125  }
1126}
1127
1128const UChar *
1129UnicodeString::getTerminatedBuffer() {
1130  if(!isWritable()) {
1131    return 0;
1132  }
1133  UChar *array = getArrayStart();
1134  int32_t len = length();
1135  if(len < getCapacity()) {
1136    if(fFlags & kBufferIsReadonly) {
1137      // If len<capacity on a read-only alias, then array[len] is
1138      // either the original NUL (if constructed with (TRUE, s, length))
1139      // or one of the original string contents characters (if later truncated),
1140      // therefore we can assume that array[len] is initialized memory.
1141      if(array[len] == 0) {
1142        return array;
1143      }
1144    } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {
1145      // kRefCounted: Do not write the NUL if the buffer is shared.
1146      // That is mostly safe, except when the length of one copy was modified
1147      // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1148      // Then the NUL would be written into the middle of another copy's string.
1149
1150      // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1151      // Do not test if there is a NUL already because it might be uninitialized memory.
1152      // (That would be safe, but tools like valgrind & Purify would complain.)
1153      array[len] = 0;
1154      return array;
1155    }
1156  }
1157  if(cloneArrayIfNeeded(len+1)) {
1158    array = getArrayStart();
1159    array[len] = 0;
1160    return array;
1161  } else {
1162    return NULL;
1163  }
1164}
1165
1166// setTo() analogous to the readonly-aliasing constructor with the same signature
1167UnicodeString &
1168UnicodeString::setTo(UBool isTerminated,
1169                     const UChar *text,
1170                     int32_t textLength)
1171{
1172  if(fFlags & kOpenGetBuffer) {
1173    // do not modify a string that has an "open" getBuffer(minCapacity)
1174    return *this;
1175  }
1176
1177  if(text == NULL) {
1178    // treat as an empty string, do not alias
1179    releaseArray();
1180    setToEmpty();
1181    return *this;
1182  }
1183
1184  if( textLength < -1 ||
1185      (textLength == -1 && !isTerminated) ||
1186      (textLength >= 0 && isTerminated && text[textLength] != 0)
1187  ) {
1188    setToBogus();
1189    return *this;
1190  }
1191
1192  releaseArray();
1193
1194  if(textLength == -1) {
1195    // text is terminated, or else it would have failed the above test
1196    textLength = u_strlen(text);
1197  }
1198  setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1199
1200  fFlags = kReadonlyAlias;
1201  return *this;
1202}
1203
1204// setTo() analogous to the writable-aliasing constructor with the same signature
1205UnicodeString &
1206UnicodeString::setTo(UChar *buffer,
1207                     int32_t buffLength,
1208                     int32_t buffCapacity) {
1209  if(fFlags & kOpenGetBuffer) {
1210    // do not modify a string that has an "open" getBuffer(minCapacity)
1211    return *this;
1212  }
1213
1214  if(buffer == NULL) {
1215    // treat as an empty string, do not alias
1216    releaseArray();
1217    setToEmpty();
1218    return *this;
1219  }
1220
1221  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1222    setToBogus();
1223    return *this;
1224  } else if(buffLength == -1) {
1225    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1226    const UChar *p = buffer, *limit = buffer + buffCapacity;
1227    while(p != limit && *p != 0) {
1228      ++p;
1229    }
1230    buffLength = (int32_t)(p - buffer);
1231  }
1232
1233  releaseArray();
1234
1235  setArray(buffer, buffLength, buffCapacity);
1236  fFlags = kWritableAlias;
1237  return *this;
1238}
1239
1240UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1241  unBogus();
1242  int32_t length = utf8.length();
1243  int32_t capacity;
1244  // The UTF-16 string will be at most as long as the UTF-8 string.
1245  if(length <= US_STACKBUF_SIZE) {
1246    capacity = US_STACKBUF_SIZE;
1247  } else {
1248    capacity = length + 1;  // +1 for the terminating NUL.
1249  }
1250  UChar *utf16 = getBuffer(capacity);
1251  int32_t length16;
1252  UErrorCode errorCode = U_ZERO_ERROR;
1253  u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1254      utf8.data(), length,
1255      0xfffd,  // Substitution character.
1256      NULL,    // Don't care about number of substitutions.
1257      &errorCode);
1258  releaseBuffer(length16);
1259  if(U_FAILURE(errorCode)) {
1260    setToBogus();
1261  }
1262  return *this;
1263}
1264
1265UnicodeString&
1266UnicodeString::setCharAt(int32_t offset,
1267             UChar c)
1268{
1269  int32_t len = length();
1270  if(cloneArrayIfNeeded() && len > 0) {
1271    if(offset < 0) {
1272      offset = 0;
1273    } else if(offset >= len) {
1274      offset = len - 1;
1275    }
1276
1277    getArrayStart()[offset] = c;
1278  }
1279  return *this;
1280}
1281
1282UnicodeString&
1283UnicodeString::replace(int32_t start,
1284               int32_t _length,
1285               UChar32 srcChar) {
1286  UChar buffer[U16_MAX_LENGTH];
1287  int32_t count = 0;
1288  UBool isError = FALSE;
1289  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1290  // We test isError so that the compiler does not complain that we don't.
1291  // If isError (srcChar is not a valid code point) then count==0 which means
1292  // we remove the source segment rather than replacing it with srcChar.
1293  return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1294}
1295
1296UnicodeString&
1297UnicodeString::append(UChar32 srcChar) {
1298  UChar buffer[U16_MAX_LENGTH];
1299  int32_t _length = 0;
1300  UBool isError = FALSE;
1301  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1302  // We test isError so that the compiler does not complain that we don't.
1303  // If isError then _length==0 which turns the doReplace() into a no-op anyway.
1304  return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
1305}
1306
1307UnicodeString&
1308UnicodeString::doReplace( int32_t start,
1309              int32_t length,
1310              const UnicodeString& src,
1311              int32_t srcStart,
1312              int32_t srcLength)
1313{
1314  if(!src.isBogus()) {
1315    // pin the indices to legal values
1316    src.pinIndices(srcStart, srcLength);
1317
1318    // get the characters from src
1319    // and replace the range in ourselves with them
1320    return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1321  } else {
1322    // remove the range
1323    return doReplace(start, length, 0, 0, 0);
1324  }
1325}
1326
1327UnicodeString&
1328UnicodeString::doReplace(int32_t start,
1329             int32_t length,
1330             const UChar *srcChars,
1331             int32_t srcStart,
1332             int32_t srcLength)
1333{
1334  if(!isWritable()) {
1335    return *this;
1336  }
1337
1338  int32_t oldLength = this->length();
1339
1340  // optimize (read-only alias).remove(0, start) and .remove(start, end)
1341  if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1342    if(start == 0) {
1343      // remove prefix by adjusting the array pointer
1344      pinIndex(length);
1345      fUnion.fFields.fArray += length;
1346      fUnion.fFields.fCapacity -= length;
1347      setLength(oldLength - length);
1348      return *this;
1349    } else {
1350      pinIndex(start);
1351      if(length >= (oldLength - start)) {
1352        // remove suffix by reducing the length (like truncate())
1353        setLength(start);
1354        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1355        return *this;
1356      }
1357    }
1358  }
1359
1360  if(srcChars == 0) {
1361    srcStart = srcLength = 0;
1362  } else if(srcLength < 0) {
1363    // get the srcLength if necessary
1364    srcLength = u_strlen(srcChars + srcStart);
1365  }
1366
1367  // calculate the size of the string after the replace
1368  int32_t newLength;
1369
1370  // optimize append() onto a large-enough, owned string
1371  if(start >= oldLength) {
1372    if(srcLength == 0) {
1373      return *this;
1374    }
1375    newLength = oldLength + srcLength;
1376    if(newLength <= getCapacity() && isBufferWritable()) {
1377      UChar *oldArray = getArrayStart();
1378      // Do not copy characters when
1379      //   UChar *buffer=str.getAppendBuffer(...);
1380      // is followed by
1381      //   str.append(buffer, length);
1382      // or
1383      //   str.appendString(buffer, length)
1384      // or similar.
1385      if(srcChars + srcStart != oldArray + start || start > oldLength) {
1386        us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1387      }
1388      setLength(newLength);
1389      return *this;
1390    } else {
1391      // pin the indices to legal values
1392      start = oldLength;
1393      length = 0;
1394    }
1395  } else {
1396    // pin the indices to legal values
1397    pinIndices(start, length);
1398
1399    newLength = oldLength - length + srcLength;
1400  }
1401
1402  // the following may change fArray but will not copy the current contents;
1403  // therefore we need to keep the current fArray
1404  UChar oldStackBuffer[US_STACKBUF_SIZE];
1405  UChar *oldArray;
1406  if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1407    // copy the stack buffer contents because it will be overwritten with
1408    // fUnion.fFields values
1409    u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1410    oldArray = oldStackBuffer;
1411  } else {
1412    oldArray = getArrayStart();
1413  }
1414
1415  // clone our array and allocate a bigger array if needed
1416  int32_t *bufferToDelete = 0;
1417  if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1418                         FALSE, &bufferToDelete)
1419  ) {
1420    return *this;
1421  }
1422
1423  // now do the replace
1424
1425  UChar *newArray = getArrayStart();
1426  if(newArray != oldArray) {
1427    // if fArray changed, then we need to copy everything except what will change
1428    us_arrayCopy(oldArray, 0, newArray, 0, start);
1429    us_arrayCopy(oldArray, start + length,
1430                 newArray, start + srcLength,
1431                 oldLength - (start + length));
1432  } else if(length != srcLength) {
1433    // fArray did not change; copy only the portion that isn't changing, leaving a hole
1434    us_arrayCopy(oldArray, start + length,
1435                 newArray, start + srcLength,
1436                 oldLength - (start + length));
1437  }
1438
1439  // now fill in the hole with the new string
1440  us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1441
1442  setLength(newLength);
1443
1444  // delayed delete in case srcChars == fArray when we started, and
1445  // to keep oldArray alive for the above operations
1446  if (bufferToDelete) {
1447    uprv_free(bufferToDelete);
1448  }
1449
1450  return *this;
1451}
1452
1453/**
1454 * Replaceable API
1455 */
1456void
1457UnicodeString::handleReplaceBetween(int32_t start,
1458                                    int32_t limit,
1459                                    const UnicodeString& text) {
1460    replaceBetween(start, limit, text);
1461}
1462
1463/**
1464 * Replaceable API
1465 */
1466void
1467UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1468    if (limit <= start) {
1469        return; // Nothing to do; avoid bogus malloc call
1470    }
1471    UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1472    // Check to make sure text is not null.
1473    if (text != NULL) {
1474	    extractBetween(start, limit, text, 0);
1475	    insert(dest, text, 0, limit - start);
1476	    uprv_free(text);
1477    }
1478}
1479
1480/**
1481 * Replaceable API
1482 *
1483 * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1484 * so we implement this function here.
1485 */
1486UBool Replaceable::hasMetaData() const {
1487    return TRUE;
1488}
1489
1490/**
1491 * Replaceable API
1492 */
1493UBool UnicodeString::hasMetaData() const {
1494    return FALSE;
1495}
1496
1497UnicodeString&
1498UnicodeString::doReverse(int32_t start, int32_t length) {
1499  if(length <= 1 || !cloneArrayIfNeeded()) {
1500    return *this;
1501  }
1502
1503  // pin the indices to legal values
1504  pinIndices(start, length);
1505  if(length <= 1) {  // pinIndices() might have shrunk the length
1506    return *this;
1507  }
1508
1509  UChar *left = getArrayStart() + start;
1510  UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1511  UChar swap;
1512  UBool hasSupplementary = FALSE;
1513
1514  // Before the loop we know left<right because length>=2.
1515  do {
1516    hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1517    hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1518    *right-- = swap;
1519  } while(left < right);
1520  // Make sure to test the middle code unit of an odd-length string.
1521  // Redundant if the length is even.
1522  hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1523
1524  /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1525  if(hasSupplementary) {
1526    UChar swap2;
1527
1528    left = getArrayStart() + start;
1529    right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1530    while(left < right) {
1531      if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1532        *left++ = swap2;
1533        *left++ = swap;
1534      } else {
1535        ++left;
1536      }
1537    }
1538  }
1539
1540  return *this;
1541}
1542
1543UBool
1544UnicodeString::padLeading(int32_t targetLength,
1545                          UChar padChar)
1546{
1547  int32_t oldLength = length();
1548  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1549    return FALSE;
1550  } else {
1551    // move contents up by padding width
1552    UChar *array = getArrayStart();
1553    int32_t start = targetLength - oldLength;
1554    us_arrayCopy(array, 0, array, start, oldLength);
1555
1556    // fill in padding character
1557    while(--start >= 0) {
1558      array[start] = padChar;
1559    }
1560    setLength(targetLength);
1561    return TRUE;
1562  }
1563}
1564
1565UBool
1566UnicodeString::padTrailing(int32_t targetLength,
1567                           UChar padChar)
1568{
1569  int32_t oldLength = length();
1570  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1571    return FALSE;
1572  } else {
1573    // fill in padding character
1574    UChar *array = getArrayStart();
1575    int32_t length = targetLength;
1576    while(--length >= oldLength) {
1577      array[length] = padChar;
1578    }
1579    setLength(targetLength);
1580    return TRUE;
1581  }
1582}
1583
1584//========================================
1585// Hashing
1586//========================================
1587int32_t
1588UnicodeString::doHashCode() const
1589{
1590    /* Delegate hash computation to uhash.  This makes UnicodeString
1591     * hashing consistent with UChar* hashing.  */
1592    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1593    if (hashCode == kInvalidHashCode) {
1594        hashCode = kEmptyHashCode;
1595    }
1596    return hashCode;
1597}
1598
1599//========================================
1600// External Buffer
1601//========================================
1602
1603UChar *
1604UnicodeString::getBuffer(int32_t minCapacity) {
1605  if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1606    fFlags|=kOpenGetBuffer;
1607    fShortLength=0;
1608    return getArrayStart();
1609  } else {
1610    return 0;
1611  }
1612}
1613
1614void
1615UnicodeString::releaseBuffer(int32_t newLength) {
1616  if(fFlags&kOpenGetBuffer && newLength>=-1) {
1617    // set the new fLength
1618    int32_t capacity=getCapacity();
1619    if(newLength==-1) {
1620      // the new length is the string length, capped by fCapacity
1621      const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1622      while(p<limit && *p!=0) {
1623        ++p;
1624      }
1625      newLength=(int32_t)(p-array);
1626    } else if(newLength>capacity) {
1627      newLength=capacity;
1628    }
1629    setLength(newLength);
1630    fFlags&=~kOpenGetBuffer;
1631  }
1632}
1633
1634//========================================
1635// Miscellaneous
1636//========================================
1637UBool
1638UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1639                                  int32_t growCapacity,
1640                                  UBool doCopyArray,
1641                                  int32_t **pBufferToDelete,
1642                                  UBool forceClone) {
1643  // default parameters need to be static, therefore
1644  // the defaults are -1 to have convenience defaults
1645  if(newCapacity == -1) {
1646    newCapacity = getCapacity();
1647  }
1648
1649  // while a getBuffer(minCapacity) is "open",
1650  // prevent any modifications of the string by returning FALSE here
1651  // if the string is bogus, then only an assignment or similar can revive it
1652  if(!isWritable()) {
1653    return FALSE;
1654  }
1655
1656  /*
1657   * We need to make a copy of the array if
1658   * the buffer is read-only, or
1659   * the buffer is refCounted (shared), and refCount>1, or
1660   * the buffer is too small.
1661   * Return FALSE if memory could not be allocated.
1662   */
1663  if(forceClone ||
1664     fFlags & kBufferIsReadonly ||
1665     (fFlags & kRefCounted && refCount() > 1) ||
1666     newCapacity > getCapacity()
1667  ) {
1668    // check growCapacity for default value and use of the stack buffer
1669    if(growCapacity < 0) {
1670      growCapacity = newCapacity;
1671    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1672      growCapacity = US_STACKBUF_SIZE;
1673    }
1674
1675    // save old values
1676    UChar oldStackBuffer[US_STACKBUF_SIZE];
1677    UChar *oldArray;
1678    uint8_t flags = fFlags;
1679
1680    if(flags&kUsingStackBuffer) {
1681      U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1682      if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1683        // copy the stack buffer contents because it will be overwritten with
1684        // fUnion.fFields values
1685        us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1686        oldArray = oldStackBuffer;
1687      } else {
1688        oldArray = 0; // no need to copy from stack buffer to itself
1689      }
1690    } else {
1691      oldArray = fUnion.fFields.fArray;
1692      U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1693    }
1694
1695    // allocate a new array
1696    if(allocate(growCapacity) ||
1697       (newCapacity < growCapacity && allocate(newCapacity))
1698    ) {
1699      if(doCopyArray && oldArray != 0) {
1700        // copy the contents
1701        // do not copy more than what fits - it may be smaller than before
1702        int32_t minLength = length();
1703        newCapacity = getCapacity();
1704        if(newCapacity < minLength) {
1705          minLength = newCapacity;
1706          setLength(minLength);
1707        }
1708        us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1709      } else {
1710        fShortLength = 0;
1711      }
1712
1713      // release the old array
1714      if(flags & kRefCounted) {
1715        // the array is refCounted; decrement and release if 0
1716        u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1717        if(umtx_atomic_dec(pRefCount) == 0) {
1718          if(pBufferToDelete == 0) {
1719              // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1720              // is defined as volatile. (Volatile has useful non-standard behavior
1721              //   with this compiler.)
1722            uprv_free((void *)pRefCount);
1723          } else {
1724            // the caller requested to delete it himself
1725            *pBufferToDelete = (int32_t *)pRefCount;
1726          }
1727        }
1728      }
1729    } else {
1730      // not enough memory for growCapacity and not even for the smaller newCapacity
1731      // reset the old values for setToBogus() to release the array
1732      if(!(flags&kUsingStackBuffer)) {
1733        fUnion.fFields.fArray = oldArray;
1734      }
1735      fFlags = flags;
1736      setToBogus();
1737      return FALSE;
1738    }
1739  }
1740  return TRUE;
1741}
1742
1743// UnicodeStringAppendable ------------------------------------------------- ***
1744
1745UnicodeStringAppendable::~UnicodeStringAppendable() {}
1746
1747UBool
1748UnicodeStringAppendable::appendCodeUnit(UChar c) {
1749  return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1750}
1751
1752UBool
1753UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1754  UChar buffer[U16_MAX_LENGTH];
1755  int32_t cLength = 0;
1756  UBool isError = FALSE;
1757  U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1758  return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1759}
1760
1761UBool
1762UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1763  return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1764}
1765
1766UBool
1767UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1768  return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1769}
1770
1771UChar *
1772UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1773                                         int32_t desiredCapacityHint,
1774                                         UChar *scratch, int32_t scratchCapacity,
1775                                         int32_t *resultCapacity) {
1776  if(minCapacity < 1 || scratchCapacity < minCapacity) {
1777    *resultCapacity = 0;
1778    return NULL;
1779  }
1780  int32_t oldLength = str.length();
1781  if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1782    *resultCapacity = str.getCapacity() - oldLength;
1783    return str.getArrayStart() + oldLength;
1784  }
1785  *resultCapacity = scratchCapacity;
1786  return scratch;
1787}
1788
1789U_NAMESPACE_END
1790
1791U_NAMESPACE_USE
1792
1793U_CAPI int32_t U_EXPORT2
1794uhash_hashUnicodeString(const UElement key) {
1795    const UnicodeString *str = (const UnicodeString*) key.pointer;
1796    return (str == NULL) ? 0 : str->hashCode();
1797}
1798
1799// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1800// does not depend on hashtable code.
1801U_CAPI UBool U_EXPORT2
1802uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1803    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1804    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1805    if (str1 == str2) {
1806        return TRUE;
1807    }
1808    if (str1 == NULL || str2 == NULL) {
1809        return FALSE;
1810    }
1811    return *str1 == *str2;
1812}
1813
1814#ifdef U_STATIC_IMPLEMENTATION
1815/*
1816This should never be called. It is defined here to make sure that the
1817virtual vector deleting destructor is defined within unistr.cpp.
1818The vector deleting destructor is already a part of UObject,
1819but defining it here makes sure that it is included with this object file.
1820This makes sure that static library dependencies are kept to a minimum.
1821*/
1822static void uprv_UnicodeStringDummy(void) {
1823    delete [] (new UnicodeString[2]);
1824}
1825#endif
1826