16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 1999-2011, International Business Machines
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   file name:  unistr_case.cpp
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   encoding:   US-ASCII
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   tab size:   8 (not used)
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   indentation:2
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created on: 2004aug19
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created by: Markus W. Scherer
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Case-mapping functions moved here from unistr.cpp
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/putil.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uelement.h"
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ustr_imp.h"
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//========================================
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Read-only implementation
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//========================================
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint8_t
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString::doCaseCompare(int32_t start,
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             int32_t length,
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             const UChar *srcChars,
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             int32_t srcStart,
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             int32_t srcLength,
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             uint32_t options) const
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // compare illegal string values
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // treat const UChar *srcChars==NULL as an empty string
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(isBogus()) {
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return -1;
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // pin indices to legal values
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  pinIndices(start, length);
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(srcChars == NULL) {
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    srcStart = srcLength = 0;
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // get the correct pointer
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  const UChar *chars = getArrayStart();
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  chars += start;
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(srcStart!=0) {
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    srcChars += srcStart;
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(chars != srcChars) {
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode=U_ZERO_ERROR;
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                options|U_COMPARE_IGNORE_CASE, &errorCode);
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(result!=0) {
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      return (int8_t)(result >> 24 | 1);
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  } else {
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // get the srcLength if necessary
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(srcLength < 0) {
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      srcLength = u_strlen(srcChars + srcStart);
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length != srcLength) {
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      return (int8_t)((length - srcLength) >> 24 | 1);
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return 0;
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//========================================
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Write implementation
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//========================================
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString &
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString::caseMap(const UCaseMap *csm,
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                       UStringCaseMapper *stringCaseMapper) {
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(isEmpty() || !isWritable()) {
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // nothing to do
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // We need to allocate a new buffer for the internal string case mapping function.
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // This is very similar to how doReplace() keeps the old array pointer
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // and deletes the old array itself after it is done.
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UChar oldStackBuffer[US_STACKBUF_SIZE];
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UChar *oldArray;
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  int32_t oldLength;
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(fFlags&kUsingStackBuffer) {
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // copy the stack buffer contents because it will be overwritten
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    oldArray = oldStackBuffer;
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    oldLength = fShortLength;
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  } else {
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    oldArray = getArrayStart();
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    oldLength = length();
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  int32_t capacity;
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(oldLength <= US_STACKBUF_SIZE) {
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    capacity = US_STACKBUF_SIZE;
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  } else {
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    capacity = oldLength + 20;
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  int32_t *bufferToDelete = 0;
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Case-map, and if the result is too long, then reallocate and repeat.
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UErrorCode errorCode;
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  int32_t newLength;
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  do {
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errorCode = U_ZERO_ERROR;
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                 oldArray, oldLength, &errorCode);
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setLength(newLength);
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if (bufferToDelete) {
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(bufferToDelete);
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(U_FAILURE(errorCode)) {
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setToBogus();
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return *this;
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString &
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString::foldCase(uint32_t options) {
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UCaseMap csm=UCASEMAP_INITIALIZER;
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  csm.csp=ucase_getSingleton();
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  csm.options=options;
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return caseMap(&csm, ustrcase_internalFold);
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Defined here to reduce dependencies on break iterator
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguhash_hashCaselessUnicodeString(const UElement key) {
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_NAMESPACE_USE
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UnicodeString *str = (const UnicodeString*) key.pointer;
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (str == NULL) {
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Inefficient; a better way would be to have a hash function in
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // UnicodeString that does case folding on the fly.
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString copy(*str);
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return copy.foldCase().hashCode();
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Defined here to reduce dependencies on break iterator
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_NAMESPACE_USE
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (str1 == str2) {
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return TRUE;
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (str1 == NULL || str2 == NULL) {
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
181