164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 60aa7b3a95216935fb245c0732328d8c78d2273dbMarkus Scherer* Copyright (C) 1999-2014, International Business Machines 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: unistr_case.cpp 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:2 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2004aug19 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Case-mapping functions moved here from unistr.cpp 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 2883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "uelement.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Read-only implementation 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint8_t 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString::doCaseCompare(int32_t start, 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length, 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcChars, 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcStart, 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options) const 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compare illegal string values 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // treat const UChar *srcChars==NULL as an empty string 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isBogus()) { 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pin indices to legal values 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pinIndices(start, length); 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcChars == NULL) { 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcStart = srcLength = 0; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get the correct pointer 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *chars = getArrayStart(); 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chars += start; 6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(srcStart!=0) { 6383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius srcChars += srcStart; 6483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(chars != srcChars) { 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, &errorCode); 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result!=0) { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int8_t)(result >> 24 | 1); 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get the srcLength if necessary 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength < 0) { 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength = u_strlen(srcChars + srcStart); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length != srcLength) { 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int8_t)((length - srcLength) >> 24 | 1); 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Write implementation 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString & 9083a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUnicodeString::caseMap(const UCaseMap *csm, 9183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UStringCaseMapper *stringCaseMapper) { 92c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isEmpty() || !isWritable()) { 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // nothing to do 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We need to allocate a new buffer for the internal string case mapping function. 98c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This is very similar to how doReplace() keeps the old array pointer 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and deletes the old array itself after it is done. 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. 101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar oldStackBuffer[US_STACKBUF_SIZE]; 102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *oldArray; 103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t oldLength; 104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1050aa7b3a95216935fb245c0732328d8c78d2273dbMarkus Scherer if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { 106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // copy the stack buffer contents because it will be overwritten 107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru oldArray = oldStackBuffer; 1080aa7b3a95216935fb245c0732328d8c78d2273dbMarkus Scherer oldLength = getShortLength(); 1090aa7b3a95216935fb245c0732328d8c78d2273dbMarkus Scherer u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength); 110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru oldArray = getArrayStart(); 112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru oldLength = length(); 113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t capacity; 116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(oldLength <= US_STACKBUF_SIZE) { 117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = US_STACKBUF_SIZE; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = oldLength + 20; 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t *bufferToDelete = 0; 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Case-map, and if the result is too long, then reallocate and repeat. 12727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode; 128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t newLength; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode = U_ZERO_ERROR; 13183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), 13283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius oldArray, oldLength, &errorCode); 133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setLength(newLength); 134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bufferToDelete) { 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(bufferToDelete); 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setToBogus(); 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString & 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString::foldCase(uint32_t options) { 14783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UCaseMap csm=UCASEMAP_INITIALIZER; 14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csm.csp=ucase_getSingleton(); 14983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csm.options=options; 15083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return caseMap(&csm, ustrcase_internalFold); 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Defined here to reduce dependencies on break iterator 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 15783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusuhash_hashCaselessUnicodeString(const UElement key) { 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_NAMESPACE_USE 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str = (const UnicodeString*) key.pointer; 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (str == NULL) { 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Inefficient; a better way would be to have a hash function in 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeString that does case folding on the fly. 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString copy(*str); 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return copy.foldCase().hashCode(); 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Defined here to reduce dependencies on break iterator 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 17183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusuhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_NAMESPACE_USE 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (str1 == str2) { 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (str1 == NULL || str2 == NULL) { 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 183