1/* 2********************************************************************** 3* Copyright (C) 2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* 7* scriptset.cpp 8* 9* created on: 2013 Jan 7 10* created by: Andy Heninger 11*/ 12 13#include "unicode/utypes.h" 14 15#include "unicode/uchar.h" 16#include "unicode/unistr.h" 17 18#include "scriptset.h" 19#include "uassert.h" 20#include "cmemory.h" 21 22U_NAMESPACE_BEGIN 23 24//---------------------------------------------------------------------------- 25// 26// ScriptSet implementation 27// 28//---------------------------------------------------------------------------- 29ScriptSet::ScriptSet() { 30 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 31 bits[i] = 0; 32 } 33} 34 35ScriptSet::~ScriptSet() { 36} 37 38ScriptSet::ScriptSet(const ScriptSet &other) { 39 *this = other; 40} 41 42 43ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 44 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 45 bits[i] = other.bits[i]; 46 } 47 return *this; 48} 49 50 51UBool ScriptSet::operator == (const ScriptSet &other) const { 52 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 53 if (bits[i] != other.bits[i]) { 54 return FALSE; 55 } 56 } 57 return TRUE; 58} 59 60UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 61 if (U_FAILURE(status)) { 62 return FALSE; 63 } 64 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 65 status = U_ILLEGAL_ARGUMENT_ERROR; 66 return FALSE; 67 } 68 uint32_t index = script / 32; 69 uint32_t bit = 1 << (script & 31); 70 return ((bits[index] & bit) != 0); 71} 72 73 74ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 75 if (U_FAILURE(status)) { 76 return *this; 77 } 78 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 79 status = U_ILLEGAL_ARGUMENT_ERROR; 80 return *this; 81 } 82 uint32_t index = script / 32; 83 uint32_t bit = 1 << (script & 31); 84 bits[index] |= bit; 85 return *this; 86} 87 88ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 89 if (U_FAILURE(status)) { 90 return *this; 91 } 92 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 93 status = U_ILLEGAL_ARGUMENT_ERROR; 94 return *this; 95 } 96 uint32_t index = script / 32; 97 uint32_t bit = 1 << (script & 31); 98 bits[index] &= ~bit; 99 return *this; 100} 101 102 103 104ScriptSet &ScriptSet::Union(const ScriptSet &other) { 105 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 106 bits[i] |= other.bits[i]; 107 } 108 return *this; 109} 110 111ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 112 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 113 bits[i] &= other.bits[i]; 114 } 115 return *this; 116} 117 118ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 119 ScriptSet t; 120 t.set(script, status); 121 if (U_SUCCESS(status)) { 122 this->intersect(t); 123 } 124 return *this; 125} 126 127UBool ScriptSet::intersects(const ScriptSet &other) const { 128 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 129 if ((bits[i] & other.bits[i]) != 0) { 130 return true; 131 } 132 } 133 return false; 134} 135 136UBool ScriptSet::contains(const ScriptSet &other) const { 137 ScriptSet t(*this); 138 t.intersect(other); 139 return (t == other); 140} 141 142 143ScriptSet &ScriptSet::setAll() { 144 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 145 bits[i] = 0xffffffffu; 146 } 147 return *this; 148} 149 150 151ScriptSet &ScriptSet::resetAll() { 152 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 153 bits[i] = 0; 154 } 155 return *this; 156} 157 158int32_t ScriptSet::countMembers() const { 159 // This bit counter is good for sparse numbers of '1's, which is 160 // very much the case that we will usually have. 161 int32_t count = 0; 162 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 163 uint32_t x = bits[i]; 164 while (x > 0) { 165 count++; 166 x &= (x - 1); // and off the least significant one bit. 167 } 168 } 169 return count; 170} 171 172int32_t ScriptSet::hashCode() const { 173 int32_t hash = 0; 174 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 175 hash ^= bits[i]; 176 } 177 return hash; 178} 179 180int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 181 // TODO: Wants a better implementation. 182 if (fromIndex < 0) { 183 return -1; 184 } 185 UErrorCode status = U_ZERO_ERROR; 186 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) { 187 if (test((UScriptCode)scriptIndex, status)) { 188 return scriptIndex; 189 } 190 } 191 return -1; 192} 193 194UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 195 UBool firstTime = TRUE; 196 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 197 if (!firstTime) { 198 dest.append((UChar)0x20); 199 } 200 firstTime = FALSE; 201 const char *scriptName = uscript_getShortName((UScriptCode(i))); 202 dest.append(UnicodeString(scriptName, -1, US_INV)); 203 } 204 return dest; 205} 206 207ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 208 resetAll(); 209 if (U_FAILURE(status)) { 210 return *this; 211 } 212 UnicodeString oneScriptName; 213 for (int32_t i=0; i<scriptString.length();) { 214 UChar32 c = scriptString.char32At(i); 215 i = scriptString.moveIndex32(i, 1); 216 if (!u_isUWhiteSpace(c)) { 217 oneScriptName.append(c); 218 if (i < scriptString.length()) { 219 continue; 220 } 221 } 222 if (oneScriptName.length() > 0) { 223 char buf[40]; 224 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 225 buf[sizeof(buf)-1] = 0; 226 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 227 if (sc == UCHAR_INVALID_CODE) { 228 status = U_ILLEGAL_ARGUMENT_ERROR; 229 } else { 230 this->set((UScriptCode)sc, status); 231 } 232 if (U_FAILURE(status)) { 233 return *this; 234 } 235 oneScriptName.remove(); 236 } 237 } 238 return *this; 239} 240 241U_NAMESPACE_END 242 243U_CAPI UBool U_EXPORT2 244uhash_equalsScriptSet(const UElement key1, const UElement key2) { 245 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 246 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 247 return (*s1 == *s2); 248} 249 250U_CAPI int8_t U_EXPORT2 251uhash_compareScriptSet(UElement key0, UElement key1) { 252 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 253 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 254 int32_t diff = s0->countMembers() - s1->countMembers(); 255 if (diff != 0) return diff; 256 int32_t i0 = s0->nextSetBit(0); 257 int32_t i1 = s1->nextSetBit(0); 258 while ((diff = i0-i1) == 0 && i0 > 0) { 259 i0 = s0->nextSetBit(i0+1); 260 i1 = s1->nextSetBit(i1+1); 261 } 262 return (int8_t)diff; 263} 264 265U_CAPI int32_t U_EXPORT2 266uhash_hashScriptSet(const UElement key) { 267 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 268 return s->hashCode(); 269} 270 271U_CAPI void U_EXPORT2 272uhash_deleteScriptSet(void *obj) { 273 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 274 delete s; 275} 276