1/* 2********************************************************************** 3* Copyright (C) 2013, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* 7* scriptset.cpp 8* 9* created on: 2013 Jan 7 10* created by: Andy Heninger 11*/ 12 13#include "unicode/utypes.h" 14 15#include "unicode/uchar.h" 16#include "unicode/unistr.h" 17 18#include "scriptset.h" 19#include "uassert.h" 20 21U_NAMESPACE_BEGIN 22 23#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 24 25//---------------------------------------------------------------------------- 26// 27// ScriptSet implementation 28// 29//---------------------------------------------------------------------------- 30ScriptSet::ScriptSet() { 31 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 32 bits[i] = 0; 33 } 34} 35 36ScriptSet::~ScriptSet() { 37} 38 39ScriptSet::ScriptSet(const ScriptSet &other) { 40 *this = other; 41} 42 43 44ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 45 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 46 bits[i] = other.bits[i]; 47 } 48 return *this; 49} 50 51 52UBool ScriptSet::operator == (const ScriptSet &other) const { 53 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 54 if (bits[i] != other.bits[i]) { 55 return FALSE; 56 } 57 } 58 return TRUE; 59} 60 61UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 62 if (U_FAILURE(status)) { 63 return FALSE; 64 } 65 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 66 status = U_ILLEGAL_ARGUMENT_ERROR; 67 return FALSE; 68 } 69 uint32_t index = script / 32; 70 uint32_t bit = 1 << (script & 31); 71 return ((bits[index] & bit) != 0); 72} 73 74 75ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 76 if (U_FAILURE(status)) { 77 return *this; 78 } 79 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 80 status = U_ILLEGAL_ARGUMENT_ERROR; 81 return *this; 82 } 83 uint32_t index = script / 32; 84 uint32_t bit = 1 << (script & 31); 85 bits[index] |= bit; 86 return *this; 87} 88 89ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 90 if (U_FAILURE(status)) { 91 return *this; 92 } 93 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 94 status = U_ILLEGAL_ARGUMENT_ERROR; 95 return *this; 96 } 97 uint32_t index = script / 32; 98 uint32_t bit = 1 << (script & 31); 99 bits[index] &= ~bit; 100 return *this; 101} 102 103 104 105ScriptSet &ScriptSet::Union(const ScriptSet &other) { 106 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 107 bits[i] |= other.bits[i]; 108 } 109 return *this; 110} 111 112ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 113 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 114 bits[i] &= other.bits[i]; 115 } 116 return *this; 117} 118 119ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 120 ScriptSet t; 121 t.set(script, status); 122 if (U_SUCCESS(status)) { 123 this->intersect(t); 124 } 125 return *this; 126} 127 128UBool ScriptSet::intersects(const ScriptSet &other) const { 129 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 130 if ((bits[i] & other.bits[i]) != 0) { 131 return true; 132 } 133 } 134 return false; 135} 136 137UBool ScriptSet::contains(const ScriptSet &other) const { 138 ScriptSet t(*this); 139 t.intersect(other); 140 return (t == other); 141} 142 143 144ScriptSet &ScriptSet::setAll() { 145 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 146 bits[i] = 0xffffffffu; 147 } 148 return *this; 149} 150 151 152ScriptSet &ScriptSet::resetAll() { 153 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 154 bits[i] = 0; 155 } 156 return *this; 157} 158 159int32_t ScriptSet::countMembers() const { 160 // This bit counter is good for sparse numbers of '1's, which is 161 // very much the case that we will usually have. 162 int32_t count = 0; 163 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 164 uint32_t x = bits[i]; 165 while (x > 0) { 166 count++; 167 x &= (x - 1); // and off the least significant one bit. 168 } 169 } 170 return count; 171} 172 173int32_t ScriptSet::hashCode() const { 174 int32_t hash = 0; 175 for (int32_t i=0; i<LENGTHOF(bits); i++) { 176 hash ^= bits[i]; 177 } 178 return hash; 179} 180 181int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 182 // TODO: Wants a better implementation. 183 if (fromIndex < 0) { 184 return -1; 185 } 186 UErrorCode status = U_ZERO_ERROR; 187 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) { 188 if (test((UScriptCode)scriptIndex, status)) { 189 return scriptIndex; 190 } 191 } 192 return -1; 193} 194 195UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 196 UBool firstTime = TRUE; 197 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 198 if (!firstTime) { 199 dest.append((UChar)0x20); 200 } 201 firstTime = FALSE; 202 const char *scriptName = uscript_getShortName((UScriptCode(i))); 203 dest.append(UnicodeString(scriptName, -1, US_INV)); 204 } 205 return dest; 206} 207 208ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 209 resetAll(); 210 if (U_FAILURE(status)) { 211 return *this; 212 } 213 UnicodeString oneScriptName; 214 for (int32_t i=0; i<scriptString.length();) { 215 UChar32 c = scriptString.char32At(i); 216 i = scriptString.moveIndex32(i, 1); 217 if (!u_isUWhiteSpace(c)) { 218 oneScriptName.append(c); 219 if (i < scriptString.length()) { 220 continue; 221 } 222 } 223 if (oneScriptName.length() > 0) { 224 char buf[40]; 225 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 226 buf[sizeof(buf)-1] = 0; 227 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 228 if (sc == UCHAR_INVALID_CODE) { 229 status = U_ILLEGAL_ARGUMENT_ERROR; 230 } else { 231 this->set((UScriptCode)sc, status); 232 } 233 if (U_FAILURE(status)) { 234 return *this; 235 } 236 oneScriptName.remove(); 237 } 238 } 239 return *this; 240} 241 242U_NAMESPACE_END 243 244U_CAPI UBool U_EXPORT2 245uhash_equalsScriptSet(const UElement key1, const UElement key2) { 246 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 247 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 248 return (*s1 == *s2); 249} 250 251U_CAPI int8_t U_EXPORT2 252uhash_compareScriptSet(UElement key0, UElement key1) { 253 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 254 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 255 int32_t diff = s0->countMembers() - s1->countMembers(); 256 if (diff != 0) return diff; 257 int32_t i0 = s0->nextSetBit(0); 258 int32_t i1 = s1->nextSetBit(0); 259 while ((diff = i0-i1) == 0 && i0 > 0) { 260 i0 = s0->nextSetBit(i0+1); 261 i1 = s1->nextSetBit(i1+1); 262 } 263 return (int8_t)diff; 264} 265 266U_CAPI int32_t U_EXPORT2 267uhash_hashScriptSet(const UElement key) { 268 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 269 return s->hashCode(); 270} 271 272U_CAPI void U_EXPORT2 273uhash_deleteScriptSet(void *obj) { 274 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 275 delete s; 276} 277