1/*
2**********************************************************************
3*   Copyright (C) 2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* scriptset.cpp
8*
9* created on: 2013 Jan 7
10* created by: Andy Heninger
11*/
12
13#include "unicode/utypes.h"
14
15#include "unicode/uchar.h"
16#include "unicode/unistr.h"
17
18#include "scriptset.h"
19#include "uassert.h"
20#include "cmemory.h"
21
22U_NAMESPACE_BEGIN
23
24//----------------------------------------------------------------------------
25//
26//  ScriptSet implementation
27//
28//----------------------------------------------------------------------------
29ScriptSet::ScriptSet() {
30    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
31        bits[i] = 0;
32    }
33}
34
35ScriptSet::~ScriptSet() {
36}
37
38ScriptSet::ScriptSet(const ScriptSet &other) {
39    *this = other;
40}
41
42
43ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
44    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
45        bits[i] = other.bits[i];
46    }
47    return *this;
48}
49
50
51UBool ScriptSet::operator == (const ScriptSet &other) const {
52    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
53        if (bits[i] != other.bits[i]) {
54            return FALSE;
55        }
56    }
57    return TRUE;
58}
59
60UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
61    if (U_FAILURE(status)) {
62        return FALSE;
63    }
64    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
65        status = U_ILLEGAL_ARGUMENT_ERROR;
66        return FALSE;
67    }
68    uint32_t index = script / 32;
69    uint32_t bit   = 1 << (script & 31);
70    return ((bits[index] & bit) != 0);
71}
72
73
74ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
75    if (U_FAILURE(status)) {
76        return *this;
77    }
78    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
79        status = U_ILLEGAL_ARGUMENT_ERROR;
80        return *this;
81    }
82    uint32_t index = script / 32;
83    uint32_t bit   = 1 << (script & 31);
84    bits[index] |= bit;
85    return *this;
86}
87
88ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
89    if (U_FAILURE(status)) {
90        return *this;
91    }
92    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
93        status = U_ILLEGAL_ARGUMENT_ERROR;
94        return *this;
95    }
96    uint32_t index = script / 32;
97    uint32_t bit   = 1 << (script & 31);
98    bits[index] &= ~bit;
99    return *this;
100}
101
102
103
104ScriptSet &ScriptSet::Union(const ScriptSet &other) {
105    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
106        bits[i] |= other.bits[i];
107    }
108    return *this;
109}
110
111ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
112    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
113        bits[i] &= other.bits[i];
114    }
115    return *this;
116}
117
118ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
119    ScriptSet t;
120    t.set(script, status);
121    if (U_SUCCESS(status)) {
122        this->intersect(t);
123    }
124    return *this;
125}
126
127UBool ScriptSet::intersects(const ScriptSet &other) const {
128    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
129        if ((bits[i] & other.bits[i]) != 0) {
130            return true;
131        }
132    }
133    return false;
134}
135
136UBool ScriptSet::contains(const ScriptSet &other) const {
137    ScriptSet t(*this);
138    t.intersect(other);
139    return (t == other);
140}
141
142
143ScriptSet &ScriptSet::setAll() {
144    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
145        bits[i] = 0xffffffffu;
146    }
147    return *this;
148}
149
150
151ScriptSet &ScriptSet::resetAll() {
152    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
153        bits[i] = 0;
154    }
155    return *this;
156}
157
158int32_t ScriptSet::countMembers() const {
159    // This bit counter is good for sparse numbers of '1's, which is
160    //  very much the case that we will usually have.
161    int32_t count = 0;
162    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
163        uint32_t x = bits[i];
164        while (x > 0) {
165            count++;
166            x &= (x - 1);    // and off the least significant one bit.
167        }
168    }
169    return count;
170}
171
172int32_t ScriptSet::hashCode() const {
173    int32_t hash = 0;
174    for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
175        hash ^= bits[i];
176    }
177    return hash;
178}
179
180int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
181    // TODO: Wants a better implementation.
182    if (fromIndex < 0) {
183        return -1;
184    }
185    UErrorCode status = U_ZERO_ERROR;
186    for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
187        if (test((UScriptCode)scriptIndex, status)) {
188            return scriptIndex;
189        }
190    }
191    return -1;
192}
193
194UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
195    UBool firstTime = TRUE;
196    for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
197        if (!firstTime) {
198            dest.append((UChar)0x20);
199        }
200        firstTime = FALSE;
201        const char *scriptName = uscript_getShortName((UScriptCode(i)));
202        dest.append(UnicodeString(scriptName, -1, US_INV));
203    }
204    return dest;
205}
206
207ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
208    resetAll();
209    if (U_FAILURE(status)) {
210        return *this;
211    }
212    UnicodeString oneScriptName;
213    for (int32_t i=0; i<scriptString.length();) {
214        UChar32 c = scriptString.char32At(i);
215        i = scriptString.moveIndex32(i, 1);
216        if (!u_isUWhiteSpace(c)) {
217            oneScriptName.append(c);
218            if (i < scriptString.length()) {
219                continue;
220            }
221        }
222        if (oneScriptName.length() > 0) {
223            char buf[40];
224            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
225            buf[sizeof(buf)-1] = 0;
226            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
227            if (sc == UCHAR_INVALID_CODE) {
228                status = U_ILLEGAL_ARGUMENT_ERROR;
229            } else {
230                this->set((UScriptCode)sc, status);
231            }
232            if (U_FAILURE(status)) {
233                return *this;
234            }
235            oneScriptName.remove();
236        }
237    }
238    return *this;
239}
240
241U_NAMESPACE_END
242
243U_CAPI UBool U_EXPORT2
244uhash_equalsScriptSet(const UElement key1, const UElement key2) {
245    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
246    icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
247    return (*s1 == *s2);
248}
249
250U_CAPI int8_t U_EXPORT2
251uhash_compareScriptSet(UElement key0, UElement key1) {
252    icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
253    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
254    int32_t diff = s0->countMembers() - s1->countMembers();
255    if (diff != 0) return diff;
256    int32_t i0 = s0->nextSetBit(0);
257    int32_t i1 = s1->nextSetBit(0);
258    while ((diff = i0-i1) == 0 && i0 > 0) {
259        i0 = s0->nextSetBit(i0+1);
260        i1 = s1->nextSetBit(i1+1);
261    }
262    return (int8_t)diff;
263}
264
265U_CAPI int32_t U_EXPORT2
266uhash_hashScriptSet(const UElement key) {
267    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
268    return s->hashCode();
269}
270
271U_CAPI void U_EXPORT2
272uhash_deleteScriptSet(void *obj) {
273    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
274    delete s;
275}
276