1/*
2**********************************************************************
3*   Copyright (C) 2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* scriptset.cpp
8*
9* created on: 2013 Jan 7
10* created by: Andy Heninger
11*/
12
13#include "unicode/utypes.h"
14
15#include "unicode/uchar.h"
16#include "unicode/unistr.h"
17
18#include "scriptset.h"
19#include "uassert.h"
20
21U_NAMESPACE_BEGIN
22
23#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24
25//----------------------------------------------------------------------------
26//
27//  ScriptSet implementation
28//
29//----------------------------------------------------------------------------
30ScriptSet::ScriptSet() {
31    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
32        bits[i] = 0;
33    }
34}
35
36ScriptSet::~ScriptSet() {
37}
38
39ScriptSet::ScriptSet(const ScriptSet &other) {
40    *this = other;
41}
42
43
44ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
45    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
46        bits[i] = other.bits[i];
47    }
48    return *this;
49}
50
51
52UBool ScriptSet::operator == (const ScriptSet &other) const {
53    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
54        if (bits[i] != other.bits[i]) {
55            return FALSE;
56        }
57    }
58    return TRUE;
59}
60
61UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
62    if (U_FAILURE(status)) {
63        return FALSE;
64    }
65    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
66        status = U_ILLEGAL_ARGUMENT_ERROR;
67        return FALSE;
68    }
69    uint32_t index = script / 32;
70    uint32_t bit   = 1 << (script & 31);
71    return ((bits[index] & bit) != 0);
72}
73
74
75ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
76    if (U_FAILURE(status)) {
77        return *this;
78    }
79    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
80        status = U_ILLEGAL_ARGUMENT_ERROR;
81        return *this;
82    }
83    uint32_t index = script / 32;
84    uint32_t bit   = 1 << (script & 31);
85    bits[index] |= bit;
86    return *this;
87}
88
89ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
90    if (U_FAILURE(status)) {
91        return *this;
92    }
93    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
94        status = U_ILLEGAL_ARGUMENT_ERROR;
95        return *this;
96    }
97    uint32_t index = script / 32;
98    uint32_t bit   = 1 << (script & 31);
99    bits[index] &= ~bit;
100    return *this;
101}
102
103
104
105ScriptSet &ScriptSet::Union(const ScriptSet &other) {
106    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
107        bits[i] |= other.bits[i];
108    }
109    return *this;
110}
111
112ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
113    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
114        bits[i] &= other.bits[i];
115    }
116    return *this;
117}
118
119ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
120    ScriptSet t;
121    t.set(script, status);
122    if (U_SUCCESS(status)) {
123        this->intersect(t);
124    }
125    return *this;
126}
127
128UBool ScriptSet::intersects(const ScriptSet &other) const {
129    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
130        if ((bits[i] & other.bits[i]) != 0) {
131            return true;
132        }
133    }
134    return false;
135}
136
137UBool ScriptSet::contains(const ScriptSet &other) const {
138    ScriptSet t(*this);
139    t.intersect(other);
140    return (t == other);
141}
142
143
144ScriptSet &ScriptSet::setAll() {
145    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
146        bits[i] = 0xffffffffu;
147    }
148    return *this;
149}
150
151
152ScriptSet &ScriptSet::resetAll() {
153    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
154        bits[i] = 0;
155    }
156    return *this;
157}
158
159int32_t ScriptSet::countMembers() const {
160    // This bit counter is good for sparse numbers of '1's, which is
161    //  very much the case that we will usually have.
162    int32_t count = 0;
163    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
164        uint32_t x = bits[i];
165        while (x > 0) {
166            count++;
167            x &= (x - 1);    // and off the least significant one bit.
168        }
169    }
170    return count;
171}
172
173int32_t ScriptSet::hashCode() const {
174    int32_t hash = 0;
175    for (int32_t i=0; i<LENGTHOF(bits); i++) {
176        hash ^= bits[i];
177    }
178    return hash;
179}
180
181int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
182    // TODO: Wants a better implementation.
183    if (fromIndex < 0) {
184        return -1;
185    }
186    UErrorCode status = U_ZERO_ERROR;
187    for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
188        if (test((UScriptCode)scriptIndex, status)) {
189            return scriptIndex;
190        }
191    }
192    return -1;
193}
194
195UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
196    UBool firstTime = TRUE;
197    for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
198        if (!firstTime) {
199            dest.append((UChar)0x20);
200        }
201        firstTime = FALSE;
202        const char *scriptName = uscript_getShortName((UScriptCode(i)));
203        dest.append(UnicodeString(scriptName, -1, US_INV));
204    }
205    return dest;
206}
207
208ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
209    resetAll();
210    if (U_FAILURE(status)) {
211        return *this;
212    }
213    UnicodeString oneScriptName;
214    for (int32_t i=0; i<scriptString.length();) {
215        UChar32 c = scriptString.char32At(i);
216        i = scriptString.moveIndex32(i, 1);
217        if (!u_isUWhiteSpace(c)) {
218            oneScriptName.append(c);
219            if (i < scriptString.length()) {
220                continue;
221            }
222        }
223        if (oneScriptName.length() > 0) {
224            char buf[40];
225            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
226            buf[sizeof(buf)-1] = 0;
227            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
228            if (sc == UCHAR_INVALID_CODE) {
229                status = U_ILLEGAL_ARGUMENT_ERROR;
230            } else {
231                this->set((UScriptCode)sc, status);
232            }
233            if (U_FAILURE(status)) {
234                return *this;
235            }
236            oneScriptName.remove();
237        }
238    }
239    return *this;
240}
241
242U_NAMESPACE_END
243
244U_CAPI UBool U_EXPORT2
245uhash_equalsScriptSet(const UElement key1, const UElement key2) {
246    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
247    icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
248    return (*s1 == *s2);
249}
250
251U_CAPI int8_t U_EXPORT2
252uhash_compareScriptSet(UElement key0, UElement key1) {
253    icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
254    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
255    int32_t diff = s0->countMembers() - s1->countMembers();
256    if (diff != 0) return diff;
257    int32_t i0 = s0->nextSetBit(0);
258    int32_t i1 = s1->nextSetBit(0);
259    while ((diff = i0-i1) == 0 && i0 > 0) {
260        i0 = s0->nextSetBit(i0+1);
261        i1 = s1->nextSetBit(i1+1);
262    }
263    return (int8_t)diff;
264}
265
266U_CAPI int32_t U_EXPORT2
267uhash_hashScriptSet(const UElement key) {
268    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
269    return s->hashCode();
270}
271
272U_CAPI void U_EXPORT2
273uhash_deleteScriptSet(void *obj) {
274    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
275    delete s;
276}
277