1/**
2 ************************************************************************************
3 * Copyright (C) 2006-2009,2011, International Business Machines Corporation        *
4 * and others. All Rights Reserved.                                                 *
5 ************************************************************************************
6 */
7
8#include "unicode/utypes.h"
9
10#if !UCONFIG_NO_BREAK_ITERATION
11
12#include "brkeng.h"
13#include "dictbe.h"
14#include "triedict.h"
15#include "unicode/uchar.h"
16#include "unicode/uniset.h"
17#include "unicode/chariter.h"
18#include "unicode/ures.h"
19#include "unicode/udata.h"
20#include "unicode/putil.h"
21#include "unicode/ustring.h"
22#include "unicode/uscript.h"
23#include "uvector.h"
24#include "umutex.h"
25#include "uresimp.h"
26#include "ubrkimpl.h"
27
28U_NAMESPACE_BEGIN
29
30/*
31 ******************************************************************
32 */
33
34LanguageBreakEngine::LanguageBreakEngine() {
35}
36
37LanguageBreakEngine::~LanguageBreakEngine() {
38}
39
40/*
41 ******************************************************************
42 */
43
44LanguageBreakFactory::LanguageBreakFactory() {
45}
46
47LanguageBreakFactory::~LanguageBreakFactory() {
48}
49
50/*
51 ******************************************************************
52 */
53
54UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
55    for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
56        fHandled[i] = 0;
57    }
58}
59
60UnhandledEngine::~UnhandledEngine() {
61    for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
62        if (fHandled[i] != 0) {
63            delete fHandled[i];
64        }
65    }
66}
67
68UBool
69UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
70    return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
71        && fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
72}
73
74int32_t
75UnhandledEngine::findBreaks( UText *text,
76                                 int32_t startPos,
77                                 int32_t endPos,
78                                 UBool reverse,
79                                 int32_t breakType,
80                                 UStack &/*foundBreaks*/ ) const {
81    if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
82        UChar32 c = utext_current32(text);
83        if (reverse) {
84            while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
85                c = utext_previous32(text);
86            }
87        }
88        else {
89            while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
90                utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
91                c = utext_current32(text);
92            }
93        }
94    }
95    return 0;
96}
97
98void
99UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
100    if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
101        if (fHandled[breakType] == 0) {
102            fHandled[breakType] = new UnicodeSet();
103            if (fHandled[breakType] == 0) {
104                return;
105            }
106        }
107        if (!fHandled[breakType]->contains(c)) {
108            UErrorCode status = U_ZERO_ERROR;
109            // Apply the entire script of the character.
110            int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
111            fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
112        }
113    }
114}
115
116/*
117 ******************************************************************
118 */
119
120ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
121    fEngines = 0;
122}
123
124ICULanguageBreakFactory::~ICULanguageBreakFactory() {
125    if (fEngines != 0) {
126        delete fEngines;
127    }
128}
129
130U_NAMESPACE_END
131U_CDECL_BEGIN
132static void U_CALLCONV _deleteEngine(void *obj) {
133    delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj;
134}
135U_CDECL_END
136U_NAMESPACE_BEGIN
137
138const LanguageBreakEngine *
139ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
140    UBool       needsInit;
141    int32_t     i;
142    const LanguageBreakEngine *lbe = NULL;
143    UErrorCode  status = U_ZERO_ERROR;
144
145    // TODO: The global mutex should not be used.
146    // The global mutex should only be used for short periods.
147    // A ICULanguageBreakFactory specific mutex should be used.
148    umtx_lock(NULL);
149    needsInit = (UBool)(fEngines == NULL);
150    if (!needsInit) {
151        i = fEngines->size();
152        while (--i >= 0) {
153            lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
154            if (lbe != NULL && lbe->handles(c, breakType)) {
155                break;
156            }
157            lbe = NULL;
158        }
159    }
160    umtx_unlock(NULL);
161
162    if (lbe != NULL) {
163        return lbe;
164    }
165
166    if (needsInit) {
167        UStack  *engines = new UStack(_deleteEngine, NULL, status);
168        if (U_SUCCESS(status) && engines == NULL) {
169            status = U_MEMORY_ALLOCATION_ERROR;
170        }
171        else if (U_FAILURE(status)) {
172            delete engines;
173            engines = NULL;
174        }
175        else {
176            umtx_lock(NULL);
177            if (fEngines == NULL) {
178                fEngines = engines;
179                engines = NULL;
180            }
181            umtx_unlock(NULL);
182            delete engines;
183        }
184    }
185
186    if (fEngines == NULL) {
187        return NULL;
188    }
189
190    // We didn't find an engine the first time through, or there was no
191    // stack. Create an engine.
192    const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
193
194    // Now get the lock, and see if someone else has created it in the
195    // meantime
196    umtx_lock(NULL);
197    i = fEngines->size();
198    while (--i >= 0) {
199        lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
200        if (lbe != NULL && lbe->handles(c, breakType)) {
201            break;
202        }
203        lbe = NULL;
204    }
205    if (lbe == NULL && newlbe != NULL) {
206        fEngines->push((void *)newlbe, status);
207        lbe = newlbe;
208        newlbe = NULL;
209    }
210    umtx_unlock(NULL);
211
212    delete newlbe;
213
214    return lbe;
215}
216
217const LanguageBreakEngine *
218ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
219    UErrorCode status = U_ZERO_ERROR;
220    UScriptCode code = uscript_getScript(c, &status);
221    if (U_SUCCESS(status)) {
222        const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType);
223        if (dict != NULL) {
224            const LanguageBreakEngine *engine = NULL;
225            switch(code) {
226            case USCRIPT_THAI:
227                engine = new ThaiBreakEngine(dict, status);
228                break;
229            case USCRIPT_KHMER:
230                engine = new KhmerBreakEngine(dict, status);
231                break;
232            default:
233                break;
234            }
235            if (engine == NULL) {
236                delete dict;
237            }
238            else if (U_FAILURE(status)) {
239                delete engine;
240                engine = NULL;
241            }
242            return engine;
243        }
244    }
245    return NULL;
246}
247
248const CompactTrieDictionary *
249ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) {
250    UErrorCode status = U_ZERO_ERROR;
251    // Open root from brkitr tree.
252    char dictnbuff[256];
253    char ext[4]={'\0'};
254
255    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
256    b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
257    b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status);
258    int32_t dictnlength = 0;
259    const UChar *dictfname = ures_getString(b, &dictnlength, &status);
260    if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) {
261        dictnlength = 0;
262        status = U_BUFFER_OVERFLOW_ERROR;
263    }
264    if (U_SUCCESS(status) && dictfname) {
265        UChar* extStart=u_strchr(dictfname, 0x002e);
266        int len = 0;
267        if(extStart!=NULL){
268            len = (int)(extStart-dictfname);
269            u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
270            u_UCharsToChars(dictfname, dictnbuff, len);
271        }
272        dictnbuff[len]=0; // nul terminate
273    }
274    ures_close(b);
275    UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status);
276    if (U_SUCCESS(status)) {
277        const CompactTrieDictionary *dict = new CompactTrieDictionary(
278            file, status);
279        if (U_SUCCESS(status) && dict == NULL) {
280            status = U_MEMORY_ALLOCATION_ERROR;
281        }
282        if (U_FAILURE(status)) {
283            delete dict;
284            dict = NULL;
285        }
286        return dict;
287    }
288    return NULL;
289}
290
291U_NAMESPACE_END
292
293#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
294