1103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ************************************************************************************
3f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Copyright (C) 2006-2014, International Business Machines Corporation
4103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * and others. All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ************************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "brkeng.h"
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "dictbe.h"
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/chariter.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ures.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uscript.h"
2254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "unicode/ucharstrie.h"
2354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "unicode/bytestrie.h"
2454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "charstr.h"
2554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "dictionarydata.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uresimp.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ubrkimpl.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ******************************************************************
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakEngine::LanguageBreakEngine() {
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakEngine::~LanguageBreakEngine() {
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ******************************************************************
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakFactory::LanguageBreakFactory() {
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakFactory::~LanguageBreakFactory() {
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ******************************************************************
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fHandled[i] = 0;
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::~UnhandledEngine() {
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fHandled[i] != 0) {
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete fHandled[i];
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::handles(UChar32 c, int32_t breakType) const {
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        && fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::findBreaks( UText *text,
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 int32_t startPos,
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 int32_t endPos,
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 UBool reverse,
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 int32_t breakType,
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 UStack &/*foundBreaks*/ ) const {
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = utext_current32(text);
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (reverse) {
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c = utext_previous32(text);
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c = utext_current32(text);
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fHandled[breakType] == 0) {
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fHandled[breakType] = new UnicodeSet();
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (fHandled[breakType] == 0) {
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return;
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!fHandled[breakType]->contains(c)) {
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode status = U_ZERO_ERROR;
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Apply the entire script of the character.
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ******************************************************************
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fEngines = 0;
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::~ICULanguageBreakFactory() {
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fEngines != 0) {
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete fEngines;
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteEngine(void *obj) {
136103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    delete (const icu::LanguageBreakEngine *) obj;
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst LanguageBreakEngine *
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool       needsInit;
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t     i;
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const LanguageBreakEngine *lbe = NULL;
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status = U_ZERO_ERROR;
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO: The global mutex should not be used.
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The global mutex should only be used for short periods.
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // A ICULanguageBreakFactory specific mutex should be used.
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_lock(NULL);
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    needsInit = (UBool)(fEngines == NULL);
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!needsInit) {
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        i = fEngines->size();
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (--i >= 0) {
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (lbe != NULL && lbe->handles(c, breakType)) {
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lbe = NULL;
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_unlock(NULL);
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lbe != NULL) {
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return lbe;
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (needsInit) {
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UStack  *engines = new UStack(_deleteEngine, NULL, status);
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(status) && engines == NULL) {
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_MEMORY_ALLOCATION_ERROR;
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else if (U_FAILURE(status)) {
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete engines;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            engines = NULL;
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            umtx_lock(NULL);
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (fEngines == NULL) {
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fEngines = engines;
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                engines = NULL;
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            umtx_unlock(NULL);
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete engines;
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fEngines == NULL) {
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We didn't find an engine the first time through, or there was no
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // stack. Create an engine.
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Now get the lock, and see if someone else has created it in the
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // meantime
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_lock(NULL);
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    i = fEngines->size();
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (--i >= 0) {
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (lbe != NULL && lbe->handles(c, breakType)) {
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        lbe = NULL;
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lbe == NULL && newlbe != NULL) {
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fEngines->push((void *)newlbe, status);
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        lbe = newlbe;
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        newlbe = NULL;
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_unlock(NULL);
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete newlbe;
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return lbe;
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst LanguageBreakEngine *
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UScriptCode code = uscript_getScript(c, &status);
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(status)) {
22554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
22654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        if (m != NULL) {
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const LanguageBreakEngine *engine = NULL;
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch(code) {
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case USCRIPT_THAI:
23054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                engine = new ThaiBreakEngine(m, status);
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
23259d709d503bab6e2b61931737e662dd293b40578ccornelius            case USCRIPT_LAO:
23359d709d503bab6e2b61931737e662dd293b40578ccornelius                engine = new LaoBreakEngine(m, status);
23459d709d503bab6e2b61931737e662dd293b40578ccornelius                break;
235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            case USCRIPT_MYANMAR:
236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                engine = new BurmeseBreakEngine(m, status);
237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                break;
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            case USCRIPT_KHMER:
23954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                engine = new KhmerBreakEngine(m, status);
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
24154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
24254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#if !UCONFIG_NO_NORMALIZATION
24354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                // CJK not available w/o normalization
24454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            case USCRIPT_HANGUL:
24554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                engine = new CjkBreakEngine(m, kKorean, status);
24654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                break;
24754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
24854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            // use same BreakEngine and dictionary for both Chinese and Japanese
24954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            case USCRIPT_HIRAGANA:
25054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            case USCRIPT_KATAKANA:
25154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            case USCRIPT_HAN:
25254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                engine = new CjkBreakEngine(m, kChineseJapanese, status);
25354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                break;
25454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#if 0
25554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            // TODO: Have to get some characters with script=common handled
25654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            // by CjkBreakEngine (e.g. U+309B). Simply subjecting
25754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            // them to CjkBreakEngine does not work. The engine has to
25854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            // special-case them.
25954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            case USCRIPT_COMMON:
26054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            {
26154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                UBlockCode block = ublock_getCode(code);
26254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
26354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                   engine = new CjkBreakEngine(dict, kChineseJapanese, status);
26454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                break;
26554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            }
26654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#endif
26754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#endif
26854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            default:
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (engine == NULL) {
27354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                delete m;
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else if (U_FAILURE(status)) {
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete engine;
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                engine = NULL;
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return engine;
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
28554dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusDictionaryMatcher *
28654dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
28854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    // open root from brkitr tree.
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t dictnlength = 0;
29254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    const UChar *dictfname =
29354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
29454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    if (U_FAILURE(status)) {
29554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        ures_close(b);
29654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        return NULL;
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
29854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    CharString dictnbuf;
29954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    CharString ext;
30054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot
30154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    if (extStart != NULL) {
30254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        int32_t len = (int32_t)(extStart - dictfname);
30354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
30454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        dictnlength = len;
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
30654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ures_close(b);
30854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
30954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(status)) {
31154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        // build trie
31254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        const uint8_t *data = (const uint8_t *)udata_getMemory(file);
31354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        const int32_t *indexes = (const int32_t *)data;
31454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
31554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
31654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        DictionaryMatcher *m = NULL;
31754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
31854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
31954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            const char *characters = (const char *)(data + offset);
32054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            m = new BytesDictionaryMatcher(characters, transform, file);
32154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        }
32254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
32354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            const UChar *characters = (const UChar *)(data + offset);
32454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            m = new UCharsDictionaryMatcher(characters, file);
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
32654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        if (m == NULL) {
32754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            // no matcher exists to take ownership - either we are an invalid
32854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            // type or memory allocation failed
32954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            udata_close(file);
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
33154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        return m;
33254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    } else if (dictfname != NULL) {
33354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        // we don't have a dictionary matcher.
33454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        // returning NULL here will cause us to fail to find a dictionary break engine, as expected
33554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        status = U_ZERO_ERROR;
33654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        return NULL;
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
344