1f2038fb01417bcf7698b87a5dfaa4a861539618aerik.corry@gmail.com/*
2a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ************************************************************************************
3a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org * Copyright (C) 2006-2013, International Business Machines Corporation
4a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org * and others. All Rights Reserved.
5a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ************************************************************************************
6a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */
7a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
8a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/utypes.h"
9a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
10a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#if !UCONFIG_NO_BREAK_ITERATION
11a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
12a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "brkeng.h"
13a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "dictbe.h"
14a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/uchar.h"
15a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/uniset.h"
16a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/chariter.h"
17a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/ures.h"
18a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/udata.h"
19a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/putil.h"
20a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/ustring.h"
21a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/uscript.h"
22a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/ucharstrie.h"
23a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/bytestrie.h"
24a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "charstr.h"
25a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "dictionarydata.h"
26a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "uvector.h"
27a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "umutex.h"
28a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "uresimp.h"
29a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "ubrkimpl.h"
30a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
31a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgU_NAMESPACE_BEGIN
327979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org
331c09276ce2ac5214e81ca554360b9f101187893blrn@chromium.org/*
34a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ******************************************************************
35fb144a0716afe7ab8bf245f2391a9e53b3db3c89fschneider@chromium.org */
36c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org
377979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orgLanguageBreakEngine::LanguageBreakEngine() {
38a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org}
39528ce02b8680a3ab6d75c7079f180a4016c69b7amachenbach@chromium.org
40ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.orgLanguageBreakEngine::~LanguageBreakEngine() {
41ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org}
42a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
43a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org/*
44a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ******************************************************************
45a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */
46a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
47a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgLanguageBreakFactory::LanguageBreakFactory() {
48a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org}
49a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
501510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgLanguageBreakFactory::~LanguageBreakFactory() {
51a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org}
52a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
53cc8e177451e2ab80cf4eacfd782d19cd05ec2070hpayer@chromium.org/*
54a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ******************************************************************
55a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */
56a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
57a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
58dcebac0f4c6c0da579b7cc91a0cbba8f3c820c8dricow@chromium.org    for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
59837a67edd9afdbfe1b59482b41693f59c48846ffulan@chromium.org        fHandled[i] = 0;
6033e09c8efd078308de3c77a88301566f65c07befverwaest@chromium.org    }
61a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org}
62a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
63a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUnhandledEngine::~UnhandledEngine() {
64a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
65a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        if (fHandled[i] != 0) {
66a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            delete fHandled[i];
67c3669763e2617aefdac84a072327b201b3dff129jkummerow@chromium.org        }
68a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    }
69a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org}
7094b0d6fcb08a2f01ba52c6edb712068f482366f1danno@chromium.org
71a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUBool
72a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUnhandledEngine::handles(UChar32 c, int32_t breakType) const {
73a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
74a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        && fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
75c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.org}
76a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
77a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgint32_t
78876cca833d7212e476250d102cad185cdcfa9dfesvenpanne@chromium.orgUnhandledEngine::findBreaks( UText *text,
794f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org                                 int32_t startPos,
80a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                                 int32_t endPos,
81a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                                 UBool reverse,
82a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                                 int32_t breakType,
83a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                                 UStack &/*foundBreaks*/ ) const {
84a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
85a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        UChar32 c = utext_current32(text);
86a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        if (reverse) {
874a9f6553038df6b893b3d3ccae351723f4cbbae7yangguo@chromium.org            while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
88a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                c = utext_previous32(text);
89a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            }
90594006017e46d82ed7146611dc12c20e3c509c7ddanno@chromium.org        }
91a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        else {
921510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org            while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
93a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
941456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org                c = utext_current32(text);
95c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org            }
96c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org        }
971f410f9a9c4fbd4270749af64b477df87b753158mstarzinger@chromium.org    }
98c53e10d01c5495df3896b9d318910b58688c6929kmillikin@chromium.org    return 0;
994f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org}
100e0e1b0d3e70c933d36ed381d511e9fda39f2a751mstarzinger@chromium.org
101c00ec2b94bc5505fa81f81daefd956f5a8776a09danno@chromium.orgvoid
1024f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.orgUnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
1034f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org    if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
1040a4e901cdfb5505a896d30aa8c2e04fce0fbe069vegorov@chromium.org        if (fHandled[breakType] == 0) {
105a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            fHandled[breakType] = new UnicodeSet();
10683aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org            if (fHandled[breakType] == 0) {
107c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org                return;
1084e308cf00936c6e7bead43e5141a04e37b49b9b5jkummerow@chromium.org            }
10956454717593e7552d6846198b8e0f661fa36a3cayangguo@chromium.org        }
110a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        if (!fHandled[breakType]->contains(c)) {
111a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            UErrorCode status = U_ZERO_ERROR;
11246a2a51ad190697e0f62c3060ce02a9de5820a07yangguo@chromium.org            // Apply the entire script of the character.
1137b26015ac58e54e88f4214e248f772ad4f055477whesse@chromium.org            int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
114a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
115d4be0f0c0edfc0a0b46e745055c3dc497c0ffcb5verwaest@chromium.org        }
116c53e10d01c5495df3896b9d318910b58688c6929kmillikin@chromium.org    }
117c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org}
118c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org
119a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org/*
12049edbdf52640c88918f8e6638ab4965819eb1dfekmillikin@chromium.org ******************************************************************
121a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */
122a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
123a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
1244f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org    fEngines = 0;
1254f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org}
1262bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org
127a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgICULanguageBreakFactory::~ICULanguageBreakFactory() {
128d2c22f0121ebc55ee26a9e742f0fd7c0b8397730kmillikin@chromium.org    if (fEngines != 0) {
129160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org        delete fEngines;
1304f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org    }
1314f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org}
1320ad885c06ff6a0d68bc9ad75629f7ddfaa6860b9erikcorry
1334f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.orgU_NAMESPACE_END
1344f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.orgU_CDECL_BEGIN
135a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgstatic void U_CALLCONV _deleteEngine(void *obj) {
136c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org    delete (const icu::LanguageBreakEngine *) obj;
1374d3fe4e246b0312eba361689f288ddf8dd516960danno@chromium.org}
138c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.orgU_CDECL_END
139378b34e3f8852e94739bb77a528278fe0e2bb532ager@chromium.orgU_NAMESPACE_BEGIN
140c36ce6e8979bbbd43539f0a0effc87ea20dd65cckmillikin@chromium.org
141c36ce6e8979bbbd43539f0a0effc87ea20dd65cckmillikin@chromium.orgconst LanguageBreakEngine *
142e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.orgICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
143a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    UBool       needsInit;
144a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    int32_t     i;
145a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    const LanguageBreakEngine *lbe = NULL;
146528ce02b8680a3ab6d75c7079f180a4016c69b7amachenbach@chromium.org    UErrorCode  status = U_ZERO_ERROR;
147355cfd19c23ac613f2738a40e356ea48297f7d5eyangguo@chromium.org
148efdb9d70bddd496ceb6a281dadcc065efbce37a1yangguo@chromium.org    // TODO: The global mutex should not be used.
149471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org    // The global mutex should only be used for short periods.
150a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    // A ICULanguageBreakFactory specific mutex should be used.
151a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    umtx_lock(NULL);
152a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    needsInit = (UBool)(fEngines == NULL);
15383aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org    if (!needsInit) {
154a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        i = fEngines->size();
1555f0c45f2cacb31d36a8f80c31f17bda7751a3644ager@chromium.org        while (--i >= 0) {
156a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
157f2038fb01417bcf7698b87a5dfaa4a861539618aerik.corry@gmail.com            if (lbe != NULL && lbe->handles(c, breakType)) {
158a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                break;
159a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            }
160e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.org            lbe = NULL;
161a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        }
16232280cf2786219b2d9a668f7f00778fb59ac40b3mstarzinger@chromium.org    }
163a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    umtx_unlock(NULL);
164a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
165a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    if (lbe != NULL) {
166a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        return lbe;
167662436e7b124b3535773535c671c53db322070b5verwaest@chromium.org    }
16883aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org
16974f333bce721daf6b1f9d7d3d3faa623f77658d7vegorov@chromium.org    if (needsInit) {
17074f333bce721daf6b1f9d7d3d3faa623f77658d7vegorov@chromium.org        UStack  *engines = new UStack(_deleteEngine, NULL, status);
171e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.org        if (U_SUCCESS(status) && engines == NULL) {
172a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            status = U_MEMORY_ALLOCATION_ERROR;
173a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        }
174a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        else if (U_FAILURE(status)) {
175160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org            delete engines;
1760a4e901cdfb5505a896d30aa8c2e04fce0fbe069vegorov@chromium.org            engines = NULL;
177b08986cb66c3f6687247cb6da186c1e73057e399whesse@chromium.org        }
178471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org        else {
179a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            umtx_lock(NULL);
180d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org            if (fEngines == NULL) {
181a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                fEngines = engines;
182ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org                engines = NULL;
183394dbcf9009cf5203b6d85e8b515fcff072040f3erik.corry@gmail.com            }
18494b0d6fcb08a2f01ba52c6edb712068f482366f1danno@chromium.org            umtx_unlock(NULL);
185a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            delete engines;
1864f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org        }
187a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    }
188a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
189d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org    if (fEngines == NULL) {
190be6bd10d8264b7a05e0a04407eb98b253bc0f152kmillikin@chromium.org        return NULL;
191154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org    }
192a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
19328faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org    // We didn't find an engine the first time through, or there was no
194003650ee766f5e92756d470a37973fd371757485yangguo@chromium.org    // stack. Create an engine.
19528faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org    const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
19628faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org
19728faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org    // Now get the lock, and see if someone else has created it in the
198c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org    // meantime
199c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org    umtx_lock(NULL);
200d3c42109e5b85232d19beab8deeb24bdcbbf07f9danno@chromium.org    i = fEngines->size();
201a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    while (--i >= 0) {
202c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org        lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
203c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org        if (lbe != NULL && lbe->handles(c, breakType)) {
204c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org            break;
20557ff881caeb2e15b46ac9e4dfc00e378f7c5f929ulan@chromium.org        }
206c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.org        lbe = NULL;
20778d1ad443658709d6c27809001a0e71efd8b898fyangguo@chromium.org    }
208a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    if (lbe == NULL && newlbe != NULL) {
209c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org        fEngines->push((void *)newlbe, status);
210c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org        lbe = newlbe;
211d3c42109e5b85232d19beab8deeb24bdcbbf07f9danno@chromium.org        newlbe = NULL;
212c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org    }
213a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    umtx_unlock(NULL);
21432cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org
21532cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org    delete newlbe;
21632cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org
21732cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org    return lbe;
21832cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org}
219dcebac0f4c6c0da579b7cc91a0cbba8f3c820c8dricow@chromium.org
220a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgconst LanguageBreakEngine *
221a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
22232cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org    UErrorCode status = U_ZERO_ERROR;
22332cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org    UScriptCode code = uscript_getScript(c, &status);
22432cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org    if (U_SUCCESS(status)) {
22532cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org        DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
22632cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org        if (m != NULL) {
22732cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org            const LanguageBreakEngine *engine = NULL;
22832cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org            switch(code) {
22932cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org            case USCRIPT_THAI:
23032cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org                engine = new ThaiBreakEngine(m, status);
23132cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org                break;
232a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            case USCRIPT_LAO:
233a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                engine = new LaoBreakEngine(m, status);
23432cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org                break;
235a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            case USCRIPT_KHMER:
2368f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org                engine = new KhmerBreakEngine(m, status);
2378f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org                break;
2388f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org
2398f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org#if !UCONFIG_NO_NORMALIZATION
2408f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org                // CJK not available w/o normalization
241a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            case USCRIPT_HANGUL:
242a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                engine = new CjkBreakEngine(m, kKorean, status);
2438f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org                break;
2448f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org
2458f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org            // use same BreakEngine and dictionary for both Chinese and Japanese
2468f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org            case USCRIPT_HIRAGANA:
247a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            case USCRIPT_KATAKANA:
248a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            case USCRIPT_HAN:
249a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                engine = new CjkBreakEngine(m, kChineseJapanese, status);
250a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                break;
251812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org#if 0
252812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org            // TODO: Have to get some characters with script=common handled
253812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org            // by CjkBreakEngine (e.g. U+309B). Simply subjecting
254812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org            // them to CjkBreakEngine does not work. The engine has to
255812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org            // special-case them.
256812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org            case USCRIPT_COMMON:
257812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org            {
258812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org                UBlockCode block = ublock_getCode(code);
259ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org                if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
260ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org                   engine = new CjkBreakEngine(dict, kChineseJapanese, status);
261ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org                break;
262a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            }
263a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#endif
264a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#endif
265a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
266a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            default:
267b99c75496e05b4cd58815ada1e39e6029130d11crossberg@chromium.org                break;
2688f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org            }
269ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org            if (engine == NULL) {
270ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org                delete m;
271ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org            }
2728f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org            else if (U_FAILURE(status)) {
2738f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org                delete engine;
274a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                engine = NULL;
2751fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org            }
2761fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org            return engine;
2771fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org        }
2781fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org    }
2798f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org    return NULL;
280c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.org}
2818f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org
282c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.orgDictionaryMatcher *
2835f0c45f2cacb31d36a8f80c31f17bda7751a3644ager@chromium.orgICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
284a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    UErrorCode status = U_ZERO_ERROR;
285a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    // open root from brkitr tree.
286a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
287a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
288a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    int32_t dictnlength = 0;
2898f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org    const UChar *dictfname =
2908f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org        ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
291471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org    if (U_FAILURE(status)) {
292471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org        ures_close(b);
293a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        return NULL;
2948f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org    }
2958f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org    CharString dictnbuf;
2968f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org    CharString ext;
297fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org    const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot
298fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org    if (extStart != NULL) {
299fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org        int32_t len = (int32_t)(extStart - dictfname);
300a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
301a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        dictnlength = len;
302a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    }
303a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
304a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    ures_close(b);
305a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
306a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
307a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    if (U_SUCCESS(status)) {
308a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        // build trie
30932cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org        const uint8_t *data = (const uint8_t *)udata_getMemory(file);
310a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        const int32_t *indexes = (const int32_t *)data;
311d3c42109e5b85232d19beab8deeb24bdcbbf07f9danno@chromium.org        const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
312a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
313a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        DictionaryMatcher *m = NULL;
314a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
315a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
316a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            const char *characters = (const char *)(data + offset);
317a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            m = new BytesDictionaryMatcher(characters, transform, file);
318a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        }
319a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
320a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            const UChar *characters = (const UChar *)(data + offset);
321a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            m = new UCharsDictionaryMatcher(characters, file);
322a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        }
323a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        if (m == NULL) {
324a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            // no matcher exists to take ownership - either we are an invalid
325a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            // type or memory allocation failed
326a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org            udata_close(file);
327a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        }
3282bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org        return m;
329a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    } else if (dictfname != NULL) {
330a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        // we don't have a dictionary matcher.
331a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        // returning NULL here will cause us to fail to find a dictionary break engine, as expected
332a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        status = U_ZERO_ERROR;
333a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org        return NULL;
334a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    }
335a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    return NULL;
3362bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org}
337a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
338a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgU_NAMESPACE_END
339a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
3402bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
341a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org