1f2038fb01417bcf7698b87a5dfaa4a861539618aerik.corry@gmail.com/* 2a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ************************************************************************************ 3a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org * Copyright (C) 2006-2013, International Business Machines Corporation 4a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org * and others. All Rights Reserved. 5a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ************************************************************************************ 6a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */ 7a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 8a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/utypes.h" 9a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 10a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#if !UCONFIG_NO_BREAK_ITERATION 11a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 12a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "brkeng.h" 13a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "dictbe.h" 14a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/uchar.h" 15a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/uniset.h" 16a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/chariter.h" 17a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/ures.h" 18a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/udata.h" 19a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/putil.h" 20a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/ustring.h" 21a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/uscript.h" 22a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/ucharstrie.h" 23a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "unicode/bytestrie.h" 24a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "charstr.h" 25a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "dictionarydata.h" 26a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "uvector.h" 27a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "umutex.h" 28a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "uresimp.h" 29a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include "ubrkimpl.h" 30a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 31a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgU_NAMESPACE_BEGIN 327979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org 331c09276ce2ac5214e81ca554360b9f101187893blrn@chromium.org/* 34a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ****************************************************************** 35fb144a0716afe7ab8bf245f2391a9e53b3db3c89fschneider@chromium.org */ 36c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org 377979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orgLanguageBreakEngine::LanguageBreakEngine() { 38a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org} 39528ce02b8680a3ab6d75c7079f180a4016c69b7amachenbach@chromium.org 40ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.orgLanguageBreakEngine::~LanguageBreakEngine() { 41ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org} 42a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 43a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org/* 44a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ****************************************************************** 45a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */ 46a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 47a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgLanguageBreakFactory::LanguageBreakFactory() { 48a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org} 49a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 501510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.orgLanguageBreakFactory::~LanguageBreakFactory() { 51a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org} 52a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 53cc8e177451e2ab80cf4eacfd782d19cd05ec2070hpayer@chromium.org/* 54a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ****************************************************************** 55a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */ 56a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 57a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { 58dcebac0f4c6c0da579b7cc91a0cbba8f3c820c8dricow@chromium.org for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 59837a67edd9afdbfe1b59482b41693f59c48846ffulan@chromium.org fHandled[i] = 0; 6033e09c8efd078308de3c77a88301566f65c07befverwaest@chromium.org } 61a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org} 62a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 63a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUnhandledEngine::~UnhandledEngine() { 64a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 65a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (fHandled[i] != 0) { 66a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org delete fHandled[i]; 67c3669763e2617aefdac84a072327b201b3dff129jkummerow@chromium.org } 68a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 69a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org} 7094b0d6fcb08a2f01ba52c6edb712068f482366f1danno@chromium.org 71a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUBool 72a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgUnhandledEngine::handles(UChar32 c, int32_t breakType) const { 73a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])) 74a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); 75c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.org} 76a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 77a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgint32_t 78876cca833d7212e476250d102cad185cdcfa9dfesvenpanne@chromium.orgUnhandledEngine::findBreaks( UText *text, 794f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org int32_t startPos, 80a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org int32_t endPos, 81a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UBool reverse, 82a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org int32_t breakType, 83a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UStack &/*foundBreaks*/ ) const { 84a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 85a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UChar32 c = utext_current32(text); 86a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (reverse) { 874a9f6553038df6b893b3d3ccae351723f4cbbae7yangguo@chromium.org while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { 88a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org c = utext_previous32(text); 89a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 90594006017e46d82ed7146611dc12c20e3c509c7ddanno@chromium.org } 91a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org else { 921510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { 93a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org utext_next32(text); // TODO: recast loop to work with post-increment operations. 941456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org c = utext_current32(text); 95c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org } 96c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org } 971f410f9a9c4fbd4270749af64b477df87b753158mstarzinger@chromium.org } 98c53e10d01c5495df3896b9d318910b58688c6929kmillikin@chromium.org return 0; 994f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org} 100e0e1b0d3e70c933d36ed381d511e9fda39f2a751mstarzinger@chromium.org 101c00ec2b94bc5505fa81f81daefd956f5a8776a09danno@chromium.orgvoid 1024f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.orgUnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { 1034f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 1040a4e901cdfb5505a896d30aa8c2e04fce0fbe069vegorov@chromium.org if (fHandled[breakType] == 0) { 105a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org fHandled[breakType] = new UnicodeSet(); 10683aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org if (fHandled[breakType] == 0) { 107c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org return; 1084e308cf00936c6e7bead43e5141a04e37b49b9b5jkummerow@chromium.org } 10956454717593e7552d6846198b8e0f661fa36a3cayangguo@chromium.org } 110a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (!fHandled[breakType]->contains(c)) { 111a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UErrorCode status = U_ZERO_ERROR; 11246a2a51ad190697e0f62c3060ce02a9de5820a07yangguo@chromium.org // Apply the entire script of the character. 1137b26015ac58e54e88f4214e248f772ad4f055477whesse@chromium.org int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); 114a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); 115d4be0f0c0edfc0a0b46e745055c3dc497c0ffcb5verwaest@chromium.org } 116c53e10d01c5495df3896b9d318910b58688c6929kmillikin@chromium.org } 117c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org} 118c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org 119a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org/* 12049edbdf52640c88918f8e6638ab4965819eb1dfekmillikin@chromium.org ****************************************************************** 121a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org */ 122a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 123a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { 1244f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org fEngines = 0; 1254f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org} 1262bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org 127a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgICULanguageBreakFactory::~ICULanguageBreakFactory() { 128d2c22f0121ebc55ee26a9e742f0fd7c0b8397730kmillikin@chromium.org if (fEngines != 0) { 129160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org delete fEngines; 1304f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org } 1314f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org} 1320ad885c06ff6a0d68bc9ad75629f7ddfaa6860b9erikcorry 1334f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.orgU_NAMESPACE_END 1344f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.orgU_CDECL_BEGIN 135a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgstatic void U_CALLCONV _deleteEngine(void *obj) { 136c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org delete (const icu::LanguageBreakEngine *) obj; 1374d3fe4e246b0312eba361689f288ddf8dd516960danno@chromium.org} 138c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.orgU_CDECL_END 139378b34e3f8852e94739bb77a528278fe0e2bb532ager@chromium.orgU_NAMESPACE_BEGIN 140c36ce6e8979bbbd43539f0a0effc87ea20dd65cckmillikin@chromium.org 141c36ce6e8979bbbd43539f0a0effc87ea20dd65cckmillikin@chromium.orgconst LanguageBreakEngine * 142e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.orgICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { 143a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UBool needsInit; 144a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org int32_t i; 145a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org const LanguageBreakEngine *lbe = NULL; 146528ce02b8680a3ab6d75c7079f180a4016c69b7amachenbach@chromium.org UErrorCode status = U_ZERO_ERROR; 147355cfd19c23ac613f2738a40e356ea48297f7d5eyangguo@chromium.org 148efdb9d70bddd496ceb6a281dadcc065efbce37a1yangguo@chromium.org // TODO: The global mutex should not be used. 149471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org // The global mutex should only be used for short periods. 150a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org // A ICULanguageBreakFactory specific mutex should be used. 151a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org umtx_lock(NULL); 152a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org needsInit = (UBool)(fEngines == NULL); 15383aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org if (!needsInit) { 154a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org i = fEngines->size(); 1555f0c45f2cacb31d36a8f80c31f17bda7751a3644ager@chromium.org while (--i >= 0) { 156a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 157f2038fb01417bcf7698b87a5dfaa4a861539618aerik.corry@gmail.com if (lbe != NULL && lbe->handles(c, breakType)) { 158a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org break; 159a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 160e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.org lbe = NULL; 161a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 16232280cf2786219b2d9a668f7f00778fb59ac40b3mstarzinger@chromium.org } 163a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org umtx_unlock(NULL); 164a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 165a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (lbe != NULL) { 166a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org return lbe; 167662436e7b124b3535773535c671c53db322070b5verwaest@chromium.org } 16883aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org 16974f333bce721daf6b1f9d7d3d3faa623f77658d7vegorov@chromium.org if (needsInit) { 17074f333bce721daf6b1f9d7d3d3faa623f77658d7vegorov@chromium.org UStack *engines = new UStack(_deleteEngine, NULL, status); 171e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.org if (U_SUCCESS(status) && engines == NULL) { 172a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 173a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 174a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org else if (U_FAILURE(status)) { 175160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org delete engines; 1760a4e901cdfb5505a896d30aa8c2e04fce0fbe069vegorov@chromium.org engines = NULL; 177b08986cb66c3f6687247cb6da186c1e73057e399whesse@chromium.org } 178471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org else { 179a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org umtx_lock(NULL); 180d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org if (fEngines == NULL) { 181a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org fEngines = engines; 182ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org engines = NULL; 183394dbcf9009cf5203b6d85e8b515fcff072040f3erik.corry@gmail.com } 18494b0d6fcb08a2f01ba52c6edb712068f482366f1danno@chromium.org umtx_unlock(NULL); 185a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org delete engines; 1864f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org } 187a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 188a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 189d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org if (fEngines == NULL) { 190be6bd10d8264b7a05e0a04407eb98b253bc0f152kmillikin@chromium.org return NULL; 191154ff99473e866f5eb00a44045e27866a7fdce29yangguo@chromium.org } 192a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 19328faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org // We didn't find an engine the first time through, or there was no 194003650ee766f5e92756d470a37973fd371757485yangguo@chromium.org // stack. Create an engine. 19528faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); 19628faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org 19728faa982749c4aa9c090939453dea14bb118f613jkummerow@chromium.org // Now get the lock, and see if someone else has created it in the 198c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org // meantime 199c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org umtx_lock(NULL); 200d3c42109e5b85232d19beab8deeb24bdcbbf07f9danno@chromium.org i = fEngines->size(); 201a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org while (--i >= 0) { 202c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 203c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org if (lbe != NULL && lbe->handles(c, breakType)) { 204c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org break; 20557ff881caeb2e15b46ac9e4dfc00e378f7c5f929ulan@chromium.org } 206c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.org lbe = NULL; 20778d1ad443658709d6c27809001a0e71efd8b898fyangguo@chromium.org } 208a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (lbe == NULL && newlbe != NULL) { 209c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org fEngines->push((void *)newlbe, status); 210c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org lbe = newlbe; 211d3c42109e5b85232d19beab8deeb24bdcbbf07f9danno@chromium.org newlbe = NULL; 212c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org } 213a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org umtx_unlock(NULL); 21432cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org 21532cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org delete newlbe; 21632cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org 21732cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org return lbe; 21832cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org} 219dcebac0f4c6c0da579b7cc91a0cbba8f3c820c8dricow@chromium.org 220a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgconst LanguageBreakEngine * 221a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { 22232cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org UErrorCode status = U_ZERO_ERROR; 22332cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org UScriptCode code = uscript_getScript(c, &status); 22432cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org if (U_SUCCESS(status)) { 22532cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType); 22632cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org if (m != NULL) { 22732cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org const LanguageBreakEngine *engine = NULL; 22832cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org switch(code) { 22932cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org case USCRIPT_THAI: 23032cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org engine = new ThaiBreakEngine(m, status); 23132cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org break; 232a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org case USCRIPT_LAO: 233a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org engine = new LaoBreakEngine(m, status); 23432cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org break; 235a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org case USCRIPT_KHMER: 2368f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org engine = new KhmerBreakEngine(m, status); 2378f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org break; 2388f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org 2398f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org#if !UCONFIG_NO_NORMALIZATION 2408f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org // CJK not available w/o normalization 241a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org case USCRIPT_HANGUL: 242a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org engine = new CjkBreakEngine(m, kKorean, status); 2438f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org break; 2448f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org 2458f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org // use same BreakEngine and dictionary for both Chinese and Japanese 2468f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org case USCRIPT_HIRAGANA: 247a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org case USCRIPT_KATAKANA: 248a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org case USCRIPT_HAN: 249a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org engine = new CjkBreakEngine(m, kChineseJapanese, status); 250a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org break; 251812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org#if 0 252812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org // TODO: Have to get some characters with script=common handled 253812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org // by CjkBreakEngine (e.g. U+309B). Simply subjecting 254812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org // them to CjkBreakEngine does not work. The engine has to 255812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org // special-case them. 256812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org case USCRIPT_COMMON: 257812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org { 258812308e1488cd8261e4dbbda1d8022642d522b9bulan@chromium.org UBlockCode block = ublock_getCode(code); 259ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA) 260ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org engine = new CjkBreakEngine(dict, kChineseJapanese, status); 261ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org break; 262a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 263a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#endif 264a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#endif 265a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 266a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org default: 267b99c75496e05b4cd58815ada1e39e6029130d11crossberg@chromium.org break; 2688f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org } 269ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org if (engine == NULL) { 270ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org delete m; 271ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org } 2728f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org else if (U_FAILURE(status)) { 2738f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org delete engine; 274a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org engine = NULL; 2751fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org } 2761fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org return engine; 2771fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org } 2781fd77d58ca66b2711f09cdea32c0c2d1a01b3ae5danno@chromium.org } 2798f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org return NULL; 280c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.org} 2818f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org 282c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.orgDictionaryMatcher * 2835f0c45f2cacb31d36a8f80c31f17bda7751a3644ager@chromium.orgICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) { 284a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UErrorCode status = U_ZERO_ERROR; 285a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org // open root from brkitr tree. 286a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); 287a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); 288a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org int32_t dictnlength = 0; 2898f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org const UChar *dictfname = 2908f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status); 291471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org if (U_FAILURE(status)) { 292471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org ures_close(b); 293a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org return NULL; 2948f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org } 2958f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org CharString dictnbuf; 2968f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org CharString ext; 297fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot 298fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org if (extStart != NULL) { 299fb732b17922ea75830be4db6b80534c4827d8a55jkummerow@chromium.org int32_t len = (int32_t)(extStart - dictfname); 300a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status); 301a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org dictnlength = len; 302a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 303a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status); 304a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ures_close(b); 305a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 306a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status); 307a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (U_SUCCESS(status)) { 308a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org // build trie 30932cb9b2c195baa85d4c04f4c7b22b9aa04e97d3fverwaest@chromium.org const uint8_t *data = (const uint8_t *)udata_getMemory(file); 310a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org const int32_t *indexes = (const int32_t *)data; 311d3c42109e5b85232d19beab8deeb24bdcbbf07f9danno@chromium.org const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; 312a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; 313a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org DictionaryMatcher *m = NULL; 314a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (trieType == DictionaryData::TRIE_TYPE_BYTES) { 315a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org const int32_t transform = indexes[DictionaryData::IX_TRANSFORM]; 316a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org const char *characters = (const char *)(data + offset); 317a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org m = new BytesDictionaryMatcher(characters, transform, file); 318a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 319a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { 320a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org const UChar *characters = (const UChar *)(data + offset); 321a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org m = new UCharsDictionaryMatcher(characters, file); 322a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 323a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org if (m == NULL) { 324a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org // no matcher exists to take ownership - either we are an invalid 325a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org // type or memory allocation failed 326a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org udata_close(file); 327a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 3282bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org return m; 329a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } else if (dictfname != NULL) { 330a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org // we don't have a dictionary matcher. 331a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org // returning NULL here will cause us to fail to find a dictionary break engine, as expected 332a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org status = U_ZERO_ERROR; 333a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org return NULL; 334a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 335a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org return NULL; 3362bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org} 337a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 338a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgU_NAMESPACE_END 339a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 3402bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 341a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org