1103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ************************************************************************************ 3f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Copyright (C) 2006-2014, International Business Machines Corporation 4103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ************************************************************************************ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "brkeng.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "dictbe.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/chariter.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ures.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uscript.h" 2254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "unicode/ucharstrie.h" 2354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "unicode/bytestrie.h" 2454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "charstr.h" 2554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "dictionarydata.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uresimp.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ubrkimpl.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ****************************************************************** 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakEngine::LanguageBreakEngine() { 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakEngine::~LanguageBreakEngine() { 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ****************************************************************** 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakFactory::LanguageBreakFactory() { 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruLanguageBreakFactory::~LanguageBreakFactory() { 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ****************************************************************** 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fHandled[i] = 0; 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::~UnhandledEngine() { 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fHandled[i] != 0) { 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fHandled[i]; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::handles(UChar32 c, int32_t breakType) const { 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])) 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::findBreaks( UText *text, 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t startPos, 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t endPos, 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool reverse, 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t breakType, 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UStack &/*foundBreaks*/ ) const { 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = utext_current32(text); 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (reverse) { 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = utext_previous32(text); 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utext_next32(text); // TODO: recast loop to work with post-increment operations. 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = utext_current32(text); 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fHandled[breakType] == 0) { 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fHandled[breakType] = new UnicodeSet(); 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fHandled[breakType] == 0) { 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!fHandled[breakType]->contains(c)) { 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Apply the entire script of the character. 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ****************************************************************** 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fEngines = 0; 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::~ICULanguageBreakFactory() { 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fEngines != 0) { 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fEngines; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteEngine(void *obj) { 136103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius delete (const icu::LanguageBreakEngine *) obj; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst LanguageBreakEngine * 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool needsInit; 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const LanguageBreakEngine *lbe = NULL; 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: The global mutex should not be used. 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The global mutex should only be used for short periods. 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A ICULanguageBreakFactory specific mutex should be used. 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(NULL); 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru needsInit = (UBool)(fEngines == NULL); 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!needsInit) { 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i = fEngines->size(); 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (--i >= 0) { 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lbe != NULL && lbe->handles(c, breakType)) { 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lbe = NULL; 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(NULL); 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lbe != NULL) { 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return lbe; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (needsInit) { 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UStack *engines = new UStack(_deleteEngine, NULL, status); 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status) && engines == NULL) { 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (U_FAILURE(status)) { 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete engines; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru engines = NULL; 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(NULL); 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fEngines == NULL) { 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fEngines = engines; 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru engines = NULL; 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(NULL); 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete engines; 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fEngines == NULL) { 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We didn't find an engine the first time through, or there was no 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // stack. Create an engine. 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now get the lock, and see if someone else has created it in the 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // meantime 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(NULL); 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i = fEngines->size(); 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (--i >= 0) { 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lbe != NULL && lbe->handles(c, breakType)) { 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lbe = NULL; 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lbe == NULL && newlbe != NULL) { 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fEngines->push((void *)newlbe, status); 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lbe = newlbe; 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newlbe = NULL; 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(NULL); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete newlbe; 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return lbe; 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst LanguageBreakEngine * 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UScriptCode code = uscript_getScript(c, &status); 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 22554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType); 22654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (m != NULL) { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const LanguageBreakEngine *engine = NULL; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(code) { 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case USCRIPT_THAI: 23054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius engine = new ThaiBreakEngine(m, status); 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 23259d709d503bab6e2b61931737e662dd293b40578ccornelius case USCRIPT_LAO: 23359d709d503bab6e2b61931737e662dd293b40578ccornelius engine = new LaoBreakEngine(m, status); 23459d709d503bab6e2b61931737e662dd293b40578ccornelius break; 235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case USCRIPT_MYANMAR: 236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius engine = new BurmeseBreakEngine(m, status); 237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case USCRIPT_KHMER: 23954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius engine = new KhmerBreakEngine(m, status); 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 24154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 24254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#if !UCONFIG_NO_NORMALIZATION 24354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // CJK not available w/o normalization 24454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius case USCRIPT_HANGUL: 24554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius engine = new CjkBreakEngine(m, kKorean, status); 24654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius break; 24754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 24854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // use same BreakEngine and dictionary for both Chinese and Japanese 24954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius case USCRIPT_HIRAGANA: 25054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius case USCRIPT_KATAKANA: 25154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius case USCRIPT_HAN: 25254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius engine = new CjkBreakEngine(m, kChineseJapanese, status); 25354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius break; 25454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#if 0 25554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // TODO: Have to get some characters with script=common handled 25654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // by CjkBreakEngine (e.g. U+309B). Simply subjecting 25754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // them to CjkBreakEngine does not work. The engine has to 25854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // special-case them. 25954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius case USCRIPT_COMMON: 26054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius { 26154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UBlockCode block = ublock_getCode(code); 26254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA) 26354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius engine = new CjkBreakEngine(dict, kChineseJapanese, status); 26454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius break; 26554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 26654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#endif 26754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#endif 26854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (engine == NULL) { 27354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius delete m; 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (U_FAILURE(status)) { 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete engine; 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru engine = NULL; 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return engine; 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28554dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusDictionaryMatcher * 28654dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) { 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 28854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // open root from brkitr tree. 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t dictnlength = 0; 29254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const UChar *dictfname = 29354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status); 29454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (U_FAILURE(status)) { 29554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius ures_close(b); 29654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius return NULL; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 29854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius CharString dictnbuf; 29954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius CharString ext; 30054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot 30154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (extStart != NULL) { 30254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius int32_t len = (int32_t)(extStart - dictfname); 30354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status); 30454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius dictnlength = len; 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 30654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status); 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(b); 30854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 30954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status); 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 31154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // build trie 31254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const uint8_t *data = (const uint8_t *)udata_getMemory(file); 31354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const int32_t *indexes = (const int32_t *)data; 31454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; 31554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; 31654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius DictionaryMatcher *m = NULL; 31754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (trieType == DictionaryData::TRIE_TYPE_BYTES) { 31854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const int32_t transform = indexes[DictionaryData::IX_TRANSFORM]; 31954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const char *characters = (const char *)(data + offset); 32054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius m = new BytesDictionaryMatcher(characters, transform, file); 32154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 32254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { 32354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const UChar *characters = (const UChar *)(data + offset); 32454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius m = new UCharsDictionaryMatcher(characters, file); 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 32654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (m == NULL) { 32754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // no matcher exists to take ownership - either we are an invalid 32854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // type or memory allocation failed 32954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius udata_close(file); 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 33154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius return m; 33254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } else if (dictfname != NULL) { 33354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // we don't have a dictionary matcher. 33454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // returning NULL here will cause us to fail to find a dictionary break engine, as expected 33554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius status = U_ZERO_ERROR; 33654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius return NULL; 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 344