1/** 2 ************************************************************************************ 3 * Copyright (C) 2006-2009,2011, International Business Machines Corporation * 4 * and others. All Rights Reserved. * 5 ************************************************************************************ 6 */ 7 8#include "unicode/utypes.h" 9 10#if !UCONFIG_NO_BREAK_ITERATION 11 12#include "brkeng.h" 13#include "dictbe.h" 14#include "triedict.h" 15#include "unicode/uchar.h" 16#include "unicode/uniset.h" 17#include "unicode/chariter.h" 18#include "unicode/ures.h" 19#include "unicode/udata.h" 20#include "unicode/putil.h" 21#include "unicode/ustring.h" 22#include "unicode/uscript.h" 23#include "uvector.h" 24#include "umutex.h" 25#include "uresimp.h" 26#include "ubrkimpl.h" 27 28U_NAMESPACE_BEGIN 29 30/* 31 ****************************************************************** 32 */ 33 34LanguageBreakEngine::LanguageBreakEngine() { 35} 36 37LanguageBreakEngine::~LanguageBreakEngine() { 38} 39 40/* 41 ****************************************************************** 42 */ 43 44LanguageBreakFactory::LanguageBreakFactory() { 45} 46 47LanguageBreakFactory::~LanguageBreakFactory() { 48} 49 50/* 51 ****************************************************************** 52 */ 53 54UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { 55 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 56 fHandled[i] = 0; 57 } 58} 59 60UnhandledEngine::~UnhandledEngine() { 61 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 62 if (fHandled[i] != 0) { 63 delete fHandled[i]; 64 } 65 } 66} 67 68UBool 69UnhandledEngine::handles(UChar32 c, int32_t breakType) const { 70 return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])) 71 && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); 72} 73 74int32_t 75UnhandledEngine::findBreaks( UText *text, 76 int32_t startPos, 77 int32_t endPos, 78 UBool reverse, 79 int32_t breakType, 80 UStack &/*foundBreaks*/ ) const { 81 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 82 UChar32 c = utext_current32(text); 83 if (reverse) { 84 while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { 85 c = utext_previous32(text); 86 } 87 } 88 else { 89 while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { 90 utext_next32(text); // TODO: recast loop to work with post-increment operations. 91 c = utext_current32(text); 92 } 93 } 94 } 95 return 0; 96} 97 98void 99UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { 100 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 101 if (fHandled[breakType] == 0) { 102 fHandled[breakType] = new UnicodeSet(); 103 if (fHandled[breakType] == 0) { 104 return; 105 } 106 } 107 if (!fHandled[breakType]->contains(c)) { 108 UErrorCode status = U_ZERO_ERROR; 109 // Apply the entire script of the character. 110 int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); 111 fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); 112 } 113 } 114} 115 116/* 117 ****************************************************************** 118 */ 119 120ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { 121 fEngines = 0; 122} 123 124ICULanguageBreakFactory::~ICULanguageBreakFactory() { 125 if (fEngines != 0) { 126 delete fEngines; 127 } 128} 129 130U_NAMESPACE_END 131U_CDECL_BEGIN 132static void U_CALLCONV _deleteEngine(void *obj) { 133 delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj; 134} 135U_CDECL_END 136U_NAMESPACE_BEGIN 137 138const LanguageBreakEngine * 139ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { 140 UBool needsInit; 141 int32_t i; 142 const LanguageBreakEngine *lbe = NULL; 143 UErrorCode status = U_ZERO_ERROR; 144 145 // TODO: The global mutex should not be used. 146 // The global mutex should only be used for short periods. 147 // A ICULanguageBreakFactory specific mutex should be used. 148 umtx_lock(NULL); 149 needsInit = (UBool)(fEngines == NULL); 150 if (!needsInit) { 151 i = fEngines->size(); 152 while (--i >= 0) { 153 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 154 if (lbe != NULL && lbe->handles(c, breakType)) { 155 break; 156 } 157 lbe = NULL; 158 } 159 } 160 umtx_unlock(NULL); 161 162 if (lbe != NULL) { 163 return lbe; 164 } 165 166 if (needsInit) { 167 UStack *engines = new UStack(_deleteEngine, NULL, status); 168 if (U_SUCCESS(status) && engines == NULL) { 169 status = U_MEMORY_ALLOCATION_ERROR; 170 } 171 else if (U_FAILURE(status)) { 172 delete engines; 173 engines = NULL; 174 } 175 else { 176 umtx_lock(NULL); 177 if (fEngines == NULL) { 178 fEngines = engines; 179 engines = NULL; 180 } 181 umtx_unlock(NULL); 182 delete engines; 183 } 184 } 185 186 if (fEngines == NULL) { 187 return NULL; 188 } 189 190 // We didn't find an engine the first time through, or there was no 191 // stack. Create an engine. 192 const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); 193 194 // Now get the lock, and see if someone else has created it in the 195 // meantime 196 umtx_lock(NULL); 197 i = fEngines->size(); 198 while (--i >= 0) { 199 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 200 if (lbe != NULL && lbe->handles(c, breakType)) { 201 break; 202 } 203 lbe = NULL; 204 } 205 if (lbe == NULL && newlbe != NULL) { 206 fEngines->push((void *)newlbe, status); 207 lbe = newlbe; 208 newlbe = NULL; 209 } 210 umtx_unlock(NULL); 211 212 delete newlbe; 213 214 return lbe; 215} 216 217const LanguageBreakEngine * 218ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { 219 UErrorCode status = U_ZERO_ERROR; 220 UScriptCode code = uscript_getScript(c, &status); 221 if (U_SUCCESS(status)) { 222 const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType); 223 if (dict != NULL) { 224 const LanguageBreakEngine *engine = NULL; 225 switch(code) { 226 case USCRIPT_THAI: 227 engine = new ThaiBreakEngine(dict, status); 228 break; 229 case USCRIPT_KHMER: 230 engine = new KhmerBreakEngine(dict, status); 231 break; 232 default: 233 break; 234 } 235 if (engine == NULL) { 236 delete dict; 237 } 238 else if (U_FAILURE(status)) { 239 delete engine; 240 engine = NULL; 241 } 242 return engine; 243 } 244 } 245 return NULL; 246} 247 248const CompactTrieDictionary * 249ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) { 250 UErrorCode status = U_ZERO_ERROR; 251 // Open root from brkitr tree. 252 char dictnbuff[256]; 253 char ext[4]={'\0'}; 254 255 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); 256 b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); 257 b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status); 258 int32_t dictnlength = 0; 259 const UChar *dictfname = ures_getString(b, &dictnlength, &status); 260 if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { 261 dictnlength = 0; 262 status = U_BUFFER_OVERFLOW_ERROR; 263 } 264 if (U_SUCCESS(status) && dictfname) { 265 UChar* extStart=u_strchr(dictfname, 0x002e); 266 int len = 0; 267 if(extStart!=NULL){ 268 len = (int)(extStart-dictfname); 269 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 270 u_UCharsToChars(dictfname, dictnbuff, len); 271 } 272 dictnbuff[len]=0; // nul terminate 273 } 274 ures_close(b); 275 UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); 276 if (U_SUCCESS(status)) { 277 const CompactTrieDictionary *dict = new CompactTrieDictionary( 278 file, status); 279 if (U_SUCCESS(status) && dict == NULL) { 280 status = U_MEMORY_ALLOCATION_ERROR; 281 } 282 if (U_FAILURE(status)) { 283 delete dict; 284 dict = NULL; 285 } 286 return dict; 287 } 288 return NULL; 289} 290 291U_NAMESPACE_END 292 293#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 294