1/* 2******************************************************************************* 3* Copyright (C) 1997-2014, International Business Machines Corporation and 4* others. All Rights Reserved. 5******************************************************************************* 6* 7* File brkiter.cpp 8* 9* Modification History: 10* 11* Date Name Description 12* 02/18/97 aliu Converted from OpenClass. Added DONE. 13* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 14***************************************************************************************** 15*/ 16 17// ***************************************************************************** 18// This file was generated from the java source file BreakIterator.java 19// ***************************************************************************** 20 21#include "unicode/utypes.h" 22 23#if !UCONFIG_NO_BREAK_ITERATION 24 25#include "unicode/rbbi.h" 26#include "unicode/brkiter.h" 27#include "unicode/udata.h" 28#include "unicode/ures.h" 29#include "unicode/ustring.h" 30#include "ucln_cmn.h" 31#include "cstring.h" 32#include "umutex.h" 33#include "servloc.h" 34#include "locbased.h" 35#include "uresimp.h" 36#include "uassert.h" 37#include "ubrkimpl.h" 38 39// ***************************************************************************** 40// class BreakIterator 41// This class implements methods for finding the location of boundaries in text. 42// Instances of BreakIterator maintain a current position and scan over text 43// returning the index of characters where boundaries occur. 44// ***************************************************************************** 45 46U_NAMESPACE_BEGIN 47 48// ------------------------------------- 49 50BreakIterator* 51BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) 52{ 53 char fnbuff[256]; 54 char ext[4]={'\0'}; 55 char actualLocale[ULOC_FULLNAME_CAPACITY]; 56 int32_t size; 57 const UChar* brkfname = NULL; 58 UResourceBundle brkRulesStack; 59 UResourceBundle brkNameStack; 60 UResourceBundle *brkRules = &brkRulesStack; 61 UResourceBundle *brkName = &brkNameStack; 62 RuleBasedBreakIterator *result = NULL; 63 64 if (U_FAILURE(status)) 65 return NULL; 66 67 ures_initStackObject(brkRules); 68 ures_initStackObject(brkName); 69 70 // Get the locale 71 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status); 72 /* this is a hack for now. Should be fixed when the data is fetched from 73 brk_index.txt */ 74 if(status==U_USING_DEFAULT_WARNING){ 75 status=U_ZERO_ERROR; 76 ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status); 77 } 78 79 // Get the "boundaries" array. 80 if (U_SUCCESS(status)) { 81 brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); 82 // Get the string object naming the rules file 83 brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); 84 // Get the actual string 85 brkfname = ures_getString(brkName, &size, &status); 86 U_ASSERT((size_t)size<sizeof(fnbuff)); 87 if ((size_t)size>=sizeof(fnbuff)) { 88 size=0; 89 if (U_SUCCESS(status)) { 90 status = U_BUFFER_OVERFLOW_ERROR; 91 } 92 } 93 94 // Use the string if we found it 95 if (U_SUCCESS(status) && brkfname) { 96 uprv_strncpy(actualLocale, 97 ures_getLocaleInternal(brkName, &status), 98 sizeof(actualLocale)/sizeof(actualLocale[0])); 99 100 UChar* extStart=u_strchr(brkfname, 0x002e); 101 int len = 0; 102 if(extStart!=NULL){ 103 len = (int)(extStart-brkfname); 104 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 105 u_UCharsToChars(brkfname, fnbuff, len); 106 } 107 fnbuff[len]=0; // nul terminate 108 } 109 } 110 111 ures_close(brkRules); 112 ures_close(brkName); 113 114 UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); 115 if (U_FAILURE(status)) { 116 ures_close(b); 117 return NULL; 118 } 119 120 // Create a RuleBasedBreakIterator 121 result = new RuleBasedBreakIterator(file, status); 122 123 // If there is a result, set the valid locale and actual locale, and the kind 124 if (U_SUCCESS(status) && result != NULL) { 125 U_LOCALE_BASED(locBased, *(BreakIterator*)result); 126 locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); 127 result->setBreakType(kind); 128 } 129 130 ures_close(b); 131 132 if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple 133 delete result; 134 return NULL; 135 } 136 137 if (result == NULL) { 138 udata_close(file); 139 if (U_SUCCESS(status)) { 140 status = U_MEMORY_ALLOCATION_ERROR; 141 } 142 } 143 144 return result; 145} 146 147// Creates a break iterator for word breaks. 148BreakIterator* U_EXPORT2 149BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) 150{ 151 return createInstance(key, UBRK_WORD, status); 152} 153 154// ------------------------------------- 155 156// Creates a break iterator for line breaks. 157BreakIterator* U_EXPORT2 158BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) 159{ 160 return createInstance(key, UBRK_LINE, status); 161} 162 163// ------------------------------------- 164 165// Creates a break iterator for character breaks. 166BreakIterator* U_EXPORT2 167BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) 168{ 169 return createInstance(key, UBRK_CHARACTER, status); 170} 171 172// ------------------------------------- 173 174// Creates a break iterator for sentence breaks. 175BreakIterator* U_EXPORT2 176BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) 177{ 178 return createInstance(key, UBRK_SENTENCE, status); 179} 180 181// ------------------------------------- 182 183// Creates a break iterator for title casing breaks. 184BreakIterator* U_EXPORT2 185BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) 186{ 187 return createInstance(key, UBRK_TITLE, status); 188} 189 190// ------------------------------------- 191 192// Gets all the available locales that has localized text boundary data. 193const Locale* U_EXPORT2 194BreakIterator::getAvailableLocales(int32_t& count) 195{ 196 return Locale::getAvailableLocales(count); 197} 198 199// ------------------------------------------ 200// 201// Default constructor and destructor 202// 203//------------------------------------------- 204 205BreakIterator::BreakIterator() 206{ 207 *validLocale = *actualLocale = 0; 208} 209 210BreakIterator::~BreakIterator() 211{ 212} 213 214// ------------------------------------------ 215// 216// Registration 217// 218//------------------------------------------- 219#if !UCONFIG_NO_SERVICE 220 221// ------------------------------------- 222 223class ICUBreakIteratorFactory : public ICUResourceBundleFactory { 224public: 225 virtual ~ICUBreakIteratorFactory(); 226protected: 227 virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { 228 return BreakIterator::makeInstance(loc, kind, status); 229 } 230}; 231 232ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 233 234// ------------------------------------- 235 236class ICUBreakIteratorService : public ICULocaleService { 237public: 238 ICUBreakIteratorService() 239 : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 240 { 241 UErrorCode status = U_ZERO_ERROR; 242 registerFactory(new ICUBreakIteratorFactory(), status); 243 } 244 245 virtual ~ICUBreakIteratorService(); 246 247 virtual UObject* cloneInstance(UObject* instance) const { 248 return ((BreakIterator*)instance)->clone(); 249 } 250 251 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { 252 LocaleKey& lkey = (LocaleKey&)key; 253 int32_t kind = lkey.kind(); 254 Locale loc; 255 lkey.currentLocale(loc); 256 return BreakIterator::makeInstance(loc, kind, status); 257 } 258 259 virtual UBool isDefault() const { 260 return countFactories() == 1; 261 } 262}; 263 264ICUBreakIteratorService::~ICUBreakIteratorService() {} 265 266// ------------------------------------- 267 268// defined in ucln_cmn.h 269U_NAMESPACE_END 270 271static icu::UInitOnce gInitOnce; 272static icu::ICULocaleService* gService = NULL; 273 274 275 276/** 277 * Release all static memory held by breakiterator. 278 */ 279U_CDECL_BEGIN 280static UBool U_CALLCONV breakiterator_cleanup(void) { 281#if !UCONFIG_NO_SERVICE 282 if (gService) { 283 delete gService; 284 gService = NULL; 285 } 286 gInitOnce.reset(); 287#endif 288 return TRUE; 289} 290U_CDECL_END 291U_NAMESPACE_BEGIN 292 293static void U_CALLCONV 294initService(void) { 295 gService = new ICUBreakIteratorService(); 296 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); 297} 298 299static ICULocaleService* 300getService(void) 301{ 302 umtx_initOnce(gInitOnce, &initService); 303 return gService; 304} 305 306 307// ------------------------------------- 308 309static inline UBool 310hasService(void) 311{ 312 return !gInitOnce.isReset() && getService() != NULL; 313} 314 315// ------------------------------------- 316 317URegistryKey U_EXPORT2 318BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) 319{ 320 ICULocaleService *service = getService(); 321 if (service == NULL) { 322 status = U_MEMORY_ALLOCATION_ERROR; 323 return NULL; 324 } 325 return service->registerInstance(toAdopt, locale, kind, status); 326} 327 328// ------------------------------------- 329 330UBool U_EXPORT2 331BreakIterator::unregister(URegistryKey key, UErrorCode& status) 332{ 333 if (U_SUCCESS(status)) { 334 if (hasService()) { 335 return gService->unregister(key, status); 336 } 337 status = U_MEMORY_ALLOCATION_ERROR; 338 } 339 return FALSE; 340} 341 342// ------------------------------------- 343 344StringEnumeration* U_EXPORT2 345BreakIterator::getAvailableLocales(void) 346{ 347 ICULocaleService *service = getService(); 348 if (service == NULL) { 349 return NULL; 350 } 351 return service->getAvailableLocales(); 352} 353#endif /* UCONFIG_NO_SERVICE */ 354 355// ------------------------------------- 356 357BreakIterator* 358BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) 359{ 360 if (U_FAILURE(status)) { 361 return NULL; 362 } 363 364#if !UCONFIG_NO_SERVICE 365 if (hasService()) { 366 Locale actualLoc(""); 367 BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); 368 // TODO: The way the service code works in ICU 2.8 is that if 369 // there is a real registered break iterator, the actualLoc 370 // will be populated, but if the handleDefault path is taken 371 // (because nothing is registered that can handle the 372 // requested locale) then the actualLoc comes back empty. In 373 // that case, the returned object already has its actual/valid 374 // locale data populated (by makeInstance, which is what 375 // handleDefault calls), so we don't touch it. YES, A COMMENT 376 // THIS LONG is a sign of bad code -- so the action item is to 377 // revisit this in ICU 3.0 and clean it up/fix it/remove it. 378 if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { 379 U_LOCALE_BASED(locBased, *result); 380 locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); 381 } 382 return result; 383 } 384 else 385#endif 386 { 387 return makeInstance(loc, kind, status); 388 } 389} 390 391// ------------------------------------- 392 393BreakIterator* 394BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) 395{ 396 397 if (U_FAILURE(status)) { 398 return NULL; 399 } 400 401 BreakIterator *result = NULL; 402 switch (kind) { 403 case UBRK_CHARACTER: 404 result = BreakIterator::buildInstance(loc, "grapheme", kind, status); 405 break; 406 case UBRK_WORD: 407 result = BreakIterator::buildInstance(loc, "word", kind, status); 408 break; 409 case UBRK_LINE: 410 result = BreakIterator::buildInstance(loc, "line", kind, status); 411 break; 412 case UBRK_SENTENCE: 413 result = BreakIterator::buildInstance(loc, "sentence", kind, status); 414 break; 415 case UBRK_TITLE: 416 result = BreakIterator::buildInstance(loc, "title", kind, status); 417 break; 418 default: 419 status = U_ILLEGAL_ARGUMENT_ERROR; 420 } 421 422 if (U_FAILURE(status)) { 423 return NULL; 424 } 425 426 return result; 427} 428 429Locale 430BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { 431 U_LOCALE_BASED(locBased, *this); 432 return locBased.getLocale(type, status); 433} 434 435const char * 436BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { 437 U_LOCALE_BASED(locBased, *this); 438 return locBased.getLocaleID(type, status); 439} 440 441 442// This implementation of getRuleStatus is a do-nothing stub, here to 443// provide a default implementation for any derived BreakIterator classes that 444// do not implement it themselves. 445int32_t BreakIterator::getRuleStatus() const { 446 return 0; 447} 448 449// This implementation of getRuleStatusVec is a do-nothing stub, here to 450// provide a default implementation for any derived BreakIterator classes that 451// do not implement it themselves. 452int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { 453 if (U_FAILURE(status)) { 454 return 0; 455 } 456 if (capacity < 1) { 457 status = U_BUFFER_OVERFLOW_ERROR; 458 return 1; 459 } 460 *fillInVec = 0; 461 return 1; 462} 463 464BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { 465 U_LOCALE_BASED(locBased, (*this)); 466 locBased.setLocaleIDs(valid, actual); 467} 468 469U_NAMESPACE_END 470 471#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 472 473//eof 474