1/* 2******************************************************************************* 3* Copyright (C) 1997-2015, International Business Machines Corporation and 4* others. All Rights Reserved. 5******************************************************************************* 6* 7* File brkiter.cpp 8* 9* Modification History: 10* 11* Date Name Description 12* 02/18/97 aliu Converted from OpenClass. Added DONE. 13* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 14***************************************************************************************** 15*/ 16 17// ***************************************************************************** 18// This file was generated from the java source file BreakIterator.java 19// ***************************************************************************** 20 21#include "unicode/utypes.h" 22 23#if !UCONFIG_NO_BREAK_ITERATION 24 25#include "unicode/rbbi.h" 26#include "unicode/brkiter.h" 27#include "unicode/udata.h" 28#include "unicode/ures.h" 29#include "unicode/ustring.h" 30#include "ucln_cmn.h" 31#include "cstring.h" 32#include "umutex.h" 33#include "servloc.h" 34#include "locbased.h" 35#include "uresimp.h" 36#include "uassert.h" 37#include "ubrkimpl.h" 38#include "charstr.h" 39 40// ***************************************************************************** 41// class BreakIterator 42// This class implements methods for finding the location of boundaries in text. 43// Instances of BreakIterator maintain a current position and scan over text 44// returning the index of characters where boundaries occur. 45// ***************************************************************************** 46 47U_NAMESPACE_BEGIN 48 49// ------------------------------------- 50 51BreakIterator* 52BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) 53{ 54 char fnbuff[256]; 55 char ext[4]={'\0'}; 56 CharString actualLocale; 57 int32_t size; 58 const UChar* brkfname = NULL; 59 UResourceBundle brkRulesStack; 60 UResourceBundle brkNameStack; 61 UResourceBundle *brkRules = &brkRulesStack; 62 UResourceBundle *brkName = &brkNameStack; 63 RuleBasedBreakIterator *result = NULL; 64 65 if (U_FAILURE(status)) 66 return NULL; 67 68 ures_initStackObject(brkRules); 69 ures_initStackObject(brkName); 70 71 // Get the locale 72 UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); 73 74 // Get the "boundaries" array. 75 if (U_SUCCESS(status)) { 76 brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); 77 // Get the string object naming the rules file 78 brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); 79 // Get the actual string 80 brkfname = ures_getString(brkName, &size, &status); 81 U_ASSERT((size_t)size<sizeof(fnbuff)); 82 if ((size_t)size>=sizeof(fnbuff)) { 83 size=0; 84 if (U_SUCCESS(status)) { 85 status = U_BUFFER_OVERFLOW_ERROR; 86 } 87 } 88 89 // Use the string if we found it 90 if (U_SUCCESS(status) && brkfname) { 91 actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); 92 93 UChar* extStart=u_strchr(brkfname, 0x002e); 94 int len = 0; 95 if(extStart!=NULL){ 96 len = (int)(extStart-brkfname); 97 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 98 u_UCharsToChars(brkfname, fnbuff, len); 99 } 100 fnbuff[len]=0; // nul terminate 101 } 102 } 103 104 ures_close(brkRules); 105 ures_close(brkName); 106 107 UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); 108 if (U_FAILURE(status)) { 109 ures_close(b); 110 return NULL; 111 } 112 113 // Create a RuleBasedBreakIterator 114 result = new RuleBasedBreakIterator(file, status); 115 116 // If there is a result, set the valid locale and actual locale, and the kind 117 if (U_SUCCESS(status) && result != NULL) { 118 U_LOCALE_BASED(locBased, *(BreakIterator*)result); 119 locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), 120 actualLocale.data()); 121 result->setBreakType(kind); 122 } 123 124 ures_close(b); 125 126 if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple 127 delete result; 128 return NULL; 129 } 130 131 if (result == NULL) { 132 udata_close(file); 133 if (U_SUCCESS(status)) { 134 status = U_MEMORY_ALLOCATION_ERROR; 135 } 136 } 137 138 return result; 139} 140 141// Creates a break iterator for word breaks. 142BreakIterator* U_EXPORT2 143BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) 144{ 145 return createInstance(key, UBRK_WORD, status); 146} 147 148// ------------------------------------- 149 150// Creates a break iterator for line breaks. 151BreakIterator* U_EXPORT2 152BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) 153{ 154 return createInstance(key, UBRK_LINE, status); 155} 156 157// ------------------------------------- 158 159// Creates a break iterator for character breaks. 160BreakIterator* U_EXPORT2 161BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) 162{ 163 return createInstance(key, UBRK_CHARACTER, status); 164} 165 166// ------------------------------------- 167 168// Creates a break iterator for sentence breaks. 169BreakIterator* U_EXPORT2 170BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) 171{ 172 return createInstance(key, UBRK_SENTENCE, status); 173} 174 175// ------------------------------------- 176 177// Creates a break iterator for title casing breaks. 178BreakIterator* U_EXPORT2 179BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) 180{ 181 return createInstance(key, UBRK_TITLE, status); 182} 183 184// ------------------------------------- 185 186// Gets all the available locales that has localized text boundary data. 187const Locale* U_EXPORT2 188BreakIterator::getAvailableLocales(int32_t& count) 189{ 190 return Locale::getAvailableLocales(count); 191} 192 193// ------------------------------------------ 194// 195// Default constructor and destructor 196// 197//------------------------------------------- 198 199BreakIterator::BreakIterator() 200{ 201 *validLocale = *actualLocale = 0; 202} 203 204BreakIterator::~BreakIterator() 205{ 206} 207 208// ------------------------------------------ 209// 210// Registration 211// 212//------------------------------------------- 213#if !UCONFIG_NO_SERVICE 214 215// ------------------------------------- 216 217class ICUBreakIteratorFactory : public ICUResourceBundleFactory { 218public: 219 virtual ~ICUBreakIteratorFactory(); 220protected: 221 virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { 222 return BreakIterator::makeInstance(loc, kind, status); 223 } 224}; 225 226ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 227 228// ------------------------------------- 229 230class ICUBreakIteratorService : public ICULocaleService { 231public: 232 ICUBreakIteratorService() 233 : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 234 { 235 UErrorCode status = U_ZERO_ERROR; 236 registerFactory(new ICUBreakIteratorFactory(), status); 237 } 238 239 virtual ~ICUBreakIteratorService(); 240 241 virtual UObject* cloneInstance(UObject* instance) const { 242 return ((BreakIterator*)instance)->clone(); 243 } 244 245 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { 246 LocaleKey& lkey = (LocaleKey&)key; 247 int32_t kind = lkey.kind(); 248 Locale loc; 249 lkey.currentLocale(loc); 250 return BreakIterator::makeInstance(loc, kind, status); 251 } 252 253 virtual UBool isDefault() const { 254 return countFactories() == 1; 255 } 256}; 257 258ICUBreakIteratorService::~ICUBreakIteratorService() {} 259 260// ------------------------------------- 261 262// defined in ucln_cmn.h 263U_NAMESPACE_END 264 265static icu::UInitOnce gInitOnce; 266static icu::ICULocaleService* gService = NULL; 267 268 269 270/** 271 * Release all static memory held by breakiterator. 272 */ 273U_CDECL_BEGIN 274static UBool U_CALLCONV breakiterator_cleanup(void) { 275#if !UCONFIG_NO_SERVICE 276 if (gService) { 277 delete gService; 278 gService = NULL; 279 } 280 gInitOnce.reset(); 281#endif 282 return TRUE; 283} 284U_CDECL_END 285U_NAMESPACE_BEGIN 286 287static void U_CALLCONV 288initService(void) { 289 gService = new ICUBreakIteratorService(); 290 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); 291} 292 293static ICULocaleService* 294getService(void) 295{ 296 umtx_initOnce(gInitOnce, &initService); 297 return gService; 298} 299 300 301// ------------------------------------- 302 303static inline UBool 304hasService(void) 305{ 306 return !gInitOnce.isReset() && getService() != NULL; 307} 308 309// ------------------------------------- 310 311URegistryKey U_EXPORT2 312BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) 313{ 314 ICULocaleService *service = getService(); 315 if (service == NULL) { 316 status = U_MEMORY_ALLOCATION_ERROR; 317 return NULL; 318 } 319 return service->registerInstance(toAdopt, locale, kind, status); 320} 321 322// ------------------------------------- 323 324UBool U_EXPORT2 325BreakIterator::unregister(URegistryKey key, UErrorCode& status) 326{ 327 if (U_SUCCESS(status)) { 328 if (hasService()) { 329 return gService->unregister(key, status); 330 } 331 status = U_MEMORY_ALLOCATION_ERROR; 332 } 333 return FALSE; 334} 335 336// ------------------------------------- 337 338StringEnumeration* U_EXPORT2 339BreakIterator::getAvailableLocales(void) 340{ 341 ICULocaleService *service = getService(); 342 if (service == NULL) { 343 return NULL; 344 } 345 return service->getAvailableLocales(); 346} 347#endif /* UCONFIG_NO_SERVICE */ 348 349// ------------------------------------- 350 351BreakIterator* 352BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) 353{ 354 if (U_FAILURE(status)) { 355 return NULL; 356 } 357 358#if !UCONFIG_NO_SERVICE 359 if (hasService()) { 360 Locale actualLoc(""); 361 BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); 362 // TODO: The way the service code works in ICU 2.8 is that if 363 // there is a real registered break iterator, the actualLoc 364 // will be populated, but if the handleDefault path is taken 365 // (because nothing is registered that can handle the 366 // requested locale) then the actualLoc comes back empty. In 367 // that case, the returned object already has its actual/valid 368 // locale data populated (by makeInstance, which is what 369 // handleDefault calls), so we don't touch it. YES, A COMMENT 370 // THIS LONG is a sign of bad code -- so the action item is to 371 // revisit this in ICU 3.0 and clean it up/fix it/remove it. 372 if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { 373 U_LOCALE_BASED(locBased, *result); 374 locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); 375 } 376 return result; 377 } 378 else 379#endif 380 { 381 return makeInstance(loc, kind, status); 382 } 383} 384 385// ------------------------------------- 386enum { kLBTypeLenMax = 32 }; 387 388BreakIterator* 389BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) 390{ 391 392 if (U_FAILURE(status)) { 393 return NULL; 394 } 395 char lbType[kLBTypeLenMax]; 396 397 BreakIterator *result = NULL; 398 switch (kind) { 399 case UBRK_CHARACTER: 400 result = BreakIterator::buildInstance(loc, "grapheme", kind, status); 401 break; 402 case UBRK_WORD: 403 result = BreakIterator::buildInstance(loc, "word", kind, status); 404 break; 405 case UBRK_LINE: 406 uprv_strcpy(lbType, "line"); 407 { 408 char lbKeyValue[kLBTypeLenMax] = {0}; 409 UErrorCode kvStatus = U_ZERO_ERROR; 410 int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kLBTypeLenMax, kvStatus); 411 if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { 412 uprv_strcat(lbType, "_"); 413 uprv_strcat(lbType, lbKeyValue); 414 } 415 } 416 result = BreakIterator::buildInstance(loc, lbType, kind, status); 417 break; 418 case UBRK_SENTENCE: 419 result = BreakIterator::buildInstance(loc, "sentence", kind, status); 420 break; 421 case UBRK_TITLE: 422 result = BreakIterator::buildInstance(loc, "title", kind, status); 423 break; 424 default: 425 status = U_ILLEGAL_ARGUMENT_ERROR; 426 } 427 428 if (U_FAILURE(status)) { 429 return NULL; 430 } 431 432 return result; 433} 434 435Locale 436BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { 437 U_LOCALE_BASED(locBased, *this); 438 return locBased.getLocale(type, status); 439} 440 441const char * 442BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { 443 U_LOCALE_BASED(locBased, *this); 444 return locBased.getLocaleID(type, status); 445} 446 447 448// This implementation of getRuleStatus is a do-nothing stub, here to 449// provide a default implementation for any derived BreakIterator classes that 450// do not implement it themselves. 451int32_t BreakIterator::getRuleStatus() const { 452 return 0; 453} 454 455// This implementation of getRuleStatusVec is a do-nothing stub, here to 456// provide a default implementation for any derived BreakIterator classes that 457// do not implement it themselves. 458int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { 459 if (U_FAILURE(status)) { 460 return 0; 461 } 462 if (capacity < 1) { 463 status = U_BUFFER_OVERFLOW_ERROR; 464 return 1; 465 } 466 *fillInVec = 0; 467 return 1; 468} 469 470BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { 471 U_LOCALE_BASED(locBased, (*this)); 472 locBased.setLocaleIDs(valid, actual); 473} 474 475U_NAMESPACE_END 476 477#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 478 479//eof 480