1/*
2*******************************************************************************
3* Copyright (C) 1997-2015, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*
7* File brkiter.cpp
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   02/18/97    aliu        Converted from OpenClass.  Added DONE.
13*   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods.
14*****************************************************************************************
15*/
16
17// *****************************************************************************
18// This file was generated from the java source file BreakIterator.java
19// *****************************************************************************
20
21#include "unicode/utypes.h"
22
23#if !UCONFIG_NO_BREAK_ITERATION
24
25#include "unicode/rbbi.h"
26#include "unicode/brkiter.h"
27#include "unicode/udata.h"
28#include "unicode/ures.h"
29#include "unicode/ustring.h"
30#include "ucln_cmn.h"
31#include "cstring.h"
32#include "umutex.h"
33#include "servloc.h"
34#include "locbased.h"
35#include "uresimp.h"
36#include "uassert.h"
37#include "ubrkimpl.h"
38#include "charstr.h"
39
40// *****************************************************************************
41// class BreakIterator
42// This class implements methods for finding the location of boundaries in text.
43// Instances of BreakIterator maintain a current position and scan over text
44// returning the index of characters where boundaries occur.
45// *****************************************************************************
46
47U_NAMESPACE_BEGIN
48
49// -------------------------------------
50
51BreakIterator*
52BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
53{
54    char fnbuff[256];
55    char ext[4]={'\0'};
56    CharString actualLocale;
57    int32_t size;
58    const UChar* brkfname = NULL;
59    UResourceBundle brkRulesStack;
60    UResourceBundle brkNameStack;
61    UResourceBundle *brkRules = &brkRulesStack;
62    UResourceBundle *brkName  = &brkNameStack;
63    RuleBasedBreakIterator *result = NULL;
64
65    if (U_FAILURE(status))
66        return NULL;
67
68    ures_initStackObject(brkRules);
69    ures_initStackObject(brkName);
70
71    // Get the locale
72    UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status);
73
74    // Get the "boundaries" array.
75    if (U_SUCCESS(status)) {
76        brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
77        // Get the string object naming the rules file
78        brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
79        // Get the actual string
80        brkfname = ures_getString(brkName, &size, &status);
81        U_ASSERT((size_t)size<sizeof(fnbuff));
82        if ((size_t)size>=sizeof(fnbuff)) {
83            size=0;
84            if (U_SUCCESS(status)) {
85                status = U_BUFFER_OVERFLOW_ERROR;
86            }
87        }
88
89        // Use the string if we found it
90        if (U_SUCCESS(status) && brkfname) {
91            actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
92
93            UChar* extStart=u_strchr(brkfname, 0x002e);
94            int len = 0;
95            if(extStart!=NULL){
96                len = (int)(extStart-brkfname);
97                u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
98                u_UCharsToChars(brkfname, fnbuff, len);
99            }
100            fnbuff[len]=0; // nul terminate
101        }
102    }
103
104    ures_close(brkRules);
105    ures_close(brkName);
106
107    UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
108    if (U_FAILURE(status)) {
109        ures_close(b);
110        return NULL;
111    }
112
113    // Create a RuleBasedBreakIterator
114    result = new RuleBasedBreakIterator(file, status);
115
116    // If there is a result, set the valid locale and actual locale, and the kind
117    if (U_SUCCESS(status) && result != NULL) {
118        U_LOCALE_BASED(locBased, *(BreakIterator*)result);
119        locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
120                              actualLocale.data());
121        result->setBreakType(kind);
122    }
123
124    ures_close(b);
125
126    if (U_FAILURE(status) && result != NULL) {  // Sometimes redundant check, but simple
127        delete result;
128        return NULL;
129    }
130
131    if (result == NULL) {
132        udata_close(file);
133        if (U_SUCCESS(status)) {
134            status = U_MEMORY_ALLOCATION_ERROR;
135        }
136    }
137
138    return result;
139}
140
141// Creates a break iterator for word breaks.
142BreakIterator* U_EXPORT2
143BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
144{
145    return createInstance(key, UBRK_WORD, status);
146}
147
148// -------------------------------------
149
150// Creates a break iterator  for line breaks.
151BreakIterator* U_EXPORT2
152BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
153{
154    return createInstance(key, UBRK_LINE, status);
155}
156
157// -------------------------------------
158
159// Creates a break iterator  for character breaks.
160BreakIterator* U_EXPORT2
161BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
162{
163    return createInstance(key, UBRK_CHARACTER, status);
164}
165
166// -------------------------------------
167
168// Creates a break iterator  for sentence breaks.
169BreakIterator* U_EXPORT2
170BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
171{
172    return createInstance(key, UBRK_SENTENCE, status);
173}
174
175// -------------------------------------
176
177// Creates a break iterator for title casing breaks.
178BreakIterator* U_EXPORT2
179BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
180{
181    return createInstance(key, UBRK_TITLE, status);
182}
183
184// -------------------------------------
185
186// Gets all the available locales that has localized text boundary data.
187const Locale* U_EXPORT2
188BreakIterator::getAvailableLocales(int32_t& count)
189{
190    return Locale::getAvailableLocales(count);
191}
192
193// ------------------------------------------
194//
195// Default constructor and destructor
196//
197//-------------------------------------------
198
199BreakIterator::BreakIterator()
200{
201    *validLocale = *actualLocale = 0;
202}
203
204BreakIterator::~BreakIterator()
205{
206}
207
208// ------------------------------------------
209//
210// Registration
211//
212//-------------------------------------------
213#if !UCONFIG_NO_SERVICE
214
215// -------------------------------------
216
217class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
218public:
219    virtual ~ICUBreakIteratorFactory();
220protected:
221    virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
222        return BreakIterator::makeInstance(loc, kind, status);
223    }
224};
225
226ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
227
228// -------------------------------------
229
230class ICUBreakIteratorService : public ICULocaleService {
231public:
232    ICUBreakIteratorService()
233        : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
234    {
235        UErrorCode status = U_ZERO_ERROR;
236        registerFactory(new ICUBreakIteratorFactory(), status);
237    }
238
239    virtual ~ICUBreakIteratorService();
240
241    virtual UObject* cloneInstance(UObject* instance) const {
242        return ((BreakIterator*)instance)->clone();
243    }
244
245    virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
246        LocaleKey& lkey = (LocaleKey&)key;
247        int32_t kind = lkey.kind();
248        Locale loc;
249        lkey.currentLocale(loc);
250        return BreakIterator::makeInstance(loc, kind, status);
251    }
252
253    virtual UBool isDefault() const {
254        return countFactories() == 1;
255    }
256};
257
258ICUBreakIteratorService::~ICUBreakIteratorService() {}
259
260// -------------------------------------
261
262// defined in ucln_cmn.h
263U_NAMESPACE_END
264
265static icu::UInitOnce gInitOnce;
266static icu::ICULocaleService* gService = NULL;
267
268
269
270/**
271 * Release all static memory held by breakiterator.
272 */
273U_CDECL_BEGIN
274static UBool U_CALLCONV breakiterator_cleanup(void) {
275#if !UCONFIG_NO_SERVICE
276    if (gService) {
277        delete gService;
278        gService = NULL;
279    }
280    gInitOnce.reset();
281#endif
282    return TRUE;
283}
284U_CDECL_END
285U_NAMESPACE_BEGIN
286
287static void U_CALLCONV
288initService(void) {
289    gService = new ICUBreakIteratorService();
290    ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
291}
292
293static ICULocaleService*
294getService(void)
295{
296    umtx_initOnce(gInitOnce, &initService);
297    return gService;
298}
299
300
301// -------------------------------------
302
303static inline UBool
304hasService(void)
305{
306    return !gInitOnce.isReset() && getService() != NULL;
307}
308
309// -------------------------------------
310
311URegistryKey U_EXPORT2
312BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
313{
314    ICULocaleService *service = getService();
315    if (service == NULL) {
316        status = U_MEMORY_ALLOCATION_ERROR;
317        return NULL;
318    }
319    return service->registerInstance(toAdopt, locale, kind, status);
320}
321
322// -------------------------------------
323
324UBool U_EXPORT2
325BreakIterator::unregister(URegistryKey key, UErrorCode& status)
326{
327    if (U_SUCCESS(status)) {
328        if (hasService()) {
329            return gService->unregister(key, status);
330        }
331        status = U_MEMORY_ALLOCATION_ERROR;
332    }
333    return FALSE;
334}
335
336// -------------------------------------
337
338StringEnumeration* U_EXPORT2
339BreakIterator::getAvailableLocales(void)
340{
341    ICULocaleService *service = getService();
342    if (service == NULL) {
343        return NULL;
344    }
345    return service->getAvailableLocales();
346}
347#endif /* UCONFIG_NO_SERVICE */
348
349// -------------------------------------
350
351BreakIterator*
352BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
353{
354    if (U_FAILURE(status)) {
355        return NULL;
356    }
357
358#if !UCONFIG_NO_SERVICE
359    if (hasService()) {
360        Locale actualLoc("");
361        BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
362        // TODO: The way the service code works in ICU 2.8 is that if
363        // there is a real registered break iterator, the actualLoc
364        // will be populated, but if the handleDefault path is taken
365        // (because nothing is registered that can handle the
366        // requested locale) then the actualLoc comes back empty.  In
367        // that case, the returned object already has its actual/valid
368        // locale data populated (by makeInstance, which is what
369        // handleDefault calls), so we don't touch it.  YES, A COMMENT
370        // THIS LONG is a sign of bad code -- so the action item is to
371        // revisit this in ICU 3.0 and clean it up/fix it/remove it.
372        if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
373            U_LOCALE_BASED(locBased, *result);
374            locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
375        }
376        return result;
377    }
378    else
379#endif
380    {
381        return makeInstance(loc, kind, status);
382    }
383}
384
385// -------------------------------------
386enum { kLBTypeLenMax = 32 };
387
388BreakIterator*
389BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
390{
391
392    if (U_FAILURE(status)) {
393        return NULL;
394    }
395    char lbType[kLBTypeLenMax];
396
397    BreakIterator *result = NULL;
398    switch (kind) {
399    case UBRK_CHARACTER:
400        result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
401        break;
402    case UBRK_WORD:
403        result = BreakIterator::buildInstance(loc, "word", kind, status);
404        break;
405    case UBRK_LINE:
406        uprv_strcpy(lbType, "line");
407        {
408            char lbKeyValue[kLBTypeLenMax] = {0};
409            UErrorCode kvStatus = U_ZERO_ERROR;
410            int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kLBTypeLenMax, kvStatus);
411            if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
412                uprv_strcat(lbType, "_");
413                uprv_strcat(lbType, lbKeyValue);
414            }
415        }
416        result = BreakIterator::buildInstance(loc, lbType, kind, status);
417        break;
418    case UBRK_SENTENCE:
419        result = BreakIterator::buildInstance(loc, "sentence", kind, status);
420        break;
421    case UBRK_TITLE:
422        result = BreakIterator::buildInstance(loc, "title", kind, status);
423        break;
424    default:
425        status = U_ILLEGAL_ARGUMENT_ERROR;
426    }
427
428    if (U_FAILURE(status)) {
429        return NULL;
430    }
431
432    return result;
433}
434
435Locale
436BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
437    U_LOCALE_BASED(locBased, *this);
438    return locBased.getLocale(type, status);
439}
440
441const char *
442BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
443    U_LOCALE_BASED(locBased, *this);
444    return locBased.getLocaleID(type, status);
445}
446
447
448// This implementation of getRuleStatus is a do-nothing stub, here to
449// provide a default implementation for any derived BreakIterator classes that
450// do not implement it themselves.
451int32_t BreakIterator::getRuleStatus() const {
452    return 0;
453}
454
455// This implementation of getRuleStatusVec is a do-nothing stub, here to
456// provide a default implementation for any derived BreakIterator classes that
457// do not implement it themselves.
458int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
459    if (U_FAILURE(status)) {
460        return 0;
461    }
462    if (capacity < 1) {
463        status = U_BUFFER_OVERFLOW_ERROR;
464        return 1;
465    }
466    *fillInVec = 0;
467    return 1;
468}
469
470BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
471  U_LOCALE_BASED(locBased, (*this));
472  locBased.setLocaleIDs(valid, actual);
473}
474
475U_NAMESPACE_END
476
477#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
478
479//eof
480