filteredbrk.cpp revision c14898b482f76ecab9026615e2e4c6fe78358bdc
1/*
2*******************************************************************************
3* Copyright (C) 2014-2015, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*/
7
8#include "unicode/utypes.h"
9#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
10
11#include "cmemory.h"
12
13#include "unicode/filteredbrk.h"
14#include "unicode/ucharstriebuilder.h"
15#include "unicode/ures.h"
16
17#include "uresimp.h" // ures_getByKeyWithFallback
18#include "ubrkimpl.h" // U_ICUDATA_BRKITR
19#include "uvector.h"
20#include "cmemory.h"
21
22U_NAMESPACE_BEGIN
23
24#ifndef FB_DEBUG
25#define FB_DEBUG 0
26#endif
27
28#if FB_DEBUG
29#include <stdio.h>
30static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
31  char buf[2048];
32  if(s) {
33    s->extract(0,s->length(),buf,2048);
34  } else {
35    strcpy(buf,"NULL");
36  }
37  fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
38          f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
39}
40
41#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
42#else
43#define FB_TRACE(m,s,b,d)
44#endif
45
46/**
47 * Used with sortedInsert()
48 */
49static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
50    const UnicodeString &a = *(const UnicodeString*)t1.pointer;
51    const UnicodeString &b = *(const UnicodeString*)t2.pointer;
52    return a.compare(b);
53}
54
55/**
56 * A UVector which implements a set of strings.
57 */
58class U_COMMON_API UStringSet : public UVector {
59 public:
60  UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
61                                           uhash_compareUnicodeString,
62                                           1,
63                                           status) {}
64  virtual ~UStringSet();
65  /**
66   * Is this UnicodeSet contained?
67   */
68  inline UBool contains(const UnicodeString& s) {
69    return contains((void*) &s);
70  }
71  using UVector::contains;
72  /**
73   * Return the ith UnicodeString alias
74   */
75  inline const UnicodeString* getStringAt(int32_t i) const {
76    return (const UnicodeString*)elementAt(i);
77  }
78  /**
79   * Adopt the UnicodeString if not already contained.
80   * Caller no longer owns the pointer in any case.
81   * @return true if adopted successfully, false otherwise (error, or else duplicate)
82   */
83  inline UBool adopt(UnicodeString *str, UErrorCode &status) {
84    if(U_FAILURE(status) || contains(*str)) {
85      delete str;
86      return false;
87    } else {
88      sortedInsert(str, compareUnicodeString, status);
89      if(U_FAILURE(status)) {
90        delete str;
91        return false;
92      }
93      return true;
94    }
95  }
96  /**
97   * Add by value.
98   * @return true if successfully adopted.
99   */
100  inline UBool add(const UnicodeString& str, UErrorCode &status) {
101    if(U_FAILURE(status)) return false;
102    UnicodeString *t = new UnicodeString(str);
103    if(t==NULL) {
104      status = U_MEMORY_ALLOCATION_ERROR; return false;
105    }
106    return adopt(t, status);
107  }
108  /**
109   * Remove this string.
110   * @return true if successfully removed, false otherwise (error, or else it wasn't there)
111   */
112  inline UBool remove(const UnicodeString &s, UErrorCode &status) {
113    if(U_FAILURE(status)) return false;
114    return removeElement((void*) &s);
115  }
116};
117
118/**
119 * Virtual, won't be inlined
120 */
121UStringSet::~UStringSet() {}
122
123/* ----------------------------------------------------------- */
124
125
126/* Filtered Break constants */
127static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
128static const int32_t kMATCH   = (1<<1); //< exact match - skip this one.
129static const int32_t kSuppressInReverse = (1<<0);
130static const int32_t kAddToForward = (1<<1);
131static const UChar   kFULLSTOP = 0x002E; // '.'
132
133/**
134 * Shared data for SimpleFilteredSentenceBreakIterator
135 */
136class SimpleFilteredSentenceBreakData : public UMemory {
137public:
138  SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
139      : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
140  SimpleFilteredSentenceBreakData *incr() { refcount++;  return this; }
141  SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
142  virtual ~SimpleFilteredSentenceBreakData();
143
144  LocalPointer<UCharsTrie>    fForwardsPartialTrie; //  Has ".a" for "a.M."
145  LocalPointer<UCharsTrie>    fBackwardsTrie; //  i.e. ".srM" for Mrs.
146  int32_t                     refcount;
147};
148
149SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
150
151/**
152 * Concrete implementation
153 */
154class SimpleFilteredSentenceBreakIterator : public BreakIterator {
155public:
156  SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
157  SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
158  virtual ~SimpleFilteredSentenceBreakIterator();
159private:
160  SimpleFilteredSentenceBreakData *fData;
161  LocalPointer<BreakIterator> fDelegate;
162  LocalUTextPointer           fText;
163
164  /* -- subclass interface -- */
165public:
166  /* -- cloning and other subclass stuff -- */
167  virtual BreakIterator *  createBufferClone(void * /*stackBuffer*/,
168                                             int32_t &/*BufferSize*/,
169                                             UErrorCode &status) {
170    // for now - always deep clone
171    status = U_SAFECLONE_ALLOCATED_WARNING;
172    return clone();
173  }
174  virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); }
175  virtual UClassID getDynamicClassID(void) const { return NULL; }
176  virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
177
178  /* -- text modifying -- */
179  virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
180  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
181  virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
182  virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
183
184  /* -- other functions that are just delegated -- */
185  virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
186  virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
187
188  /* -- ITERATION -- */
189  virtual int32_t first(void);
190  virtual int32_t preceding(int32_t offset);
191  virtual int32_t previous(void);
192  virtual UBool isBoundary(int32_t offset);
193  virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
194
195  virtual int32_t next(void);
196
197  virtual int32_t next(int32_t n);
198  virtual int32_t following(int32_t offset);
199  virtual int32_t last(void);
200
201private:
202    /**
203     * Given that the fDelegate has already given its "initial" answer,
204     * find the NEXT actual (non-excepted) break.
205     * @param n initial position from delegate
206     * @return new break position or UBRK_DONE
207     */
208    int32_t internalNext(int32_t n);
209    /**
210     * Given that the fDelegate has already given its "initial" answer,
211     * find the PREV actual (non-excepted) break.
212     * @param n initial position from delegate
213     * @return new break position or UBRK_DONE
214     */
215    int32_t internalPrev(int32_t n);
216    /**
217     * set up the UText with the value of the fDelegate.
218     * Call this before calling breakExceptionAt.
219     * May be able to avoid excess calls
220     */
221    void resetState(UErrorCode &status);
222    /**
223     * Is there a match  (exception) at this spot?
224     */
225    enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
226    /**
227     * Determine if there is an exception at this spot
228     * @param n spot to check
229     * @return kNoExceptionHere or kExceptionHere
230     **/
231    enum EFBMatchResult breakExceptionAt(int32_t n);
232};
233
234SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
235  : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
236{
237}
238
239
240SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
241  BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
242  fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
243  fDelegate(adopt)
244{
245  // all set..
246}
247
248SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
249    fData = fData->decr();
250}
251
252void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
253  fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
254}
255
256SimpleFilteredSentenceBreakIterator::EFBMatchResult
257SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
258    int64_t bestPosn = -1;
259    int32_t bestValue = -1;
260    // loops while 'n' points to an exception.
261    utext_setNativeIndex(fText.getAlias(), n); // from n..
262    fData->fBackwardsTrie->reset();
263    UChar32 uch;
264
265    //if(debug2) u_printf(" n@ %d\n", n);
266    // Assume a space is following the '.'  (so we handle the case:  "Mr. /Brown")
267    if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) {  // TODO: skip a class of chars here??
268      // TODO only do this the 1st time?
269      //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
270    } else {
271      //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
272      uch = utext_next32(fText.getAlias());
273      //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
274    }
275
276    UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
277
278    while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL  &&   // more to consume backwards and..
279          USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
280      if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
281        bestPosn = utext_getNativeIndex(fText.getAlias());
282        bestValue = fData->fBackwardsTrie->getValue();
283      }
284      //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
285    }
286
287    if(USTRINGTRIE_MATCHES(r)) { // exact match?
288      //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
289      bestValue = fData->fBackwardsTrie->getValue();
290      bestPosn = utext_getNativeIndex(fText.getAlias());
291      //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
292    }
293
294    if(bestPosn>=0) {
295      //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
296
297      //if(USTRINGTRIE_MATCHES(r)) {  // matched - so, now what?
298      //int32_t bestValue = fBackwardsTrie->getValue();
299      ////if(debug2) u_printf("rev< /%C/ matched, skip..%d  bestValue=%d\n", (UChar)uch, r, bestValue);
300
301      if(bestValue == kMATCH) { // exact match!
302        //if(debug2) u_printf(" exact backward match\n");
303        return kExceptionHere; // See if the next is another exception.
304      } else if(bestValue == kPARTIAL
305                && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
306        //if(debug2) u_printf(" partial backward match\n");
307        // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
308        // to see if it matches something going forward.
309        fData->fForwardsPartialTrie->reset();
310        UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
311        utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
312        //if(debug2) u_printf("Retrying at %d\n", bestPosn);
313        while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
314              USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
315          //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
316        }
317        if(USTRINGTRIE_MATCHES(rfwd)) {
318          //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
319          // only full matches here, nothing to check
320          // skip the next:
321            return kExceptionHere;
322        } else {
323          //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
324          // no match (no exception) -return the 'underlying' break
325          return kNoExceptionHere;
326        }
327      } else {
328        return kNoExceptionHere; // internal error and/or no forwards trie
329      }
330    } else {
331      //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r);  // no best match
332      return kNoExceptionHere; // No match - so exit. Not an exception.
333    }
334}
335
336// the workhorse single next.
337int32_t
338SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
339  if(n == UBRK_DONE || // at end  or
340    fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
341      return n;
342  }
343  // OK, do we need to break here?
344  UErrorCode status = U_ZERO_ERROR;
345  // refresh text
346  resetState(status);
347  if(U_FAILURE(status)) return UBRK_DONE; // bail out
348  int64_t utextLen = utext_nativeLength(fText.getAlias());
349
350  //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
351  while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
352    SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
353
354    switch(m) {
355    case kExceptionHere:
356      n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
357      continue;
358
359    default:
360    case kNoExceptionHere:
361      return n;
362    }
363  }
364  return n;
365}
366
367int32_t
368SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
369  if(n == 0 || n == UBRK_DONE || // at end  or
370    fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
371      return n;
372  }
373  // OK, do we need to break here?
374  UErrorCode status = U_ZERO_ERROR;
375  // refresh text
376  resetState(status);
377  if(U_FAILURE(status)) return UBRK_DONE; // bail out
378
379  //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
380  while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
381    SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
382
383    switch(m) {
384    case kExceptionHere:
385      n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
386      continue;
387
388    default:
389    case kNoExceptionHere:
390      return n;
391    }
392  }
393  return n;
394}
395
396
397int32_t
398SimpleFilteredSentenceBreakIterator::next() {
399  return internalNext(fDelegate->next());
400}
401
402int32_t
403SimpleFilteredSentenceBreakIterator::first(void) {
404  return internalNext(fDelegate->first());
405}
406
407int32_t
408SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
409  return internalPrev(fDelegate->preceding(offset));
410}
411
412int32_t
413SimpleFilteredSentenceBreakIterator::previous(void) {
414  return internalPrev(fDelegate->previous());
415}
416
417UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
418  if(!fDelegate->isBoundary(offset)) return false; // no break to suppress
419
420  UErrorCode status = U_ZERO_ERROR;
421  resetState(status);
422
423  SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
424
425  switch(m) {
426  case kExceptionHere:
427    return false;
428  default:
429  case kNoExceptionHere:
430    return true;
431  }
432}
433
434int32_t
435SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
436  return internalNext(fDelegate->next(offset));
437}
438
439int32_t
440SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
441  return internalNext(fDelegate->following(offset));
442}
443
444int32_t
445SimpleFilteredSentenceBreakIterator::last(void) {
446  // Don't suppress a break opportunity at the end of text.
447  return fDelegate->last();
448}
449
450
451/**
452 * Concrete implementation of builder class.
453 */
454class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
455public:
456  virtual ~SimpleFilteredBreakIteratorBuilder();
457  SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
458  SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
459  virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
460  virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
461  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
462private:
463  UStringSet fSet;
464};
465
466SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
467{
468}
469
470SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
471  : fSet(status)
472{
473}
474
475SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
476  : fSet(status)
477{
478  if(U_SUCCESS(status)) {
479    LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &status));
480    LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &status));
481    LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &status));
482    if(U_FAILURE(status)) return; // leaves the builder empty, if you try to use it.
483
484    LocalUResourceBundlePointer strs;
485    UErrorCode subStatus = status;
486    do {
487      strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
488      if(strs.isValid() && U_SUCCESS(subStatus)) {
489        UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
490        suppressBreakAfter(str, status); // load the string
491      }
492    } while (strs.isValid() && U_SUCCESS(subStatus));
493    if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
494      status = subStatus;
495    }
496  }
497}
498
499UBool
500SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
501{
502  UBool r = fSet.add(exception, status);
503  FB_TRACE("suppressBreakAfter",&exception,r,0);
504  return r;
505}
506
507UBool
508SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
509{
510  UBool r = fSet.remove(exception, status);
511  FB_TRACE("unsuppressBreakAfter",&exception,r,0);
512  return r;
513}
514
515/**
516 * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
517 * Work around this.
518 *
519 * Note: "new UnicodeString[subCount]" ends up calling global operator new
520 * on MSVC2012 for some reason.
521 */
522static inline UnicodeString* newUnicodeStringArray(size_t count) {
523    return new UnicodeString[count ? count : 1];
524}
525
526BreakIterator *
527SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
528  LocalPointer<BreakIterator> adopt(adoptBreakIterator);
529
530  LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
531  LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
532  if(U_FAILURE(status)) {
533    return NULL;
534  }
535
536  int32_t revCount = 0;
537  int32_t fwdCount = 0;
538
539  int32_t subCount = fSet.size();
540
541  UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
542
543  LocalArray<UnicodeString> ustrs(ustrs_ptr);
544
545  LocalMemory<int> partials;
546  partials.allocateInsteadAndReset(subCount);
547
548  LocalPointer<UCharsTrie>    backwardsTrie; //  i.e. ".srM" for Mrs.
549  LocalPointer<UCharsTrie>    forwardsPartialTrie; //  Has ".a" for "a.M."
550
551  int n=0;
552  for ( int32_t i = 0;
553        i<fSet.size();
554        i++) {
555    const UnicodeString *abbr = fSet.getStringAt(i);
556    if(abbr) {
557      FB_TRACE("build",abbr,TRUE,i);
558      ustrs[n] = *abbr; // copy by value
559      FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
560    } else {
561      FB_TRACE("build",abbr,FALSE,i);
562      status = U_MEMORY_ALLOCATION_ERROR;
563      return NULL;
564    }
565    partials[n] = 0; // default: not partial
566    n++;
567  }
568  // first pass - find partials.
569  for(int i=0;i<subCount;i++) {
570    int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
571    if(nn>-1 && (nn+1)!=ustrs[i].length()) {
572      FB_TRACE("partial",&ustrs[i],FALSE,i);
573      // is partial.
574      // is it unique?
575      int sameAs = -1;
576      for(int j=0;j<subCount;j++) {
577        if(j==i) continue;
578        if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
579          FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
580          //UBool otherIsPartial = ((nn+1)!=ustrs[j].length());  // true if ustrs[j] doesn't end at nn
581          if(partials[j]==0) { // hasn't been processed yet
582            partials[j] = kSuppressInReverse | kAddToForward;
583            FB_TRACE("suppressing",&ustrs[j],FALSE,j);
584          } else if(partials[j] & kSuppressInReverse) {
585            sameAs = j; // the other entry is already in the reverse table.
586          }
587        }
588      }
589      FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
590      FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
591      UnicodeString prefix(ustrs[i], 0, nn+1);
592      if(sameAs == -1 && partials[i] == 0) {
593        // first one - add the prefix to the reverse table.
594        prefix.reverse();
595        builder->add(prefix, kPARTIAL, status);
596        revCount++;
597        FB_TRACE("Added partial",&prefix,FALSE, i);
598        FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
599        partials[i] = kSuppressInReverse | kAddToForward;
600      } else {
601        FB_TRACE("NOT adding partial",&prefix,FALSE, i);
602        FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
603      }
604    }
605  }
606  for(int i=0;i<subCount;i++) {
607    if(partials[i]==0) {
608      ustrs[i].reverse();
609      builder->add(ustrs[i], kMATCH, status);
610      revCount++;
611      FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
612    } else {
613      FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
614
615      // an optimization would be to only add the portion after the '.'
616      // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
617      // instead of "Ph.D." since we already know the "Ph." part is a match.
618      // would need the trie to be able to hold 0-length strings, though.
619      builder2->add(ustrs[i], kMATCH, status); // forward
620      fwdCount++;
621      //ustrs[i].reverse();
622      ////if(debug2) u_printf("SUPPRESS- not Added(%d):  /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
623    }
624  }
625  FB_TRACE("AbbrCount",NULL,FALSE, subCount);
626
627  if(revCount>0) {
628    backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
629    if(U_FAILURE(status)) {
630      FB_TRACE(u_errorName(status),NULL,FALSE, -1);
631      return NULL;
632    }
633  }
634
635  if(fwdCount>0) {
636    forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
637    if(U_FAILURE(status)) {
638      FB_TRACE(u_errorName(status),NULL,FALSE, -1);
639      return NULL;
640    }
641  }
642
643  return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
644}
645
646
647// ----------- Base class implementation
648
649FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
650}
651
652FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
653}
654
655FilteredBreakIteratorBuilder *
656FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
657  if(U_FAILURE(status)) return NULL;
658  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
659  return (U_SUCCESS(status))? ret.orphan(): NULL;
660}
661
662FilteredBreakIteratorBuilder *
663FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
664  if(U_FAILURE(status)) return NULL;
665  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
666  return (U_SUCCESS(status))? ret.orphan(): NULL;
667}
668
669U_NAMESPACE_END
670
671#endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
672