10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
5c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert* Copyright (C) 2014-2015, International Business Machines Corporation and
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* others. All Rights Reserved.
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
10f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/utypes.h"
11f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
13f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "cmemory.h"
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
15f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/filteredbrk.h"
16f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/ucharstriebuilder.h"
17f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/ures.h"
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "uresimp.h" // ures_getByKeyWithFallback
20f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "ubrkimpl.h" // U_ICUDATA_BRKITR
21f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "uvector.h"
22f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "cmemory.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
26f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#ifndef FB_DEBUG
27f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define FB_DEBUG 0
28f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif
29f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
30f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if FB_DEBUG
31f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include <stdio.h>
32f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
33f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  char buf[2048];
34f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  if(s) {
35f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    s->extract(0,s->length(),buf,2048);
36f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  } else {
37f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    strcpy(buf,"NULL");
38f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  }
39f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
40f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius          f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
41f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
42f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
43f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
44f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#else
45f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define FB_TRACE(m,s,b,d)
46f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif
47f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
48c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert/**
49c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * Used with sortedInsert()
50c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert */
51f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
52f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const UnicodeString &a = *(const UnicodeString*)t1.pointer;
53f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const UnicodeString &b = *(const UnicodeString*)t2.pointer;
54f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return a.compare(b);
55f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
56f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
57f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/**
58f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * A UVector which implements a set of strings.
59f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */
60c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertclass U_COMMON_API UStringSet : public UVector {
61f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius public:
62f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
63f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                                           uhash_compareUnicodeString,
64f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                                           1,
65f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                                           status) {}
66f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  virtual ~UStringSet();
67f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  /**
68f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * Is this UnicodeSet contained?
69f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   */
70f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  inline UBool contains(const UnicodeString& s) {
71f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return contains((void*) &s);
72f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  }
73f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  using UVector::contains;
74f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  /**
75f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * Return the ith UnicodeString alias
76f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   */
77f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  inline const UnicodeString* getStringAt(int32_t i) const {
78f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return (const UnicodeString*)elementAt(i);
79f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  }
80f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  /**
81f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * Adopt the UnicodeString if not already contained.
82f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * Caller no longer owns the pointer in any case.
83f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * @return true if adopted successfully, false otherwise (error, or else duplicate)
84f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   */
85f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  inline UBool adopt(UnicodeString *str, UErrorCode &status) {
86f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if(U_FAILURE(status) || contains(*str)) {
87f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      delete str;
88f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      return false;
89f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    } else {
90f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      sortedInsert(str, compareUnicodeString, status);
91f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      if(U_FAILURE(status)) {
92f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        delete str;
93f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return false;
94f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      }
95f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      return true;
96f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
97f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  }
98f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  /**
99f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * Add by value.
100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * @return true if successfully adopted.
101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   */
102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  inline UBool add(const UnicodeString& str, UErrorCode &status) {
103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if(U_FAILURE(status)) return false;
104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UnicodeString *t = new UnicodeString(str);
105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if(t==NULL) {
106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      status = U_MEMORY_ALLOCATION_ERROR; return false;
107f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return adopt(t, status);
109f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  }
110f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  /**
111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * Remove this string.
112f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   * @return true if successfully removed, false otherwise (error, or else it wasn't there)
113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius   */
114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  inline UBool remove(const UnicodeString &s, UErrorCode &status) {
115f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if(U_FAILURE(status)) return false;
116f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return removeElement((void*) &s);
117f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  }
118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius};
119f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
120f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/**
121f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Virtual, won't be inlined
122f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */
123f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUStringSet::~UStringSet() {}
124f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
125c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert/* ----------------------------------------------------------- */
126c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
128c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert/* Filtered Break constants */
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const int32_t kMATCH   = (1<<1); //< exact match - skip this one.
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const int32_t kSuppressInReverse = (1<<0);
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const int32_t kAddToForward = (1<<1);
133c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertstatic const UChar   kFULLSTOP = 0x002E; // '.'
134c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
135c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert/**
136c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * Shared data for SimpleFilteredSentenceBreakIterator
137c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert */
138c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertclass SimpleFilteredSentenceBreakData : public UMemory {
139c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertpublic:
140c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
141c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
142c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  SimpleFilteredSentenceBreakData *incr() { refcount++;  return this; }
143c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
144c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual ~SimpleFilteredSentenceBreakData();
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
146c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  LocalPointer<UCharsTrie>    fForwardsPartialTrie; //  Has ".a" for "a.M."
147c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  LocalPointer<UCharsTrie>    fBackwardsTrie; //  i.e. ".srM" for Mrs.
148c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  int32_t                     refcount;
149c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert};
150c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
151c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
152c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
153c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert/**
154c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * Concrete implementation
155c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert */
156f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusclass SimpleFilteredSentenceBreakIterator : public BreakIterator {
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
158f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
159f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
160f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  virtual ~SimpleFilteredSentenceBreakIterator();
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
162c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  SimpleFilteredSentenceBreakData *fData;
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  LocalPointer<BreakIterator> fDelegate;
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  LocalUTextPointer           fText;
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  /* -- subclass interface -- */
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  /* -- cloning and other subclass stuff -- */
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual BreakIterator *  createBufferClone(void * /*stackBuffer*/,
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                             int32_t &/*BufferSize*/,
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                             UErrorCode &status) {
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // for now - always deep clone
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    status = U_SAFECLONE_ALLOCATED_WARNING;
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return clone();
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
176f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); }
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual UClassID getDynamicClassID(void) const { return NULL; }
1781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  /* -- text modifying -- */
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  /* -- other functions that are just delegated -- */
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  /* -- ITERATION -- */
191c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual int32_t first(void);
192c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual int32_t preceding(int32_t offset);
193c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual int32_t previous(void);
194c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual UBool isBoundary(int32_t offset);
195c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual int32_t next(void);
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
199c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual int32_t next(int32_t n);
200c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual int32_t following(int32_t offset);
201c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  virtual int32_t last(void);
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
203c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertprivate:
204c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    /**
205c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * Given that the fDelegate has already given its "initial" answer,
206c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * find the NEXT actual (non-excepted) break.
207c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * @param n initial position from delegate
208c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * @return new break position or UBRK_DONE
209c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     */
210c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    int32_t internalNext(int32_t n);
211c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    /**
212c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * Given that the fDelegate has already given its "initial" answer,
213c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * find the PREV actual (non-excepted) break.
214c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * @param n initial position from delegate
215c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * @return new break position or UBRK_DONE
216c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     */
217c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    int32_t internalPrev(int32_t n);
218c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    /**
219c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * set up the UText with the value of the fDelegate.
220c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * Call this before calling breakExceptionAt.
221c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * May be able to avoid excess calls
222c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     */
223c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    void resetState(UErrorCode &status);
224c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    /**
225c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * Is there a match  (exception) at this spot?
226c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     */
227c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
228c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    /**
229c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * Determine if there is an exception at this spot
230c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * @param n spot to check
231c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     * @return kNoExceptionHere or kExceptionHere
232c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert     **/
233c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    enum EFBMatchResult breakExceptionAt(int32_t n);
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusSimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
237c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius{
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
242f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusSimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
244c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
245c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  fDelegate(adopt)
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius{
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  // all set..
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
250c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
251c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    fData = fData->decr();
252c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
253f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
254c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertvoid SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
256c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
257c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
258c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::EFBMatchResult
259c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
260c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    int64_t bestPosn = -1;
261c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    int32_t bestValue = -1;
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // loops while 'n' points to an exception.
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    utext_setNativeIndex(fText.getAlias(), n); // from n..
264c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    fData->fBackwardsTrie->reset();
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar32 uch;
266c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //if(debug2) u_printf(" n@ %d\n", n);
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Assume a space is following the '.'  (so we handle the case:  "Mr. /Brown")
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) {  // TODO: skip a class of chars here??
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      // TODO only do this the 1st time?
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      uch = utext_next32(fText.getAlias());
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
278c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL  &&   // more to consume backwards and..
281c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert          USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        bestPosn = utext_getNativeIndex(fText.getAlias());
284c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert        bestValue = fData->fBackwardsTrie->getValue();
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      }
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(USTRINGTRIE_MATCHES(r)) { // exact match?
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
291c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      bestValue = fData->fBackwardsTrie->getValue();
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      bestPosn = utext_getNativeIndex(fText.getAlias());
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(bestPosn>=0) {
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(USTRINGTRIE_MATCHES(r)) {  // matched - so, now what?
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //int32_t bestValue = fBackwardsTrie->getValue();
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      ////if(debug2) u_printf("rev< /%C/ matched, skip..%d  bestValue=%d\n", (UChar)uch, r, bestValue);
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      if(bestValue == kMATCH) { // exact match!
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        //if(debug2) u_printf(" exact backward match\n");
305c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert        return kExceptionHere; // See if the next is another exception.
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      } else if(bestValue == kPARTIAL
307c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert                && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        //if(debug2) u_printf(" partial backward match\n");
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // to see if it matches something going forward.
311c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert        fData->fForwardsPartialTrie->reset();
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        //if(debug2) u_printf("Retrying at %d\n", bestPosn);
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
316c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert              USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(USTRINGTRIE_MATCHES(rfwd)) {
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          // only full matches here, nothing to check
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          // skip the next:
323c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert            return kExceptionHere;
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          // no match (no exception) -return the 'underlying' break
327c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert          return kNoExceptionHere;
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      } else {
330c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert        return kNoExceptionHere; // internal error and/or no forwards trie
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      }
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r);  // no best match
334c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      return kNoExceptionHere; // No match - so exit. Not an exception.
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
336c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
337c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
338c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert// the workhorse single next.
339c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
340c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
341c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  if(n == UBRK_DONE || // at end  or
342c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
343c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      return n;
344c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  }
345c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  // OK, do we need to break here?
346c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  UErrorCode status = U_ZERO_ERROR;
347c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  // refresh text
348c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  resetState(status);
349c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  if(U_FAILURE(status)) return UBRK_DONE; // bail out
350c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  int64_t utextLen = utext_nativeLength(fText.getAlias());
351c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
352c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
353c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
354c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
355c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
356c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    switch(m) {
357c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    case kExceptionHere:
358c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
359c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      continue;
360c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
361c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    default:
362c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    case kNoExceptionHere:
363c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      return n;
364c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    }
365c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  }
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  return n;
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
369c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
370c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
371c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  if(n == 0 || n == UBRK_DONE || // at end  or
372c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
373c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      return n;
374c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  }
375c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  // OK, do we need to break here?
376c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  UErrorCode status = U_ZERO_ERROR;
377c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  // refresh text
378c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  resetState(status);
379c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  if(U_FAILURE(status)) return UBRK_DONE; // bail out
380c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
381c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
382c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
383c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
384c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
385c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    switch(m) {
386c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    case kExceptionHere:
387c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
388c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      continue;
389c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
390c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    default:
391c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    case kNoExceptionHere:
392c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert      return n;
393c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    }
394c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  }
395c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return n;
396c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
397c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
398c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
399c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
400c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::next() {
401c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return internalNext(fDelegate->next());
402c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
403c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
404c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
405c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::first(void) {
40664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert  // Don't suppress a break opportunity at the beginning of text.
40764339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert  return fDelegate->first();
408c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
409c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
410c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
411c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
412c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return internalPrev(fDelegate->preceding(offset));
413c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
414c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
415c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
416c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::previous(void) {
417c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return internalPrev(fDelegate->previous());
418c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
419c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
420c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertUBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
42164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert  if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
42264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert
42364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert  if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
424c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
425c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  UErrorCode status = U_ZERO_ERROR;
426c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  resetState(status);
427c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
428c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
429c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
430c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  switch(m) {
431c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  case kExceptionHere:
432c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    return false;
433c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  default:
434c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  case kNoExceptionHere:
435c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    return true;
436c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  }
437c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
438c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
439c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
440c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::next(int32_t offset) {
441c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return internalNext(fDelegate->next(offset));
442c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
443c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
444c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
445c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::following(int32_t offset) {
446c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return internalNext(fDelegate->following(offset));
447c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
448c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
449c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertint32_t
450c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik RoubertSimpleFilteredSentenceBreakIterator::last(void) {
451c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  // Don't suppress a break opportunity at the end of text.
452c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return fDelegate->last();
453c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert}
454c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
455c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert
456f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/**
457f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Concrete implementation of builder class.
458f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */
459c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubertclass U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual ~SimpleFilteredBreakIteratorBuilder();
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
463f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
468f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  UStringSet fSet;
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius{
473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
475f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusSimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  : fSet(status)
477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius{
478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  : fSet(status)
482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius{
483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  if(U_SUCCESS(status)) {
48464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    UErrorCode subStatus = U_ZERO_ERROR;
48564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus));
48664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
48764339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      status = subStatus; // copy the failing status
48864339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#if FB_DEBUG
48964339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
49064339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#endif
49164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      return;  // leaves the builder empty, if you try to use it.
49264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    }
49364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus));
49464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
49564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      status = subStatus; // copy the failing status
49664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#if FB_DEBUG
49764339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
49864339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#endif
49964339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      return;  // leaves the builder empty, if you try to use it.
50064339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    }
50164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus));
50264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert
50364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#if FB_DEBUG
50464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    {
50564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      UErrorCode subsub = subStatus;
50664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus));
50764339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    }
50864339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#endif
50964339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert
51064339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {
51164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      status = subStatus; // copy the failing status
51264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#if FB_DEBUG
51364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
51464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#endif
51564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert      return;  // leaves the builder empty, if you try to use it.
51664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    }
517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalUResourceBundlePointer strs;
51964339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert    subStatus = status; // Pick up inherited warning status now
520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    do {
521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      if(strs.isValid() && U_SUCCESS(subStatus)) {
523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        suppressBreakAfter(str, status); // load the string
525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      }
526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } while (strs.isValid() && U_SUCCESS(subStatus));
527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      status = subStatus;
529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool
534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius{
536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  UBool r = fSet.add(exception, status);
537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  FB_TRACE("suppressBreakAfter",&exception,r,0);
538f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  return r;
539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool
542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius{
544f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  UBool r = fSet.remove(exception, status);
545f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  FB_TRACE("unsuppressBreakAfter",&exception,r,0);
546f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  return r;
547f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
548f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
549f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/**
550f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
551f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Work around this.
552f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *
553f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Note: "new UnicodeString[subCount]" ends up calling global operator new
554f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * on MSVC2012 for some reason.
555f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */
556f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic inline UnicodeString* newUnicodeStringArray(size_t count) {
557f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return new UnicodeString[count ? count : 1];
558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusBreakIterator *
561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  LocalPointer<BreakIterator> adopt(adoptBreakIterator);
563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
5651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  if(U_FAILURE(status)) {
567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return NULL;
568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  int32_t revCount = 0;
571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  int32_t fwdCount = 0;
572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  int32_t subCount = fSet.size();
574f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
575f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
576f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
577f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  LocalArray<UnicodeString> ustrs(ustrs_ptr);
578f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
579f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  LocalMemory<int> partials;
580f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  partials.allocateInsteadAndReset(subCount);
581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  LocalPointer<UCharsTrie>    backwardsTrie; //  i.e. ".srM" for Mrs.
583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  LocalPointer<UCharsTrie>    forwardsPartialTrie; //  Has ".a" for "a.M."
584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  int n=0;
586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  for ( int32_t i = 0;
587f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        i<fSet.size();
588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i++) {
589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    const UnicodeString *abbr = fSet.getStringAt(i);
590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if(abbr) {
591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE("build",abbr,TRUE,i);
592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      ustrs[n] = *abbr; // copy by value
593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    } else {
595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE("build",abbr,FALSE,i);
596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      status = U_MEMORY_ALLOCATION_ERROR;
597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      return NULL;
598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    partials[n] = 0; // default: not partial
600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    n++;
601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  // first pass - find partials.
603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  for(int i=0;i<subCount;i++) {
604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(nn>-1 && (nn+1)!=ustrs[i].length()) {
606f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE("partial",&ustrs[i],FALSE,i);
607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      // is partial.
608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      // is it unique?
609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      int sameAs = -1;
610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      for(int j=0;j<subCount;j++) {
611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(j==i) continue;
612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius          FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          //UBool otherIsPartial = ((nn+1)!=ustrs[j].length());  // true if ustrs[j] doesn't end at nn
615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          if(partials[j]==0) { // hasn't been processed yet
616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            partials[j] = kSuppressInReverse | kAddToForward;
617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            FB_TRACE("suppressing",&ustrs[j],FALSE,j);
618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          } else if(partials[j] & kSuppressInReverse) {
619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            sameAs = j; // the other entry is already in the reverse table.
620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius          }
621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      }
623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
625fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      UnicodeString prefix(ustrs[i], 0, nn+1);
626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      if(sameAs == -1 && partials[i] == 0) {
627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // first one - add the prefix to the reverse table.
628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prefix.reverse();
629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        builder->add(prefix, kPARTIAL, status);
630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        revCount++;
631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        FB_TRACE("Added partial",&prefix,FALSE, i);
632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        partials[i] = kSuppressInReverse | kAddToForward;
634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      } else {
635f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        FB_TRACE("NOT adding partial",&prefix,FALSE, i);
636f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      }
638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  for(int i=0;i<subCount;i++) {
641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(partials[i]==0) {
642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      ustrs[i].reverse();
643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      builder->add(ustrs[i], kMATCH, status);
644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      revCount++;
645f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
647f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      // an optimization would be to only add the portion after the '.'
650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
651fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      // instead of "Ph.D." since we already know the "Ph." part is a match.
652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      // would need the trie to be able to hold 0-length strings, though.
653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      builder2->add(ustrs[i], kMATCH, status); // forward
654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      fwdCount++;
655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      //ustrs[i].reverse();
656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      ////if(debug2) u_printf("SUPPRESS- not Added(%d):  /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
659f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  FB_TRACE("AbbrCount",NULL,FALSE, subCount);
660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  if(revCount>0) {
662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(status)) {
664f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE(u_errorName(status),NULL,FALSE, -1);
665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      return NULL;
666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  if(fwdCount>0) {
670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(status)) {
672f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius      FB_TRACE(u_errorName(status),NULL,FALSE, -1);
673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      return NULL;
674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  }
676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
677f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius  return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// ----------- Base class implementation
682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusFilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusFilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusFilteredBreakIteratorBuilder *
690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusFilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  if(U_FAILURE(status)) return NULL;
6921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
693c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return (U_SUCCESS(status))? ret.orphan(): NULL;
694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
696fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusFilteredBreakIteratorBuilder *
697ffdc27edd5503111189fc11165c5a11289a71f79Fredrik RoubertFilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
698fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius  if(U_FAILURE(status)) return NULL;
6991b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
700c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert  return (U_SUCCESS(status))? ret.orphan(): NULL;
701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
703fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
7050596faeddefbf198de137d5e893708495ab1584cFredrik Roubert#endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
706