1/*
2**********************************************************************
3*   Copyright (C) 2008-2010, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   05/11/2008  Andy Heninger  Port from Java
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if  !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
14
15#include "unicode/unifilt.h"
16#include "unicode/uchar.h"
17#include "unicode/uniset.h"
18#include "unicode/brkiter.h"
19#include "brktrans.h"
20#include "unicode/uchar.h"
21#include "cmemory.h"
22#include "uprops.h"
23#include "uinvchar.h"
24#include "util.h"
25#include "uvectr32.h"
26
27U_NAMESPACE_BEGIN
28
29UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
30
31static const UChar SPACE       = 32;  // ' '
32
33
34/**
35 * Constructs a transliterator with the default delimiters '{' and
36 * '}'.
37 */
38BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
39    Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
40    fInsertion(SPACE) {
41        bi = NULL;
42        UErrorCode status = U_ZERO_ERROR;
43        boundaries = new UVector32(status);
44    }
45
46
47/**
48 * Destructor.
49 */
50BreakTransliterator::~BreakTransliterator() {
51    delete bi;
52    bi = NULL;
53    delete boundaries;
54    boundaries = NULL;
55}
56
57/**
58 * Copy constructor.
59 */
60BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
61    Transliterator(o) {
62        bi = NULL;
63        if (o.bi != NULL) {
64            bi = o.bi->clone();
65        }
66        fInsertion = o.fInsertion;
67        UErrorCode status = U_ZERO_ERROR;
68        boundaries = new UVector32(status);
69    }
70
71
72/**
73 * Transliterator API.
74 */
75Transliterator* BreakTransliterator::clone(void) const {
76    return new BreakTransliterator(*this);
77}
78
79/**
80 * Implements {@link Transliterator#handleTransliterate}.
81 */
82void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
83                                                    UBool isIncremental ) const {
84
85        UErrorCode status = U_ZERO_ERROR;
86        boundaries->removeAllElements();
87        BreakTransliterator *nonConstThis = (BreakTransliterator *)this;
88        nonConstThis->getBreakIterator(); // Lazy-create it if necessary
89        UnicodeString sText = replaceableAsString(text);
90        bi->setText(sText);
91        bi->preceding(offsets.start);
92
93        // To make things much easier, we will stack the boundaries, and then insert at the end.
94        // generally, we won't need too many, since we will be filtered.
95
96        int32_t boundary;
97        for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
98            if (boundary == 0) continue;
99            // HACK: Check to see that preceeding item was a letter
100
101            UChar32 cp = sText.char32At(boundary-1);
102            int type = u_charType(cp);
103            //System.out.println(Integer.toString(cp,16) + " (before): " + type);
104            if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
105
106            cp = sText.char32At(boundary);
107            type = u_charType(cp);
108            //System.out.println(Integer.toString(cp,16) + " (after): " + type);
109            if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
110
111            boundaries->addElement(boundary, status);
112            // printf("Boundary at %d\n", boundary);
113        }
114
115        int delta = 0;
116        int lastBoundary = 0;
117
118        if (boundaries->size() != 0) { // if we found something, adjust
119            delta = boundaries->size() * fInsertion.length();
120            lastBoundary = boundaries->lastElementi();
121
122            // we do this from the end backwards, so that we don't have to keep updating.
123
124            while (boundaries->size() > 0) {
125                boundary = boundaries->popi();
126                text.handleReplaceBetween(boundary, boundary, fInsertion);
127            }
128        }
129
130        // Now fix up the return values
131        offsets.contextLimit += delta;
132        offsets.limit += delta;
133        offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
134
135        // TODO:  do something with U_FAILURE(status);
136        //        (need to look at transliterators overall, not just here.)
137}
138
139//
140//  getInsertion()
141//
142const UnicodeString &BreakTransliterator::getInsertion() const {
143    return fInsertion;
144}
145
146//
147//  setInsertion()
148//
149void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
150    this->fInsertion = insertion;
151}
152
153//
154//  getBreakIterator     Lazily create the break iterator if it does
155//                       not already exist.  Copied from Java, probably
156//                       better to just create it in the constructor.
157//
158BreakIterator *BreakTransliterator::getBreakIterator() {
159    UErrorCode status = U_ZERO_ERROR;
160    if (bi == NULL) {
161        // Note:  Thai breaking behavior is universal, it is not
162        //        tied to the Thai locale.
163        bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
164    }
165    return bi;
166}
167
168//
169//   replaceableAsString   Hack to let break iterators work
170//                         on the replaceable text from transliterators.
171//                         In practice, the only real Replaceable type that we
172//                         will be seeing is UnicodeString, so this function
173//                         will normally be efficient.
174//
175UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
176    UnicodeString s;
177    UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
178    if (rs != NULL) {
179        s = *rs;
180    } else {
181        r.extractBetween(0, r.length(), s);
182    }
183    return s;
184}
185
186U_NAMESPACE_END
187
188#endif /* #if !UCONFIG_NO_TRANSLITERATION */
189