1/* 2********************************************************************** 3* Copyright (C) 2008-2010, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 05/11/2008 Andy Heninger Port from Java 8********************************************************************** 9*/ 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION 14 15#include "unicode/unifilt.h" 16#include "unicode/uchar.h" 17#include "unicode/uniset.h" 18#include "unicode/brkiter.h" 19#include "brktrans.h" 20#include "unicode/uchar.h" 21#include "cmemory.h" 22#include "uprops.h" 23#include "uinvchar.h" 24#include "util.h" 25#include "uvectr32.h" 26 27U_NAMESPACE_BEGIN 28 29UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) 30 31static const UChar SPACE = 32; // ' ' 32 33 34/** 35 * Constructs a transliterator with the default delimiters '{' and 36 * '}'. 37 */ 38BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : 39 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), 40 fInsertion(SPACE) { 41 bi = NULL; 42 UErrorCode status = U_ZERO_ERROR; 43 boundaries = new UVector32(status); 44 } 45 46 47/** 48 * Destructor. 49 */ 50BreakTransliterator::~BreakTransliterator() { 51 delete bi; 52 bi = NULL; 53 delete boundaries; 54 boundaries = NULL; 55} 56 57/** 58 * Copy constructor. 59 */ 60BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : 61 Transliterator(o) { 62 bi = NULL; 63 if (o.bi != NULL) { 64 bi = o.bi->clone(); 65 } 66 fInsertion = o.fInsertion; 67 UErrorCode status = U_ZERO_ERROR; 68 boundaries = new UVector32(status); 69 } 70 71 72/** 73 * Transliterator API. 74 */ 75Transliterator* BreakTransliterator::clone(void) const { 76 return new BreakTransliterator(*this); 77} 78 79/** 80 * Implements {@link Transliterator#handleTransliterate}. 81 */ 82void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, 83 UBool isIncremental ) const { 84 85 UErrorCode status = U_ZERO_ERROR; 86 boundaries->removeAllElements(); 87 BreakTransliterator *nonConstThis = (BreakTransliterator *)this; 88 nonConstThis->getBreakIterator(); // Lazy-create it if necessary 89 UnicodeString sText = replaceableAsString(text); 90 bi->setText(sText); 91 bi->preceding(offsets.start); 92 93 // To make things much easier, we will stack the boundaries, and then insert at the end. 94 // generally, we won't need too many, since we will be filtered. 95 96 int32_t boundary; 97 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) { 98 if (boundary == 0) continue; 99 // HACK: Check to see that preceeding item was a letter 100 101 UChar32 cp = sText.char32At(boundary-1); 102 int type = u_charType(cp); 103 //System.out.println(Integer.toString(cp,16) + " (before): " + type); 104 if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; 105 106 cp = sText.char32At(boundary); 107 type = u_charType(cp); 108 //System.out.println(Integer.toString(cp,16) + " (after): " + type); 109 if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; 110 111 boundaries->addElement(boundary, status); 112 // printf("Boundary at %d\n", boundary); 113 } 114 115 int delta = 0; 116 int lastBoundary = 0; 117 118 if (boundaries->size() != 0) { // if we found something, adjust 119 delta = boundaries->size() * fInsertion.length(); 120 lastBoundary = boundaries->lastElementi(); 121 122 // we do this from the end backwards, so that we don't have to keep updating. 123 124 while (boundaries->size() > 0) { 125 boundary = boundaries->popi(); 126 text.handleReplaceBetween(boundary, boundary, fInsertion); 127 } 128 } 129 130 // Now fix up the return values 131 offsets.contextLimit += delta; 132 offsets.limit += delta; 133 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; 134 135 // TODO: do something with U_FAILURE(status); 136 // (need to look at transliterators overall, not just here.) 137} 138 139// 140// getInsertion() 141// 142const UnicodeString &BreakTransliterator::getInsertion() const { 143 return fInsertion; 144} 145 146// 147// setInsertion() 148// 149void BreakTransliterator::setInsertion(const UnicodeString &insertion) { 150 this->fInsertion = insertion; 151} 152 153// 154// getBreakIterator Lazily create the break iterator if it does 155// not already exist. Copied from Java, probably 156// better to just create it in the constructor. 157// 158BreakIterator *BreakTransliterator::getBreakIterator() { 159 UErrorCode status = U_ZERO_ERROR; 160 if (bi == NULL) { 161 // Note: Thai breaking behavior is universal, it is not 162 // tied to the Thai locale. 163 bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); 164 } 165 return bi; 166} 167 168// 169// replaceableAsString Hack to let break iterators work 170// on the replaceable text from transliterators. 171// In practice, the only real Replaceable type that we 172// will be seeing is UnicodeString, so this function 173// will normally be efficient. 174// 175UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { 176 UnicodeString s; 177 UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); 178 if (rs != NULL) { 179 s = *rs; 180 } else { 181 r.extractBetween(0, r.length(), s); 182 } 183 return s; 184} 185 186U_NAMESPACE_END 187 188#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 189