12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 6f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * Copyright (C) 2012-2016, International Business Machines Corporation and * 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved. * 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text; 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.DONE32; 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.current32; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.next32; 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.CharacterIterator; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Assert; 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerclass CjkBreakEngine extends DictionaryBreakEngine { 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final UnicodeSet fHangulWordSet = new UnicodeSet(); 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final UnicodeSet fHanWordSet = new UnicodeSet(); 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final UnicodeSet fKatakanaWordSet = new UnicodeSet(); 252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final UnicodeSet fHiraganaWordSet = new UnicodeSet(); 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static { 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fHangulWordSet.applyPattern("[\\uac00-\\ud7a3]"); 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fHanWordSet.applyPattern("[:Han:]"); 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fKatakanaWordSet.applyPattern("[[:Katakana:]\\uff9e\\uff9f]"); 302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fHiraganaWordSet.applyPattern("[:Hiragana:]"); 31f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // freeze them all 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fHangulWordSet.freeze(); 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fHanWordSet.freeze(); 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fKatakanaWordSet.freeze(); 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fHiraganaWordSet.freeze(); 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private DictionaryMatcher fDictionary = null; 40f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public CjkBreakEngine(boolean korean) throws IOException { 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller super(BreakIterator.KIND_WORD); 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fDictionary = DictionaryData.loadDictionaryFor("Hira"); 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (korean) { 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setCharacters(fHangulWordSet); 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { //Chinese and Japanese 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet cjSet = new UnicodeSet(); 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cjSet.addAll(fHanWordSet); 492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cjSet.addAll(fKatakanaWordSet); 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cjSet.addAll(fHiraganaWordSet); 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK 532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setCharacters(cjSet); 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 57f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public boolean equals(Object obj) { 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (obj instanceof CjkBreakEngine) { 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CjkBreakEngine other = (CjkBreakEngine)obj; 612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return this.fSet.equals(other.fSet); 622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return false; 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 66f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int hashCode() { 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return getClass().hashCode(); 692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 70f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int kMaxKatakanaLength = 8; 722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int kMaxKatakanaGroupLength = 20; 732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int maxSnlp = 255; 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static final int kint32max = Integer.MAX_VALUE; 752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static int getKatakanaCost(int wordlength) { 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int katakanaCost[] = new int[] { 8192, 984, 408, 240, 204, 252, 300, 372, 480 }; 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (wordlength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordlength]; 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 79f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private static boolean isKatakana(int value) { 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) || 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller (value >= 0xFF66 && value <= 0xFF9F); 832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 84f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 85f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert @Override 862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller public int divideUpDictionaryRange(CharacterIterator inText, int startPos, int endPos, 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller DequeI foundBreaks) { 882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (startPos >= endPos) { 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return 0; 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inText.setIndex(startPos); 932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int inputLength = endPos - startPos; 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] charPositions = new int[inputLength + 1]; 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuffer s = new StringBuffer(""); 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inText.setIndex(startPos); 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (inText.getIndex() < endPos) { 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller s.append(inText.current()); 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inText.next(); 1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String prenormstr = s.toString(); 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean isNormalized = Normalizer.quickCheck(prenormstr, Normalizer.NFKC) == Normalizer.YES || 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Normalizer.isNormalized(prenormstr, Normalizer.NFKC, 0); 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller CharacterIterator text; 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numChars = 0; 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (isNormalized) { 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller text = new java.text.StringCharacterIterator(prenormstr); 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = 0; 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller charPositions[0] = 0; 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (index < prenormstr.length()) { 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int codepoint = prenormstr.codePointAt(index); 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index += Character.charCount(codepoint); 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numChars++; 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller charPositions[numChars] = index; 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String normStr = Normalizer.normalize(prenormstr, Normalizer.NFKC); 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller text = new java.text.StringCharacterIterator(normStr); 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller charPositions = new int[normStr.length() + 1]; 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Normalizer normalizer = new Normalizer(prenormstr, Normalizer.NFKC, 0); 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int index = 0; 1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller charPositions[0] = 0; 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (index < normalizer.endIndex()) { 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller normalizer.next(); 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numChars++; 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller index = normalizer.getIndex(); 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller charPositions[numChars] = index; 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 131f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // From here on out, do the algorithm. Note that our indices 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // refer to indices within the normalized string. 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] bestSnlp = new int[numChars + 1]; 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bestSnlp[0] = 0; 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 1; i <= numChars; i++) { 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bestSnlp[i] = kint32max; 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] prev = new int[numChars + 1]; 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i <= numChars; i++) { 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prev[i] = -1; 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 144f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller final int maxWordSize = 20; 1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int values[] = new int[numChars]; 1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int lengths[] = new int[numChars]; 1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // dynamic programming to find the best segmentation 1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean is_prev_katakana = false; 1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = 0; i < numChars; i++) { 1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller text.setIndex(i); 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (bestSnlp[i] == kint32max) { 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller continue; 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 155f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int maxSearchLength = (i + maxWordSize < numChars) ? maxWordSize : (numChars - i); 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] count_ = new int[1]; 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fDictionary.matches(text, maxSearchLength, lengths, count_, maxSearchLength, values); 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int count = count_[0]; 160f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 161f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert // if there are no single character matches found in the dictionary 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // starting with this character, treat character as a 1-character word 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // with the highest value possible (i.e. the least likely to occur). 164f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert // Exclude Korean characters from this treatment, as they should be 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // left together by default. 166f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert text.setIndex(i); // fDictionary.matches() advances the text position; undo that. 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((count == 0 || lengths[0] != 1) && current32(text) != DONE32 && !fHangulWordSet.contains(current32(text))) { 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller values[count] = maxSnlp; 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller lengths[count] = 1; 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller count++; 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int j = 0; j < count; j++) { 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newSnlp = bestSnlp[i] + values[j]; 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (newSnlp < bestSnlp[lengths[j] + i]) { 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bestSnlp[lengths[j] + i] = newSnlp; 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prev[lengths[j] + i] = i; 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 180f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // In Japanese, single-character Katakana words are pretty rare. 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // So we apply the following heuristic to Katakana: any continuous 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // run of Katakana characters is considered a candidate word with 184f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert // a default cost specified in the katakanaCost table according 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to its length. 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean is_katakana = isKatakana(current32(text)); 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!is_prev_katakana && is_katakana) { 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int j = i + 1; 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller next32(text); 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (j < numChars && (j - i) < kMaxKatakanaGroupLength && isKatakana(current32(text))) { 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller next32(text); 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ++j; 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 194f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if ((j - i) < kMaxKatakanaGroupLength) { 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int newSnlp = bestSnlp[i] + getKatakanaCost(j - i); 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (newSnlp < bestSnlp[j]) { 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller bestSnlp[j] = newSnlp; 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prev[j] = i; 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller is_prev_katakana = is_katakana; 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int t_boundary[] = new int[numChars + 1]; 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int numBreaks = 0; 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (bestSnlp[numChars] == kint32max) { 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t_boundary[numBreaks] = numChars; 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numBreaks++; 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = numChars; i > 0; i = prev[i]) { 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t_boundary[numBreaks] = i; 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller numBreaks++; 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Assert.assrt(prev[t_boundary[numBreaks - 1]] == 0); 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (foundBreaks.size() == 0 || foundBreaks.peek() < startPos) { 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller t_boundary[numBreaks++] = 0; 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int correctedNumBreaks = 0; 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int i = numBreaks - 1; i >= 0; i--) { 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int pos = charPositions[t_boundary[i]] + startPos; 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!(foundBreaks.contains(pos) || pos == startPos)) { 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foundBreaks.push(charPositions[t_boundary[i]] + startPos); 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller correctedNumBreaks++; 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!foundBreaks.isEmpty() && foundBreaks.peek() == endPos) { 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller foundBreaks.pop(); 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller correctedNumBreaks--; 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 236f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (!foundBreaks.isEmpty()) 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller inText.setIndex(foundBreaks.peek()); 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return correctedNumBreaks; 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 241