12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
6f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert * Copyright (C) 2012-2016, International Business Machines Corporation and         *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.                                                *
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.DONE32;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.current32;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport static android.icu.impl.CharacterIteration.next32;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.CharacterIterator;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Assert;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerclass CjkBreakEngine extends DictionaryBreakEngine {
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final UnicodeSet fHangulWordSet = new UnicodeSet();
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final UnicodeSet fHanWordSet = new UnicodeSet();
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final UnicodeSet fKatakanaWordSet = new UnicodeSet();
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final UnicodeSet fHiraganaWordSet = new UnicodeSet();
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static {
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fHangulWordSet.applyPattern("[\\uac00-\\ud7a3]");
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fHanWordSet.applyPattern("[:Han:]");
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fKatakanaWordSet.applyPattern("[[:Katakana:]\\uff9e\\uff9f]");
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fHiraganaWordSet.applyPattern("[:Hiragana:]");
31f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // freeze them all
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fHangulWordSet.freeze();
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fHanWordSet.freeze();
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fKatakanaWordSet.freeze();
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fHiraganaWordSet.freeze();
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private DictionaryMatcher fDictionary = null;
40f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public CjkBreakEngine(boolean korean) throws IOException {
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        super(BreakIterator.KIND_WORD);
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fDictionary = DictionaryData.loadDictionaryFor("Hira");
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (korean) {
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setCharacters(fHangulWordSet);
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else { //Chinese and Japanese
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet cjSet = new UnicodeSet();
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cjSet.addAll(fHanWordSet);
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cjSet.addAll(fKatakanaWordSet);
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cjSet.addAll(fHiraganaWordSet);
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            setCharacters(cjSet);
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
57f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean equals(Object obj) {
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (obj instanceof CjkBreakEngine) {
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            CjkBreakEngine other = (CjkBreakEngine)obj;
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this.fSet.equals(other.fSet);
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return false;
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
66f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int hashCode() {
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return getClass().hashCode();
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
70f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int kMaxKatakanaLength = 8;
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int kMaxKatakanaGroupLength = 20;
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int maxSnlp = 255;
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int kint32max = Integer.MAX_VALUE;
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int getKatakanaCost(int wordlength) {
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int katakanaCost[] =  new int[] { 8192, 984, 408, 240, 204, 252, 300, 372, 480 };
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (wordlength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordlength];
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
79f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isKatakana(int value) {
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (value >= 0xFF66 && value <= 0xFF9F);
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
84f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
85f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int divideUpDictionaryRange(CharacterIterator inText, int startPos, int endPos,
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            DequeI foundBreaks) {
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (startPos >= endPos) {
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        inText.setIndex(startPos);
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int inputLength = endPos - startPos;
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] charPositions = new int[inputLength + 1];
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuffer s = new StringBuffer("");
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        inText.setIndex(startPos);
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (inText.getIndex() < endPos) {
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            s.append(inText.current());
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            inText.next();
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String prenormstr = s.toString();
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean isNormalized = Normalizer.quickCheck(prenormstr, Normalizer.NFKC) == Normalizer.YES ||
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                               Normalizer.isNormalized(prenormstr, Normalizer.NFKC, 0);
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CharacterIterator text;
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int numChars = 0;
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (isNormalized) {
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            text = new java.text.StringCharacterIterator(prenormstr);
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index = 0;
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            charPositions[0] = 0;
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (index < prenormstr.length()) {
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int codepoint = prenormstr.codePointAt(index);
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index += Character.charCount(codepoint);
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                numChars++;
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                charPositions[numChars] = index;
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String normStr = Normalizer.normalize(prenormstr, Normalizer.NFKC);
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            text = new java.text.StringCharacterIterator(normStr);
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            charPositions = new int[normStr.length() + 1];
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Normalizer normalizer = new Normalizer(prenormstr, Normalizer.NFKC, 0);
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index = 0;
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            charPositions[0] = 0;
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (index < normalizer.endIndex()) {
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                normalizer.next();
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                numChars++;
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = normalizer.getIndex();
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                charPositions[numChars] = index;
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
131f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // From here on out, do the algorithm. Note that our indices
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // refer to indices within the normalized string.
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] bestSnlp = new int[numChars + 1];
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        bestSnlp[0] = 0;
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 1; i <= numChars; i++) {
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            bestSnlp[i] = kint32max;
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int[] prev = new int[numChars + 1];
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i <= numChars; i++) {
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prev[i] = -1;
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
144f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        final int maxWordSize = 20;
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int values[] = new int[numChars];
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int lengths[] = new int[numChars];
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // dynamic programming to find the best segmentation
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean is_prev_katakana = false;
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < numChars; i++) {
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            text.setIndex(i);
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (bestSnlp[i] == kint32max) {
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
155f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int maxSearchLength = (i + maxWordSize < numChars) ? maxWordSize : (numChars - i);
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] count_ = new int[1];
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fDictionary.matches(text, maxSearchLength, lengths, count_, maxSearchLength, values);
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int count = count_[0];
160f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
161f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            // if there are no single character matches found in the dictionary
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // starting with this character, treat character as a 1-character word
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // with the highest value possible (i.e. the least likely to occur).
164f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            // Exclude Korean characters from this treatment, as they should be
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // left together by default.
166f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            text.setIndex(i);  // fDictionary.matches() advances the text position; undo that.
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if ((count == 0 || lengths[0] != 1) && current32(text) != DONE32 && !fHangulWordSet.contains(current32(text))) {
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                values[count] = maxSnlp;
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lengths[count] = 1;
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                count++;
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int j = 0; j < count; j++) {
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int newSnlp = bestSnlp[i] + values[j];
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (newSnlp < bestSnlp[lengths[j] + i]) {
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    bestSnlp[lengths[j] + i] = newSnlp;
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prev[lengths[j] + i] = i;
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
180f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // In Japanese, single-character Katakana words are pretty rare.
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // So we apply the following heuristic to Katakana: any continuous
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // run of Katakana characters is considered a candidate word with
184f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            // a default cost specified in the katakanaCost table according
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // to its length.
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean is_katakana = isKatakana(current32(text));
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!is_prev_katakana && is_katakana) {
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int j = i + 1;
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                next32(text);
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (j < numChars && (j - i) < kMaxKatakanaGroupLength && isKatakana(current32(text))) {
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    next32(text);
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++j;
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
194f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if ((j - i) < kMaxKatakanaGroupLength) {
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int newSnlp = bestSnlp[i] + getKatakanaCost(j - i);
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (newSnlp < bestSnlp[j]) {
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        bestSnlp[j] = newSnlp;
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prev[j] = i;
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            is_prev_katakana = is_katakana;
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int t_boundary[] = new int[numChars + 1];
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int numBreaks = 0;
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (bestSnlp[numChars] == kint32max) {
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            t_boundary[numBreaks] = numChars;
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            numBreaks++;
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i = numChars; i > 0; i = prev[i]) {
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                t_boundary[numBreaks] = i;
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                numBreaks++;
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Assert.assrt(prev[t_boundary[numBreaks - 1]] == 0);
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (foundBreaks.size() == 0 || foundBreaks.peek() < startPos) {
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            t_boundary[numBreaks++] = 0;
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int correctedNumBreaks = 0;
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = numBreaks - 1; i >= 0; i--) {
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int pos = charPositions[t_boundary[i]] + startPos;
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!(foundBreaks.contains(pos) || pos == startPos)) {
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                foundBreaks.push(charPositions[t_boundary[i]] + startPos);
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                correctedNumBreaks++;
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!foundBreaks.isEmpty() && foundBreaks.peek() == endPos) {
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            foundBreaks.pop();
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            correctedNumBreaks--;
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
236f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        if (!foundBreaks.isEmpty())
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            inText.setIndex(foundBreaks.peek());
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return correctedNumBreaks;
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
241