163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer// © 2017 and later: Unicode, Inc. and others.
263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer// License & terms of use: http://www.unicode.org/copyright.html#License
363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererpackage com.ibm.icu.text;
463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererimport java.nio.BufferOverflowException;
663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererimport java.util.Arrays;
763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer/**
963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Records lengths of string edits but not replacement text.
1063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Supports replacements, insertions, deletions in linear progression.
1163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Does not support moving/reordering of text.
1263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer *
1363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59
1463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release.
1563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */
1663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererpublic final class Edits {
1763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    // 0000uuuuuuuuuuuu records u+1 unchanged text units.
1863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private static final int MAX_UNCHANGED_LENGTH = 0x1000;
1963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
2063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
21fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
22fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    private static final int MAX_SHORT_CHANGE_OLD_LENGTH = 6;
23fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    private static final int MAX_SHORT_CHANGE_NEW_LENGTH = 7;
24fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    private static final int SHORT_CHANGE_NUM_MASK = 0x1ff;
2563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private static final int MAX_SHORT_CHANGE = 0x6fff;
2663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
2763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    // 0111mmmmmmnnnnnn records a replacement of m text units with n.
2863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    // m or n = 61: actual length follows in the next edits array unit.
2963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    // m or n = 62..63: actual length follows in the next two edits array units.
3063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    // Bit 30 of the actual length is in the head unit.
3163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    // Trailing units have bit 15 set.
3263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private static final int LENGTH_IN_1TRAIL = 61;
3363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private static final int LENGTH_IN_2TRAIL = 62;
3463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
3563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private static final int STACK_CAPACITY = 100;
3663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private char[] array;
3763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private int length;
3863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private int delta;
39fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    private int numChanges;
4063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
4163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
4263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Constructs an empty object.
4363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
4463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
4563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
4663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public Edits() {
4763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        array = new char[STACK_CAPACITY];
4863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
4963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
5063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
5163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Resets the data but may not release memory.
5263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
5363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
5463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
5563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public void reset() {
56fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        length = delta = numChanges = 0;
5763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
5863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
5963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private void setLastUnit(int last) {
6063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        array[length - 1] = (char)last;
6163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
6263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private int lastUnit() {
6363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        return length > 0 ? array[length - 1] : 0xffff;
6463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
6563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
6663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
6763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Adds a record for an unchanged segment of text.
6863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Normally called from inside ICU string transformation functions, not user code.
6963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
7063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
7163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
7263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public void addUnchanged(int unchangedLength) {
7363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if(unchangedLength < 0) {
7463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            throw new IllegalArgumentException(
7563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    "addUnchanged(" + unchangedLength + "): length must not be negative");
7663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
7763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        // Merge into previous unchanged-text record, if any.
7863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        int last = lastUnit();
7963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if(last < MAX_UNCHANGED) {
8063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            int remaining = MAX_UNCHANGED - last;
8163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if (remaining >= unchangedLength) {
8263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                setLastUnit(last + unchangedLength);
8363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                return;
8463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
8563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            setLastUnit(MAX_UNCHANGED);
8663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            unchangedLength -= remaining;
8763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
8863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        // Split large lengths into multiple units.
8963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
9063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            append(MAX_UNCHANGED);
9163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            unchangedLength -= MAX_UNCHANGED_LENGTH;
9263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
9363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        // Write a small (remaining) length.
9463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if(unchangedLength > 0) {
9563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            append(unchangedLength - 1);
9663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
9763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
9863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
9963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
10063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Adds a record for a text replacement/insertion/deletion.
10163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Normally called from inside ICU string transformation functions, not user code.
10263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
10363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
10463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
10563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public void addReplace(int oldLength, int newLength) {
10663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if(oldLength < 0 || newLength < 0) {
10763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            throw new IllegalArgumentException(
10863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    "addReplace(" + oldLength + ", " + newLength +
10963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    "): both lengths must be non-negative");
11063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
11163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if (oldLength == 0 && newLength == 0) {
11263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            return;
11363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
114fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        ++numChanges;
11563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        int newDelta = newLength - oldLength;
11663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if (newDelta != 0) {
11763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
11863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
11963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                // Integer overflow or underflow.
12063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                throw new IndexOutOfBoundsException();
12163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
12263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            delta += newDelta;
12363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
12463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
125fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
126fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
127fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Merge into previous same-lengths short-replacement record, if any.
128fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            int u = (oldLength << 12) | (newLength << 9);
129fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            int last = lastUnit();
130fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
131fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    (last & ~SHORT_CHANGE_NUM_MASK) == u &&
132fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
133fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                setLastUnit(last + 1);
134fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return;
135fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
136fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            append(u);
137fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            return;
138fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
139fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
14063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        int head = 0x7000;
14163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
14263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            head |= oldLength << 6;
14363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            head |= newLength;
14463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            append(head);
14563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        } else if ((array.length - length) >= 5 || growArray()) {
14663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            int limit = length + 1;
14763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if(oldLength < LENGTH_IN_1TRAIL) {
14863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                head |= oldLength << 6;
14963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            } else if(oldLength <= 0x7fff) {
15063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                head |= LENGTH_IN_1TRAIL << 6;
15163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                array[limit++] = (char)(0x8000 | oldLength);
15263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            } else {
15363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
15463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                array[limit++] = (char)(0x8000 | (oldLength >> 15));
15563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                array[limit++] = (char)(0x8000 | oldLength);
15663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
15763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if(newLength < LENGTH_IN_1TRAIL) {
15863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                head |= newLength;
15963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            } else if(newLength <= 0x7fff) {
16063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                head |= LENGTH_IN_1TRAIL;
16163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                array[limit++] = (char)(0x8000 | newLength);
16263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            } else {
16363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                head |= LENGTH_IN_2TRAIL + (newLength >> 30);
16463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                array[limit++] = (char)(0x8000 | (newLength >> 15));
16563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                array[limit++] = (char)(0x8000 | newLength);
16663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
16763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            array[length] = (char)head;
16863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            length = limit;
16963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
17063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
17163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
17263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private void append(int r) {
17363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if(length < array.length || growArray()) {
17463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            array[length++] = (char)r;
17563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
17663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
17763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
17863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private boolean growArray() {
17963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        int newCapacity;
18063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if (array.length == STACK_CAPACITY) {
18163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            newCapacity = 2000;
18263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        } else if (array.length == Integer.MAX_VALUE) {
18363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            throw new BufferOverflowException();
18463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        } else if (array.length >= (Integer.MAX_VALUE / 2)) {
18563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            newCapacity = Integer.MAX_VALUE;
18663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        } else {
18763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            newCapacity = 2 * array.length;
18863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
18963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        // Grow by at least 5 units so that a maximal change record will fit.
19063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        if ((newCapacity - array.length) < 5) {
19163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            throw new BufferOverflowException();
19263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
19363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        array = Arrays.copyOf(array, newCapacity);
19463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        return true;
19563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
19663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
19763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
19863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * How much longer is the new text compared with the old text?
19963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @return new length minus old length
20063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
20163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
20263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
20363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public int lengthDelta() { return delta; }
20463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
20563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @return true if there are any change edits
20663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
20763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
20863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
209fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    public boolean hasChanges()  { return numChanges != 0; }
210fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
211fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    /**
212fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @return the number of change edits
213fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @draft ICU 60
214fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @provisional This API might change or be removed in a future release.
215fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     */
216fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    public int numberOfChanges() { return numChanges; }
21763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
21863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
21963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Access to the list of edits.
22063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @see #getCoarseIterator
22163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @see #getFineIterator
22263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
22363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
22463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
22563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public static final class Iterator {
22663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private final char[] array;
22763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private int index;
22863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private final int length;
229fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        /**
230fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * 0 if we are not within compressed equal-length changes.
231fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * Otherwise the number of remaining changes, including the current one.
232fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         */
23363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private int remaining;
23463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private final boolean onlyChanges_, coarse;
23563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
236fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        private int dir;  // iteration direction: back(<0), initial(0), forward(>0)
23763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private boolean changed;
23863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private int oldLength_, newLength_;
23963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private int srcIndex, replIndex, destIndex;
24063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
24163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private Iterator(char[] a, int len, boolean oc, boolean crs) {
24263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            array = a;
24363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            length = len;
24463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            onlyChanges_ = oc;
24563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            coarse = crs;
24663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
24763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
24863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private int readLength(int head) {
24963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if (head < LENGTH_IN_1TRAIL) {
25063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                return head;
25163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            } else if (head < LENGTH_IN_2TRAIL) {
25263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                assert(index < length);
25363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                assert(array[index] >= 0x8000);
25463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                return array[index++] & 0x7fff;
25563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            } else {
25663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                assert((index + 2) <= length);
25763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                assert(array[index] >= 0x8000);
25863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                assert(array[index + 1] >= 0x8000);
25963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                int len = ((head & 1) << 30) |
26063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                        ((array[index] & 0x7fff) << 15) |
26163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                        (array[index + 1] & 0x7fff);
26263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                index += 2;
26363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                return len;
26463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
26563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
26663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
267fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        private void updateNextIndexes() {
26863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            srcIndex += oldLength_;
26963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if (changed) {
27063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                replIndex += newLength_;
27163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
27263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            destIndex += newLength_;
27363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
27463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
275fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        private void updatePreviousIndexes() {
276fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            srcIndex -= oldLength_;
277fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (changed) {
278fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                replIndex -= newLength_;
279fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
280fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            destIndex -= newLength_;
281fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
282fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
28363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private boolean noNext() {
284fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // No change before or beyond the string.
285fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            dir = 0;
28663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            changed = false;
28763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            oldLength_ = newLength_ = 0;
28863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            return false;
28963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
29063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
29163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
29263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * Advances to the next edit.
29363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return true if there is another edit
29463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
29563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
29663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
29763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public boolean next() {
29863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            return next(onlyChanges_);
29963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
30063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
30163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        private boolean next(boolean onlyChanges) {
302fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Forward iteration: Update the string indexes to the limit of the current span,
303fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // and post-increment-read array units to assemble a new span.
304fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Leaves the array index one after the last unit of that span.
305fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (dir > 0) {
306fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                updateNextIndexes();
307fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else {
308fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (dir < 0) {
309fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Turn around from previous() to next().
310fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Post-increment-read the same span again.
311fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    if (remaining > 0) {
312fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        // Fine-grained iterator:
313fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        // Stay on the current one of a sequence of compressed changes.
314fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        ++index;  // next() rests on the index after the sequence unit.
315fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        dir = 1;
316fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        return true;
317fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    }
318fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
319fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                dir = 1;
320fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
321fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (remaining >= 1) {
322fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Fine-grained iterator: Continue a sequence of compressed changes.
323fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (remaining > 1) {
324fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    --remaining;
325fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    return true;
326fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
327fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                remaining = 0;
32863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
32963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if (index >= length) {
33063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                return noNext();
33163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
33263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            int u = array[index++];
33363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if (u <= MAX_UNCHANGED) {
33463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                // Combine adjacent unchanged ranges.
33563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                changed = false;
33663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                oldLength_ = u + 1;
33763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
33863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    ++index;
33963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    oldLength_ += u + 1;
34063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                }
34163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                newLength_ = oldLength_;
34263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                if (onlyChanges) {
343fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    updateNextIndexes();
34463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    if (index >= length) {
34563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                        return noNext();
34663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    }
34763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    // already fetched u > MAX_UNCHANGED at index
34863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    ++index;
34963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                } else {
35063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    return true;
35163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                }
35263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
35363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            changed = true;
35463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            if (u <= MAX_SHORT_CHANGE) {
355fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int oldLen = u >> 12;
356fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
357fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
35863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                if (coarse) {
359fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ = num * oldLen;
360fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ = num * newLen;
36163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                } else {
362fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Split a sequence of changes that was compressed into one unit.
363fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ = oldLen;
364fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ = newLen;
365fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    if (num > 1) {
366fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        remaining = num;  // This is the first of two or more changes.
367fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    }
36863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    return true;
36963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                }
37063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            } else {
37163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                assert(u <= 0x7fff);
37263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                oldLength_ = readLength((u >> 6) & 0x3f);
37363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                newLength_ = readLength(u & 0x3f);
37463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                if (!coarse) {
37563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    return true;
37663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                }
37763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
37863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            // Combine adjacent changes.
37963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            while (index < length && (u = array[index]) > MAX_UNCHANGED) {
38063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                ++index;
38163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                if (u <= MAX_SHORT_CHANGE) {
382fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
383fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ += (u >> 12) * num;
384fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
38563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                } else {
38663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    assert(u <= 0x7fff);
387fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ += readLength((u >> 6) & 0x3f);
388fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ += readLength(u & 0x3f);
389fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
390fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
391fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            return true;
392fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
393fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
394fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        private boolean previous() {
395fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Backward iteration: Pre-decrement-read array units to assemble a new span,
396fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // then update the string indexes to the start of that span.
397fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Leaves the array index on the head unit of that span.
398fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (dir >= 0) {
399fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (dir > 0) {
400fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Turn around from next() to previous().
401fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Set the string indexes to the span limit and
402fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // pre-decrement-read the same span again.
403fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    if (remaining > 0) {
404fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        // Fine-grained iterator:
405fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        // Stay on the current one of a sequence of compressed changes.
406fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        --index;  // previous() rests on the sequence unit.
407fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        dir = -1;
408fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        return true;
409fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    }
410fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    updateNextIndexes();
411fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
412fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                dir = -1;
413fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
414fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (remaining > 0) {
415fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Fine-grained iterator: Continue a sequence of compressed changes.
416fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int u = array[index];
417fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
418fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
419fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    ++remaining;
420fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    updatePreviousIndexes();
421fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    return true;
422fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
423fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                remaining = 0;
424fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
425fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (index <= 0) {
426fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return noNext();
427fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
428fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            int u = array[--index];
429fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (u <= MAX_UNCHANGED) {
430fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Combine adjacent unchanged ranges.
431fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                changed = false;
432fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                oldLength_ = u + 1;
433fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
434fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    --index;
435fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ += u + 1;
436fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
437fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                newLength_ = oldLength_;
438fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // No need to handle onlyChanges as long as previous() is called only from findIndex().
439fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                updatePreviousIndexes();
440fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return true;
441fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
442fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            changed = true;
443fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (u <= MAX_SHORT_CHANGE) {
444fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int oldLen = u >> 12;
445fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
446fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
447fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (coarse) {
448fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ = num * oldLen;
449fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ = num * newLen;
450fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                } else {
451fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Split a sequence of changes that was compressed into one unit.
452fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ = oldLen;
453fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ = newLen;
454fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    if (num > 1) {
455fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        remaining = 1;  // This is the last of two or more changes.
456fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    }
457fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    updatePreviousIndexes();
458fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    return true;
459fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
460fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else {
461fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (u <= 0x7fff) {
462fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // The change is encoded in u alone.
463fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ = readLength((u >> 6) & 0x3f);
464fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ = readLength(u & 0x3f);
465fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                } else {
466fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Back up to the head of the change, read the lengths,
467fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // and reset the index to the head again.
468fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    assert(index > 0);
469fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    while ((u = array[--index]) > 0x7fff) {}
470fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    assert(u > MAX_SHORT_CHANGE);
471fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    int headIndex = index++;
472fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ = readLength((u >> 6) & 0x3f);
473fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ = readLength(u & 0x3f);
474fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    index = headIndex;
475fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
476fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (!coarse) {
477fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    updatePreviousIndexes();
478fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    return true;
479fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
480fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
481fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Combine adjacent changes.
482fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
483fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                --index;
484fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (u <= MAX_SHORT_CHANGE) {
485fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
486fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ += (u >> 12) * num;
487fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
488fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                } else if (u <= 0x7fff) {
489fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Read the lengths, and reset the index to the head again.
490fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    int headIndex = index++;
491fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ += readLength((u >> 6) & 0x3f);
492fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ += readLength(u & 0x3f);
493fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    index = headIndex;
49463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                }
49563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
496fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            updatePreviousIndexes();
49763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            return true;
49863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
49963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
50063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
50163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * Finds the edit that contains the source index.
50263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * The source index may be found in a non-change
50363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * even if normal iteration would skip non-changes.
50463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * Normal iteration can continue from a found edit.
50563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         *
50663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * <p>The iterator state before this search logically does not matter.
50763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * (It may affect the performance of the search.)
50863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         *
50963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * <p>The iterator state after this search is undefined
51063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * if the source index is out of bounds for the source string.
51163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         *
51263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @param i source index
51363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return true if the edit for the source index was found
51463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
51563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
51663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
51763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public boolean findSourceIndex(int i) {
518fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            return findIndex(i, true) == 0;
519fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
520fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
521fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        /**
522fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * Finds the edit that contains the destination index.
523fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * The destination index may be found in a non-change
524fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * even if normal iteration would skip non-changes.
525fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * Normal iteration can continue from a found edit.
526fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
527fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * <p>The iterator state before this search logically does not matter.
528fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * (It may affect the performance of the search.)
529fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
530fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * <p>The iterator state after this search is undefined
531fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * if the source index is out of bounds for the source string.
532fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
533fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @param i destination index
534fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @return true if the edit for the destination index was found
535fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @draft ICU 60
536fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @provisional This API might change or be removed in a future release.
537fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         */
538fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        public boolean findDestinationIndex(int i) {
539fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            return findIndex(i, false) == 0;
540fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
541fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
542fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        /** @return -1: error or i<0; 0: found; 1: i>=string length */
543fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        private int findIndex(int i, boolean findSource) {
544fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (i < 0) { return -1; }
545fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            int spanStart, spanLength;
546fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (findSource) {  // find source index
547fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                spanStart = srcIndex;
548fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                spanLength = oldLength_;
549fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else {  // find destination index
550fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                spanStart = destIndex;
551fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                spanLength = newLength_;
552fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
553fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (i < spanStart) {
554fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (i >= (spanStart / 2)) {
555fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Search backwards.
556fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    for (;;) {
557fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        boolean hasPrevious = previous();
558fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        assert(hasPrevious);  // because i>=0 and the first span starts at 0
559fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        spanStart = findSource ? srcIndex : destIndex;
560fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        if (i >= spanStart) {
561fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            // The index is in the current span.
562fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            return 0;
563fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        }
564fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        if (remaining > 0) {
565fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            // Is the index in one of the remaining compressed edits?
566fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            // spanStart is the start of the current span, first of the remaining ones.
567fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            spanLength = findSource ? oldLength_ : newLength_;
568fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            int u = array[index];
569fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
570fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            int num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
571fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            int len = num * spanLength;
572fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            if (i >= (spanStart - len)) {
573fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                                int n = ((spanStart - i - 1) / spanLength) + 1;
574fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                                // 1 <= n <= num
575fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                                srcIndex -= n * oldLength_;
576fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                                replIndex -= n * newLength_;
577fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                                destIndex -= n * newLength_;
578fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                                remaining += n;
579fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                                return 0;
580fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            }
581fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            // Skip all of these edits at once.
582fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            srcIndex -= num * oldLength_;
583fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            replIndex -= num * newLength_;
584fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            destIndex -= num * newLength_;
585fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            remaining = 0;
586fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        }
587fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    }
588fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
58963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                // Reset the iterator to the start.
590fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                dir = 0;
59163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
592fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else if (i < (spanStart + spanLength)) {
59363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                // The index is in the current span.
594fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return 0;
59563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
59663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            while (next(false)) {
597fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (findSource) {
598fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    spanStart = srcIndex;
599fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    spanLength = oldLength_;
600fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                } else {
601fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    spanStart = destIndex;
602fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    spanLength = newLength_;
603fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
604fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (i < (spanStart + spanLength)) {
60563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    // The index is in the current span.
606fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    return 0;
60763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                }
608fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (remaining > 1) {
60963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    // Is the index in one of the remaining compressed edits?
610fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // spanStart is the start of the current span, first of the remaining ones.
611fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    int len = remaining * spanLength;
612fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    if (i < (spanStart + len)) {
613fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        int n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
614fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        srcIndex += n * oldLength_;
615fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        replIndex += n * newLength_;
616fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        destIndex += n * newLength_;
61763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                        remaining -= n;
618fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        return 0;
61963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    }
62063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    // Make next() skip all of these edits at once.
621fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    oldLength_ *= remaining;
622fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    newLength_ *= remaining;
62363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                    remaining = 0;
62463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer                }
62563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            }
626fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            return 1;
627fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
628fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
629fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        /**
630fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * Returns the destination index corresponding to the given source index.
631fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * If the source index is inside a change edit (not at its start),
632fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * then the destination index at the end of that edit is returned,
633fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * since there is no information about index mapping inside a change edit.
634fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
635fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * <p>(This means that indexes to the start and middle of an edit,
636fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * for example around a grapheme cluster, are mapped to indexes
637fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * encompassing the entire edit.
638fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * The alternative, mapping an interior index to the start,
639fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * would map such an interval to an empty one.)
640fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
641fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * <p>This operation will usually but not always modify this object.
642fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * The iterator state after this search is undefined.
643fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
644fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @param i source index
645fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @return destination index; undefined if i is not 0..string length
646fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @draft ICU 60
647fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @provisional This API might change or be removed in a future release.
648fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         */
649fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        public int destinationIndexFromSourceIndex(int i) {
650fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            int where = findIndex(i, true);
651fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (where < 0) {
652fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Error or before the string.
653fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return 0;
654fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
655fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (where > 0 || i == srcIndex) {
656fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // At or after string length, or at start of the found span.
657fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return destIndex;
658fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
659fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (changed) {
660fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // In a change span, map to its end.
661fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return destIndex + newLength_;
662fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else {
663fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // In an unchanged span, offset 1:1 within it.
664fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return destIndex + (i - srcIndex);
665fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
666fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
667fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
668fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        /**
669fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * Returns the source index corresponding to the given destination index.
670fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * If the destination index is inside a change edit (not at its start),
671fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * then the source index at the end of that edit is returned,
672fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * since there is no information about index mapping inside a change edit.
673fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
674fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * <p>(This means that indexes to the start and middle of an edit,
675fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * for example around a grapheme cluster, are mapped to indexes
676fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * encompassing the entire edit.
677fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * The alternative, mapping an interior index to the start,
678fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * would map such an interval to an empty one.)
679fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
680fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * <p>This operation will usually but not always modify this object.
681fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * The iterator state after this search is undefined.
682fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         *
683fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @param i destination index
684fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @return source index; undefined if i is not 0..string length
685fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @draft ICU 60
686fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         * @provisional This API might change or be removed in a future release.
687fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert         */
688fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        public int sourceIndexFromDestinationIndex(int i) {
689fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            int where = findIndex(i, false);
690fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (where < 0) {
691fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Error or before the string.
692fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return 0;
693fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
694fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (where > 0 || i == destIndex) {
695fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // At or after string length, or at start of the found span.
696fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return srcIndex;
697fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
698fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (changed) {
699fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // In a change span, map to its end.
700fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return srcIndex + oldLength_;
701fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else {
702fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // In an unchanged span, offset within it.
703fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                return srcIndex + (i - destIndex);
704fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
70563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        }
70663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
70763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
70863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return true if this edit replaces oldLength() units with newLength() different ones.
70963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         *         false if oldLength units remain unchanged.
71063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
71163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
71263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
71363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public boolean hasChange() { return changed; }
71463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
71563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return the number of units in the original string which are replaced or remain unchanged.
71663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
71763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
71863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
71963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public int oldLength() { return oldLength_; }
72063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
72163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return the number of units in the modified string, if hasChange() is true.
72263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         *         Same as oldLength if hasChange() is false.
72363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
72463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
72563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
72663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public int newLength() { return newLength_; }
72763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
72863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
72963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return the current index into the source string
73063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
73163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
73263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
73363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public int sourceIndex() { return srcIndex; }
73463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
73563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return the current index into the replacement-characters-only string,
73663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         *         not counting unchanged spans
73763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
73863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
73963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
74063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public int replacementIndex() { return replIndex; }
74163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        /**
74263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @return the current index into the full destination string
74363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @draft ICU 59
74463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         * @provisional This API might change or be removed in a future release.
74563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer         */
74663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        public int destinationIndex() { return destIndex; }
74763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    };
74863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
74963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
75063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Returns an Iterator for coarse-grained changes for simple string updates.
75163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Skips non-changes.
75263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @return an Iterator that merges adjacent changes.
75363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
75463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
75563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
75663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public Iterator getCoarseChangesIterator() {
75763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        return new Iterator(array, length, true, true);
75863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
75963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
76063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
76163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
76263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @return an Iterator that merges adjacent changes.
76363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
76463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
76563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
76663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public Iterator getCoarseIterator() {
76763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        return new Iterator(array, length, false, true);
76863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
76963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
77063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
77163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Returns an Iterator for fine-grained changes for modifying styled text.
77263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Skips non-changes.
77363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @return an Iterator that separates adjacent changes.
77463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
77563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
77663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
77763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public Iterator getFineChangesIterator() {
77863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        return new Iterator(array, length, true, false);
77963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
78063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer
78163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    /**
78263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
78363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @return an Iterator that separates adjacent changes.
78463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @draft ICU 59
78563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     * @provisional This API might change or be removed in a future release.
78663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer     */
78763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    public Iterator getFineIterator() {
78863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        return new Iterator(array, length, false, false);
78963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    }
790fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
791fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    /**
792fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * Merges the two input Edits and appends the result to this object.
793fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     *
794fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * <p>Consider two string transformations (for example, normalization and case mapping)
795fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * where each records Edits in addition to writing an output string.<br>
796fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * Edits ab reflect how substrings of input string a
797fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * map to substrings of intermediate string b.<br>
798fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * Edits bc reflect how substrings of intermediate string b
799fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * map to substrings of output string c.<br>
800fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * This function merges ab and bc such that the additional edits
801fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * recorded in this object reflect how substrings of input string a
802fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * map to substrings of output string c.
803fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     *
804fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * <p>If unrelated Edits are passed in where the output string of the first
805fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * has a different length than the input string of the second,
806fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * then an IllegalArgumentException is thrown.
807fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     *
808fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @param ab reflects how substrings of input string a
809fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     *     map to substrings of intermediate string b.
810fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @param bc reflects how substrings of intermediate string b
811fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     *     map to substrings of output string c.
812fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @return this, with the merged edits appended
813fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @draft ICU 60
814fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     * @provisional This API might change or be removed in a future release.
815fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert     */
816fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    public Edits mergeAndAppend(Edits ab, Edits bc) {
817fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
818fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        // Parallel iteration over both Edits.
819fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        Iterator abIter = ab.getFineIterator();
820fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        Iterator bcIter = bc.getFineIterator();
821fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        boolean abHasNext = true, bcHasNext = true;
822fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        // Copy iterator state into local variables, so that we can modify and subdivide spans.
823fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        // ab old & new length, bc old & new length
824fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        int aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
825fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        // When we have different-intermediate-length changes, we accumulate a larger change.
826fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        int pending_aLength = 0, pending_cLength = 0;
827fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        for (;;) {
828fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // At this point, for each of the two iterators:
829fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Either we are done with the locally cached current edit,
830fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // and its intermediate-string length has been reset,
831fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // or we will continue to work with a truncated remainder of this edit.
832fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            //
833fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // If the current edit is done, and the iterator has not yet reached the end,
834fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // then we fetch the next edit. This is true for at least one of the iterators.
835fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            //
836fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Normally it does not matter whether we fetch from ab and then bc or vice versa.
837fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // However, the result is observably different when
838fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // ab deletions meet bc insertions at the same intermediate-string index.
839fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Some users expect the bc insertions to come first, so we fetch from bc first.
840fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (bc_bLength == 0) {
841fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (bcHasNext && (bcHasNext = bcIter.next())) {
842fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    bc_bLength = bcIter.oldLength();
843fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    cLength = bcIter.newLength();
844fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    if (bc_bLength == 0) {
845fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        // insertion
846fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        if (ab_bLength == 0 || !abIter.hasChange()) {
847fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            addReplace(pending_aLength, pending_cLength + cLength);
848fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            pending_aLength = pending_cLength = 0;
849fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        } else {
850fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            pending_cLength += cLength;
851fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        }
852fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        continue;
853fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    }
854fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
855fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // else see if the other iterator is done, too.
856fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
857fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (ab_bLength == 0) {
858fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (abHasNext && (abHasNext = abIter.next())) {
859fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    aLength = abIter.oldLength();
860fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    ab_bLength = abIter.newLength();
861fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    if (ab_bLength == 0) {
862fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        // deletion
863fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
864fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            addReplace(pending_aLength + aLength, pending_cLength);
865fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            pending_aLength = pending_cLength = 0;
866fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        } else {
867fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            pending_aLength += aLength;
868fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        }
869fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        continue;
870fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    }
871fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                } else if (bc_bLength == 0) {
872fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Both iterators are done at the same time:
873fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // The intermediate-string lengths match.
874fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    break;
875fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                } else {
876fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    throw new IllegalArgumentException(
877fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                            "The ab output string is shorter than the bc input string.");
878fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
879fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
880fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (bc_bLength == 0) {
881fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                throw new IllegalArgumentException(
882fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                        "The bc input string is shorter than the ab output string.");
883fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
884fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            //  Done fetching: ab_bLength > 0 && bc_bLength > 0
885fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
886fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // The current state has two parts:
887fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // - Past: We accumulate a longer ac edit in the "pending" variables.
888fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // - Current: We have copies of the current ab/bc edits in local variables.
889fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            //   At least one side is newly fetched.
890fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            //   One side might be a truncated remainder of an edit we fetched earlier.
891fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert
892fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (!abIter.hasChange() && !bcIter.hasChange()) {
893fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // An unchanged span all the way from string a to string c.
894fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (pending_aLength != 0 || pending_cLength != 0) {
895fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    addReplace(pending_aLength, pending_cLength);
896fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    pending_aLength = pending_cLength = 0;
897fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
898fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                int unchangedLength = aLength <= cLength ? aLength : cLength;
899fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                addUnchanged(unchangedLength);
900fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                ab_bLength = aLength -= unchangedLength;
901fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                bc_bLength = cLength -= unchangedLength;
902fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // At least one of the unchanged spans is now empty.
903fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                continue;
904fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
905fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (!abIter.hasChange() && bcIter.hasChange()) {
906fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Unchanged a->b but changed b->c.
907fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (ab_bLength >= bc_bLength) {
908fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Split the longer unchanged span into change + remainder.
909fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
910fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    pending_aLength = pending_cLength = 0;
911fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    aLength = ab_bLength -= bc_bLength;
912fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    bc_bLength = 0;
913fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    continue;
914fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
915fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Handle the shorter unchanged span below like a change.
916fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else if (abIter.hasChange() && !bcIter.hasChange()) {
917fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Changed a->b and then unchanged b->c.
918fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (ab_bLength <= bc_bLength) {
919fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Split the longer unchanged span into change + remainder.
920fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
921fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    pending_aLength = pending_cLength = 0;
922fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    cLength = bc_bLength -= ab_bLength;
923fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    ab_bLength = 0;
924fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    continue;
925fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
926fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                // Handle the shorter unchanged span below like a change.
927fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else {  // both abIter.hasChange() && bcIter.hasChange()
928fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                if (ab_bLength == bc_bLength) {
929fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    // Changes on both sides up to the same position. Emit & reset.
930fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    addReplace(pending_aLength + aLength, pending_cLength + cLength);
931fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    pending_aLength = pending_cLength = 0;
932fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    ab_bLength = bc_bLength = 0;
933fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                    continue;
934fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                }
935fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
936fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // Accumulate the a->c change, reset the shorter side,
937fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            // keep a remainder of the longer one.
938fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            pending_aLength += aLength;
939fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            pending_cLength += cLength;
940fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            if (ab_bLength < bc_bLength) {
941fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                bc_bLength -= ab_bLength;
942fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                cLength = ab_bLength = 0;
943fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            } else {  // ab_bLength > bc_bLength
944fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                ab_bLength -= bc_bLength;
945fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert                aLength = bc_bLength = 0;
946fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            }
947fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
948fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        if (pending_aLength != 0 || pending_cLength != 0) {
949fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert            addReplace(pending_aLength, pending_cLength);
950fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        }
951fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert        return this;
952fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert    }
95363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer}
954