1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3package com.ibm.icu.text;
4
5import java.nio.BufferOverflowException;
6import java.util.Arrays;
7
8/**
9 * Records lengths of string edits but not replacement text.
10 * Supports replacements, insertions, deletions in linear progression.
11 * Does not support moving/reordering of text.
12 *
13 * @draft ICU 59
14 * @provisional This API might change or be removed in a future release.
15 */
16public final class Edits {
17    // 0000uuuuuuuuuuuu records u+1 unchanged text units.
18    private static final int MAX_UNCHANGED_LENGTH = 0x1000;
19    private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
20
21    // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
22    private static final int MAX_SHORT_CHANGE_OLD_LENGTH = 6;
23    private static final int MAX_SHORT_CHANGE_NEW_LENGTH = 7;
24    private static final int SHORT_CHANGE_NUM_MASK = 0x1ff;
25    private static final int MAX_SHORT_CHANGE = 0x6fff;
26
27    // 0111mmmmmmnnnnnn records a replacement of m text units with n.
28    // m or n = 61: actual length follows in the next edits array unit.
29    // m or n = 62..63: actual length follows in the next two edits array units.
30    // Bit 30 of the actual length is in the head unit.
31    // Trailing units have bit 15 set.
32    private static final int LENGTH_IN_1TRAIL = 61;
33    private static final int LENGTH_IN_2TRAIL = 62;
34
35    private static final int STACK_CAPACITY = 100;
36    private char[] array;
37    private int length;
38    private int delta;
39    private int numChanges;
40
41    /**
42     * Constructs an empty object.
43     * @draft ICU 59
44     * @provisional This API might change or be removed in a future release.
45     */
46    public Edits() {
47        array = new char[STACK_CAPACITY];
48    }
49
50    /**
51     * Resets the data but may not release memory.
52     * @draft ICU 59
53     * @provisional This API might change or be removed in a future release.
54     */
55    public void reset() {
56        length = delta = numChanges = 0;
57    }
58
59    private void setLastUnit(int last) {
60        array[length - 1] = (char)last;
61    }
62    private int lastUnit() {
63        return length > 0 ? array[length - 1] : 0xffff;
64    }
65
66    /**
67     * Adds a record for an unchanged segment of text.
68     * Normally called from inside ICU string transformation functions, not user code.
69     * @draft ICU 59
70     * @provisional This API might change or be removed in a future release.
71     */
72    public void addUnchanged(int unchangedLength) {
73        if(unchangedLength < 0) {
74            throw new IllegalArgumentException(
75                    "addUnchanged(" + unchangedLength + "): length must not be negative");
76        }
77        // Merge into previous unchanged-text record, if any.
78        int last = lastUnit();
79        if(last < MAX_UNCHANGED) {
80            int remaining = MAX_UNCHANGED - last;
81            if (remaining >= unchangedLength) {
82                setLastUnit(last + unchangedLength);
83                return;
84            }
85            setLastUnit(MAX_UNCHANGED);
86            unchangedLength -= remaining;
87        }
88        // Split large lengths into multiple units.
89        while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
90            append(MAX_UNCHANGED);
91            unchangedLength -= MAX_UNCHANGED_LENGTH;
92        }
93        // Write a small (remaining) length.
94        if(unchangedLength > 0) {
95            append(unchangedLength - 1);
96        }
97    }
98
99    /**
100     * Adds a record for a text replacement/insertion/deletion.
101     * Normally called from inside ICU string transformation functions, not user code.
102     * @draft ICU 59
103     * @provisional This API might change or be removed in a future release.
104     */
105    public void addReplace(int oldLength, int newLength) {
106        if(oldLength < 0 || newLength < 0) {
107            throw new IllegalArgumentException(
108                    "addReplace(" + oldLength + ", " + newLength +
109                    "): both lengths must be non-negative");
110        }
111        if (oldLength == 0 && newLength == 0) {
112            return;
113        }
114        ++numChanges;
115        int newDelta = newLength - oldLength;
116        if (newDelta != 0) {
117            if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
118                    (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
119                // Integer overflow or underflow.
120                throw new IndexOutOfBoundsException();
121            }
122            delta += newDelta;
123        }
124
125        if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
126                newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
127            // Merge into previous same-lengths short-replacement record, if any.
128            int u = (oldLength << 12) | (newLength << 9);
129            int last = lastUnit();
130            if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
131                    (last & ~SHORT_CHANGE_NUM_MASK) == u &&
132                    (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
133                setLastUnit(last + 1);
134                return;
135            }
136            append(u);
137            return;
138        }
139
140        int head = 0x7000;
141        if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
142            head |= oldLength << 6;
143            head |= newLength;
144            append(head);
145        } else if ((array.length - length) >= 5 || growArray()) {
146            int limit = length + 1;
147            if(oldLength < LENGTH_IN_1TRAIL) {
148                head |= oldLength << 6;
149            } else if(oldLength <= 0x7fff) {
150                head |= LENGTH_IN_1TRAIL << 6;
151                array[limit++] = (char)(0x8000 | oldLength);
152            } else {
153                head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
154                array[limit++] = (char)(0x8000 | (oldLength >> 15));
155                array[limit++] = (char)(0x8000 | oldLength);
156            }
157            if(newLength < LENGTH_IN_1TRAIL) {
158                head |= newLength;
159            } else if(newLength <= 0x7fff) {
160                head |= LENGTH_IN_1TRAIL;
161                array[limit++] = (char)(0x8000 | newLength);
162            } else {
163                head |= LENGTH_IN_2TRAIL + (newLength >> 30);
164                array[limit++] = (char)(0x8000 | (newLength >> 15));
165                array[limit++] = (char)(0x8000 | newLength);
166            }
167            array[length] = (char)head;
168            length = limit;
169        }
170    }
171
172    private void append(int r) {
173        if(length < array.length || growArray()) {
174            array[length++] = (char)r;
175        }
176    }
177
178    private boolean growArray() {
179        int newCapacity;
180        if (array.length == STACK_CAPACITY) {
181            newCapacity = 2000;
182        } else if (array.length == Integer.MAX_VALUE) {
183            throw new BufferOverflowException();
184        } else if (array.length >= (Integer.MAX_VALUE / 2)) {
185            newCapacity = Integer.MAX_VALUE;
186        } else {
187            newCapacity = 2 * array.length;
188        }
189        // Grow by at least 5 units so that a maximal change record will fit.
190        if ((newCapacity - array.length) < 5) {
191            throw new BufferOverflowException();
192        }
193        array = Arrays.copyOf(array, newCapacity);
194        return true;
195    }
196
197    /**
198     * How much longer is the new text compared with the old text?
199     * @return new length minus old length
200     * @draft ICU 59
201     * @provisional This API might change or be removed in a future release.
202     */
203    public int lengthDelta() { return delta; }
204    /**
205     * @return true if there are any change edits
206     * @draft ICU 59
207     * @provisional This API might change or be removed in a future release.
208     */
209    public boolean hasChanges()  { return numChanges != 0; }
210
211    /**
212     * @return the number of change edits
213     * @draft ICU 60
214     * @provisional This API might change or be removed in a future release.
215     */
216    public int numberOfChanges() { return numChanges; }
217
218    /**
219     * Access to the list of edits.
220     * @see #getCoarseIterator
221     * @see #getFineIterator
222     * @draft ICU 59
223     * @provisional This API might change or be removed in a future release.
224     */
225    public static final class Iterator {
226        private final char[] array;
227        private int index;
228        private final int length;
229        /**
230         * 0 if we are not within compressed equal-length changes.
231         * Otherwise the number of remaining changes, including the current one.
232         */
233        private int remaining;
234        private final boolean onlyChanges_, coarse;
235
236        private int dir;  // iteration direction: back(<0), initial(0), forward(>0)
237        private boolean changed;
238        private int oldLength_, newLength_;
239        private int srcIndex, replIndex, destIndex;
240
241        private Iterator(char[] a, int len, boolean oc, boolean crs) {
242            array = a;
243            length = len;
244            onlyChanges_ = oc;
245            coarse = crs;
246        }
247
248        private int readLength(int head) {
249            if (head < LENGTH_IN_1TRAIL) {
250                return head;
251            } else if (head < LENGTH_IN_2TRAIL) {
252                assert(index < length);
253                assert(array[index] >= 0x8000);
254                return array[index++] & 0x7fff;
255            } else {
256                assert((index + 2) <= length);
257                assert(array[index] >= 0x8000);
258                assert(array[index + 1] >= 0x8000);
259                int len = ((head & 1) << 30) |
260                        ((array[index] & 0x7fff) << 15) |
261                        (array[index + 1] & 0x7fff);
262                index += 2;
263                return len;
264            }
265        }
266
267        private void updateNextIndexes() {
268            srcIndex += oldLength_;
269            if (changed) {
270                replIndex += newLength_;
271            }
272            destIndex += newLength_;
273        }
274
275        private void updatePreviousIndexes() {
276            srcIndex -= oldLength_;
277            if (changed) {
278                replIndex -= newLength_;
279            }
280            destIndex -= newLength_;
281        }
282
283        private boolean noNext() {
284            // No change before or beyond the string.
285            dir = 0;
286            changed = false;
287            oldLength_ = newLength_ = 0;
288            return false;
289        }
290
291        /**
292         * Advances to the next edit.
293         * @return true if there is another edit
294         * @draft ICU 59
295         * @provisional This API might change or be removed in a future release.
296         */
297        public boolean next() {
298            return next(onlyChanges_);
299        }
300
301        private boolean next(boolean onlyChanges) {
302            // Forward iteration: Update the string indexes to the limit of the current span,
303            // and post-increment-read array units to assemble a new span.
304            // Leaves the array index one after the last unit of that span.
305            if (dir > 0) {
306                updateNextIndexes();
307            } else {
308                if (dir < 0) {
309                    // Turn around from previous() to next().
310                    // Post-increment-read the same span again.
311                    if (remaining > 0) {
312                        // Fine-grained iterator:
313                        // Stay on the current one of a sequence of compressed changes.
314                        ++index;  // next() rests on the index after the sequence unit.
315                        dir = 1;
316                        return true;
317                    }
318                }
319                dir = 1;
320            }
321            if (remaining >= 1) {
322                // Fine-grained iterator: Continue a sequence of compressed changes.
323                if (remaining > 1) {
324                    --remaining;
325                    return true;
326                }
327                remaining = 0;
328            }
329            if (index >= length) {
330                return noNext();
331            }
332            int u = array[index++];
333            if (u <= MAX_UNCHANGED) {
334                // Combine adjacent unchanged ranges.
335                changed = false;
336                oldLength_ = u + 1;
337                while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
338                    ++index;
339                    oldLength_ += u + 1;
340                }
341                newLength_ = oldLength_;
342                if (onlyChanges) {
343                    updateNextIndexes();
344                    if (index >= length) {
345                        return noNext();
346                    }
347                    // already fetched u > MAX_UNCHANGED at index
348                    ++index;
349                } else {
350                    return true;
351                }
352            }
353            changed = true;
354            if (u <= MAX_SHORT_CHANGE) {
355                int oldLen = u >> 12;
356                int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
357                int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
358                if (coarse) {
359                    oldLength_ = num * oldLen;
360                    newLength_ = num * newLen;
361                } else {
362                    // Split a sequence of changes that was compressed into one unit.
363                    oldLength_ = oldLen;
364                    newLength_ = newLen;
365                    if (num > 1) {
366                        remaining = num;  // This is the first of two or more changes.
367                    }
368                    return true;
369                }
370            } else {
371                assert(u <= 0x7fff);
372                oldLength_ = readLength((u >> 6) & 0x3f);
373                newLength_ = readLength(u & 0x3f);
374                if (!coarse) {
375                    return true;
376                }
377            }
378            // Combine adjacent changes.
379            while (index < length && (u = array[index]) > MAX_UNCHANGED) {
380                ++index;
381                if (u <= MAX_SHORT_CHANGE) {
382                    int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
383                    oldLength_ += (u >> 12) * num;
384                    newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
385                } else {
386                    assert(u <= 0x7fff);
387                    oldLength_ += readLength((u >> 6) & 0x3f);
388                    newLength_ += readLength(u & 0x3f);
389                }
390            }
391            return true;
392        }
393
394        private boolean previous() {
395            // Backward iteration: Pre-decrement-read array units to assemble a new span,
396            // then update the string indexes to the start of that span.
397            // Leaves the array index on the head unit of that span.
398            if (dir >= 0) {
399                if (dir > 0) {
400                    // Turn around from next() to previous().
401                    // Set the string indexes to the span limit and
402                    // pre-decrement-read the same span again.
403                    if (remaining > 0) {
404                        // Fine-grained iterator:
405                        // Stay on the current one of a sequence of compressed changes.
406                        --index;  // previous() rests on the sequence unit.
407                        dir = -1;
408                        return true;
409                    }
410                    updateNextIndexes();
411                }
412                dir = -1;
413            }
414            if (remaining > 0) {
415                // Fine-grained iterator: Continue a sequence of compressed changes.
416                int u = array[index];
417                assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
418                if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
419                    ++remaining;
420                    updatePreviousIndexes();
421                    return true;
422                }
423                remaining = 0;
424            }
425            if (index <= 0) {
426                return noNext();
427            }
428            int u = array[--index];
429            if (u <= MAX_UNCHANGED) {
430                // Combine adjacent unchanged ranges.
431                changed = false;
432                oldLength_ = u + 1;
433                while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
434                    --index;
435                    oldLength_ += u + 1;
436                }
437                newLength_ = oldLength_;
438                // No need to handle onlyChanges as long as previous() is called only from findIndex().
439                updatePreviousIndexes();
440                return true;
441            }
442            changed = true;
443            if (u <= MAX_SHORT_CHANGE) {
444                int oldLen = u >> 12;
445                int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
446                int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
447                if (coarse) {
448                    oldLength_ = num * oldLen;
449                    newLength_ = num * newLen;
450                } else {
451                    // Split a sequence of changes that was compressed into one unit.
452                    oldLength_ = oldLen;
453                    newLength_ = newLen;
454                    if (num > 1) {
455                        remaining = 1;  // This is the last of two or more changes.
456                    }
457                    updatePreviousIndexes();
458                    return true;
459                }
460            } else {
461                if (u <= 0x7fff) {
462                    // The change is encoded in u alone.
463                    oldLength_ = readLength((u >> 6) & 0x3f);
464                    newLength_ = readLength(u & 0x3f);
465                } else {
466                    // Back up to the head of the change, read the lengths,
467                    // and reset the index to the head again.
468                    assert(index > 0);
469                    while ((u = array[--index]) > 0x7fff) {}
470                    assert(u > MAX_SHORT_CHANGE);
471                    int headIndex = index++;
472                    oldLength_ = readLength((u >> 6) & 0x3f);
473                    newLength_ = readLength(u & 0x3f);
474                    index = headIndex;
475                }
476                if (!coarse) {
477                    updatePreviousIndexes();
478                    return true;
479                }
480            }
481            // Combine adjacent changes.
482            while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
483                --index;
484                if (u <= MAX_SHORT_CHANGE) {
485                    int num = (u & SHORT_CHANGE_NUM_MASK) + 1;
486                    oldLength_ += (u >> 12) * num;
487                    newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
488                } else if (u <= 0x7fff) {
489                    // Read the lengths, and reset the index to the head again.
490                    int headIndex = index++;
491                    oldLength_ += readLength((u >> 6) & 0x3f);
492                    newLength_ += readLength(u & 0x3f);
493                    index = headIndex;
494                }
495            }
496            updatePreviousIndexes();
497            return true;
498        }
499
500        /**
501         * Finds the edit that contains the source index.
502         * The source index may be found in a non-change
503         * even if normal iteration would skip non-changes.
504         * Normal iteration can continue from a found edit.
505         *
506         * <p>The iterator state before this search logically does not matter.
507         * (It may affect the performance of the search.)
508         *
509         * <p>The iterator state after this search is undefined
510         * if the source index is out of bounds for the source string.
511         *
512         * @param i source index
513         * @return true if the edit for the source index was found
514         * @draft ICU 59
515         * @provisional This API might change or be removed in a future release.
516         */
517        public boolean findSourceIndex(int i) {
518            return findIndex(i, true) == 0;
519        }
520
521        /**
522         * Finds the edit that contains the destination index.
523         * The destination index may be found in a non-change
524         * even if normal iteration would skip non-changes.
525         * Normal iteration can continue from a found edit.
526         *
527         * <p>The iterator state before this search logically does not matter.
528         * (It may affect the performance of the search.)
529         *
530         * <p>The iterator state after this search is undefined
531         * if the source index is out of bounds for the source string.
532         *
533         * @param i destination index
534         * @return true if the edit for the destination index was found
535         * @draft ICU 60
536         * @provisional This API might change or be removed in a future release.
537         */
538        public boolean findDestinationIndex(int i) {
539            return findIndex(i, false) == 0;
540        }
541
542        /** @return -1: error or i<0; 0: found; 1: i>=string length */
543        private int findIndex(int i, boolean findSource) {
544            if (i < 0) { return -1; }
545            int spanStart, spanLength;
546            if (findSource) {  // find source index
547                spanStart = srcIndex;
548                spanLength = oldLength_;
549            } else {  // find destination index
550                spanStart = destIndex;
551                spanLength = newLength_;
552            }
553            if (i < spanStart) {
554                if (i >= (spanStart / 2)) {
555                    // Search backwards.
556                    for (;;) {
557                        boolean hasPrevious = previous();
558                        assert(hasPrevious);  // because i>=0 and the first span starts at 0
559                        spanStart = findSource ? srcIndex : destIndex;
560                        if (i >= spanStart) {
561                            // The index is in the current span.
562                            return 0;
563                        }
564                        if (remaining > 0) {
565                            // Is the index in one of the remaining compressed edits?
566                            // spanStart is the start of the current span, first of the remaining ones.
567                            spanLength = findSource ? oldLength_ : newLength_;
568                            int u = array[index];
569                            assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
570                            int num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
571                            int len = num * spanLength;
572                            if (i >= (spanStart - len)) {
573                                int n = ((spanStart - i - 1) / spanLength) + 1;
574                                // 1 <= n <= num
575                                srcIndex -= n * oldLength_;
576                                replIndex -= n * newLength_;
577                                destIndex -= n * newLength_;
578                                remaining += n;
579                                return 0;
580                            }
581                            // Skip all of these edits at once.
582                            srcIndex -= num * oldLength_;
583                            replIndex -= num * newLength_;
584                            destIndex -= num * newLength_;
585                            remaining = 0;
586                        }
587                    }
588                }
589                // Reset the iterator to the start.
590                dir = 0;
591                index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
592            } else if (i < (spanStart + spanLength)) {
593                // The index is in the current span.
594                return 0;
595            }
596            while (next(false)) {
597                if (findSource) {
598                    spanStart = srcIndex;
599                    spanLength = oldLength_;
600                } else {
601                    spanStart = destIndex;
602                    spanLength = newLength_;
603                }
604                if (i < (spanStart + spanLength)) {
605                    // The index is in the current span.
606                    return 0;
607                }
608                if (remaining > 1) {
609                    // Is the index in one of the remaining compressed edits?
610                    // spanStart is the start of the current span, first of the remaining ones.
611                    int len = remaining * spanLength;
612                    if (i < (spanStart + len)) {
613                        int n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
614                        srcIndex += n * oldLength_;
615                        replIndex += n * newLength_;
616                        destIndex += n * newLength_;
617                        remaining -= n;
618                        return 0;
619                    }
620                    // Make next() skip all of these edits at once.
621                    oldLength_ *= remaining;
622                    newLength_ *= remaining;
623                    remaining = 0;
624                }
625            }
626            return 1;
627        }
628
629        /**
630         * Returns the destination index corresponding to the given source index.
631         * If the source index is inside a change edit (not at its start),
632         * then the destination index at the end of that edit is returned,
633         * since there is no information about index mapping inside a change edit.
634         *
635         * <p>(This means that indexes to the start and middle of an edit,
636         * for example around a grapheme cluster, are mapped to indexes
637         * encompassing the entire edit.
638         * The alternative, mapping an interior index to the start,
639         * would map such an interval to an empty one.)
640         *
641         * <p>This operation will usually but not always modify this object.
642         * The iterator state after this search is undefined.
643         *
644         * @param i source index
645         * @return destination index; undefined if i is not 0..string length
646         * @draft ICU 60
647         * @provisional This API might change or be removed in a future release.
648         */
649        public int destinationIndexFromSourceIndex(int i) {
650            int where = findIndex(i, true);
651            if (where < 0) {
652                // Error or before the string.
653                return 0;
654            }
655            if (where > 0 || i == srcIndex) {
656                // At or after string length, or at start of the found span.
657                return destIndex;
658            }
659            if (changed) {
660                // In a change span, map to its end.
661                return destIndex + newLength_;
662            } else {
663                // In an unchanged span, offset 1:1 within it.
664                return destIndex + (i - srcIndex);
665            }
666        }
667
668        /**
669         * Returns the source index corresponding to the given destination index.
670         * If the destination index is inside a change edit (not at its start),
671         * then the source index at the end of that edit is returned,
672         * since there is no information about index mapping inside a change edit.
673         *
674         * <p>(This means that indexes to the start and middle of an edit,
675         * for example around a grapheme cluster, are mapped to indexes
676         * encompassing the entire edit.
677         * The alternative, mapping an interior index to the start,
678         * would map such an interval to an empty one.)
679         *
680         * <p>This operation will usually but not always modify this object.
681         * The iterator state after this search is undefined.
682         *
683         * @param i destination index
684         * @return source index; undefined if i is not 0..string length
685         * @draft ICU 60
686         * @provisional This API might change or be removed in a future release.
687         */
688        public int sourceIndexFromDestinationIndex(int i) {
689            int where = findIndex(i, false);
690            if (where < 0) {
691                // Error or before the string.
692                return 0;
693            }
694            if (where > 0 || i == destIndex) {
695                // At or after string length, or at start of the found span.
696                return srcIndex;
697            }
698            if (changed) {
699                // In a change span, map to its end.
700                return srcIndex + oldLength_;
701            } else {
702                // In an unchanged span, offset within it.
703                return srcIndex + (i - destIndex);
704            }
705        }
706
707        /**
708         * @return true if this edit replaces oldLength() units with newLength() different ones.
709         *         false if oldLength units remain unchanged.
710         * @draft ICU 59
711         * @provisional This API might change or be removed in a future release.
712         */
713        public boolean hasChange() { return changed; }
714        /**
715         * @return the number of units in the original string which are replaced or remain unchanged.
716         * @draft ICU 59
717         * @provisional This API might change or be removed in a future release.
718         */
719        public int oldLength() { return oldLength_; }
720        /**
721         * @return the number of units in the modified string, if hasChange() is true.
722         *         Same as oldLength if hasChange() is false.
723         * @draft ICU 59
724         * @provisional This API might change or be removed in a future release.
725         */
726        public int newLength() { return newLength_; }
727
728        /**
729         * @return the current index into the source string
730         * @draft ICU 59
731         * @provisional This API might change or be removed in a future release.
732         */
733        public int sourceIndex() { return srcIndex; }
734        /**
735         * @return the current index into the replacement-characters-only string,
736         *         not counting unchanged spans
737         * @draft ICU 59
738         * @provisional This API might change or be removed in a future release.
739         */
740        public int replacementIndex() { return replIndex; }
741        /**
742         * @return the current index into the full destination string
743         * @draft ICU 59
744         * @provisional This API might change or be removed in a future release.
745         */
746        public int destinationIndex() { return destIndex; }
747    };
748
749    /**
750     * Returns an Iterator for coarse-grained changes for simple string updates.
751     * Skips non-changes.
752     * @return an Iterator that merges adjacent changes.
753     * @draft ICU 59
754     * @provisional This API might change or be removed in a future release.
755     */
756    public Iterator getCoarseChangesIterator() {
757        return new Iterator(array, length, true, true);
758    }
759
760    /**
761     * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
762     * @return an Iterator that merges adjacent changes.
763     * @draft ICU 59
764     * @provisional This API might change or be removed in a future release.
765     */
766    public Iterator getCoarseIterator() {
767        return new Iterator(array, length, false, true);
768    }
769
770    /**
771     * Returns an Iterator for fine-grained changes for modifying styled text.
772     * Skips non-changes.
773     * @return an Iterator that separates adjacent changes.
774     * @draft ICU 59
775     * @provisional This API might change or be removed in a future release.
776     */
777    public Iterator getFineChangesIterator() {
778        return new Iterator(array, length, true, false);
779    }
780
781    /**
782     * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
783     * @return an Iterator that separates adjacent changes.
784     * @draft ICU 59
785     * @provisional This API might change or be removed in a future release.
786     */
787    public Iterator getFineIterator() {
788        return new Iterator(array, length, false, false);
789    }
790
791    /**
792     * Merges the two input Edits and appends the result to this object.
793     *
794     * <p>Consider two string transformations (for example, normalization and case mapping)
795     * where each records Edits in addition to writing an output string.<br>
796     * Edits ab reflect how substrings of input string a
797     * map to substrings of intermediate string b.<br>
798     * Edits bc reflect how substrings of intermediate string b
799     * map to substrings of output string c.<br>
800     * This function merges ab and bc such that the additional edits
801     * recorded in this object reflect how substrings of input string a
802     * map to substrings of output string c.
803     *
804     * <p>If unrelated Edits are passed in where the output string of the first
805     * has a different length than the input string of the second,
806     * then an IllegalArgumentException is thrown.
807     *
808     * @param ab reflects how substrings of input string a
809     *     map to substrings of intermediate string b.
810     * @param bc reflects how substrings of intermediate string b
811     *     map to substrings of output string c.
812     * @return this, with the merged edits appended
813     * @draft ICU 60
814     * @provisional This API might change or be removed in a future release.
815     */
816    public Edits mergeAndAppend(Edits ab, Edits bc) {
817        // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
818        // Parallel iteration over both Edits.
819        Iterator abIter = ab.getFineIterator();
820        Iterator bcIter = bc.getFineIterator();
821        boolean abHasNext = true, bcHasNext = true;
822        // Copy iterator state into local variables, so that we can modify and subdivide spans.
823        // ab old & new length, bc old & new length
824        int aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
825        // When we have different-intermediate-length changes, we accumulate a larger change.
826        int pending_aLength = 0, pending_cLength = 0;
827        for (;;) {
828            // At this point, for each of the two iterators:
829            // Either we are done with the locally cached current edit,
830            // and its intermediate-string length has been reset,
831            // or we will continue to work with a truncated remainder of this edit.
832            //
833            // If the current edit is done, and the iterator has not yet reached the end,
834            // then we fetch the next edit. This is true for at least one of the iterators.
835            //
836            // Normally it does not matter whether we fetch from ab and then bc or vice versa.
837            // However, the result is observably different when
838            // ab deletions meet bc insertions at the same intermediate-string index.
839            // Some users expect the bc insertions to come first, so we fetch from bc first.
840            if (bc_bLength == 0) {
841                if (bcHasNext && (bcHasNext = bcIter.next())) {
842                    bc_bLength = bcIter.oldLength();
843                    cLength = bcIter.newLength();
844                    if (bc_bLength == 0) {
845                        // insertion
846                        if (ab_bLength == 0 || !abIter.hasChange()) {
847                            addReplace(pending_aLength, pending_cLength + cLength);
848                            pending_aLength = pending_cLength = 0;
849                        } else {
850                            pending_cLength += cLength;
851                        }
852                        continue;
853                    }
854                }
855                // else see if the other iterator is done, too.
856            }
857            if (ab_bLength == 0) {
858                if (abHasNext && (abHasNext = abIter.next())) {
859                    aLength = abIter.oldLength();
860                    ab_bLength = abIter.newLength();
861                    if (ab_bLength == 0) {
862                        // deletion
863                        if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
864                            addReplace(pending_aLength + aLength, pending_cLength);
865                            pending_aLength = pending_cLength = 0;
866                        } else {
867                            pending_aLength += aLength;
868                        }
869                        continue;
870                    }
871                } else if (bc_bLength == 0) {
872                    // Both iterators are done at the same time:
873                    // The intermediate-string lengths match.
874                    break;
875                } else {
876                    throw new IllegalArgumentException(
877                            "The ab output string is shorter than the bc input string.");
878                }
879            }
880            if (bc_bLength == 0) {
881                throw new IllegalArgumentException(
882                        "The bc input string is shorter than the ab output string.");
883            }
884            //  Done fetching: ab_bLength > 0 && bc_bLength > 0
885
886            // The current state has two parts:
887            // - Past: We accumulate a longer ac edit in the "pending" variables.
888            // - Current: We have copies of the current ab/bc edits in local variables.
889            //   At least one side is newly fetched.
890            //   One side might be a truncated remainder of an edit we fetched earlier.
891
892            if (!abIter.hasChange() && !bcIter.hasChange()) {
893                // An unchanged span all the way from string a to string c.
894                if (pending_aLength != 0 || pending_cLength != 0) {
895                    addReplace(pending_aLength, pending_cLength);
896                    pending_aLength = pending_cLength = 0;
897                }
898                int unchangedLength = aLength <= cLength ? aLength : cLength;
899                addUnchanged(unchangedLength);
900                ab_bLength = aLength -= unchangedLength;
901                bc_bLength = cLength -= unchangedLength;
902                // At least one of the unchanged spans is now empty.
903                continue;
904            }
905            if (!abIter.hasChange() && bcIter.hasChange()) {
906                // Unchanged a->b but changed b->c.
907                if (ab_bLength >= bc_bLength) {
908                    // Split the longer unchanged span into change + remainder.
909                    addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
910                    pending_aLength = pending_cLength = 0;
911                    aLength = ab_bLength -= bc_bLength;
912                    bc_bLength = 0;
913                    continue;
914                }
915                // Handle the shorter unchanged span below like a change.
916            } else if (abIter.hasChange() && !bcIter.hasChange()) {
917                // Changed a->b and then unchanged b->c.
918                if (ab_bLength <= bc_bLength) {
919                    // Split the longer unchanged span into change + remainder.
920                    addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
921                    pending_aLength = pending_cLength = 0;
922                    cLength = bc_bLength -= ab_bLength;
923                    ab_bLength = 0;
924                    continue;
925                }
926                // Handle the shorter unchanged span below like a change.
927            } else {  // both abIter.hasChange() && bcIter.hasChange()
928                if (ab_bLength == bc_bLength) {
929                    // Changes on both sides up to the same position. Emit & reset.
930                    addReplace(pending_aLength + aLength, pending_cLength + cLength);
931                    pending_aLength = pending_cLength = 0;
932                    ab_bLength = bc_bLength = 0;
933                    continue;
934                }
935            }
936            // Accumulate the a->c change, reset the shorter side,
937            // keep a remainder of the longer one.
938            pending_aLength += aLength;
939            pending_cLength += cLength;
940            if (ab_bLength < bc_bLength) {
941                bc_bLength -= ab_bLength;
942                cLength = ab_bLength = 0;
943            } else {  // ab_bLength > bc_bLength
944                ab_bLength -= bc_bLength;
945                aLength = bc_bLength = 0;
946            }
947        }
948        if (pending_aLength != 0 || pending_cLength != 0) {
949            addReplace(pending_aLength, pending_cLength);
950        }
951        return this;
952    }
953}
954