163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer// © 2017 and later: Unicode, Inc. and others. 263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer// License & terms of use: http://www.unicode.org/copyright.html#License 363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererpackage com.ibm.icu.text; 463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererimport java.nio.BufferOverflowException; 663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererimport java.util.Arrays; 763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer/** 963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Records lengths of string edits but not replacement text. 1063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Supports replacements, insertions, deletions in linear progression. 1163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Does not support moving/reordering of text. 1263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * 1363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 1463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 1563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 1663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Schererpublic final class Edits { 1763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // 0000uuuuuuuuuuuu records u+1 unchanged text units. 1863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private static final int MAX_UNCHANGED_LENGTH = 0x1000; 1963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1; 2063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 21fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units. 22fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private static final int MAX_SHORT_CHANGE_OLD_LENGTH = 6; 23fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private static final int MAX_SHORT_CHANGE_NEW_LENGTH = 7; 24fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private static final int SHORT_CHANGE_NUM_MASK = 0x1ff; 2563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private static final int MAX_SHORT_CHANGE = 0x6fff; 2663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 2763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // 0111mmmmmmnnnnnn records a replacement of m text units with n. 2863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // m or n = 61: actual length follows in the next edits array unit. 2963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // m or n = 62..63: actual length follows in the next two edits array units. 3063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Bit 30 of the actual length is in the head unit. 3163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Trailing units have bit 15 set. 3263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private static final int LENGTH_IN_1TRAIL = 61; 3363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private static final int LENGTH_IN_2TRAIL = 62; 3463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 3563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private static final int STACK_CAPACITY = 100; 3663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private char[] array; 3763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int length; 3863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int delta; 39fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private int numChanges; 4063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 4163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 4263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Constructs an empty object. 4363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 4463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 4563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 4663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public Edits() { 4763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array = new char[STACK_CAPACITY]; 4863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 4963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 5063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 5163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Resets the data but may not release memory. 5263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 5363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 5463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 5563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public void reset() { 56fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert length = delta = numChanges = 0; 5763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 5863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 5963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private void setLastUnit(int last) { 6063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[length - 1] = (char)last; 6163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 6263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int lastUnit() { 6363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return length > 0 ? array[length - 1] : 0xffff; 6463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 6563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 6663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 6763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Adds a record for an unchanged segment of text. 6863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Normally called from inside ICU string transformation functions, not user code. 6963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 7063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 7163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 7263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public void addUnchanged(int unchangedLength) { 7363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if(unchangedLength < 0) { 7463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer throw new IllegalArgumentException( 7563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer "addUnchanged(" + unchangedLength + "): length must not be negative"); 7663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 7763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Merge into previous unchanged-text record, if any. 7863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int last = lastUnit(); 7963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if(last < MAX_UNCHANGED) { 8063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int remaining = MAX_UNCHANGED - last; 8163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (remaining >= unchangedLength) { 8263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer setLastUnit(last + unchangedLength); 8363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return; 8463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 8563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer setLastUnit(MAX_UNCHANGED); 8663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer unchangedLength -= remaining; 8763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 8863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Split large lengths into multiple units. 8963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer while(unchangedLength >= MAX_UNCHANGED_LENGTH) { 9063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer append(MAX_UNCHANGED); 9163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer unchangedLength -= MAX_UNCHANGED_LENGTH; 9263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 9363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Write a small (remaining) length. 9463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if(unchangedLength > 0) { 9563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer append(unchangedLength - 1); 9663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 9763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 9863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 9963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 10063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Adds a record for a text replacement/insertion/deletion. 10163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Normally called from inside ICU string transformation functions, not user code. 10263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 10363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 10463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 10563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public void addReplace(int oldLength, int newLength) { 10663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if(oldLength < 0 || newLength < 0) { 10763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer throw new IllegalArgumentException( 10863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer "addReplace(" + oldLength + ", " + newLength + 10963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer "): both lengths must be non-negative"); 11063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 11163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (oldLength == 0 && newLength == 0) { 11263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return; 11363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 114fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ++numChanges; 11563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int newDelta = newLength - oldLength; 11663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (newDelta != 0) { 11763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) || 11863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) { 11963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Integer overflow or underflow. 12063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer throw new IndexOutOfBoundsException(); 12163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 12263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer delta += newDelta; 12363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 12463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 125fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH && 126fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) { 127fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Merge into previous same-lengths short-replacement record, if any. 128fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int u = (oldLength << 12) | (newLength << 9); 129fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int last = lastUnit(); 130fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE && 131fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert (last & ~SHORT_CHANGE_NUM_MASK) == u && 132fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) { 133fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert setLastUnit(last + 1); 134fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return; 135fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 136fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert append(u); 137fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return; 138fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 139fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 14063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int head = 0x7000; 14163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) { 14263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= oldLength << 6; 14363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= newLength; 14463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer append(head); 14563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else if ((array.length - length) >= 5 || growArray()) { 14663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int limit = length + 1; 14763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if(oldLength < LENGTH_IN_1TRAIL) { 14863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= oldLength << 6; 14963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else if(oldLength <= 0x7fff) { 15063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= LENGTH_IN_1TRAIL << 6; 15163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[limit++] = (char)(0x8000 | oldLength); 15263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 15363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6; 15463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[limit++] = (char)(0x8000 | (oldLength >> 15)); 15563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[limit++] = (char)(0x8000 | oldLength); 15663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 15763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if(newLength < LENGTH_IN_1TRAIL) { 15863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= newLength; 15963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else if(newLength <= 0x7fff) { 16063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= LENGTH_IN_1TRAIL; 16163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[limit++] = (char)(0x8000 | newLength); 16263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 16363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer head |= LENGTH_IN_2TRAIL + (newLength >> 30); 16463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[limit++] = (char)(0x8000 | (newLength >> 15)); 16563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[limit++] = (char)(0x8000 | newLength); 16663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 16763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[length] = (char)head; 16863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer length = limit; 16963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 17063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 17163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 17263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private void append(int r) { 17363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if(length < array.length || growArray()) { 17463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array[length++] = (char)r; 17563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 17663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 17763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 17863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private boolean growArray() { 17963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int newCapacity; 18063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (array.length == STACK_CAPACITY) { 18163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer newCapacity = 2000; 18263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else if (array.length == Integer.MAX_VALUE) { 18363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer throw new BufferOverflowException(); 18463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else if (array.length >= (Integer.MAX_VALUE / 2)) { 18563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer newCapacity = Integer.MAX_VALUE; 18663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 18763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer newCapacity = 2 * array.length; 18863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 18963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Grow by at least 5 units so that a maximal change record will fit. 19063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if ((newCapacity - array.length) < 5) { 19163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer throw new BufferOverflowException(); 19263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 19363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array = Arrays.copyOf(array, newCapacity); 19463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return true; 19563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 19663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 19763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 19863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * How much longer is the new text compared with the old text? 19963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return new length minus old length 20063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 20163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 20263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 20363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public int lengthDelta() { return delta; } 20463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 20563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return true if there are any change edits 20663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 20763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 20863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 209fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert public boolean hasChanges() { return numChanges != 0; } 210fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 211fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert /** 212fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @return the number of change edits 213fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @draft ICU 60 214fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @provisional This API might change or be removed in a future release. 215fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert */ 216fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert public int numberOfChanges() { return numChanges; } 21763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 21863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 21963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Access to the list of edits. 22063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @see #getCoarseIterator 22163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @see #getFineIterator 22263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 22363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 22463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 22563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public static final class Iterator { 22663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private final char[] array; 22763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int index; 22863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private final int length; 229fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert /** 230fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 0 if we are not within compressed equal-length changes. 231fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Otherwise the number of remaining changes, including the current one. 232fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert */ 23363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int remaining; 23463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private final boolean onlyChanges_, coarse; 23563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 236fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private int dir; // iteration direction: back(<0), initial(0), forward(>0) 23763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private boolean changed; 23863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int oldLength_, newLength_; 23963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int srcIndex, replIndex, destIndex; 24063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 24163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private Iterator(char[] a, int len, boolean oc, boolean crs) { 24263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer array = a; 24363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer length = len; 24463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer onlyChanges_ = oc; 24563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer coarse = crs; 24663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 24763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 24863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int readLength(int head) { 24963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (head < LENGTH_IN_1TRAIL) { 25063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return head; 25163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else if (head < LENGTH_IN_2TRAIL) { 25263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer assert(index < length); 25363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer assert(array[index] >= 0x8000); 25463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return array[index++] & 0x7fff; 25563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 25663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer assert((index + 2) <= length); 25763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer assert(array[index] >= 0x8000); 25863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer assert(array[index + 1] >= 0x8000); 25963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int len = ((head & 1) << 30) | 26063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer ((array[index] & 0x7fff) << 15) | 26163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer (array[index + 1] & 0x7fff); 26263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer index += 2; 26363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return len; 26463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 26563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 26663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 267fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private void updateNextIndexes() { 26863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer srcIndex += oldLength_; 26963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (changed) { 27063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer replIndex += newLength_; 27163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 27263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer destIndex += newLength_; 27363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 27463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 275fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private void updatePreviousIndexes() { 276fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert srcIndex -= oldLength_; 277fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (changed) { 278fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert replIndex -= newLength_; 279fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 280fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert destIndex -= newLength_; 281fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 282fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 28363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private boolean noNext() { 284fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // No change before or beyond the string. 285fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert dir = 0; 28663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer changed = false; 28763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer oldLength_ = newLength_ = 0; 28863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return false; 28963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 29063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 29163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 29263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Advances to the next edit. 29363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return true if there is another edit 29463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 29563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 29663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 29763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public boolean next() { 29863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return next(onlyChanges_); 29963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 30063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 30163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private boolean next(boolean onlyChanges) { 302fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Forward iteration: Update the string indexes to the limit of the current span, 303fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // and post-increment-read array units to assemble a new span. 304fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Leaves the array index one after the last unit of that span. 305fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (dir > 0) { 306fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updateNextIndexes(); 307fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 308fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (dir < 0) { 309fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Turn around from previous() to next(). 310fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Post-increment-read the same span again. 311fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining > 0) { 312fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Fine-grained iterator: 313fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Stay on the current one of a sequence of compressed changes. 314fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ++index; // next() rests on the index after the sequence unit. 315fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert dir = 1; 316fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 317fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 318fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 319fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert dir = 1; 320fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 321fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining >= 1) { 322fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Fine-grained iterator: Continue a sequence of compressed changes. 323fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining > 1) { 324fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert --remaining; 325fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 326fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 327fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert remaining = 0; 32863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 32963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (index >= length) { 33063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return noNext(); 33163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 33263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer int u = array[index++]; 33363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (u <= MAX_UNCHANGED) { 33463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Combine adjacent unchanged ranges. 33563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer changed = false; 33663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer oldLength_ = u + 1; 33763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer while (index < length && (u = array[index]) <= MAX_UNCHANGED) { 33863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer ++index; 33963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer oldLength_ += u + 1; 34063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 34163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer newLength_ = oldLength_; 34263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (onlyChanges) { 343fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updateNextIndexes(); 34463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (index >= length) { 34563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return noNext(); 34663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 34763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // already fetched u > MAX_UNCHANGED at index 34863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer ++index; 34963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 35063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return true; 35163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 35263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 35363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer changed = true; 35463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (u <= MAX_SHORT_CHANGE) { 355fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int oldLen = u >> 12; 356fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH; 357fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int num = (u & SHORT_CHANGE_NUM_MASK) + 1; 35863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (coarse) { 359fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ = num * oldLen; 360fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ = num * newLen; 36163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 362fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Split a sequence of changes that was compressed into one unit. 363fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ = oldLen; 364fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ = newLen; 365fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (num > 1) { 366fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert remaining = num; // This is the first of two or more changes. 367fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 36863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return true; 36963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 37063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 37163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer assert(u <= 0x7fff); 37263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer oldLength_ = readLength((u >> 6) & 0x3f); 37363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer newLength_ = readLength(u & 0x3f); 37463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (!coarse) { 37563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return true; 37663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 37763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 37863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Combine adjacent changes. 37963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer while (index < length && (u = array[index]) > MAX_UNCHANGED) { 38063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer ++index; 38163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer if (u <= MAX_SHORT_CHANGE) { 382fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int num = (u & SHORT_CHANGE_NUM_MASK) + 1; 383fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ += (u >> 12) * num; 384fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num; 38563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } else { 38663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer assert(u <= 0x7fff); 387fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ += readLength((u >> 6) & 0x3f); 388fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ += readLength(u & 0x3f); 389fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 390fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 391fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 392fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 393fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 394fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private boolean previous() { 395fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Backward iteration: Pre-decrement-read array units to assemble a new span, 396fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // then update the string indexes to the start of that span. 397fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Leaves the array index on the head unit of that span. 398fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (dir >= 0) { 399fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (dir > 0) { 400fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Turn around from next() to previous(). 401fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Set the string indexes to the span limit and 402fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // pre-decrement-read the same span again. 403fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining > 0) { 404fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Fine-grained iterator: 405fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Stay on the current one of a sequence of compressed changes. 406fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert --index; // previous() rests on the sequence unit. 407fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert dir = -1; 408fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 409fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 410fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updateNextIndexes(); 411fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 412fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert dir = -1; 413fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 414fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining > 0) { 415fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Fine-grained iterator: Continue a sequence of compressed changes. 416fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int u = array[index]; 417fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE); 418fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) { 419fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ++remaining; 420fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updatePreviousIndexes(); 421fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 422fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 423fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert remaining = 0; 424fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 425fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (index <= 0) { 426fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return noNext(); 427fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 428fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int u = array[--index]; 429fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (u <= MAX_UNCHANGED) { 430fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Combine adjacent unchanged ranges. 431fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert changed = false; 432fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ = u + 1; 433fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) { 434fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert --index; 435fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ += u + 1; 436fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 437fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ = oldLength_; 438fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // No need to handle onlyChanges as long as previous() is called only from findIndex(). 439fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updatePreviousIndexes(); 440fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 441fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 442fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert changed = true; 443fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (u <= MAX_SHORT_CHANGE) { 444fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int oldLen = u >> 12; 445fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH; 446fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int num = (u & SHORT_CHANGE_NUM_MASK) + 1; 447fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (coarse) { 448fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ = num * oldLen; 449fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ = num * newLen; 450fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 451fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Split a sequence of changes that was compressed into one unit. 452fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ = oldLen; 453fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ = newLen; 454fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (num > 1) { 455fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert remaining = 1; // This is the last of two or more changes. 456fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 457fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updatePreviousIndexes(); 458fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 459fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 460fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 461fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (u <= 0x7fff) { 462fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // The change is encoded in u alone. 463fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ = readLength((u >> 6) & 0x3f); 464fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ = readLength(u & 0x3f); 465fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 466fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Back up to the head of the change, read the lengths, 467fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // and reset the index to the head again. 468fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert assert(index > 0); 469fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert while ((u = array[--index]) > 0x7fff) {} 470fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert assert(u > MAX_SHORT_CHANGE); 471fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int headIndex = index++; 472fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ = readLength((u >> 6) & 0x3f); 473fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ = readLength(u & 0x3f); 474fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert index = headIndex; 475fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 476fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (!coarse) { 477fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updatePreviousIndexes(); 478fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return true; 479fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 480fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 481fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Combine adjacent changes. 482fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) { 483fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert --index; 484fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (u <= MAX_SHORT_CHANGE) { 485fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int num = (u & SHORT_CHANGE_NUM_MASK) + 1; 486fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ += (u >> 12) * num; 487fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num; 488fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else if (u <= 0x7fff) { 489fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Read the lengths, and reset the index to the head again. 490fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int headIndex = index++; 491fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ += readLength((u >> 6) & 0x3f); 492fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ += readLength(u & 0x3f); 493fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert index = headIndex; 49463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 49563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 496fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert updatePreviousIndexes(); 49763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return true; 49863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 49963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 50063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 50163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Finds the edit that contains the source index. 50263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * The source index may be found in a non-change 50363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * even if normal iteration would skip non-changes. 50463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Normal iteration can continue from a found edit. 50563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * 50663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * <p>The iterator state before this search logically does not matter. 50763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * (It may affect the performance of the search.) 50863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * 50963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * <p>The iterator state after this search is undefined 51063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * if the source index is out of bounds for the source string. 51163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * 51263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @param i source index 51363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return true if the edit for the source index was found 51463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 51563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 51663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 51763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public boolean findSourceIndex(int i) { 518fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return findIndex(i, true) == 0; 519fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 520fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 521fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert /** 522fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Finds the edit that contains the destination index. 523fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * The destination index may be found in a non-change 524fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * even if normal iteration would skip non-changes. 525fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Normal iteration can continue from a found edit. 526fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 527fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>The iterator state before this search logically does not matter. 528fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * (It may affect the performance of the search.) 529fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 530fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>The iterator state after this search is undefined 531fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * if the source index is out of bounds for the source string. 532fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 533fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @param i destination index 534fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @return true if the edit for the destination index was found 535fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @draft ICU 60 536fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @provisional This API might change or be removed in a future release. 537fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert */ 538fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert public boolean findDestinationIndex(int i) { 539fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return findIndex(i, false) == 0; 540fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 541fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 542fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert /** @return -1: error or i<0; 0: found; 1: i>=string length */ 543fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert private int findIndex(int i, boolean findSource) { 544fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (i < 0) { return -1; } 545fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int spanStart, spanLength; 546fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (findSource) { // find source index 547fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanStart = srcIndex; 548fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanLength = oldLength_; 549fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { // find destination index 550fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanStart = destIndex; 551fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanLength = newLength_; 552fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 553fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (i < spanStart) { 554fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (i >= (spanStart / 2)) { 555fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Search backwards. 556fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert for (;;) { 557fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert boolean hasPrevious = previous(); 558fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert assert(hasPrevious); // because i>=0 and the first span starts at 0 559fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanStart = findSource ? srcIndex : destIndex; 560fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (i >= spanStart) { 561fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // The index is in the current span. 562fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 0; 563fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 564fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining > 0) { 565fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Is the index in one of the remaining compressed edits? 566fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // spanStart is the start of the current span, first of the remaining ones. 567fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanLength = findSource ? oldLength_ : newLength_; 568fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int u = array[index]; 569fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert assert(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE); 570fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining; 571fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int len = num * spanLength; 572fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (i >= (spanStart - len)) { 573fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int n = ((spanStart - i - 1) / spanLength) + 1; 574fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // 1 <= n <= num 575fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert srcIndex -= n * oldLength_; 576fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert replIndex -= n * newLength_; 577fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert destIndex -= n * newLength_; 578fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert remaining += n; 579fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 0; 580fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 581fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Skip all of these edits at once. 582fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert srcIndex -= num * oldLength_; 583fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert replIndex -= num * newLength_; 584fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert destIndex -= num * newLength_; 585fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert remaining = 0; 586fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 587fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 588fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 58963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Reset the iterator to the start. 590fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert dir = 0; 59163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; 592fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else if (i < (spanStart + spanLength)) { 59363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // The index is in the current span. 594fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 0; 59563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 59663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer while (next(false)) { 597fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (findSource) { 598fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanStart = srcIndex; 599fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanLength = oldLength_; 600fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 601fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanStart = destIndex; 602fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert spanLength = newLength_; 603fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 604fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (i < (spanStart + spanLength)) { 60563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // The index is in the current span. 606fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 0; 60763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 608fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (remaining > 1) { 60963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Is the index in one of the remaining compressed edits? 610fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // spanStart is the start of the current span, first of the remaining ones. 611fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int len = remaining * spanLength; 612fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (i < (spanStart + len)) { 613fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1 614fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert srcIndex += n * oldLength_; 615fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert replIndex += n * newLength_; 616fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert destIndex += n * newLength_; 61763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer remaining -= n; 618fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 0; 61963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 62063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer // Make next() skip all of these edits at once. 621fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert oldLength_ *= remaining; 622fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert newLength_ *= remaining; 62363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer remaining = 0; 62463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 62563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 626fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 1; 627fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 628fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 629fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert /** 630fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Returns the destination index corresponding to the given source index. 631fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * If the source index is inside a change edit (not at its start), 632fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * then the destination index at the end of that edit is returned, 633fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * since there is no information about index mapping inside a change edit. 634fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 635fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>(This means that indexes to the start and middle of an edit, 636fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * for example around a grapheme cluster, are mapped to indexes 637fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * encompassing the entire edit. 638fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * The alternative, mapping an interior index to the start, 639fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * would map such an interval to an empty one.) 640fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 641fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>This operation will usually but not always modify this object. 642fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * The iterator state after this search is undefined. 643fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 644fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @param i source index 645fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @return destination index; undefined if i is not 0..string length 646fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @draft ICU 60 647fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @provisional This API might change or be removed in a future release. 648fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert */ 649fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert public int destinationIndexFromSourceIndex(int i) { 650fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int where = findIndex(i, true); 651fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (where < 0) { 652fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Error or before the string. 653fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 0; 654fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 655fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (where > 0 || i == srcIndex) { 656fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // At or after string length, or at start of the found span. 657fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return destIndex; 658fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 659fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (changed) { 660fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // In a change span, map to its end. 661fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return destIndex + newLength_; 662fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 663fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // In an unchanged span, offset 1:1 within it. 664fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return destIndex + (i - srcIndex); 665fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 666fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 667fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 668fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert /** 669fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Returns the source index corresponding to the given destination index. 670fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * If the destination index is inside a change edit (not at its start), 671fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * then the source index at the end of that edit is returned, 672fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * since there is no information about index mapping inside a change edit. 673fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 674fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>(This means that indexes to the start and middle of an edit, 675fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * for example around a grapheme cluster, are mapped to indexes 676fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * encompassing the entire edit. 677fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * The alternative, mapping an interior index to the start, 678fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * would map such an interval to an empty one.) 679fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 680fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>This operation will usually but not always modify this object. 681fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * The iterator state after this search is undefined. 682fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 683fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @param i destination index 684fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @return source index; undefined if i is not 0..string length 685fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @draft ICU 60 686fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @provisional This API might change or be removed in a future release. 687fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert */ 688fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert public int sourceIndexFromDestinationIndex(int i) { 689fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int where = findIndex(i, false); 690fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (where < 0) { 691fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Error or before the string. 692fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return 0; 693fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 694fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (where > 0 || i == destIndex) { 695fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // At or after string length, or at start of the found span. 696fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return srcIndex; 697fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 698fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (changed) { 699fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // In a change span, map to its end. 700fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return srcIndex + oldLength_; 701fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 702fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // In an unchanged span, offset within it. 703fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return srcIndex + (i - destIndex); 704fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 70563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 70663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 70763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 70863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return true if this edit replaces oldLength() units with newLength() different ones. 70963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * false if oldLength units remain unchanged. 71063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 71163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 71263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 71363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public boolean hasChange() { return changed; } 71463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 71563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return the number of units in the original string which are replaced or remain unchanged. 71663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 71763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 71863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 71963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public int oldLength() { return oldLength_; } 72063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 72163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return the number of units in the modified string, if hasChange() is true. 72263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Same as oldLength if hasChange() is false. 72363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 72463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 72563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 72663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public int newLength() { return newLength_; } 72763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 72863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 72963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return the current index into the source string 73063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 73163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 73263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 73363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public int sourceIndex() { return srcIndex; } 73463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 73563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return the current index into the replacement-characters-only string, 73663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * not counting unchanged spans 73763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 73863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 73963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 74063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public int replacementIndex() { return replIndex; } 74163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 74263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return the current index into the full destination string 74363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 74463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 74563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 74663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public int destinationIndex() { return destIndex; } 74763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer }; 74863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 74963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 75063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Returns an Iterator for coarse-grained changes for simple string updates. 75163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Skips non-changes. 75263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return an Iterator that merges adjacent changes. 75363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 75463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 75563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 75663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public Iterator getCoarseChangesIterator() { 75763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return new Iterator(array, length, true, true); 75863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 75963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 76063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 76163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Returns an Iterator for coarse-grained changes and non-changes for simple string updates. 76263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return an Iterator that merges adjacent changes. 76363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 76463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 76563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 76663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public Iterator getCoarseIterator() { 76763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return new Iterator(array, length, false, true); 76863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 76963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 77063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 77163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Returns an Iterator for fine-grained changes for modifying styled text. 77263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Skips non-changes. 77363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return an Iterator that separates adjacent changes. 77463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 77563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 77663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 77763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public Iterator getFineChangesIterator() { 77863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return new Iterator(array, length, true, false); 77963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 78063cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer 78163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer /** 78263cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * Returns an Iterator for fine-grained changes and non-changes for modifying styled text. 78363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @return an Iterator that separates adjacent changes. 78463cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @draft ICU 59 78563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer * @provisional This API might change or be removed in a future release. 78663cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer */ 78763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer public Iterator getFineIterator() { 78863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer return new Iterator(array, length, false, false); 78963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer } 790fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 791fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert /** 792fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Merges the two input Edits and appends the result to this object. 793fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 794fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>Consider two string transformations (for example, normalization and case mapping) 795fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * where each records Edits in addition to writing an output string.<br> 796fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Edits ab reflect how substrings of input string a 797fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * map to substrings of intermediate string b.<br> 798fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * Edits bc reflect how substrings of intermediate string b 799fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * map to substrings of output string c.<br> 800fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * This function merges ab and bc such that the additional edits 801fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * recorded in this object reflect how substrings of input string a 802fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * map to substrings of output string c. 803fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 804fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * <p>If unrelated Edits are passed in where the output string of the first 805fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * has a different length than the input string of the second, 806fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * then an IllegalArgumentException is thrown. 807fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * 808fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @param ab reflects how substrings of input string a 809fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * map to substrings of intermediate string b. 810fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @param bc reflects how substrings of intermediate string b 811fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * map to substrings of output string c. 812fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @return this, with the merged edits appended 813fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @draft ICU 60 814fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert * @provisional This API might change or be removed in a future release. 815fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert */ 816fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert public Edits mergeAndAppend(Edits ab, Edits bc) { 817fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c. 818fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Parallel iteration over both Edits. 819fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert Iterator abIter = ab.getFineIterator(); 820fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert Iterator bcIter = bc.getFineIterator(); 821fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert boolean abHasNext = true, bcHasNext = true; 822fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Copy iterator state into local variables, so that we can modify and subdivide spans. 823fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // ab old & new length, bc old & new length 824fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0; 825fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // When we have different-intermediate-length changes, we accumulate a larger change. 826fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int pending_aLength = 0, pending_cLength = 0; 827fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert for (;;) { 828fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // At this point, for each of the two iterators: 829fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Either we are done with the locally cached current edit, 830fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // and its intermediate-string length has been reset, 831fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // or we will continue to work with a truncated remainder of this edit. 832fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // 833fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // If the current edit is done, and the iterator has not yet reached the end, 834fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // then we fetch the next edit. This is true for at least one of the iterators. 835fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // 836fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Normally it does not matter whether we fetch from ab and then bc or vice versa. 837fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // However, the result is observably different when 838fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // ab deletions meet bc insertions at the same intermediate-string index. 839fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Some users expect the bc insertions to come first, so we fetch from bc first. 840fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (bc_bLength == 0) { 841fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (bcHasNext && (bcHasNext = bcIter.next())) { 842fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert bc_bLength = bcIter.oldLength(); 843fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert cLength = bcIter.newLength(); 844fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (bc_bLength == 0) { 845fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // insertion 846fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (ab_bLength == 0 || !abIter.hasChange()) { 847fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addReplace(pending_aLength, pending_cLength + cLength); 848fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength = pending_cLength = 0; 849fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 850fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_cLength += cLength; 851fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 852fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert continue; 853fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 854fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 855fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // else see if the other iterator is done, too. 856fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 857fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (ab_bLength == 0) { 858fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (abHasNext && (abHasNext = abIter.next())) { 859fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert aLength = abIter.oldLength(); 860fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ab_bLength = abIter.newLength(); 861fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (ab_bLength == 0) { 862fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // deletion 863fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) { 864fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addReplace(pending_aLength + aLength, pending_cLength); 865fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength = pending_cLength = 0; 866fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 867fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength += aLength; 868fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 869fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert continue; 870fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 871fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else if (bc_bLength == 0) { 872fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Both iterators are done at the same time: 873fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // The intermediate-string lengths match. 874fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert break; 875fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { 876fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert throw new IllegalArgumentException( 877fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert "The ab output string is shorter than the bc input string."); 878fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 879fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 880fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (bc_bLength == 0) { 881fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert throw new IllegalArgumentException( 882fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert "The bc input string is shorter than the ab output string."); 883fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 884fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Done fetching: ab_bLength > 0 && bc_bLength > 0 885fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 886fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // The current state has two parts: 887fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // - Past: We accumulate a longer ac edit in the "pending" variables. 888fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // - Current: We have copies of the current ab/bc edits in local variables. 889fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // At least one side is newly fetched. 890fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // One side might be a truncated remainder of an edit we fetched earlier. 891fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert 892fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (!abIter.hasChange() && !bcIter.hasChange()) { 893fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // An unchanged span all the way from string a to string c. 894fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (pending_aLength != 0 || pending_cLength != 0) { 895fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addReplace(pending_aLength, pending_cLength); 896fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength = pending_cLength = 0; 897fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 898fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert int unchangedLength = aLength <= cLength ? aLength : cLength; 899fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addUnchanged(unchangedLength); 900fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ab_bLength = aLength -= unchangedLength; 901fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert bc_bLength = cLength -= unchangedLength; 902fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // At least one of the unchanged spans is now empty. 903fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert continue; 904fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 905fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (!abIter.hasChange() && bcIter.hasChange()) { 906fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Unchanged a->b but changed b->c. 907fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (ab_bLength >= bc_bLength) { 908fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Split the longer unchanged span into change + remainder. 909fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addReplace(pending_aLength + bc_bLength, pending_cLength + cLength); 910fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength = pending_cLength = 0; 911fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert aLength = ab_bLength -= bc_bLength; 912fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert bc_bLength = 0; 913fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert continue; 914fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 915fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Handle the shorter unchanged span below like a change. 916fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else if (abIter.hasChange() && !bcIter.hasChange()) { 917fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Changed a->b and then unchanged b->c. 918fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (ab_bLength <= bc_bLength) { 919fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Split the longer unchanged span into change + remainder. 920fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addReplace(pending_aLength + aLength, pending_cLength + ab_bLength); 921fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength = pending_cLength = 0; 922fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert cLength = bc_bLength -= ab_bLength; 923fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ab_bLength = 0; 924fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert continue; 925fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 926fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Handle the shorter unchanged span below like a change. 927fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { // both abIter.hasChange() && bcIter.hasChange() 928fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (ab_bLength == bc_bLength) { 929fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Changes on both sides up to the same position. Emit & reset. 930fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addReplace(pending_aLength + aLength, pending_cLength + cLength); 931fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength = pending_cLength = 0; 932fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ab_bLength = bc_bLength = 0; 933fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert continue; 934fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 935fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 936fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // Accumulate the a->c change, reset the shorter side, 937fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert // keep a remainder of the longer one. 938fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_aLength += aLength; 939fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert pending_cLength += cLength; 940fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (ab_bLength < bc_bLength) { 941fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert bc_bLength -= ab_bLength; 942fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert cLength = ab_bLength = 0; 943fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } else { // ab_bLength > bc_bLength 944fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert ab_bLength -= bc_bLength; 945fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert aLength = bc_bLength = 0; 946fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 947fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 948fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert if (pending_aLength != 0 || pending_cLength != 0) { 949fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert addReplace(pending_aLength, pending_cLength); 950fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 951fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert return this; 952fe77e7203e518f62b5bd8e8c603bca361e9cf47bFredrik Roubert } 95363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer} 954