1ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski/* 2ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Diff Match and Patch 3ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 4ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Copyright 2006 Google Inc. 5ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * http://code.google.com/p/google-diff-match-patch/ 6ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 7ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Licensed under the Apache License, Version 2.0 (the "License"); 8ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * you may not use this file except in compliance with the License. 9ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * You may obtain a copy of the License at 10ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 11ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * http://www.apache.org/licenses/LICENSE-2.0 12ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 13ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Unless required by applicable law or agreed to in writing, software 14ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * distributed under the License is distributed on an "AS IS" BASIS, 15ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * See the License for the specific language governing permissions and 17ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * limitations under the License. 18ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 19ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 20ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskipackage name.fraser.neil.plaintext; 21ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 22ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.io.UnsupportedEncodingException; 23ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.net.URLEncoder; 24ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.net.URLDecoder; 25ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.ArrayList; 26ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Arrays; 27ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.HashMap; 28ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.HashSet; 29ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.LinkedList; 30ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.List; 31ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.ListIterator; 32ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Map; 33ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Set; 34ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Stack; 35ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.regex.Matcher; 36ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.regex.Pattern; 37ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 38ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 39ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski/* 40ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Functions for diff, match and patch. 41ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Computes the difference between two texts to create a patch. 42ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Applies the patch onto another text, allowing for errors. 43ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 44ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @author fraser@google.com (Neil Fraser) 45ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 46ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 47ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski/** 48ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Class containing the diff, match and patch methods. 49ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Also contains the behaviour settings. 50ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 51ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskipublic class diff_match_patch { 52ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 53ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Defaults. 54ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Set these on your diff_match_patch instance to override the defaults. 55ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 56ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 57ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Number of seconds to map a diff before giving up (0 for infinity). 58ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 59ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public float Diff_Timeout = 1.0f; 60ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 61ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Cost of an empty edit operation in terms of edit characters. 62ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 63ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public short Diff_EditCost = 4; 64ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 65ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * The size beyond which the double-ended diff activates. 66ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Double-ending is twice as fast, but less accurate. 67ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 68ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public short Diff_DualThreshold = 32; 69ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 70ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * At what point is no match declared (0.0 = perfection, 1.0 = very loose). 71ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 72ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public float Match_Threshold = 0.5f; 73ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 74ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * How far to search for a match (0 = exact location, 1000+ = broad match). 75ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * A match this many characters away from the expected location will add 76ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 1.0 to the score (0.0 is a perfect match). 77ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 78ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int Match_Distance = 1000; 79ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 80ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * When deleting a large block of text (over ~64 characters), how close does 81ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * the contents have to match the expected contents. (0.0 = perfection, 82ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 1.0 = very loose). Note that Match_Threshold controls how closely the 83ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * end points of a delete need to match. 84ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 85ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public float Patch_DeleteThreshold = 0.5f; 86ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 87ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Chunk size for context length. 88ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 89ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public short Patch_Margin = 4; 90ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 91ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 92ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * The number of bits in an int. 93ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 94ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private int Match_MaxBits = 32; 95ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 96ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 97ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Internal class for returning results from diff_linesToChars(). 98ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Other less paranoid languages just use a three-element array. 99ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 100ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected static class LinesToCharsResult { 101ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected String chars1; 102ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected String chars2; 103ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected List<String> lineArray; 104ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 105ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected LinesToCharsResult(String chars1, String chars2, 106ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<String> lineArray) { 107ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski this.chars1 = chars1; 108ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski this.chars2 = chars2; 109ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski this.lineArray = lineArray; 110ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 111ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 112ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 113ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 114ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // DIFF FUNCTIONS 115ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 116ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 117ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 118ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * The data structure representing a diff is a Linked list of Diff objects: 119ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), 120ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Diff(Operation.EQUAL, " world.")} 121ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * which means: delete "Hello", add "Goodbye" and keep " world." 122ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 123ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public enum Operation { 124ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski DELETE, INSERT, EQUAL 125ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 126ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 127ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 128ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 129ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Find the differences between two texts. 130ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Run a faster slightly less optimal diff 131ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * This method allows the 'checklines' of diff_main() to be optional. 132ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Most of the time checklines is wanted, so default to true. 133ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old string to be diffed. 134ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 New string to be diffed. 135ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Linked List of Diff objects. 136ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 137ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Diff> diff_main(String text1, String text2) { 138ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diff_main(text1, text2, true); 139ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 140ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 141ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 142ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Find the differences between two texts. Simplifies the problem by 143ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * stripping any common prefix or suffix off the texts before diffing. 144ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old string to be diffed. 145ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 New string to be diffed. 146ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param checklines Speedup flag. If false, then don't run a 147ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * line-level diff first to identify the changed areas. 148ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * If true, then run a faster slightly less optimal diff 149ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Linked List of Diff objects. 150ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 151ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Diff> diff_main(String text1, String text2, 152ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean checklines) { 153ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Check for null inputs. 154ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1 == null || text2 == null) { 155ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException("Null inputs. (diff_main)"); 156ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 157ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 158ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Check for equality (speedup). 159ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs; 160ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.equals(text2)) { 161ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs = new LinkedList<Diff>(); 162ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.EQUAL, text1)); 163ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 164ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 165ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 166ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Trim off common prefix (speedup). 167ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int commonlength = diff_commonPrefix(text1, text2); 168ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String commonprefix = text1.substring(0, commonlength); 169ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1 = text1.substring(commonlength); 170ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2 = text2.substring(commonlength); 171ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 172ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Trim off common suffix (speedup). 173ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski commonlength = diff_commonSuffix(text1, text2); 174ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String commonsuffix = text1.substring(text1.length() - commonlength); 175ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1 = text1.substring(0, text1.length() - commonlength); 176ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2 = text2.substring(0, text2.length() - commonlength); 177ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 178ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Compute the diff on the middle block. 179ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs = diff_compute(text1, text2, checklines); 180ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 181ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Restore the prefix and suffix. 182ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (commonprefix.length() != 0) { 183ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.addFirst(new Diff(Operation.EQUAL, commonprefix)); 184ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 185ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (commonsuffix.length() != 0) { 186ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.addLast(new Diff(Operation.EQUAL, commonsuffix)); 187ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 188ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 189ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupMerge(diffs); 190ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 191ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 192ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 193ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 194ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 195ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Find the differences between two texts. Assumes that the texts do not 196ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * have any common prefix or suffix. 197ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old string to be diffed. 198ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 New string to be diffed. 199ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param checklines Speedup flag. If false, then don't run a 200ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * line-level diff first to identify the changed areas. 201ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * If true, then run a faster slightly less optimal diff 202ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Linked List of Diff objects. 203ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 204ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected LinkedList<Diff> diff_compute(String text1, String text2, 205ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean checklines) { 206ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs = new LinkedList<Diff>(); 207ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 208ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.length() == 0) { 209ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Just add some text (speedup). 210ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.INSERT, text2)); 211ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 212ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 213ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 214ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text2.length() == 0) { 215ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Just delete some text (speedup). 216ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.DELETE, text1)); 217ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 218ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 219ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 220ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String longtext = text1.length() > text2.length() ? text1 : text2; 221ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String shorttext = text1.length() > text2.length() ? text2 : text1; 222ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int i = longtext.indexOf(shorttext); 223ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (i != -1) { 224ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Shorter text is inside the longer text (speedup). 225ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Operation op = (text1.length() > text2.length()) ? 226ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Operation.DELETE : Operation.INSERT; 227ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(op, longtext.substring(0, i))); 228ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.EQUAL, shorttext)); 229ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(op, longtext.substring(i + shorttext.length()))); 230ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 231ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 232ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski longtext = shorttext = null; // Garbage collect. 233ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 234ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Check to see if the problem can be split in two. 235ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String[] hm = diff_halfMatch(text1, text2); 236ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (hm != null) { 237ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // A half-match was found, sort out the return data. 238ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text1_a = hm[0]; 239ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text1_b = hm[1]; 240ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text2_a = hm[2]; 241ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text2_b = hm[3]; 242ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String mid_common = hm[4]; 243ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Send both pairs off for separate processing. 244ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs_a = diff_main(text1_a, text2_a, checklines); 245ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs_b = diff_main(text1_b, text2_b, checklines); 246ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Merge the results. 247ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs = diffs_a; 248ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.EQUAL, mid_common)); 249ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.addAll(diffs_b); 250ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 251ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 252ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 253ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Perform a real diff. 254ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (checklines && (text1.length() < 100 || text2.length() < 100)) { 255ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski checklines = false; // Too trivial for the overhead. 256ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 257ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<String> linearray = null; 258ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (checklines) { 259ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Scan the text on a line-by-line basis first. 260ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinesToCharsResult b = diff_linesToChars(text1, text2); 261ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1 = b.chars1; 262ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2 = b.chars2; 263ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski linearray = b.lineArray; 264ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 265ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 266ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs = diff_map(text1, text2); 267ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs == null) { 268ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // No acceptable result. 269ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs = new LinkedList<Diff>(); 270ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.DELETE, text1)); 271ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.INSERT, text2)); 272ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 273ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 274ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (checklines) { 275ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Convert the diff back to original text. 276ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_charsToLines(diffs, linearray); 277ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Eliminate freak matches (e.g. blank lines) 278ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupSemantic(diffs); 279ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 280ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Rediff any replacement blocks, this time character-by-character. 281ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add a dummy entry at the end. 282ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.EQUAL, "")); 283ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int count_delete = 0; 284ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int count_insert = 0; 285ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text_delete = ""; 286ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text_insert = ""; 287ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski ListIterator<Diff> pointer = diffs.listIterator(); 288ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff thisDiff = pointer.next(); 289ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != null) { 290ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (thisDiff.operation) { 291ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case INSERT: 292ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_insert++; 293ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_insert += thisDiff.text; 294ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 295ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case DELETE: 296ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_delete++; 297ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_delete += thisDiff.text; 298ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 299ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case EQUAL: 300ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Upon reaching an equality, check for prior redundancies. 301ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (count_delete >= 1 && count_insert >= 1) { 302ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Delete the offending records and add the merged ones. 303ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); 304ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int j = 0; j < count_delete + count_insert; j++) { 305ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); 306ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); 307ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 308ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff newDiff : diff_main(text_delete, text_insert, false)) { 309ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.add(newDiff); 310ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 311ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 312ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_insert = 0; 313ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_delete = 0; 314ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_delete = ""; 315ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_insert = ""; 316ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 317ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 318ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.hasNext() ? pointer.next() : null; 319ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 320ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.removeLast(); // Remove the dummy entry at the end. 321ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 322ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 323ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 324ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 325ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 326ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 327ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Split two texts into a list of strings. Reduce the texts to a string of 328ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * hashes where each Unicode character represents one line. 329ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 First string. 330ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 Second string. 331ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return An object containing the encoded text1, the encoded text2 and 332ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * the List of unique strings. The zeroth element of the List of 333ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * unique strings is intentionally blank. 334ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 335ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected LinesToCharsResult diff_linesToChars(String text1, String text2) { 336ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<String> lineArray = new ArrayList<String>(); 337ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Map<String, Integer> lineHash = new HashMap<String, Integer>(); 338ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // e.g. linearray[4] == "Hello\n" 339ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // e.g. linehash.get("Hello\n") == 4 340ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 341ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // "\x00" is a valid character, but various debuggers don't like it. 342ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // So we'll insert a junk entry to avoid generating a null character. 343ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lineArray.add(""); 344ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 345ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); 346ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); 347ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return new LinesToCharsResult(chars1, chars2, lineArray); 348ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 349ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 350ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 351ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 352ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Split a text into a list of strings. Reduce the texts to a string of 353ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * hashes where each Unicode character represents one line. 354ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text String to encode. 355ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param lineArray List of unique strings. 356ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param lineHash Map of strings to indices. 357ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Encoded string. 358ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 359ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private String diff_linesToCharsMunge(String text, List<String> lineArray, 360ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Map<String, Integer> lineHash) { 361ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int lineStart = 0; 362ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int lineEnd = -1; 363ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String line; 364ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder chars = new StringBuilder(); 365ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Walk the text, pulling out a substring for each line. 366ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // text.split('\n') would would temporarily double our memory footprint. 367ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Modifying text would create many large strings to garbage collect. 368ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (lineEnd < text.length() - 1) { 369ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lineEnd = text.indexOf('\n', lineStart); 370ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (lineEnd == -1) { 371ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lineEnd = text.length() - 1; 372ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 373ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski line = text.substring(lineStart, lineEnd + 1); 374ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lineStart = lineEnd + 1; 375ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 376ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (lineHash.containsKey(line)) { 377ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski chars.append(String.valueOf((char) (int) lineHash.get(line))); 378ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 379ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lineArray.add(line); 380ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lineHash.put(line, lineArray.size() - 1); 381ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski chars.append(String.valueOf((char) (lineArray.size() - 1))); 382ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 383ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 384ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return chars.toString(); 385ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 386ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 387ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 388ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 389ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Rehydrate the text in a diff from a string of line hashes to real lines of 390ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * text. 391ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 392ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param lineArray List of unique strings. 393ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 394ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected void diff_charsToLines(LinkedList<Diff> diffs, 395ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<String> lineArray) { 396ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder text; 397ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff diff : diffs) { 398ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text = new StringBuilder(); 399ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int y = 0; y < diff.text.length(); y++) { 400ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append(lineArray.get(diff.text.charAt(y))); 401ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 402ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff.text = text.toString(); 403ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 404ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 405ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 406ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 407ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 408ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Explore the intersection points between the two texts. 409ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old string to be diffed. 410ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 New string to be diffed. 411ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return LinkedList of Diff objects or null if no diff available. 412ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 413ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected LinkedList<Diff> diff_map(String text1, String text2) { 414ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski long ms_end = System.currentTimeMillis() + (long) (Diff_Timeout * 1000); 415ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Cache the text lengths to prevent multiple calls. 416ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int text1_length = text1.length(); 417ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int text2_length = text2.length(); 418ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int max_d = text1_length + text2_length - 1; 419ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean doubleEnd = Diff_DualThreshold * 2 < max_d; 420ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<Set<Long>> v_map1 = new ArrayList<Set<Long>>(); 421ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<Set<Long>> v_map2 = new ArrayList<Set<Long>>(); 422ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Map<Integer, Integer> v1 = new HashMap<Integer, Integer>(); 423ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Map<Integer, Integer> v2 = new HashMap<Integer, Integer>(); 424ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v1.put(1, 0); 425ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v2.put(1, 0); 426ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int x, y; 427ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Long footstep = 0L; // Used to track overlapping paths. 428ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Map<Long, Integer> footsteps = new HashMap<Long, Integer>(); 429ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean done = false; 430ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // If the total number of characters is odd, then the front path will 431ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // collide with the reverse path. 432ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean front = ((text1_length + text2_length) % 2 == 1); 433ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int d = 0; d < max_d; d++) { 434ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Bail out if timeout reached. 435ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (Diff_Timeout > 0 && System.currentTimeMillis() > ms_end) { 436ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return null; 437ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 438ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 439ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Walk the front path one step. 440ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v_map1.add(new HashSet<Long>()); // Adds at index 'd'. 441ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int k = -d; k <= d; k += 2) { 442ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (k == -d || k != d && v1.get(k - 1) < v1.get(k + 1)) { 443ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x = v1.get(k + 1); 444ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 445ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x = v1.get(k - 1) + 1; 446ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 447ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y = x - k; 448ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (doubleEnd) { 449ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footstep = diff_footprint(x, y); 450ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (front && (footsteps.containsKey(footstep))) { 451ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski done = true; 452ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 453ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!front) { 454ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footsteps.put(footstep, d); 455ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 456ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 457ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (!done && x < text1_length && y < text2_length 458ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && text1.charAt(x) == text2.charAt(y)) { 459ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x++; 460ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y++; 461ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (doubleEnd) { 462ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footstep = diff_footprint(x, y); 463ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (front && (footsteps.containsKey(footstep))) { 464ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski done = true; 465ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 466ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!front) { 467ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footsteps.put(footstep, d); 468ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 469ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 470ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 471ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v1.put(k, x); 472ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v_map1.get(d).add(diff_footprint(x, y)); 473ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (x == text1_length && y == text2_length) { 474ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Reached the end in single-path mode. 475ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diff_path1(v_map1, text1, text2); 476ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (done) { 477ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Front path ran over reverse path. 478ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v_map2 = v_map2.subList(0, footsteps.get(footstep) + 1); 479ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> a = diff_path1(v_map1, text1.substring(0, x), 480ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2.substring(0, y)); 481ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski a.addAll(diff_path2(v_map2, text1.substring(x), text2.substring(y))); 482ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return a; 483ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 484ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 485ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 486ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (doubleEnd) { 487ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Walk the reverse path one step. 488ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v_map2.add(new HashSet<Long>()); // Adds at index 'd'. 489ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int k = -d; k <= d; k += 2) { 490ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (k == -d || k != d && v2.get(k - 1) < v2.get(k + 1)) { 491ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x = v2.get(k + 1); 492ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 493ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x = v2.get(k - 1) + 1; 494ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 495ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y = x - k; 496ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footstep = diff_footprint(text1_length - x, text2_length - y); 497ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!front && (footsteps.containsKey(footstep))) { 498ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski done = true; 499ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 500ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (front) { 501ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footsteps.put(footstep, d); 502ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 503ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (!done && x < text1_length && y < text2_length 504ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && text1.charAt(text1_length - x - 1) 505ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski == text2.charAt(text2_length - y - 1)) { 506ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x++; 507ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y++; 508ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footstep = diff_footprint(text1_length - x, text2_length - y); 509ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!front && (footsteps.containsKey(footstep))) { 510ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski done = true; 511ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 512ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (front) { 513ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski footsteps.put(footstep, d); 514ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 515ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 516ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v2.put(k, x); 517ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v_map2.get(d).add(diff_footprint(x, y)); 518ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (done) { 519ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Reverse path ran over front path. 520ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski v_map1 = v_map1.subList(0, footsteps.get(footstep) + 1); 521ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> a 522ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski = diff_path1(v_map1, text1.substring(0, text1_length - x), 523ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2.substring(0, text2_length - y)); 524ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski a.addAll(diff_path2(v_map2, text1.substring(text1_length - x), 525ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2.substring(text2_length - y))); 526ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return a; 527ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 528ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 529ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 530ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 531ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Number of diffs equals number of characters, no commonality at all. 532ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return null; 533ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 534ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 535ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 536ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 537ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Work from the middle back to the start to determine the path. 538ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param v_map List of path sets. 539ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old string fragment to be diffed. 540ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 New string fragment to be diffed. 541ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return LinkedList of Diff objects. 542ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 543ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected LinkedList<Diff> diff_path1(List<Set<Long>> v_map, 544ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text1, String text2) { 545ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> path = new LinkedList<Diff>(); 546ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int x = text1.length(); 547ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int y = text2.length(); 548ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Operation last_op = null; 549ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int d = v_map.size() - 2; d >= 0; d--) { 550ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (true) { 551ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (v_map.get(d).contains(diff_footprint(x - 1, y))) { 552ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x--; 553ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (last_op == Operation.DELETE) { 554ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.getFirst().text = text1.charAt(x) + path.getFirst().text; 555ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 556ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.addFirst(new Diff(Operation.DELETE, 557ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1.substring(x, x + 1))); 558ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 559ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_op = Operation.DELETE; 560ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 561ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) { 562ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y--; 563ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (last_op == Operation.INSERT) { 564ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.getFirst().text = text2.charAt(y) + path.getFirst().text; 565ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 566ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.addFirst(new Diff(Operation.INSERT, 567ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2.substring(y, y + 1))); 568ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 569ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_op = Operation.INSERT; 570ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 571ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 572ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x--; 573ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y--; 574ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski assert (text1.charAt(x) == text2.charAt(y)) 575ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski : "No diagonal. Can't happen. (diff_path1)"; 576ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (last_op == Operation.EQUAL) { 577ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.getFirst().text = text1.charAt(x) + path.getFirst().text; 578ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 579ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.addFirst(new Diff(Operation.EQUAL, text1.substring(x, x + 1))); 580ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 581ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_op = Operation.EQUAL; 582ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 583ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 584ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 585ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return path; 586ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 587ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 588ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 589ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 590ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Work from the middle back to the end to determine the path. 591ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param v_map List of path sets. 592ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old string fragment to be diffed. 593ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 New string fragment to be diffed. 594ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return LinkedList of Diff objects. 595ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 596ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected LinkedList<Diff> diff_path2(List<Set<Long>> v_map, 597ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text1, String text2) { 598ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> path = new LinkedList<Diff>(); 599ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int x = text1.length(); 600ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int y = text2.length(); 601ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Operation last_op = null; 602ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int d = v_map.size() - 2; d >= 0; d--) { 603ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (true) { 604ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (v_map.get(d).contains(diff_footprint(x - 1, y))) { 605ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x--; 606ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (last_op == Operation.DELETE) { 607ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.getLast().text += text1.charAt(text1.length() - x - 1); 608ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 609ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.addLast(new Diff(Operation.DELETE, 610ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1.substring(text1.length() - x - 1, text1.length() - x))); 611ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 612ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_op = Operation.DELETE; 613ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 614ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) { 615ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y--; 616ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (last_op == Operation.INSERT) { 617ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.getLast().text += text2.charAt(text2.length() - y - 1); 618ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 619ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.addLast(new Diff(Operation.INSERT, 620ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2.substring(text2.length() - y - 1, text2.length() - y))); 621ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 622ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_op = Operation.INSERT; 623ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 624ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 625ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x--; 626ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski y--; 627ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski assert (text1.charAt(text1.length() - x - 1) 628ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski == text2.charAt(text2.length() - y - 1)) 629ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski : "No diagonal. Can't happen. (diff_path2)"; 630ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (last_op == Operation.EQUAL) { 631ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.getLast().text += text1.charAt(text1.length() - x - 1); 632ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 633ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski path.addLast(new Diff(Operation.EQUAL, 634ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1.substring(text1.length() - x - 1, text1.length() - x))); 635ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 636ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_op = Operation.EQUAL; 637ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 638ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 639ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 640ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return path; 641ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 642ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 643ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 644ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 645ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute a good hash of two integers. 646ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param x First int. 647ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param y Second int. 648ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return A long made up of both ints. 649ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 650ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected long diff_footprint(int x, int y) { 651ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // The maximum size for a long is 9,223,372,036,854,775,807 652ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // The maximum size for an int is 2,147,483,647 653ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Two ints fit nicely in one long. 654ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski long result = x; 655ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski result = result << 32; 656ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski result += y; 657ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return result; 658ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 659ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 660ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 661ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 662ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Determine the common prefix of two strings 663ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 First string. 664ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 Second string. 665ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return The number of characters common to the start of each string. 666ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 667ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int diff_commonPrefix(String text1, String text2) { 668ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Performance analysis: http://neil.fraser.name/news/2007/10/09/ 669ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int n = Math.min(text1.length(), text2.length()); 670ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int i = 0; i < n; i++) { 671ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.charAt(i) != text2.charAt(i)) { 672ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return i; 673ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 674ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 675ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return n; 676ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 677ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 678ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 679ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 680ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Determine the common suffix of two strings 681ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 First string. 682ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 Second string. 683ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return The number of characters common to the end of each string. 684ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 685ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int diff_commonSuffix(String text1, String text2) { 686ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Performance analysis: http://neil.fraser.name/news/2007/10/09/ 687ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int text1_length = text1.length(); 688ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int text2_length = text2.length(); 689ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int n = Math.min(text1_length, text2_length); 690ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int i = 1; i <= n; i++) { 691ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) { 692ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return i - 1; 693ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 694ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 695ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return n; 696ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 697ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 698ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 699ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 700ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Do the two texts share a substring which is at least half the length of 701ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * the longer text? 702ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 First string. 703ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 Second string. 704ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Five element String array, containing the prefix of text1, the 705ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * suffix of text1, the prefix of text2, the suffix of text2 and the 706ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * common middle. Or null if there was no match. 707ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 708ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected String[] diff_halfMatch(String text1, String text2) { 709ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String longtext = text1.length() > text2.length() ? text1 : text2; 710ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String shorttext = text1.length() > text2.length() ? text2 : text1; 711ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (longtext.length() < 10 || shorttext.length() < 1) { 712ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return null; // Pointless. 713ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 714ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 715ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // First check if the second quarter is the seed for a half-match. 716ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String[] hm1 = diff_halfMatchI(longtext, shorttext, 717ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski (longtext.length() + 3) / 4); 718ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Check again based on the third quarter. 719ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String[] hm2 = diff_halfMatchI(longtext, shorttext, 720ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski (longtext.length() + 1) / 2); 721ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String[] hm; 722ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (hm1 == null && hm2 == null) { 723ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return null; 724ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (hm2 == null) { 725ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski hm = hm1; 726ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (hm1 == null) { 727ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski hm = hm2; 728ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 729ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Both matched. Select the longest. 730ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; 731ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 732ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 733ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // A half-match was found, sort out the return data. 734ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.length() > text2.length()) { 735ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return hm; 736ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski //return new String[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; 737ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 738ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return new String[]{hm[2], hm[3], hm[0], hm[1], hm[4]}; 739ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 740ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 741ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 742ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 743ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 744ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Does a substring of shorttext exist within longtext such that the 745ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * substring is at least half the length of longtext? 746ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param longtext Longer string. 747ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param shorttext Shorter string. 748ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param i Start index of quarter length substring within longtext. 749ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Five element String array, containing the prefix of longtext, the 750ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * suffix of longtext, the prefix of shorttext, the suffix of shorttext 751ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * and the common middle. Or null if there was no match. 752ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 753ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private String[] diff_halfMatchI(String longtext, String shorttext, int i) { 754ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Start with a 1/4 length substring at position i as a seed. 755ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String seed = longtext.substring(i, i + longtext.length() / 4); 756ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int j = -1; 757ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String best_common = ""; 758ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String best_longtext_a = "", best_longtext_b = ""; 759ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String best_shorttext_a = "", best_shorttext_b = ""; 760ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while ((j = shorttext.indexOf(seed, j + 1)) != -1) { 761ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int prefixLength = diff_commonPrefix(longtext.substring(i), 762ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski shorttext.substring(j)); 763ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int suffixLength = diff_commonSuffix(longtext.substring(0, i), 764ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski shorttext.substring(0, j)); 765ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (best_common.length() < suffixLength + prefixLength) { 766ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_common = shorttext.substring(j - suffixLength, j) 767ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + shorttext.substring(j, j + prefixLength); 768ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_longtext_a = longtext.substring(0, i - suffixLength); 769ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_longtext_b = longtext.substring(i + prefixLength); 770ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_shorttext_a = shorttext.substring(0, j - suffixLength); 771ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_shorttext_b = shorttext.substring(j + prefixLength); 772ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 773ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 774ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (best_common.length() >= longtext.length() / 2) { 775ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return new String[]{best_longtext_a, best_longtext_b, 776ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_shorttext_a, best_shorttext_b, best_common}; 777ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 778ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return null; 779ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 780ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 781ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 782ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 783ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 784ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Reduce the number of edits by eliminating semantically trivial equalities. 785ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 786ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 787ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public void diff_cleanupSemantic(LinkedList<Diff> diffs) { 788ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs.isEmpty()) { 789ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return; 790ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 791ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean changes = false; 792ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Stack<Diff> equalities = new Stack<Diff>(); // Stack of qualities. 793ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String lastequality = null; // Always equal to equalities.lastElement().text 794ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski ListIterator<Diff> pointer = diffs.listIterator(); 795ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Number of characters that changed prior to the equality. 796ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int length_changes1 = 0; 797ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Number of characters that changed after the equality. 798ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int length_changes2 = 0; 799ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff thisDiff = pointer.next(); 800ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != null) { 801ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (thisDiff.operation == Operation.EQUAL) { 802ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // equality found 803ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.push(thisDiff); 804ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski length_changes1 = length_changes2; 805ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski length_changes2 = 0; 806ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lastequality = thisDiff.text; 807ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 808ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // an insertion or deletion 809ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski length_changes2 += thisDiff.text.length(); 810ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (lastequality != null && (lastequality.length() <= length_changes1) 811ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && (lastequality.length() <= length_changes2)) { 812ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski //System.out.println("Splitting: '" + lastequality + "'"); 813ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Walk back to offending equality. 814ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != equalities.lastElement()) { 815ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.previous(); 816ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 817ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.next(); 818ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 819ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Replace equality with a delete. 820ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.set(new Diff(Operation.DELETE, lastequality)); 821ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Insert a corresponding an insert. 822ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.add(new Diff(Operation.INSERT, lastequality)); 823ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 824ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.pop(); // Throw away the equality we just deleted. 825ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!equalities.empty()) { 826ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Throw away the previous equality (it needs to be reevaluated). 827ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.pop(); 828ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 829ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (equalities.empty()) { 830ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // There are no previous equalities, walk back to the start. 831ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (pointer.hasPrevious()) { 832ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); 833ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 834ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 835ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // There is a safe equality we can fall back to. 836ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = equalities.lastElement(); 837ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != pointer.previous()) { 838ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Intentionally empty loop. 839ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 840ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 841ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 842ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski length_changes1 = 0; // Reset the counters. 843ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski length_changes2 = 0; 844ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lastequality = null; 845ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski changes = true; 846ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 847ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 848ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.hasNext() ? pointer.next() : null; 849ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 850ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 851ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (changes) { 852ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupMerge(diffs); 853ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 854ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupSemanticLossless(diffs); 855ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 856ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 857ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 858ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 859ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Look for single edits surrounded on both sides by equalities 860ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * which can be shifted sideways to align the edit to a word boundary. 861ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. 862ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 863ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 864ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public void diff_cleanupSemanticLossless(LinkedList<Diff> diffs) { 865ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String equality1, edit, equality2; 866ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String commonString; 867ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int commonOffset; 868ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int score, bestScore; 869ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String bestEquality1, bestEdit, bestEquality2; 870ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Create a new iterator at the start. 871ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski ListIterator<Diff> pointer = diffs.listIterator(); 872ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff prevDiff = pointer.hasNext() ? pointer.next() : null; 873ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff thisDiff = pointer.hasNext() ? pointer.next() : null; 874ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff nextDiff = pointer.hasNext() ? pointer.next() : null; 875ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Intentionally ignore the first and last element (don't need checking). 876ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (nextDiff != null) { 877ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (prevDiff.operation == Operation.EQUAL && 878ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff.operation == Operation.EQUAL) { 879ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // This is a single edit surrounded by equalities. 880ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equality1 = prevDiff.text; 881ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski edit = thisDiff.text; 882ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equality2 = nextDiff.text; 883ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 884ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // First, shift the edit as far left as possible. 885ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski commonOffset = diff_commonSuffix(equality1, edit); 886ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (commonOffset != 0) { 887ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski commonString = edit.substring(edit.length() - commonOffset); 888ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equality1 = equality1.substring(0, equality1.length() - commonOffset); 889ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski edit = commonString + edit.substring(0, edit.length() - commonOffset); 890ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equality2 = commonString + equality2; 891ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 892ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 893ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Second, step character by character right, looking for the best fit. 894ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestEquality1 = equality1; 895ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestEdit = edit; 896ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestEquality2 = equality2; 897ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestScore = diff_cleanupSemanticScore(equality1, edit) 898ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + diff_cleanupSemanticScore(edit, equality2); 899ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (edit.length() != 0 && equality2.length() != 0 900ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && edit.charAt(0) == equality2.charAt(0)) { 901ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equality1 += edit.charAt(0); 902ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski edit = edit.substring(1) + equality2.charAt(0); 903ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equality2 = equality2.substring(1); 904ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score = diff_cleanupSemanticScore(equality1, edit) 905ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + diff_cleanupSemanticScore(edit, equality2); 906ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // The >= encourages trailing rather than leading whitespace on edits. 907ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (score >= bestScore) { 908ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestScore = score; 909ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestEquality1 = equality1; 910ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestEdit = edit; 911ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bestEquality2 = equality2; 912ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 913ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 914ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 915ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!prevDiff.text.equals(bestEquality1)) { 916ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // We have an improvement, save it back to the diff. 917ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (bestEquality1.length() != 0) { 918ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevDiff.text = bestEquality1; 919ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 920ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); // Walk past nextDiff. 921ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); // Walk past thisDiff. 922ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); // Walk past prevDiff. 923ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); // Delete prevDiff. 924ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.next(); // Walk past thisDiff. 925ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.next(); // Walk past nextDiff. 926ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 927ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff.text = bestEdit; 928ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (bestEquality2.length() != 0) { 929ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff.text = bestEquality2; 930ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 931ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); // Delete nextDiff. 932ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff = thisDiff; 933ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = prevDiff; 934ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 935ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 936ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 937ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevDiff = thisDiff; 938ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = nextDiff; 939ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff = pointer.hasNext() ? pointer.next() : null; 940ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 941ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 942ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 943ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 944ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 945ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Given two strings, compute a score representing whether the internal 946ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * boundary falls on logical boundaries. 947ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Scores range from 5 (best) to 0 (worst). 948ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param one First string. 949ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param two Second string. 950ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return The score. 951ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 952ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private int diff_cleanupSemanticScore(String one, String two) { 953ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (one.length() == 0 || two.length() == 0) { 954ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Edges are the best. 955ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return 5; 956ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 957ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 958ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Each port of this function behaves slightly differently due to 959ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // subtle differences in each language's definition of things like 960ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // 'whitespace'. Since this function's purpose is largely cosmetic, 961ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // the choice has been made to use each language's native features 962ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // rather than force total conformity. 963ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int score = 0; 964ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // One point for non-alphanumeric. 965ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!Character.isLetterOrDigit(one.charAt(one.length() - 1)) 966ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski || !Character.isLetterOrDigit(two.charAt(0))) { 967ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score++; 968ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Two points for whitespace. 969ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (Character.isWhitespace(one.charAt(one.length() - 1)) 970ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski || Character.isWhitespace(two.charAt(0))) { 971ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score++; 972ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Three points for line breaks. 973ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (Character.getType(one.charAt(one.length() - 1)) == Character.CONTROL 974ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski || Character.getType(two.charAt(0)) == Character.CONTROL) { 975ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score++; 976ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Four points for blank lines. 977ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (BLANKLINEEND.matcher(one).find() 978ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski || BLANKLINESTART.matcher(two).find()) { 979ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score++; 980ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 981ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 982ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 983ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 984ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return score; 985ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 986ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 987ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 988ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private Pattern BLANKLINEEND 989ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL); 990ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private Pattern BLANKLINESTART 991ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL); 992ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 993ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 994ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 995ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Reduce the number of edits by eliminating operationally trivial equalities. 996ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 997ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 998ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public void diff_cleanupEfficiency(LinkedList<Diff> diffs) { 999ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs.isEmpty()) { 1000ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return; 1001ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1002ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean changes = false; 1003ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Stack<Diff> equalities = new Stack<Diff>(); // Stack of equalities. 1004ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String lastequality = null; // Always equal to equalities.lastElement().text 1005ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski ListIterator<Diff> pointer = diffs.listIterator(); 1006ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Is there an insertion operation before the last equality. 1007ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean pre_ins = false; 1008ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Is there a deletion operation before the last equality. 1009ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean pre_del = false; 1010ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Is there an insertion operation after the last equality. 1011ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean post_ins = false; 1012ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Is there a deletion operation after the last equality. 1013ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean post_del = false; 1014ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff thisDiff = pointer.next(); 1015ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff safeDiff = thisDiff; // The last Diff that is known to be unsplitable. 1016ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != null) { 1017ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (thisDiff.operation == Operation.EQUAL) { 1018ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // equality found 1019ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (thisDiff.text.length() < Diff_EditCost && (post_ins || post_del)) { 1020ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Candidate found. 1021ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.push(thisDiff); 1022ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pre_ins = post_ins; 1023ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pre_del = post_del; 1024ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lastequality = thisDiff.text; 1025ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1026ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Not a candidate, and can never become one. 1027ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.clear(); 1028ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lastequality = null; 1029ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski safeDiff = thisDiff; 1030ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1031ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski post_ins = post_del = false; 1032ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1033ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // an insertion or deletion 1034ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (thisDiff.operation == Operation.DELETE) { 1035ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski post_del = true; 1036ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1037ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski post_ins = true; 1038ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1039ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /* 1040ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Five types to be split: 1041ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> 1042ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * <ins>A</ins>X<ins>C</ins><del>D</del> 1043ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * <ins>A</ins><del>B</del>X<ins>C</ins> 1044ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * <ins>A</del>X<ins>C</ins><del>D</del> 1045ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * <ins>A</ins><del>B</del>X<del>C</del> 1046ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1047ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (lastequality != null 1048ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && ((pre_ins && pre_del && post_ins && post_del) 1049ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski || ((lastequality.length() < Diff_EditCost / 2) 1050ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) 1051ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { 1052ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski //System.out.println("Splitting: '" + lastequality + "'"); 1053ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Walk back to offending equality. 1054ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != equalities.lastElement()) { 1055ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.previous(); 1056ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1057ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.next(); 1058ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1059ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Replace equality with a delete. 1060ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.set(new Diff(Operation.DELETE, lastequality)); 1061ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Insert a corresponding an insert. 1062ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.add(thisDiff = new Diff(Operation.INSERT, lastequality)); 1063ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1064ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.pop(); // Throw away the equality we just deleted. 1065ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lastequality = null; 1066ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (pre_ins && pre_del) { 1067ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // No changes made which could affect previous entry, keep going. 1068ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski post_ins = post_del = true; 1069ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.clear(); 1070ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski safeDiff = thisDiff; 1071ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1072ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!equalities.empty()) { 1073ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Throw away the previous equality (it needs to be reevaluated). 1074ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski equalities.pop(); 1075ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1076ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (equalities.empty()) { 1077ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // There are no previous questionable equalities, 1078ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // walk back to the last known safe diff. 1079ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = safeDiff; 1080ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1081ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // There is an equality we can fall back to. 1082ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = equalities.lastElement(); 1083ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1084ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != pointer.previous()) { 1085ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Intentionally empty loop. 1086ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1087ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski post_ins = post_del = false; 1088ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1089ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1090ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski changes = true; 1091ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1092ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1093ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.hasNext() ? pointer.next() : null; 1094ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1095ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1096ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (changes) { 1097ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupMerge(diffs); 1098ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1099ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1100ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1101ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1102ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1103ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Reorder and merge like edit sections. Merge equalities. 1104ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Any edit section can move as long as it doesn't cross an equality. 1105ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 1106ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1107ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public void diff_cleanupMerge(LinkedList<Diff> diffs) { 1108ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.EQUAL, "")); // Add a dummy entry at the end. 1109ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski ListIterator<Diff> pointer = diffs.listIterator(); 1110ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int count_delete = 0; 1111ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int count_insert = 0; 1112ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text_delete = ""; 1113ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text_insert = ""; 1114ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff thisDiff = pointer.next(); 1115ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff prevEqual = null; 1116ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int commonlength; 1117ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (thisDiff != null) { 1118ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (thisDiff.operation) { 1119ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case INSERT: 1120ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_insert++; 1121ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_insert += thisDiff.text; 1122ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevEqual = null; 1123ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1124ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case DELETE: 1125ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_delete++; 1126ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_delete += thisDiff.text; 1127ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevEqual = null; 1128ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1129ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case EQUAL: 1130ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (count_delete != 0 || count_insert != 0) { 1131ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Delete the offending records. 1132ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); // Reverse direction. 1133ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (count_delete-- > 0) { 1134ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); 1135ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); 1136ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1137ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (count_insert-- > 0) { 1138ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); 1139ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); 1140ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1141ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (count_delete != 0 && count_insert != 0) { 1142ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Factor out any common prefixies. 1143ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski commonlength = diff_commonPrefix(text_insert, text_delete); 1144ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (commonlength != 0) { 1145ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (pointer.hasPrevious()) { 1146ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.previous(); 1147ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski assert thisDiff.operation == Operation.EQUAL 1148ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski : "Previous diff should have been an equality."; 1149ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff.text += text_insert.substring(0, commonlength); 1150ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.next(); 1151ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1152ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.add(new Diff(Operation.EQUAL, 1153ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_insert.substring(0, commonlength))); 1154ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1155ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_insert = text_insert.substring(commonlength); 1156ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_delete = text_delete.substring(commonlength); 1157ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1158ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Factor out any common suffixies. 1159ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski commonlength = diff_commonSuffix(text_insert, text_delete); 1160ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (commonlength != 0) { 1161ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.next(); 1162ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff.text = text_insert.substring(text_insert.length() 1163ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski - commonlength) + thisDiff.text; 1164ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_insert = text_insert.substring(0, text_insert.length() 1165ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski - commonlength); 1166ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_delete = text_delete.substring(0, text_delete.length() 1167ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski - commonlength); 1168ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); 1169ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1170ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1171ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Insert the merged records. 1172ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text_delete.length() != 0) { 1173ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.add(new Diff(Operation.DELETE, text_delete)); 1174ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1175ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text_insert.length() != 0) { 1176ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.add(new Diff(Operation.INSERT, text_insert)); 1177ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1178ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Step forward to the equality. 1179ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.hasNext() ? pointer.next() : null; 1180ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (prevEqual != null) { 1181ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Merge this equality with the previous one. 1182ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevEqual.text += thisDiff.text; 1183ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); 1184ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.previous(); 1185ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.next(); // Forward direction 1186ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1187ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_insert = 0; 1188ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski count_delete = 0; 1189ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_delete = ""; 1190ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text_insert = ""; 1191ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevEqual = thisDiff; 1192ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1193ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1194ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.hasNext() ? pointer.next() : null; 1195ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1196ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // System.out.println(diff); 1197ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs.getLast().text.length() == 0) { 1198ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.removeLast(); // Remove the dummy entry at the end. 1199ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1200ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1201ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /* 1202ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Second pass: look for single edits surrounded on both sides by equalities 1203ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * which can be shifted sideways to eliminate an equality. 1204ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC 1205ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1206ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean changes = false; 1207ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Create a new iterator at the start. 1208ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // (As opposed to walking the current one back.) 1209ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer = diffs.listIterator(); 1210ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff prevDiff = pointer.hasNext() ? pointer.next() : null; 1211ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.hasNext() ? pointer.next() : null; 1212ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff nextDiff = pointer.hasNext() ? pointer.next() : null; 1213ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Intentionally ignore the first and last element (don't need checking). 1214ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (nextDiff != null) { 1215ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (prevDiff.operation == Operation.EQUAL && 1216ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff.operation == Operation.EQUAL) { 1217ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // This is a single edit surrounded by equalities. 1218ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (thisDiff.text.endsWith(prevDiff.text)) { 1219ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Shift the edit over the previous equality. 1220ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff.text = prevDiff.text 1221ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + thisDiff.text.substring(0, thisDiff.text.length() 1222ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski - prevDiff.text.length()); 1223ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff.text = prevDiff.text + nextDiff.text; 1224ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); // Walk past nextDiff. 1225ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); // Walk past thisDiff. 1226ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.previous(); // Walk past prevDiff. 1227ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); // Delete prevDiff. 1228ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.next(); // Walk past thisDiff. 1229ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = pointer.next(); // Walk past nextDiff. 1230ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff = pointer.hasNext() ? pointer.next() : null; 1231ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski changes = true; 1232ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (thisDiff.text.startsWith(nextDiff.text)) { 1233ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Shift the edit over the next equality. 1234ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevDiff.text += nextDiff.text; 1235ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff.text = thisDiff.text.substring(nextDiff.text.length()) 1236ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + nextDiff.text; 1237ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); // Delete nextDiff. 1238ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff = pointer.hasNext() ? pointer.next() : null; 1239ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski changes = true; 1240ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1241ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1242ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prevDiff = thisDiff; 1243ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski thisDiff = nextDiff; 1244ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nextDiff = pointer.hasNext() ? pointer.next() : null; 1245ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1246ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // If shifts were made, the diff needs reordering and another shift sweep. 1247ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (changes) { 1248ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupMerge(diffs); 1249ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1250ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1251ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1252ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1253ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1254ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * loc is a location in text1, compute and return the equivalent location in 1255ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * text2. 1256ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * e.g. "The cat" vs "The big cat", 1->1, 5->8 1257ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 1258ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param loc Location within text1. 1259ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Location within text2. 1260ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1261ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int diff_xIndex(LinkedList<Diff> diffs, int loc) { 1262ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int chars1 = 0; 1263ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int chars2 = 0; 1264ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int last_chars1 = 0; 1265ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int last_chars2 = 0; 1266ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff lastDiff = null; 1267ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : diffs) { 1268ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.INSERT) { 1269ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Equality or deletion. 1270ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski chars1 += aDiff.text.length(); 1271ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1272ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.DELETE) { 1273ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Equality or insertion. 1274ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski chars2 += aDiff.text.length(); 1275ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1276ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (chars1 > loc) { 1277ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Overshot the location. 1278ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lastDiff = aDiff; 1279ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1280ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1281ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_chars1 = chars1; 1282ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_chars2 = chars2; 1283ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1284ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (lastDiff != null && lastDiff.operation == Operation.DELETE) { 1285ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // The location was deleted. 1286ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return last_chars2; 1287ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1288ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add the remaining character length. 1289ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return last_chars2 + (loc - last_chars1); 1290ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1291ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1292ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1293ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1294ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Convert a Diff list into a pretty HTML report. 1295ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 1296ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return HTML representation. 1297ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1298ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String diff_prettyHtml(LinkedList<Diff> diffs) { 1299ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder html = new StringBuilder(); 1300ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int i = 0; 1301ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : diffs) { 1302ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text = aDiff.text.replace("&", "&").replace("<", "<") 1303ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .replace(">", ">").replace("\n", "¶<BR>"); 1304ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (aDiff.operation) { 1305ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case INSERT: 1306ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=").append(i) 1307ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .append("\">").append(text).append("</INS>"); 1308ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1309ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case DELETE: 1310ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=").append(i) 1311ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .append("\">").append(text).append("</DEL>"); 1312ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1313ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case EQUAL: 1314ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski html.append("<SPAN TITLE=\"i=").append(i).append("\">").append(text) 1315ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .append("</SPAN>"); 1316ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1317ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1318ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.DELETE) { 1319ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski i += aDiff.text.length(); 1320ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1321ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1322ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return html.toString(); 1323ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1324ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1325ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1326ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1327ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute and return the source text (all equalities and deletions). 1328ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 1329ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Source text. 1330ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1331ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String diff_text1(LinkedList<Diff> diffs) { 1332ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder text = new StringBuilder(); 1333ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : diffs) { 1334ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.INSERT) { 1335ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append(aDiff.text); 1336ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1337ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1338ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return text.toString(); 1339ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1340ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1341ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1342ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1343ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute and return the destination text (all equalities and insertions). 1344ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 1345ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Destination text. 1346ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1347ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String diff_text2(LinkedList<Diff> diffs) { 1348ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder text = new StringBuilder(); 1349ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : diffs) { 1350ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.DELETE) { 1351ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append(aDiff.text); 1352ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1353ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1354ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return text.toString(); 1355ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1356ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1357ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1358ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1359ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute the Levenshtein distance; the number of inserted, deleted or 1360ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * substituted characters. 1361ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs LinkedList of Diff objects. 1362ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Number of changes. 1363ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1364ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int diff_levenshtein(LinkedList<Diff> diffs) { 1365ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int levenshtein = 0; 1366ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int insertions = 0; 1367ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int deletions = 0; 1368ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : diffs) { 1369ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (aDiff.operation) { 1370ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case INSERT: 1371ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski insertions += aDiff.text.length(); 1372ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1373ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case DELETE: 1374ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski deletions += aDiff.text.length(); 1375ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1376ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case EQUAL: 1377ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // A deletion and an insertion is one substitution. 1378ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski levenshtein += Math.max(insertions, deletions); 1379ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski insertions = 0; 1380ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski deletions = 0; 1381ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1382ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1383ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1384ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski levenshtein += Math.max(insertions, deletions); 1385ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return levenshtein; 1386ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1387ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1388ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1389ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1390ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Crush the diff into an encoded string which describes the operations 1391ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * required to transform text1 into text2. 1392ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. 1393ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Operations are tab-separated. Inserted text is escaped using %xx notation. 1394ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs Array of diff tuples. 1395ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Delta text. 1396ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1397ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String diff_toDelta(LinkedList<Diff> diffs) { 1398ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder text = new StringBuilder(); 1399ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : diffs) { 1400ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (aDiff.operation) { 1401ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case INSERT: 1402ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 1403ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append("+").append(URLEncoder.encode(aDiff.text, "UTF-8") 1404ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .replace('+', ' ')).append("\t"); 1405ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (UnsupportedEncodingException e) { 1406ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Not likely on modern system. 1407ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new Error("This system does not support UTF-8.", e); 1408ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1409ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1410ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case DELETE: 1411ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append("-").append(aDiff.text.length()).append("\t"); 1412ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1413ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case EQUAL: 1414ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append("=").append(aDiff.text.length()).append("\t"); 1415ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1416ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1417ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1418ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String delta = text.toString(); 1419ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (delta.length() != 0) { 1420ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Strip off trailing tab character. 1421ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski delta = delta.substring(0, delta.length() - 1); 1422ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski delta = unescapeForEncodeUriCompatability(delta); 1423ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1424ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return delta; 1425ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1426ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1427ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1428ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1429ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Given the original text1, and an encoded string which describes the 1430ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * operations required to transform text1 into text2, compute the full diff. 1431ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Source string for the diff. 1432ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param delta Delta text. 1433ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Array of diff tuples or null if invalid. 1434ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @throws IllegalArgumentException If invalid input. 1435ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1436ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Diff> diff_fromDelta(String text1, String delta) 1437ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throws IllegalArgumentException { 1438ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs = new LinkedList<Diff>(); 1439ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int pointer = 0; // Cursor in text1 1440ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String[] tokens = delta.split("\t"); 1441ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (String token : tokens) { 1442ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (token.length() == 0) { 1443ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Blank tokens are ok (from a trailing \t). 1444ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski continue; 1445ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1446ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Each token begins with a one character parameter which specifies the 1447ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // operation of this token (delete, insert, equality). 1448ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String param = token.substring(1); 1449ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (token.charAt(0)) { 1450ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case '+': 1451ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // decode would change all "+" to " " 1452ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski param = param.replace("+", "%2B"); 1453ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 1454ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski param = URLDecoder.decode(param, "UTF-8"); 1455ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (UnsupportedEncodingException e) { 1456ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Not likely on modern system. 1457ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new Error("This system does not support UTF-8.", e); 1458ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (IllegalArgumentException e) { 1459ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Malformed URI sequence. 1460ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException( 1461ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski "Illegal escape in diff_fromDelta: " + param, e); 1462ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1463ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.INSERT, param)); 1464ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1465ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case '-': 1466ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Fall through. 1467ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case '=': 1468ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int n; 1469ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 1470ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski n = Integer.parseInt(param); 1471ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (NumberFormatException e) { 1472ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException( 1473ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski "Invalid number in diff_fromDelta: " + param, e); 1474ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1475ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (n < 0) { 1476ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException( 1477ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski "Negative number in diff_fromDelta: " + param); 1478ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1479ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text; 1480ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 1481ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text = text1.substring(pointer, pointer += n); 1482ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (StringIndexOutOfBoundsException e) { 1483ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException("Delta length (" + pointer 1484ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + ") larger than source text length (" + text1.length() 1485ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + ").", e); 1486ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1487ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (token.charAt(0) == '=') { 1488ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.EQUAL, text)); 1489ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1490ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.add(new Diff(Operation.DELETE, text)); 1491ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1492ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1493ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski default: 1494ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Anything else is an error. 1495ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException( 1496ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski "Invalid diff operation in diff_fromDelta: " + token.charAt(0)); 1497ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1498ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1499ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (pointer != text1.length()) { 1500ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException("Delta length (" + pointer 1501ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + ") smaller than source text length (" + text1.length() + ")."); 1502ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1503ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return diffs; 1504ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1505ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1506ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1507ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // MATCH FUNCTIONS 1508ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1509ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1510ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1511ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Locate the best instance of 'pattern' in 'text' near 'loc'. 1512ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Returns -1 if no match found. 1513ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text The text to search. 1514ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param pattern The pattern to search for. 1515ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param loc The location to search around. 1516ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Best match index or -1. 1517ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1518ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int match_main(String text, String pattern, int loc) { 1519ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Check for null inputs. 1520ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text == null || pattern == null) { 1521ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException("Null inputs. (match_main)"); 1522ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1523ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1524ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski loc = Math.max(0, Math.min(loc, text.length())); 1525ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text.equals(pattern)) { 1526ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Shortcut (potentially not guaranteed by the algorithm) 1527ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return 0; 1528ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (text.length() == 0) { 1529ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Nothing to match. 1530ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return -1; 1531ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (loc + pattern.length() <= text.length() 1532ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && text.substring(loc, loc + pattern.length()).equals(pattern)) { 1533ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Perfect match at the perfect spot! (Includes case of null pattern) 1534ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return loc; 1535ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1536ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Do a fuzzy compare. 1537ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return match_bitap(text, pattern, loc); 1538ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1539ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1540ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1541ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1542ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1543ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Locate the best instance of 'pattern' in 'text' near 'loc' using the 1544ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Bitap algorithm. Returns -1 if no match found. 1545ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text The text to search. 1546ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param pattern The pattern to search for. 1547ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param loc The location to search around. 1548ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Best match index or -1. 1549ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1550ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected int match_bitap(String text, String pattern, int loc) { 1551ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski assert (Match_MaxBits == 0 || pattern.length() <= Match_MaxBits) 1552ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski : "Pattern too long for this application."; 1553ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1554ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Initialise the alphabet. 1555ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Map<Character, Integer> s = match_alphabet(pattern); 1556ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1557ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Highest score beyond which we give up. 1558ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski double score_threshold = Match_Threshold; 1559ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Is there a nearby exact match? (speedup) 1560ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int best_loc = text.indexOf(pattern, loc); 1561ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (best_loc != -1) { 1562ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern), 1563ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score_threshold); 1564ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // What about in the other direction? (speedup) 1565ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_loc = text.lastIndexOf(pattern, loc + pattern.length()); 1566ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (best_loc != -1) { 1567ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern), 1568ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score_threshold); 1569ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1570ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1571ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1572ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Initialise the bit arrays. 1573ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int matchmask = 1 << (pattern.length() - 1); 1574ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_loc = -1; 1575ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1576ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int bin_min, bin_mid; 1577ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int bin_max = pattern.length() + text.length(); 1578ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Empty initialization added to appease Java compiler. 1579ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int[] last_rd = new int[0]; 1580ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int d = 0; d < pattern.length(); d++) { 1581ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Scan for the best match; each iteration allows for one more error. 1582ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Run a binary search to determine how far from 'loc' we can stray at 1583ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // this error level. 1584ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bin_min = 0; 1585ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bin_mid = bin_max; 1586ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (bin_min < bin_mid) { 1587ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (match_bitapScore(d, loc + bin_mid, loc, pattern) 1588ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski <= score_threshold) { 1589ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bin_min = bin_mid; 1590ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1591ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bin_max = bin_mid; 1592ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1593ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bin_mid = (bin_max - bin_min) / 2 + bin_min; 1594ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1595ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Use the result from this iteration as the maximum for the next. 1596ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bin_max = bin_mid; 1597ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int start = Math.max(1, loc - bin_mid + 1); 1598ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int finish = Math.min(loc + bin_mid, text.length()) + pattern.length(); 1599ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1600ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int[] rd = new int[finish + 2]; 1601ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski rd[finish + 1] = (1 << d) - 1; 1602ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int j = finish; j >= start; j--) { 1603ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int charMatch; 1604ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text.length() <= j - 1 || !s.containsKey(text.charAt(j - 1))) { 1605ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Out of range. 1606ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski charMatch = 0; 1607ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1608ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski charMatch = s.get(text.charAt(j - 1)); 1609ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1610ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (d == 0) { 1611ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // First pass: exact match. 1612ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; 1613ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1614ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Subsequent passes: fuzzy match. 1615ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski rd[j] = ((rd[j + 1] << 1) | 1) & charMatch 1616ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; 1617ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1618ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if ((rd[j] & matchmask) != 0) { 1619ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski double score = match_bitapScore(d, j - 1, loc, pattern); 1620ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // This match will almost certainly be better than any existing 1621ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // match. But check anyway. 1622ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (score <= score_threshold) { 1623ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Told you so. 1624ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski score_threshold = score; 1625ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski best_loc = j - 1; 1626ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (best_loc > loc) { 1627ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // When passing loc, don't exceed our current distance from loc. 1628ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start = Math.max(1, 2 * loc - best_loc); 1629ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1630ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Already passed loc, downhill from here on in. 1631ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1632ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1633ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1634ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1635ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1636ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { 1637ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // No hope for a (better) match at greater error levels. 1638ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1639ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1640ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski last_rd = rd; 1641ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1642ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return best_loc; 1643ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1644ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1645ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1646ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1647ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute and return the score for a match with e errors and x location. 1648ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param e Number of errors in match. 1649ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param x Location of match. 1650ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param loc Expected location of match. 1651ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param pattern Pattern being sought. 1652ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Overall score for match (0.0 = good, 1.0 = bad). 1653ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1654ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private double match_bitapScore(int e, int x, int loc, String pattern) { 1655ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski float accuracy = (float) e / pattern.length(); 1656ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int proximity = Math.abs(loc - x); 1657ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (Match_Distance == 0) { 1658ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Dodge divide by zero error. 1659ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return proximity == 0 ? accuracy : 1.0; 1660ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1661ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return accuracy + (proximity / (float) Match_Distance); 1662ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1663ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1664ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1665ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1666ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Initialise the alphabet for the Bitap algorithm. 1667ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param pattern The text to encode. 1668ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Hash of character locations. 1669ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1670ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected Map<Character, Integer> match_alphabet(String pattern) { 1671ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Map<Character, Integer> s = new HashMap<Character, Integer>(); 1672ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski char[] char_pattern = pattern.toCharArray(); 1673ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (char c : char_pattern) { 1674ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski s.put(c, 0); 1675ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1676ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int i = 0; 1677ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (char c : char_pattern) { 1678ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski s.put(c, s.get(c) | (1 << (pattern.length() - i - 1))); 1679ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski i++; 1680ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1681ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return s; 1682ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1683ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1684ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1685ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // PATCH FUNCTIONS 1686ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1687ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1688ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1689ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Increase the context until it is unique, 1690ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * but don't let the pattern expand beyond Match_MaxBits. 1691ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param patch The patch to grow. 1692ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text Source text. 1693ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1694ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski protected void patch_addContext(Patch patch, String text) { 1695ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text.length() == 0) { 1696ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return; 1697ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1698ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String pattern = text.substring(patch.start2, patch.start2 + patch.length1); 1699ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int padding = 0; 1700ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1701ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Look for the first and last matches of pattern in text. If two different 1702ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // matches are found, increase the pattern length. 1703ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (text.indexOf(pattern) != text.lastIndexOf(pattern) 1704ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { 1705ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski padding += Patch_Margin; 1706ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pattern = text.substring(Math.max(0, patch.start2 - padding), 1707ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Math.min(text.length(), patch.start2 + patch.length1 + padding)); 1708ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1709ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add one chunk for good luck. 1710ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski padding += Patch_Margin; 1711ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1712ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add the prefix. 1713ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String prefix = text.substring(Math.max(0, patch.start2 - padding), 1714ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2); 1715ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (prefix.length() != 0) { 1716ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.addFirst(new Diff(Operation.EQUAL, prefix)); 1717ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1718ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add the suffix. 1719ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String suffix = text.substring(patch.start2 + patch.length1, 1720ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Math.min(text.length(), patch.start2 + patch.length1 + padding)); 1721ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (suffix.length() != 0) { 1722ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.addLast(new Diff(Operation.EQUAL, suffix)); 1723ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1724ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1725ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Roll back the start points. 1726ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1 -= prefix.length(); 1727ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2 -= prefix.length(); 1728ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Extend the lengths. 1729ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += prefix.length() + suffix.length(); 1730ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += prefix.length() + suffix.length(); 1731ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1732ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1733ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1734ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1735ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute a list of patches to turn text1 into text2. 1736ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * A set of diffs will be computed. 1737ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old text. 1738ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 New text. 1739ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return LinkedList of Patch objects. 1740ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1741ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Patch> patch_make(String text1, String text2) { 1742ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1 == null || text2 == null) { 1743ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException("Null inputs. (patch_make)"); 1744ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1745ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // No diffs provided, compute our own. 1746ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs = diff_main(text1, text2, true); 1747ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs.size() > 2) { 1748ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupSemantic(diffs); 1749ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupEfficiency(diffs); 1750ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1751ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patch_make(text1, diffs); 1752ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1753ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1754ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1755ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1756ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute a list of patches to turn text1 into text2. 1757ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * text1 will be derived from the provided diffs. 1758ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs Array of diff tuples for text1 to text2. 1759ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return LinkedList of Patch objects. 1760ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1761ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Patch> patch_make(LinkedList<Diff> diffs) { 1762ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs == null) { 1763ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException("Null inputs. (patch_make)"); 1764ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1765ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // No origin string provided, compute our own. 1766ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text1 = diff_text1(diffs); 1767ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patch_make(text1, diffs); 1768ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1769ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1770ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1771ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1772ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute a list of patches to turn text1 into text2. 1773ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * text2 is ignored, diffs are the delta between text1 and text2. 1774ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old text 1775ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text2 Ignored. 1776ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs Array of diff tuples for text1 to text2. 1777ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return LinkedList of Patch objects. 1778ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @deprecated Prefer patch_make(String text1, LinkedList<Diff> diffs). 1779ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1780ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Patch> patch_make(String text1, String text2, 1781ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs) { 1782ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patch_make(text1, diffs); 1783ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1784ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1785ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1786ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1787ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Compute a list of patches to turn text1 into text2. 1788ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * text2 is not provided, diffs are the delta between text1 and text2. 1789ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text1 Old text. 1790ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param diffs Array of diff tuples for text1 to text2. 1791ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return LinkedList of Patch objects. 1792ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1793ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Patch> patch_make(String text1, LinkedList<Diff> diffs) { 1794ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1 == null || diffs == null) { 1795ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException("Null inputs. (patch_make)"); 1796ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1797ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1798ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Patch> patches = new LinkedList<Patch>(); 1799ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs.isEmpty()) { 1800ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patches; // Get rid of the null case. 1801ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1802ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Patch patch = new Patch(); 1803ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int char_count1 = 0; // Number of characters into the text1 string. 1804ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int char_count2 = 0; // Number of characters into the text2 string. 1805ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Start with text1 (prepatch_text) and apply the diffs until we arrive at 1806ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // text2 (postpatch_text). We recreate the patches one by one to determine 1807ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // context info. 1808ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String prepatch_text = text1; 1809ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String postpatch_text = text1; 1810ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : diffs) { 1811ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (patch.diffs.isEmpty() && aDiff.operation != Operation.EQUAL) { 1812ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // A new patch starts here. 1813ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1 = char_count1; 1814ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2 = char_count2; 1815ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1816ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1817ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (aDiff.operation) { 1818ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case INSERT: 1819ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(aDiff); 1820ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += aDiff.text.length(); 1821ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski postpatch_text = postpatch_text.substring(0, char_count2) 1822ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + aDiff.text + postpatch_text.substring(char_count2); 1823ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1824ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case DELETE: 1825ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += aDiff.text.length(); 1826ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(aDiff); 1827ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski postpatch_text = postpatch_text.substring(0, char_count2) 1828ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + postpatch_text.substring(char_count2 + aDiff.text.length()); 1829ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1830ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case EQUAL: 1831ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.text.length() <= 2 * Patch_Margin 1832ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && !patch.diffs.isEmpty() && aDiff != diffs.getLast()) { 1833ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Small equality inside a patch. 1834ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(aDiff); 1835ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += aDiff.text.length(); 1836ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += aDiff.text.length(); 1837ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1838ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1839ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.text.length() >= 2 * Patch_Margin) { 1840ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Time for a new patch. 1841ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!patch.diffs.isEmpty()) { 1842ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch_addContext(patch, prepatch_text); 1843ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patches.add(patch); 1844ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch = new Patch(); 1845ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Unlike Unidiff, our patch lists have a rolling context. 1846ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff 1847ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Update prepatch text & pos to reflect the application of the 1848ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // just completed patch. 1849ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski prepatch_text = postpatch_text; 1850ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski char_count1 = char_count2; 1851ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1852ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1853ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 1854ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1855ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1856ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Update the current character count. 1857ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.INSERT) { 1858ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski char_count1 += aDiff.text.length(); 1859ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1860ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.DELETE) { 1861ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski char_count2 += aDiff.text.length(); 1862ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1863ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1864ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Pick up the leftover patch if not empty. 1865ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!patch.diffs.isEmpty()) { 1866ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch_addContext(patch, prepatch_text); 1867ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patches.add(patch); 1868ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1869ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1870ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patches; 1871ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1872ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1873ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1874ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1875ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Given an array of patches, return another array that is identical. 1876ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param patches Array of patch objects. 1877ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Array of patch objects. 1878ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1879ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Patch> patch_deepCopy(LinkedList<Patch> patches) { 1880ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Patch> patchesCopy = new LinkedList<Patch>(); 1881ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Patch aPatch : patches) { 1882ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Patch patchCopy = new Patch(); 1883ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : aPatch.diffs) { 1884ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff diffCopy = new Diff(aDiff.operation, aDiff.text); 1885ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patchCopy.diffs.add(diffCopy); 1886ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1887ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patchCopy.start1 = aPatch.start1; 1888ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patchCopy.start2 = aPatch.start2; 1889ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patchCopy.length1 = aPatch.length1; 1890ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patchCopy.length2 = aPatch.length2; 1891ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patchesCopy.add(patchCopy); 1892ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1893ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patchesCopy; 1894ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1895ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1896ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1897ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 1898ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Merge a set of patches onto the text. Return a patched text, as well 1899ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * as an array of true/false values indicating which patches were applied. 1900ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param patches Array of patch objects 1901ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text Old text. 1902ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Two element Object array, containing the new text and an array of 1903ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * boolean values. 1904ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 1905ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public Object[] patch_apply(LinkedList<Patch> patches, String text) { 1906ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (patches.isEmpty()) { 1907ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return new Object[]{text, new boolean[0]}; 1908ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1909ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1910ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Deep copy the patches so that no changes are made to originals. 1911ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patches = patch_deepCopy(patches); 1912ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1913ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String nullPadding = patch_addPadding(patches); 1914ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text = nullPadding + text + nullPadding; 1915ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch_splitMax(patches); 1916ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 1917ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int x = 0; 1918ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // delta keeps track of the offset between the expected and actual location 1919ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // of the previous patch. If there are patches expected at positions 10 and 1920ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // 20, but the first patch was found at 12, delta is 2 and the second patch 1921ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // has an effective expected position of 22. 1922ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int delta = 0; 1923ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean[] results = new boolean[patches.size()]; 1924ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Patch aPatch : patches) { 1925ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int expected_loc = aPatch.start2 + delta; 1926ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text1 = diff_text1(aPatch.diffs); 1927ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int start_loc; 1928ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int end_loc = -1; 1929ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.length() > this.Match_MaxBits) { 1930ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // patch_splitMax will only provide an oversized pattern in the case of 1931ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // a monster delete. 1932ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start_loc = match_main(text, 1933ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1.substring(0, this.Match_MaxBits), expected_loc); 1934ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (start_loc != -1) { 1935ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski end_loc = match_main(text, 1936ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text1.substring(text1.length() - this.Match_MaxBits), 1937ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski expected_loc + text1.length() - this.Match_MaxBits); 1938ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (end_loc == -1 || start_loc >= end_loc) { 1939ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Can't find valid trailing context. Drop this patch. 1940ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start_loc = -1; 1941ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1942ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1943ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1944ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start_loc = match_main(text, text1, expected_loc); 1945ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1946ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (start_loc == -1) { 1947ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // No match found. :( 1948ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski results[x] = false; 1949ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Subtract the delta for this failed patch from subsequent patches. 1950ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski delta -= aPatch.length2 - aPatch.length1; 1951ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1952ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Found a match. :) 1953ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski results[x] = true; 1954ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski delta = start_loc - expected_loc; 1955ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String text2; 1956ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (end_loc == -1) { 1957ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2 = text.substring(start_loc, 1958ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Math.min(start_loc + text1.length(), text.length())); 1959ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1960ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text2 = text.substring(start_loc, 1961ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Math.min(end_loc + this.Match_MaxBits, text.length())); 1962ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1963ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.equals(text2)) { 1964ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Perfect match, just shove the replacement text in. 1965ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text = text.substring(0, start_loc) + diff_text2(aPatch.diffs) 1966ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + text.substring(start_loc + text1.length()); 1967ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1968ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Imperfect match. Run a diff to get a framework of equivalent 1969ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // indices. 1970ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs = diff_main(text1, text2, false); 1971ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (text1.length() > this.Match_MaxBits 1972ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && diff_levenshtein(diffs) / (float) text1.length() 1973ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski > this.Patch_DeleteThreshold) { 1974ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // The end points match, but the content is unacceptably bad. 1975ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski results[x] = false; 1976ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 1977ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_cleanupSemanticLossless(diffs); 1978ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int index1 = 0; 1979ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : aPatch.diffs) { 1980ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.EQUAL) { 1981ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int index2 = diff_xIndex(diffs, index1); 1982ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation == Operation.INSERT) { 1983ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Insertion 1984ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text = text.substring(0, start_loc + index2) + aDiff.text 1985ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + text.substring(start_loc + index2); 1986ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (aDiff.operation == Operation.DELETE) { 1987ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Deletion 1988ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text = text.substring(0, start_loc + index2) 1989ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + text.substring(start_loc + diff_xIndex(diffs, 1990ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski index1 + aDiff.text.length())); 1991ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1992ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1993ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (aDiff.operation != Operation.DELETE) { 1994ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski index1 += aDiff.text.length(); 1995ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1996ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1997ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1998ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 1999ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2000ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski x++; 2001ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2002ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Strip the padding off. 2003ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text = text.substring(nullPadding.length(), text.length() 2004ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski - nullPadding.length()); 2005ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return new Object[]{text, results}; 2006ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2007ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2008ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2009ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2010ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Add some padding on text start and end so that edges can match something. 2011ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Intended to be called only from within patch_apply. 2012ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param patches Array of patch objects. 2013ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return The padding string added to each side. 2014ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2015ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String patch_addPadding(LinkedList<Patch> patches) { 2016ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int paddingLength = this.Patch_Margin; 2017ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String nullPadding = ""; 2018ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (int x = 1; x <= paddingLength; x++) { 2019ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski nullPadding += String.valueOf((char) x); 2020ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2021ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2022ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Bump all the patches forward. 2023ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Patch aPatch : patches) { 2024ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski aPatch.start1 += paddingLength; 2025ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski aPatch.start2 += paddingLength; 2026ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2027ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2028ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add some padding on start of first diff. 2029ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Patch patch = patches.getFirst(); 2030ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<Diff> diffs = patch.diffs; 2031ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs.isEmpty() || diffs.getFirst().operation != Operation.EQUAL) { 2032ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add nullPadding equality. 2033ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.addFirst(new Diff(Operation.EQUAL, nullPadding)); 2034ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1 -= paddingLength; // Should be 0. 2035ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2 -= paddingLength; // Should be 0. 2036ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += paddingLength; 2037ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += paddingLength; 2038ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (paddingLength > diffs.getFirst().text.length()) { 2039ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Grow first equality. 2040ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff firstDiff = diffs.getFirst(); 2041ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int extraLength = paddingLength - firstDiff.text.length(); 2042ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski firstDiff.text = nullPadding.substring(firstDiff.text.length()) 2043ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski + firstDiff.text; 2044ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1 -= extraLength; 2045ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2 -= extraLength; 2046ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += extraLength; 2047ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += extraLength; 2048ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2049ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2050ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add some padding on end of last diff. 2051ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch = patches.getLast(); 2052ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs = patch.diffs; 2053ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diffs.isEmpty() || diffs.getLast().operation != Operation.EQUAL) { 2054ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Add nullPadding equality. 2055ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diffs.addLast(new Diff(Operation.EQUAL, nullPadding)); 2056ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += paddingLength; 2057ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += paddingLength; 2058ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (paddingLength > diffs.getLast().text.length()) { 2059ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Grow last equality. 2060ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Diff lastDiff = diffs.getLast(); 2061ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int extraLength = paddingLength - lastDiff.text.length(); 2062ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski lastDiff.text += nullPadding.substring(0, extraLength); 2063ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += extraLength; 2064ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += extraLength; 2065ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2066ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2067ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return nullPadding; 2068ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2069ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2070ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2071ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2072ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Look through the patches and break up any which are longer than the 2073ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * maximum limit of the match algorithm. 2074ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param patches LinkedList of Patch objects. 2075ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2076ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public void patch_splitMax(LinkedList<Patch> patches) { 2077ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int patch_size; 2078ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String precontext, postcontext; 2079ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Patch patch; 2080ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski int start1, start2; 2081ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski boolean empty; 2082ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Operation diff_type; 2083ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String diff_text; 2084ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski ListIterator<Patch> pointer = patches.listIterator(); 2085ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Patch bigpatch = pointer.hasNext() ? pointer.next() : null; 2086ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (bigpatch != null) { 2087ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (bigpatch.length1 <= Match_MaxBits) { 2088ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bigpatch = pointer.hasNext() ? pointer.next() : null; 2089ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski continue; 2090ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2091ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Remove the big old patch. 2092ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.remove(); 2093ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch_size = Match_MaxBits; 2094ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start1 = bigpatch.start1; 2095ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start2 = bigpatch.start2; 2096ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski precontext = ""; 2097ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (!bigpatch.diffs.isEmpty()) { 2098ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Create one of several smaller patches. 2099ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch = new Patch(); 2100ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski empty = true; 2101ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1 = start1 - precontext.length(); 2102ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2 = start2 - precontext.length(); 2103ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (precontext.length() != 0) { 2104ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 = patch.length2 = precontext.length(); 2105ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(new Diff(Operation.EQUAL, precontext)); 2106ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2107ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (!bigpatch.diffs.isEmpty() 2108ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && patch.length1 < patch_size - Patch_Margin) { 2109ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_type = bigpatch.diffs.getFirst().operation; 2110ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_text = bigpatch.diffs.getFirst().text; 2111ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diff_type == Operation.INSERT) { 2112ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Insertions are harmless. 2113ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += diff_text.length(); 2114ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start2 += diff_text.length(); 2115ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.addLast(bigpatch.diffs.removeFirst()); 2116ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski empty = false; 2117ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (diff_type == Operation.DELETE && patch.diffs.size() == 1 2118ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && patch.diffs.getFirst().operation == Operation.EQUAL 2119ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && diff_text.length() > 2 * patch_size) { 2120ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // This is a large deletion. Let it pass in one chunk. 2121ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += diff_text.length(); 2122ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start1 += diff_text.length(); 2123ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski empty = false; 2124ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(new Diff(diff_type, diff_text)); 2125ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bigpatch.diffs.removeFirst(); 2126ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2127ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Deletion or equality. Only take as much as we can stomach. 2128ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski diff_text = diff_text.substring(0, Math.min(diff_text.length(), 2129ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch_size - patch.length1 - Patch_Margin)); 2130ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += diff_text.length(); 2131ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start1 += diff_text.length(); 2132ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diff_type == Operation.EQUAL) { 2133ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += diff_text.length(); 2134ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski start2 += diff_text.length(); 2135ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2136ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski empty = false; 2137ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2138ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(new Diff(diff_type, diff_text)); 2139ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diff_text.equals(bigpatch.diffs.getFirst().text)) { 2140ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bigpatch.diffs.removeFirst(); 2141ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2142ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bigpatch.diffs.getFirst().text = bigpatch.diffs.getFirst().text 2143ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .substring(diff_text.length()); 2144ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2145ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2146ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2147ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Compute the head context for the next patch. 2148ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski precontext = diff_text2(patch.diffs); 2149ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski precontext = precontext.substring(Math.max(0, precontext.length() 2150ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski - Patch_Margin)); 2151ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Append the end context for this patch. 2152ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { 2153ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski postcontext = diff_text1(bigpatch.diffs).substring(0, Patch_Margin); 2154ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2155ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski postcontext = diff_text1(bigpatch.diffs); 2156ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2157ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (postcontext.length() != 0) { 2158ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 += postcontext.length(); 2159ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 += postcontext.length(); 2160ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!patch.diffs.isEmpty() 2161ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && patch.diffs.getLast().operation == Operation.EQUAL) { 2162ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.getLast().text += postcontext; 2163ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2164ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(new Diff(Operation.EQUAL, postcontext)); 2165ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2166ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2167ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!empty) { 2168ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski pointer.add(patch); 2169ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2170ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2171ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski bigpatch = pointer.hasNext() ? pointer.next() : null; 2172ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2173ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2174ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2175ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2176ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2177ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Take a list of patches and return a textual representation. 2178ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param patches List of Patch objects. 2179ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return Text representation of patches. 2180ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2181ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String patch_toText(List<Patch> patches) { 2182ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder text = new StringBuilder(); 2183ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Patch aPatch : patches) { 2184ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append(aPatch); 2185ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2186ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return text.toString(); 2187ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2188ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2189ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2190ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2191ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Parse a textual representation of patches and return a List of Patch 2192ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * objects. 2193ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param textline Text representation of patches. 2194ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return List of Patch objects. 2195ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @throws IllegalArgumentException If invalid input. 2196ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2197ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public List<Patch> patch_fromText(String textline) 2198ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throws IllegalArgumentException { 2199ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<Patch> patches = new LinkedList<Patch>(); 2200ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (textline.length() == 0) { 2201ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patches; 2202ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2203ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski List<String> textList = Arrays.asList(textline.split("\n")); 2204ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski LinkedList<String> text = new LinkedList<String>(textList); 2205ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Patch patch; 2206ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Pattern patchHeader 2207ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); 2208ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski Matcher m; 2209ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski char sign; 2210ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String line; 2211ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (!text.isEmpty()) { 2212ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski m = patchHeader.matcher(text.getFirst()); 2213ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (!m.matches()) { 2214ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException( 2215ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski "Invalid patch string: " + text.getFirst()); 2216ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2217ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch = new Patch(); 2218ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patches.add(patch); 2219ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1 = Integer.parseInt(m.group(1)); 2220ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (m.group(2).length() == 0) { 2221ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1--; 2222ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 = 1; 2223ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (m.group(2).equals("0")) { 2224ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 = 0; 2225ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2226ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start1--; 2227ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length1 = Integer.parseInt(m.group(2)); 2228ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2229ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2230ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2 = Integer.parseInt(m.group(3)); 2231ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (m.group(4).length() == 0) { 2232ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2--; 2233ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 = 1; 2234ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (m.group(4).equals("0")) { 2235ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 = 0; 2236ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2237ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.start2--; 2238ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.length2 = Integer.parseInt(m.group(4)); 2239ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2240ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.removeFirst(); 2241ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2242ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski while (!text.isEmpty()) { 2243ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 2244ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski sign = text.getFirst().charAt(0); 2245ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (IndexOutOfBoundsException e) { 2246ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Blank line? Whatever. 2247ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.removeFirst(); 2248ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski continue; 2249ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2250ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski line = text.getFirst().substring(1); 2251ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski line = line.replace("+", "%2B"); // decode would change all "+" to " " 2252ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 2253ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski line = URLDecoder.decode(line, "UTF-8"); 2254ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (UnsupportedEncodingException e) { 2255ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Not likely on modern system. 2256ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new Error("This system does not support UTF-8.", e); 2257ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (IllegalArgumentException e) { 2258ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Malformed URI sequence. 2259ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException( 2260ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski "Illegal escape in patch_fromText: " + line, e); 2261ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2262ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (sign == '-') { 2263ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Deletion. 2264ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(new Diff(Operation.DELETE, line)); 2265ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (sign == '+') { 2266ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Insertion. 2267ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(new Diff(Operation.INSERT, line)); 2268ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (sign == ' ') { 2269ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Minor equality. 2270ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski patch.diffs.add(new Diff(Operation.EQUAL, line)); 2271ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (sign == '@') { 2272ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Start of next patch. 2273ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 2274ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2275ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // WTF? 2276ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new IllegalArgumentException( 2277ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski "Invalid patch mode '" + sign + "' in: " + line); 2278ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2279ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.removeFirst(); 2280ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2281ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2282ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return patches; 2283ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2284ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2285ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2286ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2287ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Class representing one diff operation. 2288ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2289ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public static class Diff { 2290ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2291ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * One of: INSERT, DELETE or EQUAL. 2292ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2293ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public Operation operation; 2294ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2295ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * The text associated with this diff operation. 2296ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2297ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String text; 2298ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2299ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2300ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Constructor. Initializes the diff with the provided values. 2301ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param operation One of INSERT, DELETE or EQUAL. 2302ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param text The text being applied. 2303ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2304ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public Diff(Operation operation, String text) { 2305ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Construct a diff with the specified operation and text. 2306ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski this.operation = operation; 2307ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski this.text = text; 2308ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2309ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2310ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2311ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2312ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Display a human-readable version of this Diff. 2313ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return text version. 2314ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2315ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String toString() { 2316ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String prettyText = this.text.replace('\n', '\u00b6'); 2317ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return "Diff(" + this.operation + ",\"" + prettyText + "\")"; 2318ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2319ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2320ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2321ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2322ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Is this Diff equivalent to another Diff? 2323ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param d Another Diff to compare against. 2324ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return true or false. 2325ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2326ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public boolean equals(Object d) { 2327ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 2328ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return (((Diff) d).operation == this.operation) 2329ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski && (((Diff) d).text.equals(this.text)); 2330ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (ClassCastException e) { 2331ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return false; 2332ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2333ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2334ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2335ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2336ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2337ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2338ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Class representing one patch operation. 2339ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2340ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public static class Patch { 2341ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public LinkedList<Diff> diffs; 2342ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int start1; 2343ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int start2; 2344ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int length1; 2345ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public int length2; 2346ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2347ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2348ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2349ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Constructor. Initializes with an empty list of diffs. 2350ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2351ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public Patch() { 2352ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski this.diffs = new LinkedList<Diff>(); 2353ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2354ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2355ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2356ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2357ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Emmulate GNU diff's format. 2358ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Header: @@ -382,8 +481,9 @@ 2359ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Indicies are printed as 1-based, not 0-based. 2360ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return The GNU diff string. 2361ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2362ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski public String toString() { 2363ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski String coords1, coords2; 2364ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (this.length1 == 0) { 2365ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski coords1 = this.start1 + ",0"; 2366ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (this.length1 == 1) { 2367ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski coords1 = Integer.toString(this.start1 + 1); 2368ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2369ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski coords1 = (this.start1 + 1) + "," + this.length1; 2370ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2371ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski if (this.length2 == 0) { 2372ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski coords2 = this.start2 + ",0"; 2373ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else if (this.length2 == 1) { 2374ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski coords2 = Integer.toString(this.start2 + 1); 2375ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } else { 2376ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski coords2 = (this.start2 + 1) + "," + this.length2; 2377ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2378ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski StringBuilder text = new StringBuilder(); 2379ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append("@@ -").append(coords1).append(" +").append(coords2) 2380ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .append(" @@\n"); 2381ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Escape the body of the patch with %xx notation. 2382ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski for (Diff aDiff : this.diffs) { 2383ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski switch (aDiff.operation) { 2384ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case INSERT: 2385ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append('+'); 2386ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 2387ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case DELETE: 2388ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append('-'); 2389ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 2390ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski case EQUAL: 2391ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append(' '); 2392ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski break; 2393ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2394ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski try { 2395ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' ')) 2396ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .append("\n"); 2397ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } catch (UnsupportedEncodingException e) { 2398ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski // Not likely on modern system. 2399ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski throw new Error("This system does not support UTF-8.", e); 2400ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2401ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2402ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return unescapeForEncodeUriCompatability(text.toString()); 2403ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2404ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2405ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2406ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski 2407ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski /** 2408ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Unescape selected chars for compatability with JavaScript's encodeURI. 2409ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * In speed critical applications this could be dropped since the 2410ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * receiving application will certainly decode these fine. 2411ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Note that this function is case-sensitive. Thus "%3f" would not be 2412ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * unescaped. But this is ok because it is only called with the output of 2413ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * URLEncoder.encode which returns uppercase hex. 2414ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 2415ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Example: "%3F" -> "?", "%24" -> "$", etc. 2416ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * 2417ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @param str The string to escape. 2418ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @return The escaped string. 2419ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */ 2420ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski private static String unescapeForEncodeUriCompatability(String str) { 2421ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski return str.replace("%21", "!").replace("%7E", "~") 2422ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .replace("%27", "'").replace("%28", "(").replace("%29", ")") 2423ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .replace("%3B", ";").replace("%2F", "/").replace("%3F", "?") 2424ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .replace("%3A", ":").replace("%40", "@").replace("%26", "&") 2425ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .replace("%3D", "=").replace("%2B", "+").replace("%24", "$") 2426ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski .replace("%2C", ",").replace("%23", "#"); 2427ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski } 2428ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski} 2429