1ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski/*
2ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Diff Match and Patch
3ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski *
4ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Copyright 2006 Google Inc.
5ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * http://code.google.com/p/google-diff-match-patch/
6ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski *
7ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Licensed under the Apache License, Version 2.0 (the "License");
8ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * you may not use this file except in compliance with the License.
9ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * You may obtain a copy of the License at
10ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski *
11ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski *   http://www.apache.org/licenses/LICENSE-2.0
12ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski *
13ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Unless required by applicable law or agreed to in writing, software
14ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * distributed under the License is distributed on an "AS IS" BASIS,
15ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * See the License for the specific language governing permissions and
17ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * limitations under the License.
18ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */
19ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
20ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskipackage name.fraser.neil.plaintext;
21ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
22ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.io.UnsupportedEncodingException;
23ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.net.URLEncoder;
24ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.net.URLDecoder;
25ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.ArrayList;
26ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Arrays;
27ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.HashMap;
28ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.HashSet;
29ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.LinkedList;
30ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.List;
31ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.ListIterator;
32ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Map;
33ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Set;
34ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.Stack;
35ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.regex.Matcher;
36ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskiimport java.util.regex.Pattern;
37ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
38ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
39ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski/*
40ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Functions for diff, match and patch.
41ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Computes the difference between two texts to create a patch.
42ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Applies the patch onto another text, allowing for errors.
43ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski *
44ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * @author fraser@google.com (Neil Fraser)
45ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */
46ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
47ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski/**
48ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Class containing the diff, match and patch methods.
49ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski * Also contains the behaviour settings.
50ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski */
51ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowskipublic class diff_match_patch {
52ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
53ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  // Defaults.
54ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  // Set these on your diff_match_patch instance to override the defaults.
55ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
56ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
57ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Number of seconds to map a diff before giving up (0 for infinity).
58ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
59ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public float Diff_Timeout = 1.0f;
60ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
61ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Cost of an empty edit operation in terms of edit characters.
62ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
63ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public short Diff_EditCost = 4;
64ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
65ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * The size beyond which the double-ended diff activates.
66ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Double-ending is twice as fast, but less accurate.
67ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
68ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public short Diff_DualThreshold = 32;
69ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
70ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * At what point is no match declared (0.0 = perfection, 1.0 = very loose).
71ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
72ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public float Match_Threshold = 0.5f;
73ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
74ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * How far to search for a match (0 = exact location, 1000+ = broad match).
75ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * A match this many characters away from the expected location will add
76ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * 1.0 to the score (0.0 is a perfect match).
77ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
78ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public int Match_Distance = 1000;
79ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
80ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * When deleting a large block of text (over ~64 characters), how close does
81ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * the contents have to match the expected contents. (0.0 = perfection,
82ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * 1.0 = very loose).  Note that Match_Threshold controls how closely the
83ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * end points of a delete need to match.
84ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
85ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public float Patch_DeleteThreshold = 0.5f;
86ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
87ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Chunk size for context length.
88ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
89ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public short Patch_Margin = 4;
90ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
91ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
92ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * The number of bits in an int.
93ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
94ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private int Match_MaxBits = 32;
95ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
96ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
97ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Internal class for returning results from diff_linesToChars().
98ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Other less paranoid languages just use a three-element array.
99ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
100ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected static class LinesToCharsResult {
101ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    protected String chars1;
102ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    protected String chars2;
103ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    protected List<String> lineArray;
104ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
105ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    protected LinesToCharsResult(String chars1, String chars2,
106ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        List<String> lineArray) {
107ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      this.chars1 = chars1;
108ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      this.chars2 = chars2;
109ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      this.lineArray = lineArray;
110ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
111ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
112ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
113ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
114ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  //  DIFF FUNCTIONS
115ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
116ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
117ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
118ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * The data structure representing a diff is a Linked list of Diff objects:
119ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"),
120ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *  Diff(Operation.EQUAL, " world.")}
121ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * which means: delete "Hello", add "Goodbye" and keep " world."
122ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
123ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public enum Operation {
124ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    DELETE, INSERT, EQUAL
125ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
126ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
127ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
128ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
129ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Find the differences between two texts.
130ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Run a faster slightly less optimal diff
131ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * This method allows the 'checklines' of diff_main() to be optional.
132ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Most of the time checklines is wanted, so default to true.
133ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old string to be diffed.
134ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 New string to be diffed.
135ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Linked List of Diff objects.
136ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
137ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Diff> diff_main(String text1, String text2) {
138ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return diff_main(text1, text2, true);
139ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
140ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
141ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
142ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Find the differences between two texts.  Simplifies the problem by
143ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * stripping any common prefix or suffix off the texts before diffing.
144ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old string to be diffed.
145ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 New string to be diffed.
146ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param checklines Speedup flag.  If false, then don't run a
147ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     line-level diff first to identify the changed areas.
148ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     If true, then run a faster slightly less optimal diff
149ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Linked List of Diff objects.
150ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
151ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Diff> diff_main(String text1, String text2,
152ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                    boolean checklines) {
153ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Check for null inputs.
154ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text1 == null || text2 == null) {
155ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throw new IllegalArgumentException("Null inputs. (diff_main)");
156ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
157ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
158ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Check for equality (speedup).
159ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Diff> diffs;
160ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text1.equals(text2)) {
161ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs = new LinkedList<Diff>();
162ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.EQUAL, text1));
163ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return diffs;
164ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
165ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
166ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Trim off common prefix (speedup).
167ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int commonlength = diff_commonPrefix(text1, text2);
168ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String commonprefix = text1.substring(0, commonlength);
169ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    text1 = text1.substring(commonlength);
170ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    text2 = text2.substring(commonlength);
171ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
172ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Trim off common suffix (speedup).
173ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    commonlength = diff_commonSuffix(text1, text2);
174ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String commonsuffix = text1.substring(text1.length() - commonlength);
175ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    text1 = text1.substring(0, text1.length() - commonlength);
176ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    text2 = text2.substring(0, text2.length() - commonlength);
177ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
178ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Compute the diff on the middle block.
179ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    diffs = diff_compute(text1, text2, checklines);
180ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
181ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Restore the prefix and suffix.
182ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (commonprefix.length() != 0) {
183ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.addFirst(new Diff(Operation.EQUAL, commonprefix));
184ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
185ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (commonsuffix.length() != 0) {
186ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.addLast(new Diff(Operation.EQUAL, commonsuffix));
187ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
188ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
189ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    diff_cleanupMerge(diffs);
190ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return diffs;
191ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
192ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
193ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
194ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
195ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Find the differences between two texts.  Assumes that the texts do not
196ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * have any common prefix or suffix.
197ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old string to be diffed.
198ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 New string to be diffed.
199ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param checklines Speedup flag.  If false, then don't run a
200ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     line-level diff first to identify the changed areas.
201ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     If true, then run a faster slightly less optimal diff
202ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Linked List of Diff objects.
203ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
204ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected LinkedList<Diff> diff_compute(String text1, String text2,
205ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                          boolean checklines) {
206ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Diff> diffs = new LinkedList<Diff>();
207ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
208ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text1.length() == 0) {
209ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Just add some text (speedup).
210ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.INSERT, text2));
211ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return diffs;
212ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
213ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
214ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text2.length() == 0) {
215ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Just delete some text (speedup).
216ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.DELETE, text1));
217ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return diffs;
218ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
219ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
220ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String longtext = text1.length() > text2.length() ? text1 : text2;
221ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String shorttext = text1.length() > text2.length() ? text2 : text1;
222ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int i = longtext.indexOf(shorttext);
223ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (i != -1) {
224ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Shorter text is inside the longer text (speedup).
225ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      Operation op = (text1.length() > text2.length()) ?
226ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                     Operation.DELETE : Operation.INSERT;
227ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(op, longtext.substring(0, i)));
228ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.EQUAL, shorttext));
229ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(op, longtext.substring(i + shorttext.length())));
230ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return diffs;
231ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
232ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    longtext = shorttext = null;  // Garbage collect.
233ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
234ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Check to see if the problem can be split in two.
235ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String[] hm = diff_halfMatch(text1, text2);
236ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (hm != null) {
237ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // A half-match was found, sort out the return data.
238ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text1_a = hm[0];
239ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text1_b = hm[1];
240ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text2_a = hm[2];
241ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text2_b = hm[3];
242ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String mid_common = hm[4];
243ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Send both pairs off for separate processing.
244ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      LinkedList<Diff> diffs_a = diff_main(text1_a, text2_a, checklines);
245ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      LinkedList<Diff> diffs_b = diff_main(text1_b, text2_b, checklines);
246ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Merge the results.
247ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs = diffs_a;
248ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.EQUAL, mid_common));
249ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.addAll(diffs_b);
250ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return diffs;
251ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
252ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
253ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Perform a real diff.
254ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (checklines && (text1.length() < 100 || text2.length() < 100)) {
255ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      checklines = false;  // Too trivial for the overhead.
256ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
257ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    List<String> linearray = null;
258ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (checklines) {
259ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Scan the text on a line-by-line basis first.
260ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      LinesToCharsResult b = diff_linesToChars(text1, text2);
261ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      text1 = b.chars1;
262ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      text2 = b.chars2;
263ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      linearray = b.lineArray;
264ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
265ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
266ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    diffs = diff_map(text1, text2);
267ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs == null) {
268ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // No acceptable result.
269ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs = new LinkedList<Diff>();
270ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.DELETE, text1));
271ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.INSERT, text2));
272ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
273ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
274ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (checklines) {
275ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Convert the diff back to original text.
276ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff_charsToLines(diffs, linearray);
277ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Eliminate freak matches (e.g. blank lines)
278ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff_cleanupSemantic(diffs);
279ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
280ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Rediff any replacement blocks, this time character-by-character.
281ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Add a dummy entry at the end.
282ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.add(new Diff(Operation.EQUAL, ""));
283ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int count_delete = 0;
284ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int count_insert = 0;
285ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text_delete = "";
286ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text_insert = "";
287ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      ListIterator<Diff> pointer = diffs.listIterator();
288ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      Diff thisDiff = pointer.next();
289ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      while (thisDiff != null) {
290ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        switch (thisDiff.operation) {
291ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        case INSERT:
292ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          count_insert++;
293ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text_insert += thisDiff.text;
294ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
295ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        case DELETE:
296ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          count_delete++;
297ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text_delete += thisDiff.text;
298ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
299ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        case EQUAL:
300ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Upon reaching an equality, check for prior redundancies.
301ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (count_delete >= 1 && count_insert >= 1) {
302ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Delete the offending records and add the merged ones.
303ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.previous();
304ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            for (int j = 0; j < count_delete + count_insert; j++) {
305ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              pointer.previous();
306ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              pointer.remove();
307ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
308ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            for (Diff newDiff : diff_main(text_delete, text_insert, false)) {
309ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              pointer.add(newDiff);
310ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
311ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
312ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          count_insert = 0;
313ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          count_delete = 0;
314ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text_delete = "";
315ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text_insert = "";
316ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
317ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
318ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        thisDiff = pointer.hasNext() ? pointer.next() : null;
319ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
320ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.removeLast();  // Remove the dummy entry at the end.
321ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
322ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return diffs;
323ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
324ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
325ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
326ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
327ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Split two texts into a list of strings.  Reduce the texts to a string of
328ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * hashes where each Unicode character represents one line.
329ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 First string.
330ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 Second string.
331ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return An object containing the encoded text1, the encoded text2 and
332ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     the List of unique strings.  The zeroth element of the List of
333ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     unique strings is intentionally blank.
334ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
335ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected LinesToCharsResult diff_linesToChars(String text1, String text2) {
336ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    List<String> lineArray = new ArrayList<String>();
337ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Map<String, Integer> lineHash = new HashMap<String, Integer>();
338ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // e.g. linearray[4] == "Hello\n"
339ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // e.g. linehash.get("Hello\n") == 4
340ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
341ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // "\x00" is a valid character, but various debuggers don't like it.
342ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // So we'll insert a junk entry to avoid generating a null character.
343ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    lineArray.add("");
344ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
345ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash);
346ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash);
347ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return new LinesToCharsResult(chars1, chars2, lineArray);
348ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
349ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
350ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
351ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
352ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Split a text into a list of strings.  Reduce the texts to a string of
353ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * hashes where each Unicode character represents one line.
354ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text String to encode.
355ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param lineArray List of unique strings.
356ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param lineHash Map of strings to indices.
357ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Encoded string.
358ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
359ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private String diff_linesToCharsMunge(String text, List<String> lineArray,
360ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                        Map<String, Integer> lineHash) {
361ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int lineStart = 0;
362ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int lineEnd = -1;
363ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String line;
364ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    StringBuilder chars = new StringBuilder();
365ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Walk the text, pulling out a substring for each line.
366ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // text.split('\n') would would temporarily double our memory footprint.
367ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Modifying text would create many large strings to garbage collect.
368ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (lineEnd < text.length() - 1) {
369ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      lineEnd = text.indexOf('\n', lineStart);
370ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (lineEnd == -1) {
371ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        lineEnd = text.length() - 1;
372ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
373ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      line = text.substring(lineStart, lineEnd + 1);
374ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      lineStart = lineEnd + 1;
375ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
376ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (lineHash.containsKey(line)) {
377ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        chars.append(String.valueOf((char) (int) lineHash.get(line)));
378ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
379ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        lineArray.add(line);
380ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        lineHash.put(line, lineArray.size() - 1);
381ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        chars.append(String.valueOf((char) (lineArray.size() - 1)));
382ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
383ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
384ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return chars.toString();
385ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
386ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
387ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
388ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
389ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Rehydrate the text in a diff from a string of line hashes to real lines of
390ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * text.
391ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
392ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param lineArray List of unique strings.
393ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
394ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected void diff_charsToLines(LinkedList<Diff> diffs,
395ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                  List<String> lineArray) {
396ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    StringBuilder text;
397ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff diff : diffs) {
398ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      text = new StringBuilder();
399ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      for (int y = 0; y < diff.text.length(); y++) {
400ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text.append(lineArray.get(diff.text.charAt(y)));
401ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
402ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff.text = text.toString();
403ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
404ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
405ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
406ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
407ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
408ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Explore the intersection points between the two texts.
409ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old string to be diffed.
410ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 New string to be diffed.
411ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return LinkedList of Diff objects or null if no diff available.
412ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
413ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected LinkedList<Diff> diff_map(String text1, String text2) {
414ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    long ms_end = System.currentTimeMillis() + (long) (Diff_Timeout * 1000);
415ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Cache the text lengths to prevent multiple calls.
416ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int text1_length = text1.length();
417ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int text2_length = text2.length();
418ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int max_d = text1_length + text2_length - 1;
419ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean doubleEnd = Diff_DualThreshold * 2 < max_d;
420ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    List<Set<Long>> v_map1 = new ArrayList<Set<Long>>();
421ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    List<Set<Long>> v_map2 = new ArrayList<Set<Long>>();
422ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Map<Integer, Integer> v1 = new HashMap<Integer, Integer>();
423ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Map<Integer, Integer> v2 = new HashMap<Integer, Integer>();
424ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    v1.put(1, 0);
425ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    v2.put(1, 0);
426ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int x, y;
427ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Long footstep = 0L;  // Used to track overlapping paths.
428ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Map<Long, Integer> footsteps = new HashMap<Long, Integer>();
429ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean done = false;
430ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // If the total number of characters is odd, then the front path will
431ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // collide with the reverse path.
432ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean front = ((text1_length + text2_length) % 2 == 1);
433ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (int d = 0; d < max_d; d++) {
434ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Bail out if timeout reached.
435ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (Diff_Timeout > 0 && System.currentTimeMillis() > ms_end) {
436ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        return null;
437ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
438ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
439ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Walk the front path one step.
440ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      v_map1.add(new HashSet<Long>());  // Adds at index 'd'.
441ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      for (int k = -d; k <= d; k += 2) {
442ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (k == -d || k != d && v1.get(k - 1) < v1.get(k + 1)) {
443ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          x = v1.get(k + 1);
444ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
445ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          x = v1.get(k - 1) + 1;
446ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
447ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        y = x - k;
448ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (doubleEnd) {
449ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          footstep = diff_footprint(x, y);
450ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (front && (footsteps.containsKey(footstep))) {
451ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            done = true;
452ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
453ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (!front) {
454ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            footsteps.put(footstep, d);
455ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
456ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
457ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        while (!done && x < text1_length && y < text2_length
458ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski               && text1.charAt(x) == text2.charAt(y)) {
459ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          x++;
460ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          y++;
461ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (doubleEnd) {
462ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            footstep = diff_footprint(x, y);
463ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (front && (footsteps.containsKey(footstep))) {
464ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              done = true;
465ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
466ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (!front) {
467ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              footsteps.put(footstep, d);
468ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
469ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
470ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
471ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        v1.put(k, x);
472ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        v_map1.get(d).add(diff_footprint(x, y));
473ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (x == text1_length && y == text2_length) {
474ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Reached the end in single-path mode.
475ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          return diff_path1(v_map1, text1, text2);
476ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (done) {
477ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Front path ran over reverse path.
478ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          v_map2 = v_map2.subList(0, footsteps.get(footstep) + 1);
479ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          LinkedList<Diff> a = diff_path1(v_map1, text1.substring(0, x),
480ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                          text2.substring(0, y));
481ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          a.addAll(diff_path2(v_map2, text1.substring(x), text2.substring(y)));
482ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          return a;
483ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
484ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
485ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
486ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (doubleEnd) {
487ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Walk the reverse path one step.
488ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        v_map2.add(new HashSet<Long>());  // Adds at index 'd'.
489ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        for (int k = -d; k <= d; k += 2) {
490ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (k == -d || k != d && v2.get(k - 1) < v2.get(k + 1)) {
491ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            x = v2.get(k + 1);
492ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
493ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            x = v2.get(k - 1) + 1;
494ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
495ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          y = x - k;
496ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          footstep = diff_footprint(text1_length - x, text2_length - y);
497ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (!front && (footsteps.containsKey(footstep))) {
498ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            done = true;
499ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
500ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (front) {
501ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            footsteps.put(footstep, d);
502ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
503ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          while (!done && x < text1_length && y < text2_length
504ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                 && text1.charAt(text1_length - x - 1)
505ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                 == text2.charAt(text2_length - y - 1)) {
506ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            x++;
507ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            y++;
508ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            footstep = diff_footprint(text1_length - x, text2_length - y);
509ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (!front && (footsteps.containsKey(footstep))) {
510ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              done = true;
511ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
512ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (front) {
513ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              footsteps.put(footstep, d);
514ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
515ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
516ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          v2.put(k, x);
517ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          v_map2.get(d).add(diff_footprint(x, y));
518ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (done) {
519ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Reverse path ran over front path.
520ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            v_map1 = v_map1.subList(0, footsteps.get(footstep) + 1);
521ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            LinkedList<Diff> a
522ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                = diff_path1(v_map1, text1.substring(0, text1_length - x),
523ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                             text2.substring(0, text2_length - y));
524ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            a.addAll(diff_path2(v_map2, text1.substring(text1_length - x),
525ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                text2.substring(text2_length - y)));
526ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            return a;
527ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
528ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
529ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
530ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
531ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Number of diffs equals number of characters, no commonality at all.
532ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return null;
533ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
534ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
535ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
536ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
537ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Work from the middle back to the start to determine the path.
538ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param v_map List of path sets.
539ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old string fragment to be diffed.
540ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 New string fragment to be diffed.
541ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return LinkedList of Diff objects.
542ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
543ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected LinkedList<Diff> diff_path1(List<Set<Long>> v_map,
544ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                        String text1, String text2) {
545ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Diff> path = new LinkedList<Diff>();
546ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int x = text1.length();
547ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int y = text2.length();
548ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Operation last_op = null;
549ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (int d = v_map.size() - 2; d >= 0; d--) {
550ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      while (true) {
551ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (v_map.get(d).contains(diff_footprint(x - 1, y))) {
552ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          x--;
553ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (last_op == Operation.DELETE) {
554ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.getFirst().text = text1.charAt(x) + path.getFirst().text;
555ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
556ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.addFirst(new Diff(Operation.DELETE,
557ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                   text1.substring(x, x + 1)));
558ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
559ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          last_op = Operation.DELETE;
560ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
561ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) {
562ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          y--;
563ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (last_op == Operation.INSERT) {
564ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.getFirst().text = text2.charAt(y) + path.getFirst().text;
565ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
566ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.addFirst(new Diff(Operation.INSERT,
567ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                   text2.substring(y, y + 1)));
568ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
569ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          last_op = Operation.INSERT;
570ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
571ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
572ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          x--;
573ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          y--;
574ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          assert (text1.charAt(x) == text2.charAt(y))
575ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                 : "No diagonal.  Can't happen. (diff_path1)";
576ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (last_op == Operation.EQUAL) {
577ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.getFirst().text = text1.charAt(x) + path.getFirst().text;
578ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
579ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.addFirst(new Diff(Operation.EQUAL, text1.substring(x, x + 1)));
580ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
581ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          last_op = Operation.EQUAL;
582ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
583ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
584ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
585ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return path;
586ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
587ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
588ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
589ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
590ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Work from the middle back to the end to determine the path.
591ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param v_map List of path sets.
592ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old string fragment to be diffed.
593ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 New string fragment to be diffed.
594ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return LinkedList of Diff objects.
595ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
596ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected LinkedList<Diff> diff_path2(List<Set<Long>> v_map,
597ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                        String text1, String text2) {
598ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Diff> path = new LinkedList<Diff>();
599ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int x = text1.length();
600ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int y = text2.length();
601ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Operation last_op = null;
602ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (int d = v_map.size() - 2; d >= 0; d--) {
603ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      while (true) {
604ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (v_map.get(d).contains(diff_footprint(x - 1, y))) {
605ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          x--;
606ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (last_op == Operation.DELETE) {
607ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.getLast().text += text1.charAt(text1.length() - x - 1);
608ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
609ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.addLast(new Diff(Operation.DELETE,
610ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                text1.substring(text1.length() - x - 1, text1.length() - x)));
611ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
612ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          last_op = Operation.DELETE;
613ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
614ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (v_map.get(d).contains(diff_footprint(x, y - 1))) {
615ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          y--;
616ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (last_op == Operation.INSERT) {
617ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.getLast().text += text2.charAt(text2.length() - y - 1);
618ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
619ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.addLast(new Diff(Operation.INSERT,
620ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                text2.substring(text2.length() - y - 1, text2.length() - y)));
621ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
622ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          last_op = Operation.INSERT;
623ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
624ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
625ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          x--;
626ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          y--;
627ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          assert (text1.charAt(text1.length() - x - 1)
628ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  == text2.charAt(text2.length() - y - 1))
629ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                 : "No diagonal.  Can't happen. (diff_path2)";
630ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (last_op == Operation.EQUAL) {
631ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.getLast().text += text1.charAt(text1.length() - x - 1);
632ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
633ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            path.addLast(new Diff(Operation.EQUAL,
634ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                text1.substring(text1.length() - x - 1, text1.length() - x)));
635ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
636ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          last_op = Operation.EQUAL;
637ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
638ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
639ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
640ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return path;
641ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
642ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
643ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
644ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
645ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute a good hash of two integers.
646ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param x First int.
647ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param y Second int.
648ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return A long made up of both ints.
649ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
650ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected long diff_footprint(int x, int y) {
651ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // The maximum size for a long is 9,223,372,036,854,775,807
652ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // The maximum size for an int is 2,147,483,647
653ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Two ints fit nicely in one long.
654ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    long result = x;
655ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    result = result << 32;
656ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    result += y;
657ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return result;
658ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
659ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
660ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
661ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
662ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Determine the common prefix of two strings
663ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 First string.
664ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 Second string.
665ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return The number of characters common to the start of each string.
666ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
667ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public int diff_commonPrefix(String text1, String text2) {
668ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Performance analysis: http://neil.fraser.name/news/2007/10/09/
669ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int n = Math.min(text1.length(), text2.length());
670ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (int i = 0; i < n; i++) {
671ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (text1.charAt(i) != text2.charAt(i)) {
672ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        return i;
673ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
674ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
675ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return n;
676ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
677ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
678ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
679ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
680ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Determine the common suffix of two strings
681ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 First string.
682ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 Second string.
683ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return The number of characters common to the end of each string.
684ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
685ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public int diff_commonSuffix(String text1, String text2) {
686ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Performance analysis: http://neil.fraser.name/news/2007/10/09/
687ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int text1_length = text1.length();
688ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int text2_length = text2.length();
689ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int n = Math.min(text1_length, text2_length);
690ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (int i = 1; i <= n; i++) {
691ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) {
692ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        return i - 1;
693ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
694ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
695ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return n;
696ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
697ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
698ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
699ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
700ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Do the two texts share a substring which is at least half the length of
701ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * the longer text?
702ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 First string.
703ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 Second string.
704ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Five element String array, containing the prefix of text1, the
705ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     suffix of text1, the prefix of text2, the suffix of text2 and the
706ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     common middle.  Or null if there was no match.
707ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
708ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected String[] diff_halfMatch(String text1, String text2) {
709ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String longtext = text1.length() > text2.length() ? text1 : text2;
710ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String shorttext = text1.length() > text2.length() ? text2 : text1;
711ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (longtext.length() < 10 || shorttext.length() < 1) {
712ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return null;  // Pointless.
713ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
714ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
715ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // First check if the second quarter is the seed for a half-match.
716ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String[] hm1 = diff_halfMatchI(longtext, shorttext,
717ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                   (longtext.length() + 3) / 4);
718ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Check again based on the third quarter.
719ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String[] hm2 = diff_halfMatchI(longtext, shorttext,
720ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                   (longtext.length() + 1) / 2);
721ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String[] hm;
722ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (hm1 == null && hm2 == null) {
723ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return null;
724ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else if (hm2 == null) {
725ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      hm = hm1;
726ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else if (hm1 == null) {
727ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      hm = hm2;
728ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else {
729ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Both matched.  Select the longest.
730ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2;
731ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
732ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
733ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // A half-match was found, sort out the return data.
734ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text1.length() > text2.length()) {
735ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return hm;
736ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      //return new String[]{hm[0], hm[1], hm[2], hm[3], hm[4]};
737ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else {
738ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return new String[]{hm[2], hm[3], hm[0], hm[1], hm[4]};
739ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
740ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
741ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
742ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
743ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
744ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Does a substring of shorttext exist within longtext such that the
745ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * substring is at least half the length of longtext?
746ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param longtext Longer string.
747ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param shorttext Shorter string.
748ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param i Start index of quarter length substring within longtext.
749ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Five element String array, containing the prefix of longtext, the
750ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     suffix of longtext, the prefix of shorttext, the suffix of shorttext
751ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *     and the common middle.  Or null if there was no match.
752ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
753ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private String[] diff_halfMatchI(String longtext, String shorttext, int i) {
754ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Start with a 1/4 length substring at position i as a seed.
755ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String seed = longtext.substring(i, i + longtext.length() / 4);
756ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int j = -1;
757ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String best_common = "";
758ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String best_longtext_a = "", best_longtext_b = "";
759ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String best_shorttext_a = "", best_shorttext_b = "";
760ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while ((j = shorttext.indexOf(seed, j + 1)) != -1) {
761ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int prefixLength = diff_commonPrefix(longtext.substring(i),
762ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                           shorttext.substring(j));
763ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int suffixLength = diff_commonSuffix(longtext.substring(0, i),
764ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                           shorttext.substring(0, j));
765ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (best_common.length() < suffixLength + prefixLength) {
766ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        best_common = shorttext.substring(j - suffixLength, j)
767ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            + shorttext.substring(j, j + prefixLength);
768ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        best_longtext_a = longtext.substring(0, i - suffixLength);
769ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        best_longtext_b = longtext.substring(i + prefixLength);
770ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        best_shorttext_a = shorttext.substring(0, j - suffixLength);
771ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        best_shorttext_b = shorttext.substring(j + prefixLength);
772ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
773ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
774ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (best_common.length() >= longtext.length() / 2) {
775ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return new String[]{best_longtext_a, best_longtext_b,
776ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                          best_shorttext_a, best_shorttext_b, best_common};
777ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else {
778ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return null;
779ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
780ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
781ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
782ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
783ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
784ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Reduce the number of edits by eliminating semantically trivial equalities.
785ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
786ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
787ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public void diff_cleanupSemantic(LinkedList<Diff> diffs) {
788ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs.isEmpty()) {
789ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return;
790ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
791ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean changes = false;
792ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Stack<Diff> equalities = new Stack<Diff>();  // Stack of qualities.
793ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String lastequality = null; // Always equal to equalities.lastElement().text
794ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    ListIterator<Diff> pointer = diffs.listIterator();
795ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Number of characters that changed prior to the equality.
796ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int length_changes1 = 0;
797ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Number of characters that changed after the equality.
798ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int length_changes2 = 0;
799ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff thisDiff = pointer.next();
800ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (thisDiff != null) {
801ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (thisDiff.operation == Operation.EQUAL) {
802ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // equality found
803ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        equalities.push(thisDiff);
804ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        length_changes1 = length_changes2;
805ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        length_changes2 = 0;
806ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        lastequality = thisDiff.text;
807ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
808ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // an insertion or deletion
809ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        length_changes2 += thisDiff.text.length();
810ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (lastequality != null && (lastequality.length() <= length_changes1)
811ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            && (lastequality.length() <= length_changes2)) {
812ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          //System.out.println("Splitting: '" + lastequality + "'");
813ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Walk back to offending equality.
814ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          while (thisDiff != equalities.lastElement()) {
815ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            thisDiff = pointer.previous();
816ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
817ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.next();
818ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
819ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Replace equality with a delete.
820ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.set(new Diff(Operation.DELETE, lastequality));
821ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Insert a corresponding an insert.
822ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.add(new Diff(Operation.INSERT, lastequality));
823ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
824ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equalities.pop();  // Throw away the equality we just deleted.
825ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (!equalities.empty()) {
826ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Throw away the previous equality (it needs to be reevaluated).
827ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            equalities.pop();
828ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
829ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (equalities.empty()) {
830ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // There are no previous equalities, walk back to the start.
831ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            while (pointer.hasPrevious()) {
832ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              pointer.previous();
833ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
834ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
835ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // There is a safe equality we can fall back to.
836ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            thisDiff = equalities.lastElement();
837ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            while (thisDiff != pointer.previous()) {
838ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // Intentionally empty loop.
839ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
840ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
841ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
842ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          length_changes1 = 0;  // Reset the counters.
843ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          length_changes2 = 0;
844ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          lastequality = null;
845ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          changes = true;
846ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
847ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
848ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      thisDiff = pointer.hasNext() ? pointer.next() : null;
849ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
850ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
851ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (changes) {
852ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff_cleanupMerge(diffs);
853ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
854ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    diff_cleanupSemanticLossless(diffs);
855ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
856ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
857ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
858ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
859ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Look for single edits surrounded on both sides by equalities
860ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * which can be shifted sideways to align the edit to a word boundary.
861ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
862ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
863ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
864ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public void diff_cleanupSemanticLossless(LinkedList<Diff> diffs) {
865ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String equality1, edit, equality2;
866ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String commonString;
867ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int commonOffset;
868ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int score, bestScore;
869ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String bestEquality1, bestEdit, bestEquality2;
870ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Create a new iterator at the start.
871ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    ListIterator<Diff> pointer = diffs.listIterator();
872ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
873ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff thisDiff = pointer.hasNext() ? pointer.next() : null;
874ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
875ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Intentionally ignore the first and last element (don't need checking).
876ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (nextDiff != null) {
877ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (prevDiff.operation == Operation.EQUAL &&
878ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          nextDiff.operation == Operation.EQUAL) {
879ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // This is a single edit surrounded by equalities.
880ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        equality1 = prevDiff.text;
881ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        edit = thisDiff.text;
882ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        equality2 = nextDiff.text;
883ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
884ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // First, shift the edit as far left as possible.
885ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        commonOffset = diff_commonSuffix(equality1, edit);
886ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (commonOffset != 0) {
887ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          commonString = edit.substring(edit.length() - commonOffset);
888ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equality1 = equality1.substring(0, equality1.length() - commonOffset);
889ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          edit = commonString + edit.substring(0, edit.length() - commonOffset);
890ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equality2 = commonString + equality2;
891ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
892ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
893ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Second, step character by character right, looking for the best fit.
894ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        bestEquality1 = equality1;
895ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        bestEdit = edit;
896ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        bestEquality2 = equality2;
897ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        bestScore = diff_cleanupSemanticScore(equality1, edit)
898ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            + diff_cleanupSemanticScore(edit, equality2);
899ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        while (edit.length() != 0 && equality2.length() != 0
900ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            && edit.charAt(0) == equality2.charAt(0)) {
901ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equality1 += edit.charAt(0);
902ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          edit = edit.substring(1) + equality2.charAt(0);
903ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equality2 = equality2.substring(1);
904ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          score = diff_cleanupSemanticScore(equality1, edit)
905ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              + diff_cleanupSemanticScore(edit, equality2);
906ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // The >= encourages trailing rather than leading whitespace on edits.
907ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (score >= bestScore) {
908ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            bestScore = score;
909ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            bestEquality1 = equality1;
910ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            bestEdit = edit;
911ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            bestEquality2 = equality2;
912ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
913ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
914ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
915ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (!prevDiff.text.equals(bestEquality1)) {
916ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // We have an improvement, save it back to the diff.
917ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (bestEquality1.length() != 0) {
918ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            prevDiff.text = bestEquality1;
919ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
920ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.previous(); // Walk past nextDiff.
921ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.previous(); // Walk past thisDiff.
922ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.previous(); // Walk past prevDiff.
923ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.remove(); // Delete prevDiff.
924ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.next(); // Walk past thisDiff.
925ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.next(); // Walk past nextDiff.
926ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
927ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          thisDiff.text = bestEdit;
928ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (bestEquality2.length() != 0) {
929ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            nextDiff.text = bestEquality2;
930ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
931ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.remove(); // Delete nextDiff.
932ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            nextDiff = thisDiff;
933ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            thisDiff = prevDiff;
934ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
935ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
936ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
937ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      prevDiff = thisDiff;
938ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      thisDiff = nextDiff;
939ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      nextDiff = pointer.hasNext() ? pointer.next() : null;
940ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
941ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
942ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
943ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
944ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
945ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Given two strings, compute a score representing whether the internal
946ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * boundary falls on logical boundaries.
947ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Scores range from 5 (best) to 0 (worst).
948ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param one First string.
949ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param two Second string.
950ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return The score.
951ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
952ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private int diff_cleanupSemanticScore(String one, String two) {
953ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (one.length() == 0 || two.length() == 0) {
954ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Edges are the best.
955ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return 5;
956ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
957ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
958ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Each port of this function behaves slightly differently due to
959ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // subtle differences in each language's definition of things like
960ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // 'whitespace'.  Since this function's purpose is largely cosmetic,
961ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // the choice has been made to use each language's native features
962ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // rather than force total conformity.
963ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int score = 0;
964ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // One point for non-alphanumeric.
965ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (!Character.isLetterOrDigit(one.charAt(one.length() - 1))
966ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        || !Character.isLetterOrDigit(two.charAt(0))) {
967ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      score++;
968ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Two points for whitespace.
969ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (Character.isWhitespace(one.charAt(one.length() - 1))
970ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          || Character.isWhitespace(two.charAt(0))) {
971ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        score++;
972ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Three points for line breaks.
973ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (Character.getType(one.charAt(one.length() - 1)) == Character.CONTROL
974ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            || Character.getType(two.charAt(0)) == Character.CONTROL) {
975ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          score++;
976ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Four points for blank lines.
977ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (BLANKLINEEND.matcher(one).find()
978ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              || BLANKLINESTART.matcher(two).find()) {
979ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            score++;
980ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
981ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
982ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
983ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
984ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return score;
985ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
986ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
987ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
988ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private Pattern BLANKLINEEND
989ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL);
990ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private Pattern BLANKLINESTART
991ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL);
992ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
993ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
994ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
995ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Reduce the number of edits by eliminating operationally trivial equalities.
996ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
997ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
998ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public void diff_cleanupEfficiency(LinkedList<Diff> diffs) {
999ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs.isEmpty()) {
1000ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return;
1001ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1002ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean changes = false;
1003ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Stack<Diff> equalities = new Stack<Diff>();  // Stack of equalities.
1004ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String lastequality = null; // Always equal to equalities.lastElement().text
1005ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    ListIterator<Diff> pointer = diffs.listIterator();
1006ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Is there an insertion operation before the last equality.
1007ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean pre_ins = false;
1008ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Is there a deletion operation before the last equality.
1009ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean pre_del = false;
1010ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Is there an insertion operation after the last equality.
1011ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean post_ins = false;
1012ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Is there a deletion operation after the last equality.
1013ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean post_del = false;
1014ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff thisDiff = pointer.next();
1015ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff safeDiff = thisDiff;  // The last Diff that is known to be unsplitable.
1016ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (thisDiff != null) {
1017ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (thisDiff.operation == Operation.EQUAL) {
1018ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // equality found
1019ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (thisDiff.text.length() < Diff_EditCost && (post_ins || post_del)) {
1020ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Candidate found.
1021ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equalities.push(thisDiff);
1022ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pre_ins = post_ins;
1023ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pre_del = post_del;
1024ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          lastequality = thisDiff.text;
1025ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1026ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Not a candidate, and can never become one.
1027ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equalities.clear();
1028ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          lastequality = null;
1029ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          safeDiff = thisDiff;
1030ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1031ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        post_ins = post_del = false;
1032ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
1033ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // an insertion or deletion
1034ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (thisDiff.operation == Operation.DELETE) {
1035ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          post_del = true;
1036ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1037ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          post_ins = true;
1038ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1039ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        /*
1040ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski         * Five types to be split:
1041ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski         * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
1042ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski         * <ins>A</ins>X<ins>C</ins><del>D</del>
1043ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski         * <ins>A</ins><del>B</del>X<ins>C</ins>
1044ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski         * <ins>A</del>X<ins>C</ins><del>D</del>
1045ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski         * <ins>A</ins><del>B</del>X<del>C</del>
1046ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski         */
1047ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (lastequality != null
1048ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            && ((pre_ins && pre_del && post_ins && post_del)
1049ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                || ((lastequality.length() < Diff_EditCost / 2)
1050ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                    && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0)
1051ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                        + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) {
1052ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          //System.out.println("Splitting: '" + lastequality + "'");
1053ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Walk back to offending equality.
1054ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          while (thisDiff != equalities.lastElement()) {
1055ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            thisDiff = pointer.previous();
1056ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1057ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.next();
1058ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1059ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Replace equality with a delete.
1060ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.set(new Diff(Operation.DELETE, lastequality));
1061ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Insert a corresponding an insert.
1062ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.add(thisDiff = new Diff(Operation.INSERT, lastequality));
1063ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1064ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          equalities.pop();  // Throw away the equality we just deleted.
1065ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          lastequality = null;
1066ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (pre_ins && pre_del) {
1067ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // No changes made which could affect previous entry, keep going.
1068ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            post_ins = post_del = true;
1069ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            equalities.clear();
1070ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            safeDiff = thisDiff;
1071ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
1072ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (!equalities.empty()) {
1073ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // Throw away the previous equality (it needs to be reevaluated).
1074ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              equalities.pop();
1075ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
1076ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (equalities.empty()) {
1077ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // There are no previous questionable equalities,
1078ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // walk back to the last known safe diff.
1079ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              thisDiff = safeDiff;
1080ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            } else {
1081ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // There is an equality we can fall back to.
1082ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              thisDiff = equalities.lastElement();
1083ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
1084ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            while (thisDiff != pointer.previous()) {
1085ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // Intentionally empty loop.
1086ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
1087ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            post_ins = post_del = false;
1088ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1089ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1090ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          changes = true;
1091ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1092ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1093ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      thisDiff = pointer.hasNext() ? pointer.next() : null;
1094ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1095ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1096ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (changes) {
1097ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff_cleanupMerge(diffs);
1098ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1099ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1100ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1101ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1102ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1103ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Reorder and merge like edit sections.  Merge equalities.
1104ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Any edit section can move as long as it doesn't cross an equality.
1105ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
1106ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1107ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public void diff_cleanupMerge(LinkedList<Diff> diffs) {
1108ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    diffs.add(new Diff(Operation.EQUAL, ""));  // Add a dummy entry at the end.
1109ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    ListIterator<Diff> pointer = diffs.listIterator();
1110ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int count_delete = 0;
1111ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int count_insert = 0;
1112ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String text_delete = "";
1113ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String text_insert = "";
1114ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff thisDiff = pointer.next();
1115ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff prevEqual = null;
1116ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int commonlength;
1117ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (thisDiff != null) {
1118ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      switch (thisDiff.operation) {
1119ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case INSERT:
1120ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        count_insert++;
1121ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text_insert += thisDiff.text;
1122ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        prevEqual = null;
1123ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1124ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case DELETE:
1125ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        count_delete++;
1126ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text_delete += thisDiff.text;
1127ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        prevEqual = null;
1128ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1129ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case EQUAL:
1130ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (count_delete != 0 || count_insert != 0) {
1131ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Delete the offending records.
1132ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.previous();  // Reverse direction.
1133ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          while (count_delete-- > 0) {
1134ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.previous();
1135ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.remove();
1136ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1137ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          while (count_insert-- > 0) {
1138ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.previous();
1139ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.remove();
1140ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1141ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (count_delete != 0 && count_insert != 0) {
1142ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Factor out any common prefixies.
1143ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            commonlength = diff_commonPrefix(text_insert, text_delete);
1144ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (commonlength != 0) {
1145ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              if (pointer.hasPrevious()) {
1146ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                thisDiff = pointer.previous();
1147ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                assert thisDiff.operation == Operation.EQUAL
1148ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                       : "Previous diff should have been an equality.";
1149ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                thisDiff.text += text_insert.substring(0, commonlength);
1150ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                pointer.next();
1151ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              } else {
1152ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                pointer.add(new Diff(Operation.EQUAL,
1153ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                    text_insert.substring(0, commonlength)));
1154ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              }
1155ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              text_insert = text_insert.substring(commonlength);
1156ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              text_delete = text_delete.substring(commonlength);
1157ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
1158ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Factor out any common suffixies.
1159ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            commonlength = diff_commonSuffix(text_insert, text_delete);
1160ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (commonlength != 0) {
1161ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              thisDiff = pointer.next();
1162ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              thisDiff.text = text_insert.substring(text_insert.length()
1163ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  - commonlength) + thisDiff.text;
1164ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              text_insert = text_insert.substring(0, text_insert.length()
1165ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  - commonlength);
1166ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              text_delete = text_delete.substring(0, text_delete.length()
1167ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  - commonlength);
1168ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              pointer.previous();
1169ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
1170ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1171ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Insert the merged records.
1172ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (text_delete.length() != 0) {
1173ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.add(new Diff(Operation.DELETE, text_delete));
1174ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1175ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (text_insert.length() != 0) {
1176ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            pointer.add(new Diff(Operation.INSERT, text_insert));
1177ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1178ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Step forward to the equality.
1179ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          thisDiff = pointer.hasNext() ? pointer.next() : null;
1180ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (prevEqual != null) {
1181ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Merge this equality with the previous one.
1182ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          prevEqual.text += thisDiff.text;
1183ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.remove();
1184ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          thisDiff = pointer.previous();
1185ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.next();  // Forward direction
1186ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1187ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        count_insert = 0;
1188ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        count_delete = 0;
1189ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text_delete = "";
1190ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text_insert = "";
1191ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        prevEqual = thisDiff;
1192ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1193ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1194ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      thisDiff = pointer.hasNext() ? pointer.next() : null;
1195ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1196ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // System.out.println(diff);
1197ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs.getLast().text.length() == 0) {
1198ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.removeLast();  // Remove the dummy entry at the end.
1199ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1200ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1201ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /*
1202ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Second pass: look for single edits surrounded on both sides by equalities
1203ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * which can be shifted sideways to eliminate an equality.
1204ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1205ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
1206ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean changes = false;
1207ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Create a new iterator at the start.
1208ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // (As opposed to walking the current one back.)
1209ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    pointer = diffs.listIterator();
1210ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
1211ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    thisDiff = pointer.hasNext() ? pointer.next() : null;
1212ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
1213ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Intentionally ignore the first and last element (don't need checking).
1214ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (nextDiff != null) {
1215ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (prevDiff.operation == Operation.EQUAL &&
1216ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          nextDiff.operation == Operation.EQUAL) {
1217ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // This is a single edit surrounded by equalities.
1218ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (thisDiff.text.endsWith(prevDiff.text)) {
1219ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Shift the edit over the previous equality.
1220ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          thisDiff.text = prevDiff.text
1221ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              + thisDiff.text.substring(0, thisDiff.text.length()
1222ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                           - prevDiff.text.length());
1223ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          nextDiff.text = prevDiff.text + nextDiff.text;
1224ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.previous(); // Walk past nextDiff.
1225ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.previous(); // Walk past thisDiff.
1226ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.previous(); // Walk past prevDiff.
1227ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.remove(); // Delete prevDiff.
1228ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.next(); // Walk past thisDiff.
1229ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          thisDiff = pointer.next(); // Walk past nextDiff.
1230ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          nextDiff = pointer.hasNext() ? pointer.next() : null;
1231ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          changes = true;
1232ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (thisDiff.text.startsWith(nextDiff.text)) {
1233ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Shift the edit over the next equality.
1234ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          prevDiff.text += nextDiff.text;
1235ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          thisDiff.text = thisDiff.text.substring(nextDiff.text.length())
1236ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              + nextDiff.text;
1237ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.remove(); // Delete nextDiff.
1238ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          nextDiff = pointer.hasNext() ? pointer.next() : null;
1239ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          changes = true;
1240ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1241ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1242ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      prevDiff = thisDiff;
1243ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      thisDiff = nextDiff;
1244ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      nextDiff = pointer.hasNext() ? pointer.next() : null;
1245ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1246ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // If shifts were made, the diff needs reordering and another shift sweep.
1247ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (changes) {
1248ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff_cleanupMerge(diffs);
1249ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1250ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1251ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1252ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1253ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1254ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * loc is a location in text1, compute and return the equivalent location in
1255ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * text2.
1256ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * e.g. "The cat" vs "The big cat", 1->1, 5->8
1257ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
1258ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param loc Location within text1.
1259ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Location within text2.
1260ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1261ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public int diff_xIndex(LinkedList<Diff> diffs, int loc) {
1262ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int chars1 = 0;
1263ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int chars2 = 0;
1264ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int last_chars1 = 0;
1265ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int last_chars2 = 0;
1266ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Diff lastDiff = null;
1267ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff aDiff : diffs) {
1268ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (aDiff.operation != Operation.INSERT) {
1269ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Equality or deletion.
1270ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        chars1 += aDiff.text.length();
1271ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1272ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (aDiff.operation != Operation.DELETE) {
1273ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Equality or insertion.
1274ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        chars2 += aDiff.text.length();
1275ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1276ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (chars1 > loc) {
1277ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Overshot the location.
1278ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        lastDiff = aDiff;
1279ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1280ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1281ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      last_chars1 = chars1;
1282ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      last_chars2 = chars2;
1283ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1284ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (lastDiff != null && lastDiff.operation == Operation.DELETE) {
1285ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // The location was deleted.
1286ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return last_chars2;
1287ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1288ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Add the remaining character length.
1289ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return last_chars2 + (loc - last_chars1);
1290ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1291ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1292ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1293ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1294ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Convert a Diff list into a pretty HTML report.
1295ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
1296ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return HTML representation.
1297ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1298ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public String diff_prettyHtml(LinkedList<Diff> diffs) {
1299ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    StringBuilder html = new StringBuilder();
1300ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int i = 0;
1301ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff aDiff : diffs) {
1302ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text = aDiff.text.replace("&", "&amp;").replace("<", "&lt;")
1303ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          .replace(">", "&gt;").replace("\n", "&para;<BR>");
1304ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      switch (aDiff.operation) {
1305ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case INSERT:
1306ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=").append(i)
1307ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            .append("\">").append(text).append("</INS>");
1308ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1309ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case DELETE:
1310ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=").append(i)
1311ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            .append("\">").append(text).append("</DEL>");
1312ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1313ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case EQUAL:
1314ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        html.append("<SPAN TITLE=\"i=").append(i).append("\">").append(text)
1315ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            .append("</SPAN>");
1316ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1317ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1318ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (aDiff.operation != Operation.DELETE) {
1319ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        i += aDiff.text.length();
1320ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1321ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1322ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return html.toString();
1323ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1324ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1325ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1326ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1327ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute and return the source text (all equalities and deletions).
1328ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
1329ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Source text.
1330ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1331ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public String diff_text1(LinkedList<Diff> diffs) {
1332ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    StringBuilder text = new StringBuilder();
1333ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff aDiff : diffs) {
1334ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (aDiff.operation != Operation.INSERT) {
1335ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text.append(aDiff.text);
1336ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1337ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1338ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return text.toString();
1339ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1340ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1341ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1342ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1343ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute and return the destination text (all equalities and insertions).
1344ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
1345ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Destination text.
1346ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1347ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public String diff_text2(LinkedList<Diff> diffs) {
1348ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    StringBuilder text = new StringBuilder();
1349ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff aDiff : diffs) {
1350ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (aDiff.operation != Operation.DELETE) {
1351ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text.append(aDiff.text);
1352ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1353ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1354ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return text.toString();
1355ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1356ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1357ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1358ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1359ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute the Levenshtein distance; the number of inserted, deleted or
1360ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * substituted characters.
1361ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs LinkedList of Diff objects.
1362ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Number of changes.
1363ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1364ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public int diff_levenshtein(LinkedList<Diff> diffs) {
1365ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int levenshtein = 0;
1366ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int insertions = 0;
1367ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int deletions = 0;
1368ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff aDiff : diffs) {
1369ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      switch (aDiff.operation) {
1370ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case INSERT:
1371ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        insertions += aDiff.text.length();
1372ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1373ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case DELETE:
1374ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        deletions += aDiff.text.length();
1375ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1376ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case EQUAL:
1377ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // A deletion and an insertion is one substitution.
1378ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        levenshtein += Math.max(insertions, deletions);
1379ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        insertions = 0;
1380ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        deletions = 0;
1381ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1382ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1383ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1384ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    levenshtein += Math.max(insertions, deletions);
1385ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return levenshtein;
1386ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1387ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1388ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1389ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1390ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Crush the diff into an encoded string which describes the operations
1391ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * required to transform text1 into text2.
1392ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
1393ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Operations are tab-separated.  Inserted text is escaped using %xx notation.
1394ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs Array of diff tuples.
1395ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Delta text.
1396ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1397ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public String diff_toDelta(LinkedList<Diff> diffs) {
1398ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    StringBuilder text = new StringBuilder();
1399ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff aDiff : diffs) {
1400ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      switch (aDiff.operation) {
1401ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case INSERT:
1402ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        try {
1403ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text.append("+").append(URLEncoder.encode(aDiff.text, "UTF-8")
1404ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                                            .replace('+', ' ')).append("\t");
1405ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (UnsupportedEncodingException e) {
1406ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Not likely on modern system.
1407ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new Error("This system does not support UTF-8.", e);
1408ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1409ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1410ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case DELETE:
1411ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text.append("-").append(aDiff.text.length()).append("\t");
1412ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1413ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case EQUAL:
1414ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text.append("=").append(aDiff.text.length()).append("\t");
1415ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1416ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1417ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1418ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String delta = text.toString();
1419ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (delta.length() != 0) {
1420ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Strip off trailing tab character.
1421ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      delta = delta.substring(0, delta.length() - 1);
1422ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      delta = unescapeForEncodeUriCompatability(delta);
1423ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1424ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return delta;
1425ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1426ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1427ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1428ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1429ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Given the original text1, and an encoded string which describes the
1430ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * operations required to transform text1 into text2, compute the full diff.
1431ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Source string for the diff.
1432ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param delta Delta text.
1433ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Array of diff tuples or null if invalid.
1434ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @throws IllegalArgumentException If invalid input.
1435ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1436ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Diff> diff_fromDelta(String text1, String delta)
1437ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throws IllegalArgumentException {
1438ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Diff> diffs = new LinkedList<Diff>();
1439ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int pointer = 0;  // Cursor in text1
1440ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String[] tokens = delta.split("\t");
1441ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (String token : tokens) {
1442ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (token.length() == 0) {
1443ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Blank tokens are ok (from a trailing \t).
1444ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        continue;
1445ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1446ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Each token begins with a one character parameter which specifies the
1447ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // operation of this token (delete, insert, equality).
1448ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String param = token.substring(1);
1449ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      switch (token.charAt(0)) {
1450ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case '+':
1451ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // decode would change all "+" to " "
1452ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        param = param.replace("+", "%2B");
1453ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        try {
1454ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          param = URLDecoder.decode(param, "UTF-8");
1455ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (UnsupportedEncodingException e) {
1456ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Not likely on modern system.
1457ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new Error("This system does not support UTF-8.", e);
1458ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (IllegalArgumentException e) {
1459ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Malformed URI sequence.
1460ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new IllegalArgumentException(
1461ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              "Illegal escape in diff_fromDelta: " + param, e);
1462ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1463ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        diffs.add(new Diff(Operation.INSERT, param));
1464ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1465ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case '-':
1466ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Fall through.
1467ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case '=':
1468ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        int n;
1469ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        try {
1470ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          n = Integer.parseInt(param);
1471ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (NumberFormatException e) {
1472ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new IllegalArgumentException(
1473ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              "Invalid number in diff_fromDelta: " + param, e);
1474ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1475ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (n < 0) {
1476ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new IllegalArgumentException(
1477ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              "Negative number in diff_fromDelta: " + param);
1478ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1479ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        String text;
1480ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        try {
1481ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text = text1.substring(pointer, pointer += n);
1482ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (StringIndexOutOfBoundsException e) {
1483ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new IllegalArgumentException("Delta length (" + pointer
1484ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              + ") larger than source text length (" + text1.length()
1485ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              + ").", e);
1486ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1487ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (token.charAt(0) == '=') {
1488ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          diffs.add(new Diff(Operation.EQUAL, text));
1489ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1490ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          diffs.add(new Diff(Operation.DELETE, text));
1491ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1492ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1493ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      default:
1494ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Anything else is an error.
1495ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        throw new IllegalArgumentException(
1496ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            "Invalid diff operation in diff_fromDelta: " + token.charAt(0));
1497ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1498ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1499ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (pointer != text1.length()) {
1500ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throw new IllegalArgumentException("Delta length (" + pointer
1501ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          + ") smaller than source text length (" + text1.length() + ").");
1502ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1503ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return diffs;
1504ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1505ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1506ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1507ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  //  MATCH FUNCTIONS
1508ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1509ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1510ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1511ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Locate the best instance of 'pattern' in 'text' near 'loc'.
1512ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Returns -1 if no match found.
1513ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text The text to search.
1514ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param pattern The pattern to search for.
1515ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param loc The location to search around.
1516ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Best match index or -1.
1517ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1518ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public int match_main(String text, String pattern, int loc) {
1519ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Check for null inputs.
1520ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text == null || pattern == null) {
1521ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throw new IllegalArgumentException("Null inputs. (match_main)");
1522ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1523ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1524ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    loc = Math.max(0, Math.min(loc, text.length()));
1525ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text.equals(pattern)) {
1526ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Shortcut (potentially not guaranteed by the algorithm)
1527ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return 0;
1528ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else if (text.length() == 0) {
1529ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Nothing to match.
1530ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return -1;
1531ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else if (loc + pattern.length() <= text.length()
1532ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        && text.substring(loc, loc + pattern.length()).equals(pattern)) {
1533ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Perfect match at the perfect spot!  (Includes case of null pattern)
1534ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return loc;
1535ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else {
1536ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Do a fuzzy compare.
1537ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return match_bitap(text, pattern, loc);
1538ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1539ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1540ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1541ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1542ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1543ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Locate the best instance of 'pattern' in 'text' near 'loc' using the
1544ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Bitap algorithm.  Returns -1 if no match found.
1545ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text The text to search.
1546ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param pattern The pattern to search for.
1547ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param loc The location to search around.
1548ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Best match index or -1.
1549ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1550ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected int match_bitap(String text, String pattern, int loc) {
1551ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    assert (Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)
1552ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        : "Pattern too long for this application.";
1553ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1554ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Initialise the alphabet.
1555ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Map<Character, Integer> s = match_alphabet(pattern);
1556ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1557ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Highest score beyond which we give up.
1558ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    double score_threshold = Match_Threshold;
1559ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Is there a nearby exact match? (speedup)
1560ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int best_loc = text.indexOf(pattern, loc);
1561ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (best_loc != -1) {
1562ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern),
1563ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          score_threshold);
1564ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // What about in the other direction? (speedup)
1565ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      best_loc = text.lastIndexOf(pattern, loc + pattern.length());
1566ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (best_loc != -1) {
1567ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern),
1568ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            score_threshold);
1569ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1570ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1571ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1572ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Initialise the bit arrays.
1573ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int matchmask = 1 << (pattern.length() - 1);
1574ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    best_loc = -1;
1575ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1576ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int bin_min, bin_mid;
1577ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int bin_max = pattern.length() + text.length();
1578ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Empty initialization added to appease Java compiler.
1579ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int[] last_rd = new int[0];
1580ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (int d = 0; d < pattern.length(); d++) {
1581ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Scan for the best match; each iteration allows for one more error.
1582ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Run a binary search to determine how far from 'loc' we can stray at
1583ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // this error level.
1584ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      bin_min = 0;
1585ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      bin_mid = bin_max;
1586ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      while (bin_min < bin_mid) {
1587ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (match_bitapScore(d, loc + bin_mid, loc, pattern)
1588ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            <= score_threshold) {
1589ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          bin_min = bin_mid;
1590ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1591ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          bin_max = bin_mid;
1592ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1593ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        bin_mid = (bin_max - bin_min) / 2 + bin_min;
1594ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1595ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Use the result from this iteration as the maximum for the next.
1596ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      bin_max = bin_mid;
1597ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int start = Math.max(1, loc - bin_mid + 1);
1598ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int finish = Math.min(loc + bin_mid, text.length()) + pattern.length();
1599ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1600ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int[] rd = new int[finish + 2];
1601ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      rd[finish + 1] = (1 << d) - 1;
1602ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      for (int j = finish; j >= start; j--) {
1603ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        int charMatch;
1604ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (text.length() <= j - 1 || !s.containsKey(text.charAt(j - 1))) {
1605ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Out of range.
1606ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          charMatch = 0;
1607ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1608ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          charMatch = s.get(text.charAt(j - 1));
1609ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1610ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (d == 0) {
1611ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // First pass: exact match.
1612ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          rd[j] = ((rd[j + 1] << 1) | 1) & charMatch;
1613ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1614ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Subsequent passes: fuzzy match.
1615ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1616ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1];
1617ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1618ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if ((rd[j] & matchmask) != 0) {
1619ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          double score = match_bitapScore(d, j - 1, loc, pattern);
1620ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // This match will almost certainly be better than any existing
1621ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // match.  But check anyway.
1622ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (score <= score_threshold) {
1623ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Told you so.
1624ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            score_threshold = score;
1625ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            best_loc = j - 1;
1626ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (best_loc > loc) {
1627ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // When passing loc, don't exceed our current distance from loc.
1628ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              start = Math.max(1, 2 * loc - best_loc);
1629ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            } else {
1630ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              // Already passed loc, downhill from here on in.
1631ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              break;
1632ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
1633ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1634ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1635ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1636ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) {
1637ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // No hope for a (better) match at greater error levels.
1638ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1639ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1640ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      last_rd = rd;
1641ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1642ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return best_loc;
1643ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1644ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1645ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1646ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1647ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute and return the score for a match with e errors and x location.
1648ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param e Number of errors in match.
1649ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param x Location of match.
1650ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param loc Expected location of match.
1651ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param pattern Pattern being sought.
1652ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Overall score for match (0.0 = good, 1.0 = bad).
1653ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1654ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private double match_bitapScore(int e, int x, int loc, String pattern) {
1655ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    float accuracy = (float) e / pattern.length();
1656ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int proximity = Math.abs(loc - x);
1657ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (Match_Distance == 0) {
1658ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Dodge divide by zero error.
1659ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return proximity == 0 ? accuracy : 1.0;
1660ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1661ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return accuracy + (proximity / (float) Match_Distance);
1662ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1663ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1664ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1665ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1666ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Initialise the alphabet for the Bitap algorithm.
1667ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param pattern The text to encode.
1668ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Hash of character locations.
1669ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1670ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected Map<Character, Integer> match_alphabet(String pattern) {
1671ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Map<Character, Integer> s = new HashMap<Character, Integer>();
1672ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    char[] char_pattern = pattern.toCharArray();
1673ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (char c : char_pattern) {
1674ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      s.put(c, 0);
1675ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1676ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int i = 0;
1677ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (char c : char_pattern) {
1678ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      s.put(c, s.get(c) | (1 << (pattern.length() - i - 1)));
1679ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      i++;
1680ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1681ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return s;
1682ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1683ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1684ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1685ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  //  PATCH FUNCTIONS
1686ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1687ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1688ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1689ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Increase the context until it is unique,
1690ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * but don't let the pattern expand beyond Match_MaxBits.
1691ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param patch The patch to grow.
1692ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text Source text.
1693ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1694ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  protected void patch_addContext(Patch patch, String text) {
1695ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text.length() == 0) {
1696ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return;
1697ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1698ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String pattern = text.substring(patch.start2, patch.start2 + patch.length1);
1699ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int padding = 0;
1700ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1701ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Look for the first and last matches of pattern in text.  If two different
1702ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // matches are found, increase the pattern length.
1703ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (text.indexOf(pattern) != text.lastIndexOf(pattern)
1704ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) {
1705ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      padding += Patch_Margin;
1706ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      pattern = text.substring(Math.max(0, patch.start2 - padding),
1707ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          Math.min(text.length(), patch.start2 + patch.length1 + padding));
1708ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1709ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Add one chunk for good luck.
1710ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    padding += Patch_Margin;
1711ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1712ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Add the prefix.
1713ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String prefix = text.substring(Math.max(0, patch.start2 - padding),
1714ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start2);
1715ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (prefix.length() != 0) {
1716ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.diffs.addFirst(new Diff(Operation.EQUAL, prefix));
1717ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1718ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Add the suffix.
1719ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String suffix = text.substring(patch.start2 + patch.length1,
1720ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        Math.min(text.length(), patch.start2 + patch.length1 + padding));
1721ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (suffix.length() != 0) {
1722ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.diffs.addLast(new Diff(Operation.EQUAL, suffix));
1723ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1724ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1725ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Roll back the start points.
1726ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    patch.start1 -= prefix.length();
1727ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    patch.start2 -= prefix.length();
1728ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Extend the lengths.
1729ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    patch.length1 += prefix.length() + suffix.length();
1730ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    patch.length2 += prefix.length() + suffix.length();
1731ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1732ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1733ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1734ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1735ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute a list of patches to turn text1 into text2.
1736ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * A set of diffs will be computed.
1737ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old text.
1738ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 New text.
1739ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return LinkedList of Patch objects.
1740ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1741ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Patch> patch_make(String text1, String text2) {
1742ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text1 == null || text2 == null) {
1743ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throw new IllegalArgumentException("Null inputs. (patch_make)");
1744ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1745ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // No diffs provided, compute our own.
1746ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Diff> diffs = diff_main(text1, text2, true);
1747ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs.size() > 2) {
1748ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff_cleanupSemantic(diffs);
1749ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diff_cleanupEfficiency(diffs);
1750ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1751ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return patch_make(text1, diffs);
1752ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1753ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1754ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1755ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1756ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute a list of patches to turn text1 into text2.
1757ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * text1 will be derived from the provided diffs.
1758ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs Array of diff tuples for text1 to text2.
1759ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return LinkedList of Patch objects.
1760ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1761ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Patch> patch_make(LinkedList<Diff> diffs) {
1762ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs == null) {
1763ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throw new IllegalArgumentException("Null inputs. (patch_make)");
1764ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1765ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // No origin string provided, compute our own.
1766ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String text1 = diff_text1(diffs);
1767ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return patch_make(text1, diffs);
1768ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1769ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1770ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1771ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1772ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute a list of patches to turn text1 into text2.
1773ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * text2 is ignored, diffs are the delta between text1 and text2.
1774ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old text
1775ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text2 Ignored.
1776ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs Array of diff tuples for text1 to text2.
1777ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return LinkedList of Patch objects.
1778ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @deprecated Prefer patch_make(String text1, LinkedList<Diff> diffs).
1779ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1780ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Patch> patch_make(String text1, String text2,
1781ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      LinkedList<Diff> diffs) {
1782ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return patch_make(text1, diffs);
1783ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1784ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1785ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1786ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1787ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Compute a list of patches to turn text1 into text2.
1788ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * text2 is not provided, diffs are the delta between text1 and text2.
1789ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text1 Old text.
1790ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param diffs Array of diff tuples for text1 to text2.
1791ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return LinkedList of Patch objects.
1792ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1793ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Patch> patch_make(String text1, LinkedList<Diff> diffs) {
1794ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (text1 == null || diffs == null) {
1795ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throw new IllegalArgumentException("Null inputs. (patch_make)");
1796ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1797ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1798ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Patch> patches = new LinkedList<Patch>();
1799ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs.isEmpty()) {
1800ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return patches;  // Get rid of the null case.
1801ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1802ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Patch patch = new Patch();
1803ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int char_count1 = 0;  // Number of characters into the text1 string.
1804ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int char_count2 = 0;  // Number of characters into the text2 string.
1805ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Start with text1 (prepatch_text) and apply the diffs until we arrive at
1806ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // text2 (postpatch_text). We recreate the patches one by one to determine
1807ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // context info.
1808ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String prepatch_text = text1;
1809ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String postpatch_text = text1;
1810ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Diff aDiff : diffs) {
1811ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (patch.diffs.isEmpty() && aDiff.operation != Operation.EQUAL) {
1812ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // A new patch starts here.
1813ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start1 = char_count1;
1814ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start2 = char_count2;
1815ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1816ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1817ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      switch (aDiff.operation) {
1818ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case INSERT:
1819ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.diffs.add(aDiff);
1820ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length2 += aDiff.text.length();
1821ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        postpatch_text = postpatch_text.substring(0, char_count2)
1822ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            + aDiff.text + postpatch_text.substring(char_count2);
1823ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1824ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case DELETE:
1825ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length1 += aDiff.text.length();
1826ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.diffs.add(aDiff);
1827ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        postpatch_text = postpatch_text.substring(0, char_count2)
1828ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            + postpatch_text.substring(char_count2 + aDiff.text.length());
1829ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1830ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      case EQUAL:
1831ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (aDiff.text.length() <= 2 * Patch_Margin
1832ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            && !patch.diffs.isEmpty() && aDiff != diffs.getLast()) {
1833ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Small equality inside a patch.
1834ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.diffs.add(aDiff);
1835ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.length1 += aDiff.text.length();
1836ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.length2 += aDiff.text.length();
1837ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1838ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1839ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (aDiff.text.length() >= 2 * Patch_Margin) {
1840ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Time for a new patch.
1841ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (!patch.diffs.isEmpty()) {
1842ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch_addContext(patch, prepatch_text);
1843ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patches.add(patch);
1844ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch = new Patch();
1845ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Unlike Unidiff, our patch lists have a rolling context.
1846ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1847ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Update prepatch text & pos to reflect the application of the
1848ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // just completed patch.
1849ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            prepatch_text = postpatch_text;
1850ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            char_count1 = char_count2;
1851ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1852ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1853ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        break;
1854ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1855ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1856ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Update the current character count.
1857ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (aDiff.operation != Operation.INSERT) {
1858ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        char_count1 += aDiff.text.length();
1859ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1860ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (aDiff.operation != Operation.DELETE) {
1861ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        char_count2 += aDiff.text.length();
1862ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1863ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1864ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Pick up the leftover patch if not empty.
1865ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (!patch.diffs.isEmpty()) {
1866ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch_addContext(patch, prepatch_text);
1867ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patches.add(patch);
1868ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1869ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1870ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return patches;
1871ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1872ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1873ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1874ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1875ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Given an array of patches, return another array that is identical.
1876ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param patches Array of patch objects.
1877ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Array of patch objects.
1878ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1879ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public LinkedList<Patch> patch_deepCopy(LinkedList<Patch> patches) {
1880ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Patch> patchesCopy = new LinkedList<Patch>();
1881ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Patch aPatch : patches) {
1882ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      Patch patchCopy = new Patch();
1883ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      for (Diff aDiff : aPatch.diffs) {
1884ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        Diff diffCopy = new Diff(aDiff.operation, aDiff.text);
1885ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patchCopy.diffs.add(diffCopy);
1886ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1887ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patchCopy.start1 = aPatch.start1;
1888ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patchCopy.start2 = aPatch.start2;
1889ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patchCopy.length1 = aPatch.length1;
1890ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patchCopy.length2 = aPatch.length2;
1891ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patchesCopy.add(patchCopy);
1892ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1893ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return patchesCopy;
1894ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
1895ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1896ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1897ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
1898ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Merge a set of patches onto the text.  Return a patched text, as well
1899ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * as an array of true/false values indicating which patches were applied.
1900ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param patches Array of patch objects
1901ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param text Old text.
1902ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Two element Object array, containing the new text and an array of
1903ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *      boolean values.
1904ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
1905ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public Object[] patch_apply(LinkedList<Patch> patches, String text) {
1906ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (patches.isEmpty()) {
1907ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return new Object[]{text, new boolean[0]};
1908ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
1909ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1910ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Deep copy the patches so that no changes are made to originals.
1911ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    patches = patch_deepCopy(patches);
1912ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1913ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String nullPadding = patch_addPadding(patches);
1914ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    text = nullPadding + text + nullPadding;
1915ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    patch_splitMax(patches);
1916ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
1917ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int x = 0;
1918ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // delta keeps track of the offset between the expected and actual location
1919ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // of the previous patch.  If there are patches expected at positions 10 and
1920ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // 20, but the first patch was found at 12, delta is 2 and the second patch
1921ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // has an effective expected position of 22.
1922ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int delta = 0;
1923ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean[] results = new boolean[patches.size()];
1924ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Patch aPatch : patches) {
1925ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int expected_loc = aPatch.start2 + delta;
1926ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String text1 = diff_text1(aPatch.diffs);
1927ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int start_loc;
1928ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int end_loc = -1;
1929ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (text1.length() > this.Match_MaxBits) {
1930ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // patch_splitMax will only provide an oversized pattern in the case of
1931ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // a monster delete.
1932ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        start_loc = match_main(text,
1933ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            text1.substring(0, this.Match_MaxBits), expected_loc);
1934ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (start_loc != -1) {
1935ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          end_loc = match_main(text,
1936ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              text1.substring(text1.length() - this.Match_MaxBits),
1937ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              expected_loc + text1.length() - this.Match_MaxBits);
1938ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (end_loc == -1 || start_loc >= end_loc) {
1939ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Can't find valid trailing context.  Drop this patch.
1940ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            start_loc = -1;
1941ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1942ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1943ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
1944ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        start_loc = match_main(text, text1, expected_loc);
1945ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
1946ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (start_loc == -1) {
1947ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // No match found.  :(
1948ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        results[x] = false;
1949ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Subtract the delta for this failed patch from subsequent patches.
1950ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        delta -= aPatch.length2 - aPatch.length1;
1951ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
1952ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Found a match.  :)
1953ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        results[x] = true;
1954ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        delta = start_loc - expected_loc;
1955ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        String text2;
1956ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (end_loc == -1) {
1957ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text2 = text.substring(start_loc,
1958ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              Math.min(start_loc + text1.length(), text.length()));
1959ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1960ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text2 = text.substring(start_loc,
1961ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              Math.min(end_loc + this.Match_MaxBits, text.length()));
1962ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1963ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (text1.equals(text2)) {
1964ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Perfect match, just shove the replacement text in.
1965ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text = text.substring(0, start_loc) + diff_text2(aPatch.diffs)
1966ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              + text.substring(start_loc + text1.length());
1967ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
1968ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Imperfect match.  Run a diff to get a framework of equivalent
1969ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // indices.
1970ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          LinkedList<Diff> diffs = diff_main(text1, text2, false);
1971ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (text1.length() > this.Match_MaxBits
1972ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              && diff_levenshtein(diffs) / (float) text1.length()
1973ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              > this.Patch_DeleteThreshold) {
1974ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // The end points match, but the content is unacceptably bad.
1975ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            results[x] = false;
1976ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
1977ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            diff_cleanupSemanticLossless(diffs);
1978ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            int index1 = 0;
1979ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            for (Diff aDiff : aPatch.diffs) {
1980ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              if (aDiff.operation != Operation.EQUAL) {
1981ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                int index2 = diff_xIndex(diffs, index1);
1982ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                if (aDiff.operation == Operation.INSERT) {
1983ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  // Insertion
1984ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  text = text.substring(0, start_loc + index2) + aDiff.text
1985ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                      + text.substring(start_loc + index2);
1986ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                } else if (aDiff.operation == Operation.DELETE) {
1987ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  // Deletion
1988ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  text = text.substring(0, start_loc + index2)
1989ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                      + text.substring(start_loc + diff_xIndex(diffs,
1990ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                      index1 + aDiff.text.length()));
1991ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                }
1992ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              }
1993ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              if (aDiff.operation != Operation.DELETE) {
1994ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                index1 += aDiff.text.length();
1995ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              }
1996ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
1997ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
1998ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
1999ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2000ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      x++;
2001ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2002ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Strip the padding off.
2003ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    text = text.substring(nullPadding.length(), text.length()
2004ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        - nullPadding.length());
2005ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return new Object[]{text, results};
2006ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2007ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2008ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2009ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
2010ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Add some padding on text start and end so that edges can match something.
2011ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Intended to be called only from within patch_apply.
2012ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param patches Array of patch objects.
2013ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return The padding string added to each side.
2014ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
2015ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public String patch_addPadding(LinkedList<Patch> patches) {
2016ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int paddingLength = this.Patch_Margin;
2017ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String nullPadding = "";
2018ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (int x = 1; x <= paddingLength; x++) {
2019ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      nullPadding += String.valueOf((char) x);
2020ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2021ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2022ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Bump all the patches forward.
2023ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Patch aPatch : patches) {
2024ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      aPatch.start1 += paddingLength;
2025ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      aPatch.start2 += paddingLength;
2026ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2027ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2028ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Add some padding on start of first diff.
2029ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Patch patch = patches.getFirst();
2030ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<Diff> diffs = patch.diffs;
2031ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs.isEmpty() || diffs.getFirst().operation != Operation.EQUAL) {
2032ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Add nullPadding equality.
2033ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.addFirst(new Diff(Operation.EQUAL, nullPadding));
2034ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.start1 -= paddingLength;  // Should be 0.
2035ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.start2 -= paddingLength;  // Should be 0.
2036ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length1 += paddingLength;
2037ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length2 += paddingLength;
2038ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else if (paddingLength > diffs.getFirst().text.length()) {
2039ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Grow first equality.
2040ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      Diff firstDiff = diffs.getFirst();
2041ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int extraLength = paddingLength - firstDiff.text.length();
2042ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      firstDiff.text = nullPadding.substring(firstDiff.text.length())
2043ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          + firstDiff.text;
2044ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.start1 -= extraLength;
2045ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.start2 -= extraLength;
2046ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length1 += extraLength;
2047ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length2 += extraLength;
2048ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2049ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2050ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    // Add some padding on end of last diff.
2051ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    patch = patches.getLast();
2052ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    diffs = patch.diffs;
2053ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (diffs.isEmpty() || diffs.getLast().operation != Operation.EQUAL) {
2054ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Add nullPadding equality.
2055ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      diffs.addLast(new Diff(Operation.EQUAL, nullPadding));
2056ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length1 += paddingLength;
2057ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length2 += paddingLength;
2058ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    } else if (paddingLength > diffs.getLast().text.length()) {
2059ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Grow last equality.
2060ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      Diff lastDiff = diffs.getLast();
2061ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      int extraLength = paddingLength - lastDiff.text.length();
2062ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      lastDiff.text += nullPadding.substring(0, extraLength);
2063ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length1 += extraLength;
2064ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.length2 += extraLength;
2065ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2066ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2067ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return nullPadding;
2068ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2069ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2070ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2071ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
2072ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Look through the patches and break up any which are longer than the
2073ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * maximum limit of the match algorithm.
2074ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param patches LinkedList of Patch objects.
2075ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
2076ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public void patch_splitMax(LinkedList<Patch> patches) {
2077ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int patch_size;
2078ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String precontext, postcontext;
2079ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Patch patch;
2080ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    int start1, start2;
2081ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    boolean empty;
2082ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Operation diff_type;
2083ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String diff_text;
2084ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    ListIterator<Patch> pointer = patches.listIterator();
2085ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Patch bigpatch = pointer.hasNext() ? pointer.next() : null;
2086ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (bigpatch != null) {
2087ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (bigpatch.length1 <= Match_MaxBits) {
2088ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        bigpatch = pointer.hasNext() ? pointer.next() : null;
2089ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        continue;
2090ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2091ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Remove the big old patch.
2092ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      pointer.remove();
2093ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch_size = Match_MaxBits;
2094ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      start1 = bigpatch.start1;
2095ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      start2 = bigpatch.start2;
2096ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      precontext = "";
2097ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      while (!bigpatch.diffs.isEmpty()) {
2098ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Create one of several smaller patches.
2099ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch = new Patch();
2100ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        empty = true;
2101ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start1 = start1 - precontext.length();
2102ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start2 = start2 - precontext.length();
2103ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (precontext.length() != 0) {
2104ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.length1 = patch.length2 = precontext.length();
2105ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.diffs.add(new Diff(Operation.EQUAL, precontext));
2106ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2107ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        while (!bigpatch.diffs.isEmpty()
2108ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            && patch.length1 < patch_size - Patch_Margin) {
2109ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          diff_type = bigpatch.diffs.getFirst().operation;
2110ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          diff_text = bigpatch.diffs.getFirst().text;
2111ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (diff_type == Operation.INSERT) {
2112ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Insertions are harmless.
2113ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.length2 += diff_text.length();
2114ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            start2 += diff_text.length();
2115ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.diffs.addLast(bigpatch.diffs.removeFirst());
2116ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            empty = false;
2117ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else if (diff_type == Operation.DELETE && patch.diffs.size() == 1
2118ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              && patch.diffs.getFirst().operation == Operation.EQUAL
2119ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              && diff_text.length() > 2 * patch_size) {
2120ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // This is a large deletion.  Let it pass in one chunk.
2121ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.length1 += diff_text.length();
2122ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            start1 += diff_text.length();
2123ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            empty = false;
2124ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.diffs.add(new Diff(diff_type, diff_text));
2125ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            bigpatch.diffs.removeFirst();
2126ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
2127ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            // Deletion or equality.  Only take as much as we can stomach.
2128ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            diff_text = diff_text.substring(0, Math.min(diff_text.length(),
2129ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                patch_size - patch.length1 - Patch_Margin));
2130ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.length1 += diff_text.length();
2131ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            start1 += diff_text.length();
2132ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (diff_type == Operation.EQUAL) {
2133ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              patch.length2 += diff_text.length();
2134ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              start2 += diff_text.length();
2135ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            } else {
2136ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              empty = false;
2137ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
2138ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.diffs.add(new Diff(diff_type, diff_text));
2139ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            if (diff_text.equals(bigpatch.diffs.getFirst().text)) {
2140ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              bigpatch.diffs.removeFirst();
2141ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            } else {
2142ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              bigpatch.diffs.getFirst().text = bigpatch.diffs.getFirst().text
2143ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski                  .substring(diff_text.length());
2144ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            }
2145ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
2146ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2147ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Compute the head context for the next patch.
2148ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        precontext = diff_text2(patch.diffs);
2149ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        precontext = precontext.substring(Math.max(0, precontext.length()
2150ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            - Patch_Margin));
2151ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        // Append the end context for this patch.
2152ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (diff_text1(bigpatch.diffs).length() > Patch_Margin) {
2153ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          postcontext = diff_text1(bigpatch.diffs).substring(0, Patch_Margin);
2154ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
2155ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          postcontext = diff_text1(bigpatch.diffs);
2156ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2157ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (postcontext.length() != 0) {
2158ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.length1 += postcontext.length();
2159ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.length2 += postcontext.length();
2160ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          if (!patch.diffs.isEmpty()
2161ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              && patch.diffs.getLast().operation == Operation.EQUAL) {
2162ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.diffs.getLast().text += postcontext;
2163ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          } else {
2164ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            patch.diffs.add(new Diff(Operation.EQUAL, postcontext));
2165ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          }
2166ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2167ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (!empty) {
2168ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          pointer.add(patch);
2169ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2170ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2171ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      bigpatch = pointer.hasNext() ? pointer.next() : null;
2172ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2173ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2174ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2175ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2176ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
2177ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Take a list of patches and return a textual representation.
2178ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param patches List of Patch objects.
2179ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return Text representation of patches.
2180ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
2181ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public String patch_toText(List<Patch> patches) {
2182ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    StringBuilder text = new StringBuilder();
2183ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    for (Patch aPatch : patches) {
2184ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      text.append(aPatch);
2185ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2186ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return text.toString();
2187ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2188ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2189ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2190ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
2191ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Parse a textual representation of patches and return a List of Patch
2192ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * objects.
2193ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param textline Text representation of patches.
2194ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return List of Patch objects.
2195ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @throws IllegalArgumentException If invalid input.
2196ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
2197ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public List<Patch> patch_fromText(String textline)
2198ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      throws IllegalArgumentException {
2199ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    List<Patch> patches = new LinkedList<Patch>();
2200ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    if (textline.length() == 0) {
2201ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return patches;
2202ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2203ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    List<String> textList = Arrays.asList(textline.split("\n"));
2204ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    LinkedList<String> text = new LinkedList<String>(textList);
2205ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Patch patch;
2206ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Pattern patchHeader
2207ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
2208ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    Matcher m;
2209ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    char sign;
2210ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    String line;
2211ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    while (!text.isEmpty()) {
2212ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      m = patchHeader.matcher(text.getFirst());
2213ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (!m.matches()) {
2214ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        throw new IllegalArgumentException(
2215ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski            "Invalid patch string: " + text.getFirst());
2216ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2217ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch = new Patch();
2218ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patches.add(patch);
2219ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.start1 = Integer.parseInt(m.group(1));
2220ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (m.group(2).length() == 0) {
2221ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start1--;
2222ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length1 = 1;
2223ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else if (m.group(2).equals("0")) {
2224ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length1 = 0;
2225ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
2226ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start1--;
2227ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length1 = Integer.parseInt(m.group(2));
2228ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2229ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2230ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      patch.start2 = Integer.parseInt(m.group(3));
2231ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (m.group(4).length() == 0) {
2232ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start2--;
2233ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length2 = 1;
2234ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else if (m.group(4).equals("0")) {
2235ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length2 = 0;
2236ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
2237ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.start2--;
2238ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        patch.length2 = Integer.parseInt(m.group(4));
2239ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2240ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      text.removeFirst();
2241ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2242ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      while (!text.isEmpty()) {
2243ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        try {
2244ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          sign = text.getFirst().charAt(0);
2245ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (IndexOutOfBoundsException e) {
2246ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Blank line?  Whatever.
2247ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text.removeFirst();
2248ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          continue;
2249ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2250ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        line = text.getFirst().substring(1);
2251ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        line = line.replace("+", "%2B");  // decode would change all "+" to " "
2252ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        try {
2253ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          line = URLDecoder.decode(line, "UTF-8");
2254ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (UnsupportedEncodingException e) {
2255ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Not likely on modern system.
2256ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new Error("This system does not support UTF-8.", e);
2257ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (IllegalArgumentException e) {
2258ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Malformed URI sequence.
2259ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new IllegalArgumentException(
2260ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              "Illegal escape in patch_fromText: " + line, e);
2261ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2262ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        if (sign == '-') {
2263ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Deletion.
2264ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.diffs.add(new Diff(Operation.DELETE, line));
2265ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (sign == '+') {
2266ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Insertion.
2267ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.diffs.add(new Diff(Operation.INSERT, line));
2268ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (sign == ' ') {
2269ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Minor equality.
2270ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          patch.diffs.add(new Diff(Operation.EQUAL, line));
2271ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else if (sign == '@') {
2272ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Start of next patch.
2273ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
2274ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } else {
2275ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // WTF?
2276ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new IllegalArgumentException(
2277ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              "Invalid patch mode '" + sign + "' in: " + line);
2278ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2279ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        text.removeFirst();
2280ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2281ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2282ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return patches;
2283ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2284ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2285ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2286ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
2287ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Class representing one diff operation.
2288ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
2289ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public static class Diff {
2290ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /**
2291ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * One of: INSERT, DELETE or EQUAL.
2292ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
2293ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public Operation operation;
2294ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /**
2295ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * The text associated with this diff operation.
2296ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
2297ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public String text;
2298ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2299ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /**
2300ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Constructor.  Initializes the diff with the provided values.
2301ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * @param operation One of INSERT, DELETE or EQUAL.
2302ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * @param text The text being applied.
2303ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
2304ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public Diff(Operation operation, String text) {
2305ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Construct a diff with the specified operation and text.
2306ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      this.operation = operation;
2307ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      this.text = text;
2308ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2309ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2310ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2311ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /**
2312ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Display a human-readable version of this Diff.
2313ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * @return text version.
2314ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
2315ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public String toString() {
2316ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String prettyText = this.text.replace('\n', '\u00b6');
2317ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return "Diff(" + this.operation + ",\"" + prettyText + "\")";
2318ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2319ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2320ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2321ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /**
2322ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Is this Diff equivalent to another Diff?
2323ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * @param d Another Diff to compare against.
2324ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * @return true or false.
2325ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
2326ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public boolean equals(Object d) {
2327ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      try {
2328ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        return (((Diff) d).operation == this.operation)
2329ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski               && (((Diff) d).text.equals(this.text));
2330ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } catch (ClassCastException e) {
2331ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        return false;
2332ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2333ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2334ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2335ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2336ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2337ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
2338ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Class representing one patch operation.
2339ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
2340ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  public static class Patch {
2341ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public LinkedList<Diff> diffs;
2342ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public int start1;
2343ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public int start2;
2344ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public int length1;
2345ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public int length2;
2346ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2347ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2348ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /**
2349ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Constructor.  Initializes with an empty list of diffs.
2350ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
2351ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public Patch() {
2352ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      this.diffs = new LinkedList<Diff>();
2353ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2354ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2355ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2356ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    /**
2357ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Emmulate GNU diff's format.
2358ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Header: @@ -382,8 +481,9 @@
2359ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * Indicies are printed as 1-based, not 0-based.
2360ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     * @return The GNU diff string.
2361ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski     */
2362ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    public String toString() {
2363ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      String coords1, coords2;
2364ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (this.length1 == 0) {
2365ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        coords1 = this.start1 + ",0";
2366ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else if (this.length1 == 1) {
2367ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        coords1 = Integer.toString(this.start1 + 1);
2368ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
2369ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        coords1 = (this.start1 + 1) + "," + this.length1;
2370ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2371ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      if (this.length2 == 0) {
2372ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        coords2 = this.start2 + ",0";
2373ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else if (this.length2 == 1) {
2374ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        coords2 = Integer.toString(this.start2 + 1);
2375ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      } else {
2376ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        coords2 = (this.start2 + 1) + "," + this.length2;
2377ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2378ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      StringBuilder text = new StringBuilder();
2379ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      text.append("@@ -").append(coords1).append(" +").append(coords2)
2380ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          .append(" @@\n");
2381ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      // Escape the body of the patch with %xx notation.
2382ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      for (Diff aDiff : this.diffs) {
2383ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        switch (aDiff.operation) {
2384ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        case INSERT:
2385ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text.append('+');
2386ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
2387ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        case DELETE:
2388ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text.append('-');
2389ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
2390ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        case EQUAL:
2391ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text.append(' ');
2392ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          break;
2393ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2394ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        try {
2395ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' '))
2396ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski              .append("\n");
2397ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        } catch (UnsupportedEncodingException e) {
2398ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          // Not likely on modern system.
2399ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski          throw new Error("This system does not support UTF-8.", e);
2400ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        }
2401ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      }
2402ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski      return unescapeForEncodeUriCompatability(text.toString());
2403ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    }
2404ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2405ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2406ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski
2407ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  /**
2408ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Unescape selected chars for compatability with JavaScript's encodeURI.
2409ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * In speed critical applications this could be dropped since the
2410ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * receiving application will certainly decode these fine.
2411ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Note that this function is case-sensitive.  Thus "%3f" would not be
2412ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * unescaped.  But this is ok because it is only called with the output of
2413ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * URLEncoder.encode which returns uppercase hex.
2414ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *
2415ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * Example: "%3F" -> "?", "%24" -> "$", etc.
2416ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   *
2417ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @param str The string to escape.
2418ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   * @return The escaped string.
2419ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski   */
2420ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  private static String unescapeForEncodeUriCompatability(String str) {
2421ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski    return str.replace("%21", "!").replace("%7E", "~")
2422ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        .replace("%27", "'").replace("%28", "(").replace("%29", ")")
2423ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        .replace("%3B", ";").replace("%2F", "/").replace("%3F", "?")
2424ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        .replace("%3A", ":").replace("%40", "@").replace("%26", "&")
2425ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        .replace("%3D", "=").replace("%2B", "+").replace("%24", "$")
2426ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski        .replace("%2C", ",").replace("%23", "#");
2427ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski  }
2428ed79165d195c99e5d8e283bb5bbf84c3363ae254Maksymilian Osowski}
2429