1package jdiff;
2
3import java.io.*;
4import java.util.*;
5
6/** A class to compare vectors of objects.  The result of comparison
7    is a list of <code>change</code> objects which form an
8    edit script.  The objects compared are traditionally lines
9    of text from two files.  Comparison options such as "ignore
10    whitespace" are implemented by modifying the <code>equals</code>
11    and <code>hashcode</code> methods for the objects compared.
12<p>
13   The basic algorithm is described in: </br>
14   "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
15   Algorithmica Vol. 1 No. 2, 1986, p 251.
16<p>
17   This class outputs different results from GNU diff 1.15 on some
18   inputs.  Our results are actually better (smaller change list, smaller
19   total size of changes), but it would be nice to know why.  Perhaps
20   there is a memory overwrite bug in GNU diff 1.15.
21
22  @author Stuart D. Gathman, translated from GNU diff 1.15
23    Copyright (C) 2000  Business Management Systems, Inc.
24<p>
25    This program is free software; you can redistribute it and/or modify
26    it under the terms of the GNU General Public License as published by
27    the Free Software Foundation; either version 1, or (at your option)
28    any later version.
29<p>
30    This program is distributed in the hope that it will be useful,
31    but WITHOUT ANY WARRANTY; without even the implied warranty of
32    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33    GNU General Public License for more details.
34<p>
35    You should have received a copy of the <a href=COPYING.txt>
36    GNU General Public License</a>
37    along with this program; if not, write to the Free Software
38    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
39
40 */
41
42public class DiffMyers
43{
44
45  /** Prepare to find differences between two arrays.  Each element of
46      the arrays is translated to an "equivalence number" based on
47      the result of <code>equals</code>.  The original Object arrays
48      are no longer needed for computing the differences.  They will
49      be needed again later to print the results of the comparison as
50      an edit script, if desired.
51   */
52  public DiffMyers(Object[] a,Object[] b)
53  {
54    Hashtable h = new Hashtable(a.length + b.length);
55    filevec[0] = new file_data(a,h);
56    filevec[1] = new file_data(b,h);
57  }
58
59  /** 1 more than the maximum equivalence value used for this or its
60     sibling file. */
61  private int equiv_max = 1;
62
63  /** When set to true, the comparison uses a heuristic to speed it up.
64    With this heuristic, for files with a constant small density
65    of changes, the algorithm is linear in the file size.  */
66  public boolean heuristic = false;
67
68  /** When set to true, the algorithm returns a guarranteed minimal
69      set of changes.  This makes things slower, sometimes much slower. */
70  public boolean no_discards = false;
71
72  private int[] xvec, yvec;        /* Vectors being compared. */
73  private int[] fdiag;                /* Vector, indexed by diagonal, containing
74                                   the X coordinate of the point furthest
75                                   along the given diagonal in the forward
76                                   search of the edit matrix. */
77  private int[] bdiag;                /* Vector, indexed by diagonal, containing
78                                   the X coordinate of the point furthest
79                                   along the given diagonal in the backward
80                                   search of the edit matrix. */
81  private int fdiagoff, bdiagoff;
82  private final file_data[] filevec = new file_data[2];
83  private int cost;
84
85  /** Find the midpoint of the shortest edit script for a specified
86     portion of the two files.
87
88     We scan from the beginnings of the files, and simultaneously from the ends,
89     doing a breadth-first search through the space of edit-sequence.
90     When the two searches meet, we have found the midpoint of the shortest
91     edit sequence.
92
93     The value returned is the number of the diagonal on which the midpoint lies.
94     The diagonal number equals the number of inserted lines minus the number
95     of deleted lines (counting only lines before the midpoint).
96     The edit cost is stored into COST; this is the total number of
97     lines inserted or deleted (counting only lines before the midpoint).
98
99     This function assumes that the first lines of the specified portions
100     of the two files do not match, and likewise that the last lines do not
101     match.  The caller must trim matching lines from the beginning and end
102     of the portions it is going to specify.
103
104     Note that if we return the "wrong" diagonal value, or if
105     the value of bdiag at that diagonal is "wrong",
106     the worst this can do is cause suboptimal diff output.
107     It cannot cause incorrect diff output.  */
108
109  private int diag (int xoff, int xlim, int yoff, int ylim)
110  {
111    final int[] fd = fdiag;        // Give the compiler a chance.
112    final int[] bd = bdiag;        // Additional help for the compiler.
113    final int[] xv = xvec;                // Still more help for the compiler.
114    final int[] yv = yvec;                // And more and more . . .
115    final int dmin = xoff - ylim;        // Minimum valid diagonal.
116    final int dmax = xlim - yoff;        // Maximum valid diagonal.
117    final int fmid = xoff - yoff;        // Center diagonal of top-down search.
118    final int bmid = xlim - ylim;        // Center diagonal of bottom-up search.
119    int fmin = fmid, fmax = fmid;        // Limits of top-down search.
120    int bmin = bmid, bmax = bmid;        // Limits of bottom-up search.
121    /* True if southeast corner is on an odd
122                                     diagonal with respect to the northwest. */
123    final boolean odd = (fmid - bmid & 1) != 0;
124
125    fd[fdiagoff + fmid] = xoff;
126    bd[bdiagoff + bmid] = xlim;
127
128    for (int c = 1;; ++c)
129      {
130        int d;                        /* Active diagonal. */
131        boolean big_snake = false;
132
133        /* Extend the top-down search by an edit step in each diagonal. */
134        if (fmin > dmin)
135          fd[fdiagoff + --fmin - 1] = -1;
136        else
137          ++fmin;
138        if (fmax < dmax)
139          fd[fdiagoff + ++fmax + 1] = -1;
140        else
141          --fmax;
142        for (d = fmax; d >= fmin; d -= 2)
143          {
144            int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
145
146            if (tlo >= thi)
147              x = tlo + 1;
148            else
149              x = thi;
150            oldx = x;
151            y = x - d;
152            while (x < xlim && y < ylim && xv[x] == yv[y]) {
153              ++x; ++y;
154            }
155            if (x - oldx > 20)
156              big_snake = true;
157            fd[fdiagoff + d] = x;
158            if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
159              {
160                cost = 2 * c - 1;
161                return d;
162              }
163          }
164
165        /* Similar extend the bottom-up search. */
166        if (bmin > dmin)
167          bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
168        else
169          ++bmin;
170        if (bmax < dmax)
171          bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
172        else
173          --bmax;
174        for (d = bmax; d >= bmin; d -= 2)
175          {
176            int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
177
178            if (tlo < thi)
179              x = tlo;
180            else
181              x = thi - 1;
182            oldx = x;
183            y = x - d;
184            while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
185              --x; --y;
186            }
187            if (oldx - x > 20)
188              big_snake = true;
189            bd[bdiagoff + d] = x;
190            if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
191              {
192                cost = 2 * c;
193                return d;
194              }
195          }
196
197        /* Heuristic: check occasionally for a diagonal that has made
198           lots of progress compared with the edit distance.
199           If we have any such, find the one that has made the most
200           progress and return it as if it had succeeded.
201
202           With this heuristic, for files with a constant small density
203           of changes, the algorithm is linear in the file size.  */
204
205        if (c > 200 && big_snake && heuristic)
206          {
207            int best = 0;
208            int bestpos = -1;
209
210            for (d = fmax; d >= fmin; d -= 2)
211              {
212                int dd = d - fmid;
213                if ((fd[fdiagoff + d] - xoff)*2 - dd > 12 * (c + (dd > 0 ? dd : -dd)))
214                  {
215                    if (fd[fdiagoff + d] * 2 - dd > best
216                        && fd[fdiagoff + d] - xoff > 20
217                        && fd[fdiagoff + d] - d - yoff > 20)
218                      {
219                        int k;
220                        int x = fd[fdiagoff + d];
221
222                        /* We have a good enough best diagonal;
223                           now insist that it end with a significant snake.  */
224                        for (k = 1; k <= 20; k++)
225                          if (xvec[x - k] != yvec[x - d - k])
226                            break;
227
228                        if (k == 21)
229                          {
230                            best = fd[fdiagoff + d] * 2 - dd;
231                            bestpos = d;
232                          }
233                      }
234                  }
235              }
236            if (best > 0)
237              {
238                cost = 2 * c - 1;
239                return bestpos;
240              }
241
242            best = 0;
243            for (d = bmax; d >= bmin; d -= 2)
244              {
245                int dd = d - bmid;
246                if ((xlim - bd[bdiagoff + d])*2 + dd > 12 * (c + (dd > 0 ? dd : -dd)))
247                  {
248                    if ((xlim - bd[bdiagoff + d]) * 2 + dd > best
249                        && xlim - bd[bdiagoff + d] > 20
250                        && ylim - (bd[bdiagoff + d] - d) > 20)
251                      {
252                        /* We have a good enough best diagonal;
253                           now insist that it end with a significant snake.  */
254                        int k;
255                        int x = bd[bdiagoff + d];
256
257                        for (k = 0; k < 20; k++)
258                          if (xvec[x + k] != yvec[x - d + k])
259                            break;
260                        if (k == 20)
261                          {
262                            best = (xlim - bd[bdiagoff + d]) * 2 + dd;
263                            bestpos = d;
264                          }
265                      }
266                  }
267              }
268            if (best > 0)
269              {
270                cost = 2 * c - 1;
271                return bestpos;
272              }
273          }
274      }
275  }
276
277  /** Compare in detail contiguous subsequences of the two files
278     which are known, as a whole, to match each other.
279
280     The results are recorded in the vectors filevec[N].changed_flag, by
281     storing a 1 in the element for each line that is an insertion or deletion.
282
283     The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
284
285     Note that XLIM, YLIM are exclusive bounds.
286     All line numbers are origin-0 and discarded lines are not counted.  */
287
288  private void compareseq (int xoff, int xlim, int yoff, int ylim) {
289    /* Slide down the bottom initial diagonal. */
290    while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
291      ++xoff; ++yoff;
292    }
293    /* Slide up the top initial diagonal. */
294    while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
295      --xlim; --ylim;
296    }
297
298    /* Handle simple cases. */
299    if (xoff == xlim)
300      while (yoff < ylim)
301        filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
302    else if (yoff == ylim)
303      while (xoff < xlim)
304        filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
305    else
306      {
307        /* Find a point of correspondence in the middle of the files.  */
308
309        int d = diag (xoff, xlim, yoff, ylim);
310        int c = cost;
311        int f = fdiag[fdiagoff + d];
312        int b = bdiag[bdiagoff + d];
313
314        if (c == 1)
315          {
316            /* This should be impossible, because it implies that
317               one of the two subsequences is empty,
318               and that case was handled above without calling `diag'.
319               Let's verify that this is true.  */
320            throw new IllegalArgumentException("Empty subsequence");
321          }
322        else
323          {
324            /* Use that point to split this problem into two subproblems.  */
325            compareseq (xoff, b, yoff, b - d);
326            /* This used to use f instead of b,
327               but that is incorrect!
328               It is not necessarily the case that diagonal d
329               has a snake from b to f.  */
330            compareseq (b, xlim, b - d, ylim);
331          }
332      }
333  }
334
335  /** Discard lines from one file that have no matches in the other file.
336   */
337
338  private void discard_confusing_lines() {
339    filevec[0].discard_confusing_lines(filevec[1]);
340    filevec[1].discard_confusing_lines(filevec[0]);
341  }
342
343  private boolean inhibit = false;
344
345  /** Adjust inserts/deletes of blank lines to join changes
346     as much as possible.
347   */
348
349  private void shift_boundaries() {
350    if (inhibit)
351      return;
352    filevec[0].shift_boundaries(filevec[1]);
353    filevec[1].shift_boundaries(filevec[0]);
354  }
355
356  /** Scan the tables of which lines are inserted and deleted,
357     producing an edit script in reverse order.  */
358
359  private change build_reverse_script() {
360    change script = null;
361    final boolean[] changed0 = filevec[0].changed_flag;
362    final boolean[] changed1 = filevec[1].changed_flag;
363    final int len0 = filevec[0].buffered_lines;
364    final int len1 = filevec[1].buffered_lines;
365
366    /* Note that changedN[len0] does exist, and contains 0.  */
367
368    int i0 = 0, i1 = 0;
369
370    while (i0 < len0 || i1 < len1)
371      {
372        if (changed0[1+i0] || changed1[1+i1])
373          {
374            int line0 = i0, line1 = i1;
375
376            /* Find # lines changed here in each file.  */
377            while (changed0[1+i0]) ++i0;
378            while (changed1[1+i1]) ++i1;
379
380            /* Record this change.  */
381            script = new change(line0, line1, i0 - line0, i1 - line1, script);
382          }
383
384        /* We have reached lines in the two files that match each other.  */
385        i0++; i1++;
386      }
387
388    return script;
389  }
390
391  /** Scan the tables of which lines are inserted and deleted,
392     producing an edit script in forward order.  */
393
394  private change build_script() {
395    change script = null;
396    final boolean[] changed0 = filevec[0].changed_flag;
397    final boolean[] changed1 = filevec[1].changed_flag;
398    final int len0 = filevec[0].buffered_lines;
399    final int len1 = filevec[1].buffered_lines;
400    int i0 = len0, i1 = len1;
401
402    /* Note that changedN[-1] does exist, and contains 0.  */
403
404    while (i0 >= 0 || i1 >= 0)
405      {
406        if (changed0[i0] || changed1[i1])
407          {
408            int line0 = i0, line1 = i1;
409
410            /* Find # lines changed here in each file.  */
411            while (changed0[i0]) --i0;
412            while (changed1[i1]) --i1;
413
414            /* Record this change.  */
415            script = new change(i0, i1, line0 - i0, line1 - i1, script);
416          }
417
418        /* We have reached lines in the two files that match each other.  */
419        i0--; i1--;
420      }
421
422    return script;
423  }
424
425  /* Report the differences of two files.  DEPTH is the current directory
426     depth. */
427  public change diff_2(final boolean reverse) {
428
429    /* Some lines are obviously insertions or deletions
430       because they don't match anything.  Detect them now,
431       and avoid even thinking about them in the main comparison algorithm.  */
432
433    discard_confusing_lines ();
434
435    /* Now do the main comparison algorithm, considering just the
436       undiscarded lines.  */
437
438    xvec = filevec[0].undiscarded;
439    yvec = filevec[1].undiscarded;
440
441    int diags =
442      filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
443    fdiag = new int[diags];
444    fdiagoff = filevec[1].nondiscarded_lines + 1;
445    bdiag = new int[diags];
446    bdiagoff = filevec[1].nondiscarded_lines + 1;
447
448    compareseq (0, filevec[0].nondiscarded_lines,
449                0, filevec[1].nondiscarded_lines);
450    fdiag = null;
451    bdiag = null;
452
453    /* Modify the results slightly to make them prettier
454       in cases where that can validly be done.  */
455
456    shift_boundaries ();
457
458    /* Get the results of comparison in the form of a chain
459       of `struct change's -- an edit script.  */
460
461    if (reverse)
462      return build_reverse_script();
463    else
464      return build_script();
465  }
466
467  /** The result of comparison is an "edit script": a chain of change objects.
468     Each change represents one place where some lines are deleted
469     and some are inserted.
470
471     LINE0 and LINE1 are the first affected lines in the two files (origin 0).
472     DELETED is the number of lines deleted here from file 0.
473     INSERTED is the number of lines inserted here in file 1.
474
475     If DELETED is 0 then LINE0 is the number of the line before
476     which the insertion was done; vice versa for INSERTED and LINE1.  */
477
478  public static class change {
479    /** Previous or next edit command. */
480    public change link;
481    /** # lines of file 1 changed here.  */
482    public int inserted;
483    /** # lines of file 0 changed here.  */
484    public int deleted;
485    /** Line number of 1st deleted line.  */
486    public final int line0;
487    /** Line number of 1st inserted line.  */
488    public final int line1;
489
490    /** Cons an additional entry onto the front of an edit script OLD.
491       LINE0 and LINE1 are the first affected lines in the two files (origin 0).
492       DELETED is the number of lines deleted here from file 0.
493       INSERTED is the number of lines inserted here in file 1.
494
495       If DELETED is 0 then LINE0 is the number of the line before
496       which the insertion was done; vice versa for INSERTED and LINE1.  */
497    change(int line0, int line1, int deleted, int inserted, change old) {
498      this.line0 = line0;
499      this.line1 = line1;
500      this.inserted = inserted;
501      this.deleted = deleted;
502      this.link = old;
503      //System.err.println(line0+","+line1+","+inserted+","+deleted);
504    }
505  }
506
507  /** Data on one input file being compared.
508   */
509
510  class file_data {
511
512    /** Allocate changed array for the results of comparison.  */
513    void clear() {
514      /* Allocate a flag for each line of each file, saying whether that line
515         is an insertion or deletion.
516         Allocate an extra element, always zero, at each end of each vector.
517       */
518      changed_flag = new boolean[buffered_lines + 2];
519    }
520
521    /** Return equiv_count[I] as the number of lines in this file
522       that fall in equivalence class I.
523         @return the array of equivalence class counts.
524     */
525    int[] equivCount() {
526      int[] equiv_count = new int[equiv_max];
527      for (int i = 0; i < buffered_lines; ++i)
528        ++equiv_count[equivs[i]];
529      return equiv_count;
530    }
531
532    /** Discard lines that have no matches in another file.
533
534       A line which is discarded will not be considered by the actual
535       comparison algorithm; it will be as if that line were not in the file.
536       The file's `realindexes' table maps virtual line numbers
537       (which don't count the discarded lines) into real line numbers;
538       this is how the actual comparison algorithm produces results
539       that are comprehensible when the discarded lines are counted.
540<p>
541       When we discard a line, we also mark it as a deletion or insertion
542       so that it will be printed in the output.
543      @param f the other file
544     */
545    void discard_confusing_lines(file_data f) {
546      clear();
547    /* Set up table of which lines are going to be discarded. */
548      final byte[] discarded = discardable(f.equivCount());
549
550    /* Don't really discard the provisional lines except when they occur
551       in a run of discardables, with nonprovisionals at the beginning
552       and end.  */
553      filterDiscards(discarded);
554
555    /* Actually discard the lines. */
556      discard(discarded);
557    }
558
559    /** Mark to be discarded each line that matches no line of another file.
560       If a line matches many lines, mark it as provisionally discardable.
561       @see equivCount()
562       @param counts The count of each equivalence number for the other file.
563       @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
564               for each line
565     */
566
567    private byte[] discardable(final int[] counts) {
568      final int end = buffered_lines;
569      final byte[] discards = new byte[end];
570      final int[] equivs = this.equivs;
571      int many = 5;
572      int tem = end / 64;
573
574      /* Multiply MANY by approximate square root of number of lines.
575         That is the threshold for provisionally discardable lines.  */
576      while ((tem = tem >> 2) > 0)
577        many *= 2;
578
579      for (int i = 0; i < end; i++)
580        {
581          int nmatch;
582          if (equivs[i] == 0)
583            continue;
584          nmatch = counts[equivs[i]];
585          if (nmatch == 0)
586            discards[i] = 1;
587          else if (nmatch > many)
588            discards[i] = 2;
589        }
590      return discards;
591    }
592
593    /** Don't really discard the provisional lines except when they occur
594       in a run of discardables, with nonprovisionals at the beginning
595       and end.  */
596
597    private void filterDiscards(final byte[] discards) {
598        final int end = buffered_lines;
599
600        for (int i = 0; i < end; i++)
601          {
602            /* Cancel provisional discards not in middle of run of discards.  */
603            if (discards[i] == 2)
604              discards[i] = 0;
605            else if (discards[i] != 0)
606              {
607                /* We have found a nonprovisional discard.  */
608                int j;
609                int length;
610                int provisional = 0;
611
612                /* Find end of this run of discardable lines.
613                   Count how many are provisionally discardable.  */
614                for (j = i; j < end; j++)
615                  {
616                    if (discards[j] == 0)
617                      break;
618                    if (discards[j] == 2)
619                      ++provisional;
620                  }
621
622                /* Cancel provisional discards at end, and shrink the run.  */
623                while (j > i && discards[j - 1] == 2) {
624                  discards[--j] = 0; --provisional;
625                }
626
627                /* Now we have the length of a run of discardable lines
628                   whose first and last are not provisional.  */
629                length = j - i;
630
631                /* If 1/4 of the lines in the run are provisional,
632                   cancel discarding of all provisional lines in the run.  */
633                if (provisional * 4 > length)
634                  {
635                    while (j > i)
636                      if (discards[--j] == 2)
637                        discards[j] = 0;
638                  }
639                else
640                  {
641                    int consec;
642                    int minimum = 1;
643                    int tem = length / 4;
644
645                    /* MINIMUM is approximate square root of LENGTH/4.
646                       A subrun of two or more provisionals can stand
647                       when LENGTH is at least 16.
648                       A subrun of 4 or more can stand when LENGTH >= 64.  */
649                    while ((tem = tem >> 2) > 0)
650                      minimum *= 2;
651                    minimum++;
652
653                    /* Cancel any subrun of MINIMUM or more provisionals
654                       within the larger run.  */
655                    for (j = 0, consec = 0; j < length; j++)
656                      if (discards[i + j] != 2)
657                        consec = 0;
658                      else if (minimum == ++consec)
659                        /* Back up to start of subrun, to cancel it all.  */
660                        j -= consec;
661                      else if (minimum < consec)
662                        discards[i + j] = 0;
663
664                    /* Scan from beginning of run
665                       until we find 3 or more nonprovisionals in a row
666                       or until the first nonprovisional at least 8 lines in.
667                       Until that point, cancel any provisionals.  */
668                    for (j = 0, consec = 0; j < length; j++)
669                      {
670                        if (j >= 8 && discards[i + j] == 1)
671                          break;
672                        if (discards[i + j] == 2) {
673                          consec = 0; discards[i + j] = 0;
674                        }
675                        else if (discards[i + j] == 0)
676                          consec = 0;
677                        else
678                          consec++;
679                        if (consec == 3)
680                          break;
681                      }
682
683                    /* I advances to the last line of the run.  */
684                    i += length - 1;
685
686                    /* Same thing, from end.  */
687                    for (j = 0, consec = 0; j < length; j++)
688                      {
689                        if (j >= 8 && discards[i - j] == 1)
690                          break;
691                        if (discards[i - j] == 2) {
692                          consec = 0; discards[i - j] = 0;
693                        }
694                        else if (discards[i - j] == 0)
695                          consec = 0;
696                        else
697                          consec++;
698                        if (consec == 3)
699                          break;
700                      }
701                  }
702              }
703          }
704      }
705
706    /** Actually discard the lines.
707      @param discards flags lines to be discarded
708     */
709    private void discard(final byte[] discards) {
710      final int end = buffered_lines;
711      int j = 0;
712      for (int i = 0; i < end; ++i)
713        if (no_discards || discards[i] == 0)
714          {
715            undiscarded[j] = equivs[i];
716            realindexes[j++] = i;
717          }
718        else
719          changed_flag[1+i] = true;
720      nondiscarded_lines = j;
721    }
722
723    file_data(Object[] data,Hashtable h) {
724      buffered_lines = data.length;
725
726      equivs = new int[buffered_lines];
727      undiscarded = new int[buffered_lines];
728      realindexes = new int[buffered_lines];
729
730      for (int i = 0; i < data.length; ++i) {
731        Integer ir = (Integer)h.get(data[i]);
732        if (ir == null)
733          h.put(data[i],new Integer(equivs[i] = equiv_max++));
734        else
735          equivs[i] = ir.intValue();
736      }
737    }
738
739    /** Adjust inserts/deletes of blank lines to join changes
740       as much as possible.
741
742       We do something when a run of changed lines include a blank
743       line at one end and have an excluded blank line at the other.
744       We are free to choose which blank line is included.
745       `compareseq' always chooses the one at the beginning,
746       but usually it is cleaner to consider the following blank line
747       to be the "change".  The only exception is if the preceding blank line
748       would join this change to other changes.
749      @param f the file being compared against
750    */
751
752    void shift_boundaries(file_data f) {
753      final boolean[] changed = changed_flag;
754      final boolean[] other_changed = f.changed_flag;
755      int i = 0;
756      int j = 0;
757      int i_end = buffered_lines;
758      int preceding = -1;
759      int other_preceding = -1;
760
761      for (;;)
762        {
763          int start, end, other_start;
764
765          /* Scan forwards to find beginning of another run of changes.
766             Also keep track of the corresponding point in the other file.  */
767
768          while (i < i_end && !changed[1+i])
769            {
770              while (other_changed[1+j++])
771                /* Non-corresponding lines in the other file
772                   will count as the preceding batch of changes.  */
773                other_preceding = j;
774              i++;
775            }
776
777          if (i == i_end)
778            break;
779
780          start = i;
781          other_start = j;
782
783          for (;;)
784            {
785              /* Now find the end of this run of changes.  */
786
787              while (i < i_end && changed[1+i]) i++;
788              end = i;
789
790              /* If the first changed line matches the following unchanged one,
791                 and this run does not follow right after a previous run,
792                 and there are no lines deleted from the other file here,
793                 then classify the first changed line as unchanged
794                 and the following line as changed in its place.  */
795
796              /* You might ask, how could this run follow right after another?
797                 Only because the previous run was shifted here.  */
798
799              if (end != i_end
800                  && equivs[start] == equivs[end]
801                  && !other_changed[1+j]
802                  && end != i_end
803                  && !((preceding >= 0 && start == preceding)
804                       || (other_preceding >= 0
805                           && other_start == other_preceding)))
806                {
807                  changed[1+end++] = true;
808                  changed[1+start++] = false;
809                  ++i;
810                  /* Since one line-that-matches is now before this run
811                     instead of after, we must advance in the other file
812                     to keep in synch.  */
813                  ++j;
814                }
815              else
816                break;
817            }
818
819          preceding = i;
820          other_preceding = j;
821        }
822    }
823
824    /** Number of elements (lines) in this file. */
825    final int buffered_lines;
826
827    /** Vector, indexed by line number, containing an equivalence code for
828       each line.  It is this vector that is actually compared with that
829       of another file to generate differences. */
830    private final int[]            equivs;
831
832    /** Vector, like the previous one except that
833       the elements for discarded lines have been squeezed out.  */
834    final int[]           undiscarded;
835
836    /** Vector mapping virtual line numbers (not counting discarded lines)
837       to real ones (counting those lines).  Both are origin-0.  */
838    final int[]           realindexes;
839
840    /** Total number of nondiscarded lines. */
841    int                    nondiscarded_lines;
842
843    /** Array, indexed by real origin-1 line number,
844       containing true for a line that is an insertion or a deletion.
845       The results of comparison are stored here.  */
846    boolean[]            changed_flag;
847
848  }
849
850}
851