1 /* Analyze file differences for GNU DIFF.
2    Copyright (C) 1988, 1989, 1992, 1993, 1997 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 */
17 
18 /* The basic algorithm is described in:
19    "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
20    Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
21    see especially section 4.2, which describes the variation used below.
22    Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
23    heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
24    at the price of producing suboptimal output for large inputs with
25    many differences.
26 
27    The basic algorithm was independently discovered as described in:
28    "Algorithms for Approximate String Matching", E. Ukkonen,
29    Information and Control Vol. 64, 1985, pp. 100-118.  */
30 
31 #include "diff.h"
32 #include "cmpbuf.h"
33 
34 extern int no_discards;
35 
36 static int *xvec, *yvec;      /* Vectors being compared. */
37 static int *fdiag;            /* Vector, indexed by diagonal, containing
38                                            1 + the X coordinate of the point furthest
39                                            along the given diagonal in the forward
40                                            search of the edit matrix. */
41 static int *bdiag;            /* Vector, indexed by diagonal, containing
42                                            the X coordinate of the point furthest
43                                            along the given diagonal in the backward
44                                            search of the edit matrix. */
45 static int too_expensive;     /* Edit scripts longer than this are too
46                                            expensive to compute.  */
47 
48 #define SNAKE_LIMIT 20        /* Snakes bigger than this are considered `big'.  */
49 
50 struct partition
51 {
52   int xmid, ymid;   /* Midpoints of this partition.  */
53   int lo_minimal;   /* Nonzero if low half will be analyzed minimally.  */
54   int hi_minimal;   /* Likewise for high half.  */
55 };
56 
57 static int diag PARAMS((int, int, int, int, int, struct partition *));
58 static struct change *add_change PARAMS((int, int, int, int, struct change *));
59 static struct change *build_reverse_script PARAMS((struct file_data const[]));
60 static struct change *build_script PARAMS((struct file_data const[]));
61 static void briefly_report PARAMS((int, struct file_data const[]));
62 static void compareseq PARAMS((int, int, int, int, int));
63 static void discard_confusing_lines PARAMS((struct file_data[]));
64 static void shift_boundaries PARAMS((struct file_data[]));
65 
66 /* Find the midpoint of the shortest edit script for a specified
67    portion of the two files.
68 
69    Scan from the beginnings of the files, and simultaneously from the ends,
70    doing a breadth-first search through the space of edit-sequence.
71    When the two searches meet, we have found the midpoint of the shortest
72    edit sequence.
73 
74    If MINIMAL is nonzero, find the minimal edit script regardless
75    of expense.  Otherwise, if the search is too expensive, use
76    heuristics to stop the search and report a suboptimal answer.
77 
78    Set PART->(XMID,YMID) to the midpoint (XMID,YMID).  The diagonal number
79    XMID - YMID equals the number of inserted lines minus the number
80    of deleted lines (counting only lines before the midpoint).
81    Return the approximate edit cost; this is the total number of
82    lines inserted or deleted (counting only lines before the midpoint),
83    unless a heuristic is used to terminate the search prematurely.
84 
85    Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script for the
86    left half of the partition is known; similarly for PART->RIGHT_MINIMAL.
87 
88    This function assumes that the first lines of the specified portions
89    of the two files do not match, and likewise that the last lines do not
90    match.  The caller must trim matching lines from the beginning and end
91    of the portions it is going to specify.
92 
93    If we return the "wrong" partitions,
94    the worst this can do is cause suboptimal diff output.
95    It cannot cause incorrect diff output.  */
96 
97 static int
diag(xoff,xlim,yoff,ylim,minimal,part)98 diag (xoff, xlim, yoff, ylim, minimal, part)
99      int xoff, xlim, yoff, ylim, minimal;
100      struct partition *part;
101 {
102   int *const fd = fdiag;      /* Give the compiler a chance. */
103   int *const bd = bdiag;      /* Additional help for the compiler. */
104   int const *const xv = xvec; /* Still more help for the compiler. */
105   int const *const yv = yvec; /* And more and more . . . */
106   int const dmin = xoff - ylim;         /* Minimum valid diagonal. */
107   int const dmax = xlim - yoff;         /* Maximum valid diagonal. */
108   int const fmid = xoff - yoff;         /* Center diagonal of top-down search. */
109   int const bmid = xlim - ylim;         /* Center diagonal of bottom-up search. */
110   int fmin = fmid, fmax = fmid;         /* Limits of top-down search. */
111   int bmin = bmid, bmax = bmid;         /* Limits of bottom-up search. */
112   int c;                      /* Cost. */
113   int odd = (fmid - bmid) & 1;          /* True if southeast corner is on an odd
114                                            diagonal with respect to the northwest. */
115 
116   fd[fmid] = xoff;
117   bd[bmid] = xlim;
118 
119   for (c = 1;; ++c)
120     {
121       int d;                            /* Active diagonal. */
122       int big_snake = 0;
123 
124       /* Extend the top-down search by an edit step in each diagonal. */
125       fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
126       fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
127       for (d = fmax; d >= fmin; d -= 2)
128           {
129             int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
130 
131             if (tlo >= thi)
132               x = tlo + 1;
133             else
134               x = thi;
135             oldx = x;
136             y = x - d;
137             while (x < xlim && y < ylim && xv[x] == yv[y])
138               ++x, ++y;
139             if (x - oldx > SNAKE_LIMIT)
140               big_snake = 1;
141             fd[d] = x;
142             if (odd && bmin <= d && d <= bmax && bd[d] <= x)
143               {
144                 part->xmid = x;
145                 part->ymid = y;
146                 part->lo_minimal = part->hi_minimal = 1;
147                 return 2 * c - 1;
148               }
149           }
150 
151       /* Similarly extend the bottom-up search.  */
152       bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin;
153       bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax;
154       for (d = bmax; d >= bmin; d -= 2)
155           {
156             int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
157 
158             if (tlo < thi)
159               x = tlo;
160             else
161               x = thi - 1;
162             oldx = x;
163             y = x - d;
164             while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
165               --x, --y;
166             if (oldx - x > SNAKE_LIMIT)
167               big_snake = 1;
168             bd[d] = x;
169             if (!odd && fmin <= d && d <= fmax && x <= fd[d])
170               {
171                 part->xmid = x;
172                 part->ymid = y;
173                 part->lo_minimal = part->hi_minimal = 1;
174                 return 2 * c;
175               }
176           }
177 
178       if (minimal)
179           continue;
180 
181       /* Heuristic: check occasionally for a diagonal that has made
182            lots of progress compared with the edit distance.
183            If we have any such, find the one that has made the most
184            progress and return it as if it had succeeded.
185 
186            With this heuristic, for files with a constant small density
187            of changes, the algorithm is linear in the file size.  */
188 
189       if (c > 200 && big_snake && heuristic)
190           {
191             int best;
192 
193             best = 0;
194             for (d = fmax; d >= fmin; d -= 2)
195               {
196                 int dd = d - fmid;
197                 int x = fd[d];
198                 int y = x - d;
199                 int v = (x - xoff) * 2 - dd;
200                 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
201                     {
202                       if (v > best
203                           && xoff + SNAKE_LIMIT <= x && x < xlim
204                           && yoff + SNAKE_LIMIT <= y && y < ylim)
205                         {
206                           /* We have a good enough best diagonal;
207                                now insist that it end with a significant snake.  */
208                           int k;
209 
210                           for (k = 1; xv[x - k] == yv[y - k]; k++)
211                               if (k == SNAKE_LIMIT)
212                                 {
213                                   best = v;
214                                   part->xmid = x;
215                                   part->ymid = y;
216                                   break;
217                                 }
218                         }
219                     }
220               }
221             if (best > 0)
222               {
223                 part->lo_minimal = 1;
224                 part->hi_minimal = 0;
225                 return 2 * c - 1;
226               }
227 
228             best = 0;
229             for (d = bmax; d >= bmin; d -= 2)
230               {
231                 int dd = d - bmid;
232                 int x = bd[d];
233                 int y = x - d;
234                 int v = (xlim - x) * 2 + dd;
235                 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
236                     {
237                       if (v > best
238                           && xoff < x && x <= xlim - SNAKE_LIMIT
239                           && yoff < y && y <= ylim - SNAKE_LIMIT)
240                         {
241                           /* We have a good enough best diagonal;
242                                now insist that it end with a significant snake.  */
243                           int k;
244 
245                           for (k = 0; xv[x + k] == yv[y + k]; k++)
246                               if (k == SNAKE_LIMIT - 1)
247                                 {
248                                   best = v;
249                                   part->xmid = x;
250                                   part->ymid = y;
251                                   break;
252                                 }
253                         }
254                     }
255               }
256             if (best > 0)
257               {
258                 part->lo_minimal = 0;
259                 part->hi_minimal = 1;
260                 return 2 * c - 1;
261               }
262           }
263 
264       /* Heuristic: if we've gone well beyond the call of duty,
265            give up and report halfway between our best results so far.  */
266       if (c >= too_expensive)
267           {
268             int fxybest, fxbest;
269             int bxybest, bxbest;
270 
271             fxbest = bxbest = 0;  /* Pacify `gcc -Wall'.  */
272 
273             /* Find forward diagonal that maximizes X + Y.  */
274             fxybest = -1;
275             for (d = fmax; d >= fmin; d -= 2)
276               {
277                 int x = min (fd[d], xlim);
278                 int y = x - d;
279                 if (ylim < y)
280                     x = ylim + d, y = ylim;
281                 if (fxybest < x + y)
282                     {
283                       fxybest = x + y;
284                       fxbest = x;
285                     }
286               }
287 
288             /* Find backward diagonal that minimizes X + Y.  */
289             bxybest = INT_MAX;
290             for (d = bmax; d >= bmin; d -= 2)
291               {
292                 int x = max (xoff, bd[d]);
293                 int y = x - d;
294                 if (y < yoff)
295                     x = yoff + d, y = yoff;
296                 if (x + y < bxybest)
297                     {
298                       bxybest = x + y;
299                       bxbest = x;
300                     }
301               }
302 
303             /* Use the better of the two diagonals.  */
304             if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
305               {
306                 part->xmid = fxbest;
307                 part->ymid = fxybest - fxbest;
308                 part->lo_minimal = 1;
309                 part->hi_minimal = 0;
310               }
311             else
312               {
313                 part->xmid = bxbest;
314                 part->ymid = bxybest - bxbest;
315                 part->lo_minimal = 0;
316                 part->hi_minimal = 1;
317               }
318             return 2 * c - 1;
319           }
320     }
321 }
322 
323 /* Compare in detail contiguous subsequences of the two files
324    which are known, as a whole, to match each other.
325 
326    The results are recorded in the vectors files[N].changed_flag, by
327    storing a 1 in the element for each line that is an insertion or deletion.
328 
329    The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
330 
331    Note that XLIM, YLIM are exclusive bounds.
332    All line numbers are origin-0 and discarded lines are not counted.
333 
334    If MINIMAL is nonzero, find a minimal difference no matter how
335    expensive it is.  */
336 
337 static void
compareseq(xoff,xlim,yoff,ylim,minimal)338 compareseq (xoff, xlim, yoff, ylim, minimal)
339      int xoff, xlim, yoff, ylim, minimal;
340 {
341   int * const xv = xvec; /* Help the compiler.  */
342   int * const yv = yvec;
343 
344   /* Slide down the bottom initial diagonal. */
345   while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
346     ++xoff, ++yoff;
347   /* Slide up the top initial diagonal. */
348   while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
349     --xlim, --ylim;
350 
351   /* Handle simple cases. */
352   if (xoff == xlim)
353     while (yoff < ylim)
354       files[1].changed_flag[files[1].realindexes[yoff++]] = 1;
355   else if (yoff == ylim)
356     while (xoff < xlim)
357       files[0].changed_flag[files[0].realindexes[xoff++]] = 1;
358   else
359     {
360       int c;
361       struct partition part;
362 
363       /* Find a point of correspondence in the middle of the files.  */
364 
365       c = diag (xoff, xlim, yoff, ylim, minimal, &part);
366 
367       if (c == 1)
368           {
369             /* This should be impossible, because it implies that
370                one of the two subsequences is empty,
371                and that case was handled above without calling `diag'.
372                Let's verify that this is true.  */
373             abort ();
374 #if 0
375             /* The two subsequences differ by a single insert or delete;
376                record it and we are done.  */
377             if (part.xmid - part.ymid < xoff - yoff)
378               files[1].changed_flag[files[1].realindexes[part.ymid - 1]] = 1;
379             else
380               files[0].changed_flag[files[0].realindexes[part.xmid]] = 1;
381 #endif
382           }
383       else
384           {
385             /* Use the partitions to split this problem into subproblems.  */
386             compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
387             compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
388           }
389     }
390 }
391 
392 /* Discard lines from one file that have no matches in the other file.
393 
394    A line which is discarded will not be considered by the actual
395    comparison algorithm; it will be as if that line were not in the file.
396    The file's `realindexes' table maps virtual line numbers
397    (which don't count the discarded lines) into real line numbers;
398    this is how the actual comparison algorithm produces results
399    that are comprehensible when the discarded lines are counted.
400 
401    When we discard a line, we also mark it as a deletion or insertion
402    so that it will be printed in the output.  */
403 
404 static void
discard_confusing_lines(filevec)405 discard_confusing_lines (filevec)
406      struct file_data filevec[];
407 {
408   unsigned int f, i;
409   char *discarded[2];
410   int *equiv_count[2];
411   int *p;
412 
413   /* Allocate our results.  */
414   p = (int *) xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
415                            * (2 * sizeof (int)));
416   for (f = 0; f < 2; f++)
417     {
418       filevec[f].undiscarded = p;  p += filevec[f].buffered_lines;
419       filevec[f].realindexes = p;  p += filevec[f].buffered_lines;
420     }
421 
422   /* Set up equiv_count[F][I] as the number of lines in file F
423      that fall in equivalence class I.  */
424 
425   p = (int *) xmalloc (filevec[0].equiv_max * (2 * sizeof (int)));
426   equiv_count[0] = p;
427   equiv_count[1] = p + filevec[0].equiv_max;
428   bzero (p, filevec[0].equiv_max * (2 * sizeof (int)));
429 
430   for (i = 0; i < filevec[0].buffered_lines; ++i)
431     ++equiv_count[0][filevec[0].equivs[i]];
432   for (i = 0; i < filevec[1].buffered_lines; ++i)
433     ++equiv_count[1][filevec[1].equivs[i]];
434 
435   /* Set up tables of which lines are going to be discarded.  */
436 
437   discarded[0] = xmalloc (sizeof (char)
438                                 * (filevec[0].buffered_lines
439                                    + filevec[1].buffered_lines));
440   discarded[1] = discarded[0] + filevec[0].buffered_lines;
441   bzero (discarded[0], sizeof (char) * (filevec[0].buffered_lines
442                                                   + filevec[1].buffered_lines));
443 
444   /* Mark to be discarded each line that matches no line of the other file.
445      If a line matches many lines, mark it as provisionally discardable.  */
446 
447   for (f = 0; f < 2; f++)
448     {
449       unsigned int end = filevec[f].buffered_lines;
450       char *discards = discarded[f];
451       int *counts = equiv_count[1 - f];
452       int *equivs = filevec[f].equivs;
453       unsigned int many = 5;
454       unsigned int tem = end / 64;
455 
456       /* Multiply MANY by approximate square root of number of lines.
457            That is the threshold for provisionally discardable lines.  */
458       while ((tem = tem >> 2) > 0)
459           many *= 2;
460 
461       for (i = 0; i < end; i++)
462           {
463             int nmatch;
464             if (equivs[i] == 0)
465               continue;
466             nmatch = counts[equivs[i]];
467             if (nmatch == 0)
468               discards[i] = 1;
469             else if (nmatch > many)
470               discards[i] = 2;
471           }
472     }
473 
474   /* Don't really discard the provisional lines except when they occur
475      in a run of discardables, with nonprovisionals at the beginning
476      and end.  */
477 
478   for (f = 0; f < 2; f++)
479     {
480       unsigned int end = filevec[f].buffered_lines;
481       register char *discards = discarded[f];
482 
483       for (i = 0; i < end; i++)
484           {
485             /* Cancel provisional discards not in middle of run of discards.  */
486             if (discards[i] == 2)
487               discards[i] = 0;
488             else if (discards[i] != 0)
489               {
490                 /* We have found a nonprovisional discard.  */
491                 register int j;
492                 unsigned int length;
493                 unsigned int provisional = 0;
494 
495                 /* Find end of this run of discardable lines.
496                      Count how many are provisionally discardable.  */
497                 for (j = i; j < end; j++)
498                     {
499                       if (discards[j] == 0)
500                         break;
501                       if (discards[j] == 2)
502                         ++provisional;
503                     }
504 
505                 /* Cancel provisional discards at end, and shrink the run.  */
506                 while (j > i && discards[j - 1] == 2)
507                     discards[--j] = 0, --provisional;
508 
509                 /* Now we have the length of a run of discardable lines
510                      whose first and last are not provisional.  */
511                 length = j - i;
512 
513                 /* If 1/4 of the lines in the run are provisional,
514                      cancel discarding of all provisional lines in the run.  */
515                 if (provisional * 4 > length)
516                     {
517                       while (j > i)
518                         if (discards[--j] == 2)
519                           discards[j] = 0;
520                     }
521                 else
522                     {
523                       register unsigned int consec;
524                       unsigned int minimum = 1;
525                       unsigned int tem = length / 4;
526 
527                       /* MINIMUM is approximate square root of LENGTH/4.
528                          A subrun of two or more provisionals can stand
529                          when LENGTH is at least 16.
530                          A subrun of 4 or more can stand when LENGTH >= 64.  */
531                       while ((tem = tem >> 2) > 0)
532                         minimum *= 2;
533                       minimum++;
534 
535                       /* Cancel any subrun of MINIMUM or more provisionals
536                          within the larger run.  */
537                       for (j = 0, consec = 0; j < length; j++)
538                         if (discards[i + j] != 2)
539                           consec = 0;
540                         else if (minimum == ++consec)
541                           /* Back up to start of subrun, to cancel it all.  */
542                           j -= consec;
543                         else if (minimum < consec)
544                           discards[i + j] = 0;
545 
546                       /* Scan from beginning of run
547                          until we find 3 or more nonprovisionals in a row
548                          or until the first nonprovisional at least 8 lines in.
549                          Until that point, cancel any provisionals.  */
550                       for (j = 0, consec = 0; j < length; j++)
551                         {
552                           if (j >= 8 && discards[i + j] == 1)
553                               break;
554                           if (discards[i + j] == 2)
555                               consec = 0, discards[i + j] = 0;
556                           else if (discards[i + j] == 0)
557                               consec = 0;
558                           else
559                               consec++;
560                           if (consec == 3)
561                               break;
562                         }
563 
564                       /* I advances to the last line of the run.  */
565                       i += length - 1;
566 
567                       /* Same thing, from end.  */
568                       for (j = 0, consec = 0; j < length; j++)
569                         {
570                           if (j >= 8 && discards[i - j] == 1)
571                               break;
572                           if (discards[i - j] == 2)
573                               consec = 0, discards[i - j] = 0;
574                           else if (discards[i - j] == 0)
575                               consec = 0;
576                           else
577                               consec++;
578                           if (consec == 3)
579                               break;
580                         }
581                     }
582               }
583           }
584     }
585 
586   /* Actually discard the lines. */
587   for (f = 0; f < 2; f++)
588     {
589       char *discards = discarded[f];
590       unsigned int end = filevec[f].buffered_lines;
591       unsigned int j = 0;
592       for (i = 0; i < end; ++i)
593           if (no_discards || discards[i] == 0)
594             {
595               filevec[f].undiscarded[j] = filevec[f].equivs[i];
596               filevec[f].realindexes[j++] = i;
597             }
598           else
599             filevec[f].changed_flag[i] = 1;
600       filevec[f].nondiscarded_lines = j;
601     }
602 
603   free (discarded[0]);
604   free (equiv_count[0]);
605 }
606 
607 /* Adjust inserts/deletes of identical lines to join changes
608    as much as possible.
609 
610    We do something when a run of changed lines include a
611    line at one end and have an excluded, identical line at the other.
612    We are free to choose which identical line is included.
613    `compareseq' usually chooses the one at the beginning,
614    but usually it is cleaner to consider the following identical line
615    to be the "change".  */
616 
617 int inhibit;
618 
619 static void
shift_boundaries(filevec)620 shift_boundaries (filevec)
621      struct file_data filevec[];
622 {
623   int f;
624 
625   if (inhibit)
626     return;
627 
628   for (f = 0; f < 2; f++)
629     {
630       char *changed = filevec[f].changed_flag;
631       char const *other_changed = filevec[1-f].changed_flag;
632       int const *equivs = filevec[f].equivs;
633       int i = 0;
634       int j = 0;
635       int i_end = filevec[f].buffered_lines;
636 
637       while (1)
638           {
639             int runlength, start, corresponding;
640 
641             /* Scan forwards to find beginning of another run of changes.
642                Also keep track of the corresponding point in the other file.  */
643 
644             while (i < i_end && changed[i] == 0)
645               {
646                 while (other_changed[j++])
647                     continue;
648                 i++;
649               }
650 
651             if (i == i_end)
652               break;
653 
654             start = i;
655 
656             /* Find the end of this run of changes.  */
657 
658             while (changed[++i])
659               continue;
660             while (other_changed[j])
661               j++;
662 
663             do
664               {
665                 /* Record the length of this run of changes, so that
666                      we can later determine whether the run has grown.  */
667                 runlength = i - start;
668 
669                 /* Move the changed region back, so long as the
670                      previous unchanged line matches the last changed one.
671                      This merges with previous changed regions.  */
672 
673                 while (start && equivs[start - 1] == equivs[i - 1])
674                     {
675                       changed[--start] = 1;
676                       changed[--i] = 0;
677                       while (changed[start - 1])
678                         start--;
679                       while (other_changed[--j])
680                         continue;
681                     }
682 
683                 /* Set CORRESPONDING to the end of the changed run, at the last
684                      point where it corresponds to a changed run in the other file.
685                      CORRESPONDING == I_END means no such point has been found.  */
686                 corresponding = other_changed[j - 1] ? i : i_end;
687 
688                 /* Move the changed region forward, so long as the
689                      first changed line matches the following unchanged one.
690                      This merges with following changed regions.
691                      Do this second, so that if there are no merges,
692                      the changed region is moved forward as far as possible.  */
693 
694                 while (i != i_end && equivs[start] == equivs[i])
695                     {
696                       changed[start++] = 0;
697                       changed[i++] = 1;
698                       while (changed[i])
699                         i++;
700                       while (other_changed[++j])
701                         corresponding = i;
702                     }
703               }
704             while (runlength != i - start);
705 
706             /* If possible, move the fully-merged run of changes
707                back to a corresponding run in the other file.  */
708 
709             while (corresponding < i)
710               {
711                 changed[--start] = 1;
712                 changed[--i] = 0;
713                 while (other_changed[--j])
714                     continue;
715               }
716           }
717     }
718 }
719 
720 /* Cons an additional entry onto the front of an edit script OLD.
721    LINE0 and LINE1 are the first affected lines in the two files (origin 0).
722    DELETED is the number of lines deleted here from file 0.
723    INSERTED is the number of lines inserted here in file 1.
724 
725    If DELETED is 0 then LINE0 is the number of the line before
726    which the insertion was done; vice versa for INSERTED and LINE1.  */
727 
728 static struct change *
add_change(line0,line1,deleted,inserted,old)729 add_change (line0, line1, deleted, inserted, old)
730      int line0, line1, deleted, inserted;
731      struct change *old;
732 {
733   struct change *new = (struct change *) xmalloc (sizeof (struct change));
734 
735   new->line0 = line0;
736   new->line1 = line1;
737   new->inserted = inserted;
738   new->deleted = deleted;
739   new->link = old;
740   return new;
741 }
742 
743 /* Scan the tables of which lines are inserted and deleted,
744    producing an edit script in reverse order.  */
745 
746 static struct change *
build_reverse_script(filevec)747 build_reverse_script (filevec)
748      struct file_data const filevec[];
749 {
750   struct change *script = 0;
751   char *changed0 = filevec[0].changed_flag;
752   char *changed1 = filevec[1].changed_flag;
753   int len0 = filevec[0].buffered_lines;
754   int len1 = filevec[1].buffered_lines;
755 
756   /* Note that changedN[len0] does exist, and contains 0.  */
757 
758   int i0 = 0, i1 = 0;
759 
760   while (i0 < len0 || i1 < len1)
761     {
762       if (changed0[i0] || changed1[i1])
763           {
764             int line0 = i0, line1 = i1;
765 
766             /* Find # lines changed here in each file.  */
767             while (changed0[i0]) ++i0;
768             while (changed1[i1]) ++i1;
769 
770             /* Record this change.  */
771             script = add_change (line0, line1, i0 - line0, i1 - line1, script);
772           }
773 
774       /* We have reached lines in the two files that match each other.  */
775       i0++, i1++;
776     }
777 
778   return script;
779 }
780 
781 /* Scan the tables of which lines are inserted and deleted,
782    producing an edit script in forward order.  */
783 
784 static struct change *
build_script(filevec)785 build_script (filevec)
786      struct file_data const filevec[];
787 {
788   struct change *script = 0;
789   char *changed0 = filevec[0].changed_flag;
790   char *changed1 = filevec[1].changed_flag;
791   int i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
792 
793   /* Note that changedN[-1] does exist, and contains 0.  */
794 
795   while (i0 >= 0 || i1 >= 0)
796     {
797       if (changed0[i0 - 1] || changed1[i1 - 1])
798           {
799             int line0 = i0, line1 = i1;
800 
801             /* Find # lines changed here in each file.  */
802             while (changed0[i0 - 1]) --i0;
803             while (changed1[i1 - 1]) --i1;
804 
805             /* Record this change.  */
806             script = add_change (i0, i1, line0 - i0, line1 - i1, script);
807           }
808 
809       /* We have reached lines in the two files that match each other.  */
810       i0--, i1--;
811     }
812 
813   return script;
814 }
815 
816 /* If CHANGES, briefly report that two files differed.  */
817 static void
briefly_report(changes,filevec)818 briefly_report (changes, filevec)
819      int changes;
820      struct file_data const filevec[];
821 {
822   if (changes)
823     message (no_details_flag ? "Files %s and %s differ\n"
824                : "Binary files %s and %s differ\n",
825                filevec[0].name, filevec[1].name);
826 }
827 
828 /* Report the differences of two files.  DEPTH is the current directory
829    depth. */
830 int
diff_2_files(filevec,depth)831 diff_2_files (filevec, depth)
832      struct file_data filevec[];
833      int depth;
834 {
835   int diags;
836   int i;
837   struct change *e, *p;
838   struct change *script;
839   int changes;
840 
841 
842   /* If we have detected that either file is binary,
843      compare the two files as binary.  This can happen
844      only when the first chunk is read.
845      Also, --brief without any --ignore-* options means
846      we can speed things up by treating the files as binary.  */
847 
848   if (read_files (filevec, no_details_flag & ~ignore_some_changes))
849     {
850       /* Files with different lengths must be different.  */
851       if (filevec[0].stat.st_size != filevec[1].stat.st_size
852             && (filevec[0].desc < 0 || S_ISREG (filevec[0].stat.st_mode))
853             && (filevec[1].desc < 0 || S_ISREG (filevec[1].stat.st_mode)))
854           changes = 1;
855 
856       /* Standard input equals itself.  */
857       else if (filevec[0].desc == filevec[1].desc)
858           changes = 0;
859 
860       else
861           /* Scan both files, a buffer at a time, looking for a difference.  */
862           {
863             /* Allocate same-sized buffers for both files.  */
864             size_t buffer_size = buffer_lcm (STAT_BLOCKSIZE (filevec[0].stat),
865                                                      STAT_BLOCKSIZE (filevec[1].stat));
866             for (i = 0; i < 2; i++)
867               filevec[i].buffer = xrealloc (filevec[i].buffer, buffer_size);
868 
869             for (;;  filevec[0].buffered_chars = filevec[1].buffered_chars = 0)
870               {
871                 /* Read a buffer's worth from both files.  */
872                 for (i = 0; i < 2; i++)
873                     if (0 <= filevec[i].desc)
874                       while (filevec[i].buffered_chars != buffer_size)
875                         {
876                           int r = read (filevec[i].desc,
877                                             filevec[i].buffer
878                                             + filevec[i].buffered_chars,
879                                             buffer_size - filevec[i].buffered_chars);
880                           if (r == 0)
881                               break;
882                           if (r < 0)
883                               pfatal_with_name (filevec[i].name);
884                           filevec[i].buffered_chars += r;
885                         }
886 
887                 /* If the buffers differ, the files differ.  */
888                 if (filevec[0].buffered_chars != filevec[1].buffered_chars
889                       || (filevec[0].buffered_chars != 0
890                           && memcmp (filevec[0].buffer,
891                                          filevec[1].buffer,
892                                          filevec[0].buffered_chars) != 0))
893                     {
894                       changes = 1;
895                       break;
896                     }
897 
898                 /* If we reach end of file, the files are the same.  */
899                 if (filevec[0].buffered_chars != buffer_size)
900                     {
901                       changes = 0;
902                       break;
903                     }
904               }
905           }
906 
907       briefly_report (changes, filevec);
908     }
909   else
910     {
911       /* Allocate vectors for the results of comparison:
912            a flag for each line of each file, saying whether that line
913            is an insertion or deletion.
914            Allocate an extra element, always zero, at each end of each vector.  */
915 
916       size_t s = filevec[0].buffered_lines + filevec[1].buffered_lines + 4;
917       filevec[0].changed_flag = xmalloc (s);
918       bzero (filevec[0].changed_flag, s);
919       filevec[0].changed_flag++;
920       filevec[1].changed_flag = filevec[0].changed_flag
921                                         + filevec[0].buffered_lines + 2;
922 
923       /* Some lines are obviously insertions or deletions
924            because they don't match anything.  Detect them now, and
925            avoid even thinking about them in the main comparison algorithm.  */
926 
927       discard_confusing_lines (filevec);
928 
929       /* Now do the main comparison algorithm, considering just the
930            undiscarded lines.  */
931 
932       xvec = filevec[0].undiscarded;
933       yvec = filevec[1].undiscarded;
934       diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
935       fdiag = (int *) xmalloc (diags * (2 * sizeof (int)));
936       bdiag = fdiag + diags;
937       fdiag += filevec[1].nondiscarded_lines + 1;
938       bdiag += filevec[1].nondiscarded_lines + 1;
939 
940       /* Set TOO_EXPENSIVE to be approximate square root of input size,
941            bounded below by 256.  */
942       too_expensive = 1;
943       for (i = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines;
944              i != 0; i >>= 2)
945           too_expensive <<= 1;
946       too_expensive = max (256, too_expensive);
947 
948       files[0] = filevec[0];
949       files[1] = filevec[1];
950 
951       compareseq (0, filevec[0].nondiscarded_lines,
952                       0, filevec[1].nondiscarded_lines, no_discards);
953 
954       free (fdiag - (filevec[1].nondiscarded_lines + 1));
955 
956       /* Modify the results slightly to make them prettier
957            in cases where that can validly be done.  */
958 
959       shift_boundaries (filevec);
960 
961       /* Get the results of comparison in the form of a chain
962            of `struct change's -- an edit script.  */
963 
964       if (output_style == OUTPUT_ED)
965           script = build_reverse_script (filevec);
966       else
967           script = build_script (filevec);
968 
969       /* Set CHANGES if we had any diffs.
970            If some changes are ignored, we must scan the script to decide.  */
971       if (ignore_blank_lines_flag || ignore_regexp_list)
972           {
973             struct change *next = script;
974             changes = 0;
975 
976             while (next && changes == 0)
977               {
978                 struct change *this, *end;
979                 int first0, last0, first1, last1, deletes, inserts;
980 
981                 /* Find a set of changes that belong together.  */
982                 this = next;
983                 end = find_change (next);
984 
985                 /* Disconnect them from the rest of the changes, making them
986                      a hunk, and remember the rest for next iteration.  */
987                 next = end->link;
988                 end->link = 0;
989 
990                 /* Determine whether this hunk is really a difference.  */
991                 analyze_hunk (this, &first0, &last0, &first1, &last1,
992                                   &deletes, &inserts);
993 
994                 /* Reconnect the script so it will all be freed properly.  */
995                 end->link = next;
996 
997                 if (deletes || inserts)
998                     changes = 1;
999               }
1000           }
1001       else
1002           changes = (script != 0);
1003 
1004       if (no_details_flag)
1005           briefly_report (changes, filevec);
1006       else
1007           {
1008             if (changes || ! no_diff_means_no_output)
1009               {
1010                 /* Record info for starting up output,
1011                      to be used if and when we have some output to print.  */
1012                 setup_output (files[0].name, files[1].name, depth);
1013 
1014                 switch (output_style)
1015                     {
1016                     case OUTPUT_CONTEXT:
1017                       print_context_script (script, 0);
1018                       break;
1019 
1020                     case OUTPUT_UNIFIED:
1021                       print_context_script (script, 1);
1022                       break;
1023 
1024                     case OUTPUT_ED:
1025                       print_ed_script (script);
1026                       break;
1027 
1028                     case OUTPUT_FORWARD_ED:
1029                       pr_forward_ed_script (script);
1030                       break;
1031 
1032                     case OUTPUT_RCS:
1033                       print_rcs_script (script);
1034                       break;
1035 
1036                     case OUTPUT_NORMAL:
1037                       print_normal_script (script);
1038                       break;
1039 
1040                     case OUTPUT_IFDEF:
1041                       print_ifdef_script (script);
1042                       break;
1043 
1044                     case OUTPUT_SDIFF:
1045                       print_sdiff_script (script);
1046                     }
1047 
1048                 finish_output ();
1049               }
1050           }
1051 
1052       free (filevec[0].undiscarded);
1053 
1054       free (filevec[0].changed_flag - 1);
1055 
1056       for (i = 1; i >= 0; --i)
1057           free (filevec[i].equivs);
1058 
1059       for (i = 0; i < 2; ++i)
1060           free (filevec[i].linbuf + filevec[i].linbuf_base);
1061 
1062       for (e = script; e; e = p)
1063           {
1064             p = e->link;
1065             free (e);
1066           }
1067 
1068       if (! ROBUST_OUTPUT_STYLE (output_style))
1069           for (i = 0; i < 2; ++i)
1070             if (filevec[i].missing_newline)
1071               {
1072                 diff_error ("No newline at end of file %s", filevec[i].name, "");
1073                 changes = 2;
1074               }
1075     }
1076 
1077   if (filevec[0].buffer != filevec[1].buffer)
1078     free (filevec[0].buffer);
1079   free (filevec[1].buffer);
1080 
1081   return changes;
1082 }
1083