Projet

Général

Profil

Paste
Télécharger (34,7 ko) Statistiques
| Branche: | Révision:

root / htmltest / sites / all / modules / diff / DiffEngine.php @ a5572547

1
<?php
2

    
3
/**
4
 * @file
5
 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
6
 *
7
 * Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
8
 * You may copy this code freely under the conditions of the GPL.
9
 */
10

    
11
define('USE_ASSERTS', FALSE);
12

    
13
/**
14
 * @todo document
15
 * @private
16
 * @subpackage DifferenceEngine
17
 */
18
class _DiffOp {
19
  var $type;
20
  var $orig;
21
  var $closing;
22

    
23
  function reverse() {
24
    trigger_error('pure virtual', E_USER_ERROR);
25
  }
26

    
27
  function norig() {
28
    return $this->orig ? sizeof($this->orig) : 0;
29
  }
30

    
31
  function nclosing() {
32
    return $this->closing ? sizeof($this->closing) : 0;
33
  }
34
}
35

    
36
/**
37
 * @todo document
38
 * @private
39
 * @subpackage DifferenceEngine
40
 */
41
class _DiffOp_Copy extends _DiffOp {
42
  var $type = 'copy';
43

    
44
  function _DiffOp_Copy($orig, $closing = FALSE) {
45
    if (!is_array($closing)) {
46
      $closing = $orig;
47
    }
48
    $this->orig = $orig;
49
    $this->closing = $closing;
50
  }
51

    
52
  function reverse() {
53
    return new _DiffOp_Copy($this->closing, $this->orig);
54
  }
55
}
56

    
57
/**
58
 * @todo document
59
 * @private
60
 * @subpackage DifferenceEngine
61
 */
62
class _DiffOp_Delete extends _DiffOp {
63
  var $type = 'delete';
64

    
65
  function _DiffOp_Delete($lines) {
66
    $this->orig = $lines;
67
    $this->closing = FALSE;
68
  }
69

    
70
  function reverse() {
71
    return new _DiffOp_Add($this->orig);
72
  }
73
}
74

    
75
/**
76
 * @todo document
77
 * @private
78
 * @subpackage DifferenceEngine
79
 */
80
class _DiffOp_Add extends _DiffOp {
81
  var $type = 'add';
82

    
83
  function _DiffOp_Add($lines) {
84
    $this->closing = $lines;
85
    $this->orig = FALSE;
86
  }
87

    
88
  function reverse() {
89
    return new _DiffOp_Delete($this->closing);
90
  }
91
}
92

    
93
/**
94
 * @todo document
95
 * @private
96
 * @subpackage DifferenceEngine
97
 */
98
class _DiffOp_Change extends _DiffOp {
99
  var $type = 'change';
100

    
101
  function _DiffOp_Change($orig, $closing) {
102
    $this->orig = $orig;
103
    $this->closing = $closing;
104
  }
105

    
106
  function reverse() {
107
    return new _DiffOp_Change($this->closing, $this->orig);
108
  }
109
}
110

    
111

    
112
/**
113
 * Class used internally by Diff to actually compute the diffs.
114
 *
115
 * The algorithm used here is mostly lifted from the perl module
116
 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
117
 *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
118
 *
119
 * More ideas are taken from:
120
 *   http://www.ics.uci.edu/~eppstein/161/960229.html
121
 *
122
 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
123
 * diffutils-2.7, which can be found at:
124
 *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
125
 *
126
 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
127
 * are my own.
128
 *
129
 * Line length limits for robustness added by Tim Starling, 2005-08-31
130
 *
131
 * @author Geoffrey T. Dairiki, Tim Starling
132
 * @private
133
 * @subpackage DifferenceEngine
134
 */
135
class _DiffEngine {
136
  function MAX_XREF_LENGTH() {
137
    return 10000;
138
  }
139

    
140
  function diff($from_lines, $to_lines) {
141

    
142
    $n_from = sizeof($from_lines);
143
    $n_to = sizeof($to_lines);
144

    
145
    $this->xchanged = $this->ychanged = array();
146
    $this->xv = $this->yv = array();
147
    $this->xind = $this->yind = array();
148
    unset($this->seq);
149
    unset($this->in_seq);
150
    unset($this->lcs);
151

    
152
    // Skip leading common lines.
153
    for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
154
      if ($from_lines[$skip] !== $to_lines[$skip]) {
155
        break;
156
      }
157
      $this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
158
    }
159
    // Skip trailing common lines.
160
    $xi = $n_from;
161
    $yi = $n_to;
162
    for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
163
      if ($from_lines[$xi] !== $to_lines[$yi]) {
164
        break;
165
      }
166
      $this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
167
    }
168

    
169
    // Ignore lines which do not exist in both files.
170
    for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
171
      $xhash[$this->_line_hash($from_lines[$xi])] = 1;
172
    }
173

    
174
    for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
175
      $line = $to_lines[$yi];
176
      if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
177
        continue;
178
      }
179
      $yhash[$this->_line_hash($line)] = 1;
180
      $this->yv[] = $line;
181
      $this->yind[] = $yi;
182
    }
183
    for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
184
      $line = $from_lines[$xi];
185
      if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
186
        continue;
187
      }
188
      $this->xv[] = $line;
189
      $this->xind[] = $xi;
190
    }
191

    
192
    // Find the LCS.
193
    $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
194

    
195
    // Merge edits when possible
196
    $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
197
    $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
198

    
199
    // Compute the edit operations.
200
    $edits = array();
201
    $xi = $yi = 0;
202
    while ($xi < $n_from || $yi < $n_to) {
203
      USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
204
      USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
205

    
206
      // Skip matching "snake".
207
      $copy = array();
208
      while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
209
        $copy[] = $from_lines[$xi++];
210
        ++$yi;
211
      }
212
      if ($copy) {
213
        $edits[] = new _DiffOp_Copy($copy);
214
      }
215
      // Find deletes & adds.
216
      $delete = array();
217
      while ($xi < $n_from && $this->xchanged[$xi]) {
218
        $delete[] = $from_lines[$xi++];
219
      }
220
      $add = array();
221
      while ($yi < $n_to && $this->ychanged[$yi]) {
222
        $add[] = $to_lines[$yi++];
223
      }
224
      if ($delete && $add) {
225
        $edits[] = new _DiffOp_Change($delete, $add);
226
      }
227
      elseif ($delete) {
228
        $edits[] = new _DiffOp_Delete($delete);
229
      }
230
      elseif ($add) {
231
        $edits[] = new _DiffOp_Add($add);
232
      }
233
    }
234
    return $edits;
235
  }
236

    
237
  /**
238
   * Returns the whole line if it's small enough, or the MD5 hash otherwise.
239
   */
240
  function _line_hash($line) {
241
    if (drupal_strlen($line) > $this->MAX_XREF_LENGTH()) {
242
      return md5($line);
243
    }
244
    else {
245
      return $line;
246
    }
247
  }
248

    
249

    
250
  /**
251
   * Divide the Largest Common Subsequence (LCS) of the sequences
252
   * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
253
   * sized segments.
254
   *
255
   * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
256
   * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
257
   * sub sequences.  The first sub-sequence is contained in [X0, X1),
258
   * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
259
   * that (X0, Y0) == (XOFF, YOFF) and
260
   * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
261
   *
262
   * This function assumes that the first lines of the specified portions
263
   * of the two files do not match, and likewise that the last lines do not
264
   * match.  The caller must trim matching lines from the beginning and end
265
   * of the portions it is going to specify.
266
   */
267
  function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
268
    $flip = FALSE;
269

    
270
    if ($xlim - $xoff > $ylim - $yoff) {
271
      // Things seems faster (I'm not sure I understand why)
272
      // when the shortest sequence in X.
273
      $flip = TRUE;
274
      list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
275
    }
276

    
277
    if ($flip) {
278
      for ($i = $ylim - 1; $i >= $yoff; $i--) {
279
        $ymatches[$this->xv[$i]][] = $i;
280
      }
281
    }
282
    else {
283
      for ($i = $ylim - 1; $i >= $yoff; $i--) {
284
        $ymatches[$this->yv[$i]][] = $i;
285
      }
286
    }
287
    $this->lcs = 0;
288
    $this->seq[0]= $yoff - 1;
289
    $this->in_seq = array();
290
    $ymids[0] = array();
291

    
292
    $numer = $xlim - $xoff + $nchunks - 1;
293
    $x = $xoff;
294
    for ($chunk = 0; $chunk < $nchunks; $chunk++) {
295
      if ($chunk > 0) {
296
        for ($i = 0; $i <= $this->lcs; $i++) {
297
          $ymids[$i][$chunk-1] = $this->seq[$i];
298
        }
299
      }
300

    
301
      $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
302
      for ( ; $x < $x1; $x++) {
303
        $line = $flip ? $this->yv[$x] : $this->xv[$x];
304
        if (empty($ymatches[$line])) {
305
          continue;
306
        }
307
        $matches = $ymatches[$line];
308
        reset($matches);
309
        while (list ($junk, $y) = each($matches)) {
310
          if (empty($this->in_seq[$y])) {
311
            $k = $this->_lcs_pos($y);
312
            USE_ASSERTS && assert($k > 0);
313
            $ymids[$k] = $ymids[$k-1];
314
            break;
315
          }
316
        }
317
        while (list ($junk, $y) = each($matches)) {
318
          if ($y > $this->seq[$k-1]) {
319
            USE_ASSERTS && assert($y < $this->seq[$k]);
320
            // Optimization: this is a common case:
321
            // next match is just replacing previous match.
322
            $this->in_seq[$this->seq[$k]] = FALSE;
323
            $this->seq[$k] = $y;
324
            $this->in_seq[$y] = 1;
325
          }
326
          elseif (empty($this->in_seq[$y])) {
327
            $k = $this->_lcs_pos($y);
328
            USE_ASSERTS && assert($k > 0);
329
            $ymids[$k] = $ymids[$k-1];
330
          }
331
        }
332
      }
333
    }
334

    
335
    $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
336
    $ymid = $ymids[$this->lcs];
337
    for ($n = 0; $n < $nchunks - 1; $n++) {
338
      $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
339
      $y1 = $ymid[$n] + 1;
340
      $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
341
    }
342
    $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
343

    
344
    return array($this->lcs, $seps);
345
  }
346

    
347
  function _lcs_pos($ypos) {
348

    
349
    $end = $this->lcs;
350
    if ($end == 0 || $ypos > $this->seq[$end]) {
351
      $this->seq[++$this->lcs] = $ypos;
352
      $this->in_seq[$ypos] = 1;
353
      return $this->lcs;
354
    }
355

    
356
    $beg = 1;
357
    while ($beg < $end) {
358
      $mid = (int)(($beg + $end) / 2);
359
      if ($ypos > $this->seq[$mid]) {
360
        $beg = $mid + 1;
361
      }
362
      else {
363
        $end = $mid;
364
      }
365
    }
366

    
367
    USE_ASSERTS && assert($ypos != $this->seq[$end]);
368

    
369
    $this->in_seq[$this->seq[$end]] = FALSE;
370
    $this->seq[$end] = $ypos;
371
    $this->in_seq[$ypos] = 1;
372
    return $end;
373
  }
374

    
375
  /**
376
   * Find LCS of two sequences.
377
   *
378
   * The results are recorded in the vectors $this->{x,y}changed[], by
379
   * storing a 1 in the element for each line that is an insertion
380
   * or deletion (ie. is not in the LCS).
381
   *
382
   * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
383
   *
384
   * Note that XLIM, YLIM are exclusive bounds.
385
   * All line numbers are origin-0 and discarded lines are not counted.
386
   */
387
  function _compareseq($xoff, $xlim, $yoff, $ylim) {
388

    
389
    // Slide down the bottom initial diagonal.
390
    while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
391
      ++$xoff;
392
      ++$yoff;
393
    }
394

    
395
    // Slide up the top initial diagonal.
396
    while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
397
      --$xlim;
398
      --$ylim;
399
    }
400

    
401
    if ($xoff == $xlim || $yoff == $ylim) {
402
      $lcs = 0;
403
    }
404
    else {
405
      // This is ad hoc but seems to work well.
406
      //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
407
      //$nchunks = max(2, min(8, (int)$nchunks));
408
      $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
409
      list($lcs, $seps)
410
      = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
411
    }
412

    
413
    if ($lcs == 0) {
414
      // X and Y sequences have no common subsequence:
415
      // mark all changed.
416
      while ($yoff < $ylim) {
417
        $this->ychanged[$this->yind[$yoff++]] = 1;
418
      }
419
      while ($xoff < $xlim) {
420
        $this->xchanged[$this->xind[$xoff++]] = 1;
421
      }
422
    }
423
    else {
424
      // Use the partitions to split this problem into subproblems.
425
      reset($seps);
426
      $pt1 = $seps[0];
427
      while ($pt2 = next($seps)) {
428
        $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
429
        $pt1 = $pt2;
430
      }
431
    }
432
  }
433

    
434
  /**
435
   * Adjust inserts/deletes of identical lines to join changes
436
   * as much as possible.
437
   *
438
   * We do something when a run of changed lines include a
439
   * line at one end and has an excluded, identical line at the other.
440
   * We are free to choose which identical line is included.
441
   * `compareseq' usually chooses the one at the beginning,
442
   * but usually it is cleaner to consider the following identical line
443
   * to be the "change".
444
   *
445
   * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
446
   */
447
  function _shift_boundaries($lines, &$changed, $other_changed) {
448
    $i = 0;
449
    $j = 0;
450

    
451
    USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
452
    $len = sizeof($lines);
453
    $other_len = sizeof($other_changed);
454

    
455
    while (1) {
456
      /*
457
       * Scan forwards to find beginning of another run of changes.
458
       * Also keep track of the corresponding point in the other file.
459
       *
460
       * Throughout this code, $i and $j are adjusted together so that
461
       * the first $i elements of $changed and the first $j elements
462
       * of $other_changed both contain the same number of zeros
463
       * (unchanged lines).
464
       * Furthermore, $j is always kept so that $j == $other_len or
465
       * $other_changed[$j] == FALSE.
466
       */
467
      while ($j < $other_len && $other_changed[$j]) {
468
        $j++;
469
      }
470
      while ($i < $len && ! $changed[$i]) {
471
        USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
472
        $i++;
473
        $j++;
474
        while ($j < $other_len && $other_changed[$j]) {
475
          $j++;
476
        }
477
      }
478

    
479
      if ($i == $len) {
480
        break;
481
      }
482
      $start = $i;
483

    
484
      // Find the end of this run of changes.
485
      while (++$i < $len && $changed[$i]) {
486
        continue;
487
      }
488

    
489
      do {
490
        /*
491
         * Record the length of this run of changes, so that
492
         * we can later determine whether the run has grown.
493
         */
494
        $runlength = $i - $start;
495

    
496
        /*
497
         * Move the changed region back, so long as the
498
         * previous unchanged line matches the last changed one.
499
         * This merges with previous changed regions.
500
         */
501
        while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
502
          $changed[--$start] = 1;
503
          $changed[--$i] = FALSE;
504
          while ($start > 0 && $changed[$start - 1]) {
505
            $start--;
506
          }
507
          USE_ASSERTS && assert('$j > 0');
508
          while ($other_changed[--$j]) {
509
            continue;
510
          }
511
          USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
512
        }
513

    
514
        /*
515
         * Set CORRESPONDING to the end of the changed run, at the last
516
         * point where it corresponds to a changed run in the other file.
517
         * CORRESPONDING == LEN means no such point has been found.
518
         */
519
        $corresponding = $j < $other_len ? $i : $len;
520

    
521
        /*
522
         * Move the changed region forward, so long as the
523
         * first changed line matches the following unchanged one.
524
         * This merges with following changed regions.
525
         * Do this second, so that if there are no merges,
526
         * the changed region is moved forward as far as possible.
527
         */
528
        while ($i < $len && $lines[$start] == $lines[$i]) {
529
          $changed[$start++] = FALSE;
530
          $changed[$i++] = 1;
531
          while ($i < $len && $changed[$i]) {
532
            $i++;
533
          }
534
          USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
535
          $j++;
536
          if ($j < $other_len && $other_changed[$j]) {
537
            $corresponding = $i;
538
            while ($j < $other_len && $other_changed[$j]) {
539
              $j++;
540
            }
541
          }
542
        }
543
      } while ($runlength != $i - $start);
544

    
545
      /*
546
       * If possible, move the fully-merged run of changes
547
       * back to a corresponding run in the other file.
548
       */
549
      while ($corresponding < $i) {
550
        $changed[--$start] = 1;
551
        $changed[--$i] = 0;
552
        USE_ASSERTS && assert('$j > 0');
553
        while ($other_changed[--$j]) {
554
          continue;
555
        }
556
        USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
557
      }
558
    }
559
  }
560
}
561

    
562
/**
563
 * Class representing a 'diff' between two sequences of strings.
564
 * @todo document
565
 * @private
566
 * @subpackage DifferenceEngine
567
 */
568
class Diff {
569
  var $edits;
570

    
571
  /**
572
   * Constructor.
573
   * Computes diff between sequences of strings.
574
   *
575
   * @param $from_lines array An array of strings.
576
   *      (Typically these are lines from a file.)
577
   * @param $to_lines array An array of strings.
578
   */
579
  function Diff($from_lines, $to_lines) {
580
    $eng = new _DiffEngine;
581
    $this->edits = $eng->diff($from_lines, $to_lines);
582
    //$this->_check($from_lines, $to_lines);
583
  }
584

    
585
  /**
586
   * Compute reversed Diff.
587
   *
588
   * SYNOPSIS:
589
   *
590
   *  $diff = new Diff($lines1, $lines2);
591
   *  $rev = $diff->reverse();
592
   * @return object A Diff object representing the inverse of the
593
   *          original diff.
594
   */
595
  function reverse() {
596
    $rev = $this;
597
    $rev->edits = array();
598
    foreach ($this->edits as $edit) {
599
      $rev->edits[] = $edit->reverse();
600
    }
601
    return $rev;
602
  }
603

    
604
  /**
605
   * Check for empty diff.
606
   *
607
   * @return bool True iff two sequences were identical.
608
   */
609
  function isEmpty() {
610
    foreach ($this->edits as $edit) {
611
      if ($edit->type != 'copy') {
612
        return FALSE;
613
      }
614
    }
615
    return TRUE;
616
  }
617

    
618
  /**
619
   * Compute the length of the Longest Common Subsequence (LCS).
620
   *
621
   * This is mostly for diagnostic purposed.
622
   *
623
   * @return int The length of the LCS.
624
   */
625
  function lcs() {
626
    $lcs = 0;
627
    foreach ($this->edits as $edit) {
628
      if ($edit->type == 'copy') {
629
        $lcs += sizeof($edit->orig);
630
      }
631
    }
632
    return $lcs;
633
  }
634

    
635
  /**
636
   * Get the original set of lines.
637
   *
638
   * This reconstructs the $from_lines parameter passed to the
639
   * constructor.
640
   *
641
   * @return array The original sequence of strings.
642
   */
643
  function orig() {
644
    $lines = array();
645

    
646
    foreach ($this->edits as $edit) {
647
      if ($edit->orig) {
648
        array_splice($lines, sizeof($lines), 0, $edit->orig);
649
      }
650
    }
651
    return $lines;
652
  }
653

    
654
  /**
655
   * Get the closing set of lines.
656
   *
657
   * This reconstructs the $to_lines parameter passed to the
658
   * constructor.
659
   *
660
   * @return array The sequence of strings.
661
   */
662
  function closing() {
663
    $lines = array();
664

    
665
    foreach ($this->edits as $edit) {
666
      if ($edit->closing) {
667
        array_splice($lines, sizeof($lines), 0, $edit->closing);
668
      }
669
    }
670
    return $lines;
671
  }
672

    
673
  /**
674
   * Check a Diff for validity.
675
   *
676
   * This is here only for debugging purposes.
677
   */
678
  function _check($from_lines, $to_lines) {
679
    if (serialize($from_lines) != serialize($this->orig())) {
680
      trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
681
    }
682
    if (serialize($to_lines) != serialize($this->closing())) {
683
      trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
684
    }
685

    
686
    $rev = $this->reverse();
687
    if (serialize($to_lines) != serialize($rev->orig())) {
688
      trigger_error("Reversed original doesn't match", E_USER_ERROR);
689
    }
690
    if (serialize($from_lines) != serialize($rev->closing())) {
691
      trigger_error("Reversed closing doesn't match", E_USER_ERROR);
692
    }
693

    
694

    
695
    $prevtype = 'none';
696
    foreach ($this->edits as $edit) {
697
      if ( $prevtype == $edit->type ) {
698
        trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
699
      }
700
      $prevtype = $edit->type;
701
    }
702

    
703
    $lcs = $this->lcs();
704
    trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
705
  }
706
}
707

    
708
/**
709
 * FIXME: bad name.
710
 * @todo document
711
 * @private
712
 * @subpackage DifferenceEngine
713
 */
714
class MappedDiff extends Diff {
715
  /**
716
   * Constructor.
717
   *
718
   * Computes diff between sequences of strings.
719
   *
720
   * This can be used to compute things like
721
   * case-insensitve diffs, or diffs which ignore
722
   * changes in white-space.
723
   *
724
   * @param $from_lines array An array of strings.
725
   *  (Typically these are lines from a file.)
726
   *
727
   * @param $to_lines array An array of strings.
728
   *
729
   * @param $mapped_from_lines array This array should
730
   *  have the same size number of elements as $from_lines.
731
   *  The elements in $mapped_from_lines and
732
   *  $mapped_to_lines are what is actually compared
733
   *  when computing the diff.
734
   *
735
   * @param $mapped_to_lines array This array should
736
   *  have the same number of elements as $to_lines.
737
   */
738
  function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
739

    
740
    assert(sizeof($from_lines) == sizeof($mapped_from_lines));
741
    assert(sizeof($to_lines) == sizeof($mapped_to_lines));
742

    
743
    $this->Diff($mapped_from_lines, $mapped_to_lines);
744

    
745
    $xi = $yi = 0;
746
    for ($i = 0; $i < sizeof($this->edits); $i++) {
747
      $orig = &$this->edits[$i]->orig;
748
      if (is_array($orig)) {
749
        $orig = array_slice($from_lines, $xi, sizeof($orig));
750
        $xi += sizeof($orig);
751
      }
752

    
753
      $closing = &$this->edits[$i]->closing;
754
      if (is_array($closing)) {
755
        $closing = array_slice($to_lines, $yi, sizeof($closing));
756
        $yi += sizeof($closing);
757
      }
758
    }
759
  }
760
}
761

    
762
/**
763
 * A class to format Diffs
764
 *
765
 * This class formats the diff in classic diff format.
766
 * It is intended that this class be customized via inheritance,
767
 * to obtain fancier outputs.
768
 * @todo document
769
 * @private
770
 * @subpackage DifferenceEngine
771
 */
772
class DiffFormatter {
773
  /**
774
   * Should a block header be shown?
775
   */
776
  var $show_header = TRUE;
777

    
778
  /**
779
   * Number of leading context "lines" to preserve.
780
   *
781
   * This should be left at zero for this class, but subclasses
782
   * may want to set this to other values.
783
   */
784
  var $leading_context_lines = 0;
785

    
786
  /**
787
   * Number of trailing context "lines" to preserve.
788
   *
789
   * This should be left at zero for this class, but subclasses
790
   * may want to set this to other values.
791
   */
792
  var $trailing_context_lines = 0;
793

    
794
  /**
795
   * Format a diff.
796
   *
797
   * @param $diff object A Diff object.
798
   * @return string The formatted output.
799
   */
800
  function format($diff) {
801
    $xi = $yi = 1;
802
    $block = FALSE;
803
    $context = array();
804

    
805
    $nlead = $this->leading_context_lines;
806
    $ntrail = $this->trailing_context_lines;
807

    
808
    $this->_start_diff();
809

    
810
    foreach ($diff->edits as $edit) {
811
      if ($edit->type == 'copy') {
812
        if (is_array($block)) {
813
          if (sizeof($edit->orig) <= $nlead + $ntrail) {
814
            $block[] = $edit;
815
          }
816
          else {
817
            if ($ntrail) {
818
              $context = array_slice($edit->orig, 0, $ntrail);
819
              $block[] = new _DiffOp_Copy($context);
820
            }
821
            $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
822
            $block = FALSE;
823
          }
824
        }
825
        $context = $edit->orig;
826
      }
827
      else {
828
        if (! is_array($block)) {
829
          $context = array_slice($context, sizeof($context) - $nlead);
830
          $x0 = $xi - sizeof($context);
831
          $y0 = $yi - sizeof($context);
832
          $block = array();
833
          if ($context) {
834
            $block[] = new _DiffOp_Copy($context);
835
          }
836
        }
837
        $block[] = $edit;
838
      }
839

    
840
      if ($edit->orig) {
841
        $xi += sizeof($edit->orig);
842
      }
843
      if ($edit->closing) {
844
        $yi += sizeof($edit->closing);
845
      }
846
    }
847

    
848
    if (is_array($block)) {
849
      $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
850
    }
851
    $end = $this->_end_diff();
852

    
853
    if (!empty($xi)) {
854
      $this->line_stats['counter']['x'] += $xi;
855
    }
856
    if (!empty($yi)) {
857
      $this->line_stats['counter']['y'] += $yi;
858
    }
859

    
860
    return $end;
861
  }
862

    
863
  function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
864
    $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
865
    foreach ($edits as $edit) {
866
      if ($edit->type == 'copy') {
867
        $this->_context($edit->orig);
868
      }
869
      elseif ($edit->type == 'add') {
870
        $this->_added($edit->closing);
871
      }
872
      elseif ($edit->type == 'delete') {
873
        $this->_deleted($edit->orig);
874
      }
875
      elseif ($edit->type == 'change') {
876
        $this->_changed($edit->orig, $edit->closing);
877
      }
878
      else {
879
        trigger_error('Unknown edit type', E_USER_ERROR);
880
      }
881
    }
882
    $this->_end_block();
883
  }
884

    
885
  function _start_diff() {
886
    ob_start();
887
  }
888

    
889
  function _end_diff() {
890
    $val = ob_get_contents();
891
    ob_end_clean();
892
    return $val;
893
  }
894

    
895
  function _block_header($xbeg, $xlen, $ybeg, $ylen) {
896
    if ($xlen > 1) {
897
      $xbeg .= "," . ($xbeg + $xlen - 1);
898
    }
899
    if ($ylen > 1) {
900
      $ybeg .= "," . ($ybeg + $ylen - 1);
901
    }
902

    
903
    return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
904
  }
905

    
906
  function _start_block($header) {
907
    if ($this->show_header) {
908
      echo $header . "\n";
909
    }
910
  }
911

    
912
  function _end_block() {
913
  }
914

    
915
  function _lines($lines, $prefix = ' ') {
916
    foreach ($lines as $line) {
917
      echo "$prefix $line\n";
918
    }
919
  }
920

    
921
  function _context($lines) {
922
    $this->_lines($lines);
923
  }
924

    
925
  function _added($lines) {
926
    $this->_lines($lines, '>');
927
  }
928
  function _deleted($lines) {
929
    $this->_lines($lines, '<');
930
  }
931

    
932
  function _changed($orig, $closing) {
933
    $this->_deleted($orig);
934
    echo "---\n";
935
    $this->_added($closing);
936
  }
937
}
938

    
939

    
940
/**
941
 *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
942
 *
943
 */
944

    
945
define('NBSP', '&#160;');      // iso-8859-x non-breaking space.
946

    
947
/**
948
 * @todo document
949
 * @private
950
 * @subpackage DifferenceEngine
951
 */
952
class _HWLDF_WordAccumulator {
953
  function _HWLDF_WordAccumulator() {
954
    $this->_lines = array();
955
    $this->_line = '';
956
    $this->_group = '';
957
    $this->_tag = '';
958
  }
959

    
960
  function _flushGroup($new_tag) {
961
    if ($this->_group !== '') {
962
      if ($this->_tag == 'mark') {
963
        $this->_line .= '<span class="diffchange">' . check_plain($this->_group) . '</span>';
964
      }
965
      else {
966
        $this->_line .= check_plain($this->_group);
967
      }
968
    }
969
    $this->_group = '';
970
    $this->_tag = $new_tag;
971
  }
972

    
973
  function _flushLine($new_tag) {
974
    $this->_flushGroup($new_tag);
975
    if ($this->_line != '') {
976
      array_push($this->_lines, $this->_line);
977
    }
978
    else {
979
      // make empty lines visible by inserting an NBSP
980
      array_push($this->_lines, NBSP);
981
    }
982
    $this->_line = '';
983
  }
984

    
985
  function addWords($words, $tag = '') {
986
    if ($tag != $this->_tag) {
987
      $this->_flushGroup($tag);
988
    }
989
    foreach ($words as $word) {
990
      // new-line should only come as first char of word.
991
      if ($word == '') {
992
        continue;
993
      }
994
      if ($word[0] == "\n") {
995
        $this->_flushLine($tag);
996
        $word = drupal_substr($word, 1);
997
      }
998
      assert(!strstr($word, "\n"));
999
      $this->_group .= $word;
1000
    }
1001
  }
1002

    
1003
  function getLines() {
1004
    $this->_flushLine('~done');
1005
    return $this->_lines;
1006
  }
1007
}
1008

    
1009
/**
1010
 * @todo document
1011
 * @private
1012
 * @subpackage DifferenceEngine
1013
 */
1014
class WordLevelDiff extends MappedDiff {
1015
  function MAX_LINE_LENGTH() {
1016
    return 10000;
1017
  }
1018

    
1019
  function WordLevelDiff($orig_lines, $closing_lines) {
1020
    list($orig_words, $orig_stripped) = $this->_split($orig_lines);
1021
    list($closing_words, $closing_stripped) = $this->_split($closing_lines);
1022

    
1023
    $this->MappedDiff($orig_words, $closing_words, $orig_stripped, $closing_stripped);
1024
  }
1025

    
1026
  function _split($lines) {
1027
    $words = array();
1028
    $stripped = array();
1029
    $first = TRUE;
1030
    foreach ($lines as $line) {
1031
      // If the line is too long, just pretend the entire line is one big word
1032
      // This prevents resource exhaustion problems
1033
      if ( $first ) {
1034
        $first = FALSE;
1035
      }
1036
      else {
1037
        $words[] = "\n";
1038
        $stripped[] = "\n";
1039
      }
1040
      if ( drupal_strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
1041
        $words[] = $line;
1042
        $stripped[] = $line;
1043
      }
1044
      else {
1045
        if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) {
1046
          $words = array_merge($words, $m[0]);
1047
          $stripped = array_merge($stripped, $m[1]);
1048
        }
1049
      }
1050
    }
1051
    return array($words, $stripped);
1052
  }
1053

    
1054
  function orig() {
1055
    $orig = new _HWLDF_WordAccumulator;
1056

    
1057
    foreach ($this->edits as $edit) {
1058
      if ($edit->type == 'copy') {
1059
        $orig->addWords($edit->orig);
1060
      }
1061
      elseif ($edit->orig) {
1062
        $orig->addWords($edit->orig, 'mark');
1063
      }
1064
    }
1065
    $lines = $orig->getLines();
1066
    return $lines;
1067
  }
1068

    
1069
  function closing() {
1070
    $closing = new _HWLDF_WordAccumulator;
1071

    
1072
    foreach ($this->edits as $edit) {
1073
      if ($edit->type == 'copy') {
1074
        $closing->addWords($edit->closing);
1075
      }
1076
      elseif ($edit->closing) {
1077
        $closing->addWords($edit->closing, 'mark');
1078
      }
1079
    }
1080
    $lines = $closing->getLines();
1081
    return $lines;
1082
  }
1083
}
1084

    
1085
/**
1086
 * Diff formatter which uses Drupal theme functions.
1087
 * @private
1088
 * @subpackage DifferenceEngine
1089
 */
1090
class DrupalDiffFormatter extends DiffFormatter {
1091

    
1092
  var $rows;
1093
  var $line_stats = array(
1094
    'counter' => array('x' => 0, 'y' => 0),
1095
    'offset' => array('x' => 0, 'y' => 0),
1096
  );
1097

    
1098
  function DrupalDiffFormatter() {
1099
    $this->leading_context_lines = variable_get('diff_context_lines_leading', 2);
1100
    $this->trailing_context_lines = variable_get('diff_context_lines_trailing', 2);
1101
  }
1102

    
1103
  function _start_diff() {
1104
    $this->rows = array();
1105
  }
1106

    
1107
  function _end_diff() {
1108
    return $this->rows;
1109
  }
1110

    
1111
  function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1112
    return array(
1113
      array(
1114
        'data' => theme('diff_header_line', array('lineno' => $xbeg + $this->line_stats['offset']['x'])),
1115
        'colspan' => 2,
1116
      ),
1117
      array(
1118
        'data' => theme('diff_header_line', array('lineno' => $ybeg + $this->line_stats['offset']['y'])),
1119
        'colspan' => 2,
1120
      )
1121
    );
1122
  }
1123

    
1124
  function _start_block($header) {
1125
    if ($this->show_header) {
1126
      $this->rows[] = $header;
1127
    }
1128
  }
1129

    
1130
  function _end_block() {
1131
  }
1132

    
1133
  function _lines($lines, $prefix=' ', $color='white') {
1134
  }
1135

    
1136
  /**
1137
   * Note: you should HTML-escape parameter before calling this.
1138
   */
1139
  function addedLine($line) {
1140
    return array(
1141
      array(
1142
        'data' => '+',
1143
        'class' => 'diff-marker',
1144
      ),
1145
      array(
1146
        'data' => theme('diff_content_line', array('line' => $line)),
1147
        'class' => 'diff-context diff-addedline',
1148
      )
1149
    );
1150
  }
1151

    
1152
  /**
1153
   * Note: you should HTML-escape parameter before calling this.
1154
   */
1155
  function deletedLine($line) {
1156
    return array(
1157
      array(
1158
        'data' => '-',
1159
        'class' => 'diff-marker',
1160
      ),
1161
      array(
1162
        'data' => theme('diff_content_line', array('line' => $line)),
1163
        'class' => 'diff-context diff-deletedline',
1164
      )
1165
    );
1166
  }
1167

    
1168
  /**
1169
   * Note: you should HTML-escape parameter before calling this.
1170
   */
1171
  function contextLine($line) {
1172
    return array(
1173
      '&nbsp;',
1174
      array(
1175
        'data' => theme('diff_content_line', array('line' => $line)),
1176
        'class' => 'diff-context',
1177
      )
1178
    );
1179
  }
1180

    
1181
  function emptyLine() {
1182
    return array(
1183
      '&nbsp;',
1184
      theme('diff_empty_line', array('line' => '&nbsp;')),
1185
    );
1186
  }
1187

    
1188
  function _added($lines) {
1189
    foreach ($lines as $line) {
1190
      $this->rows[] = array_merge($this->emptyLine(), $this->addedLine(check_plain($line)));
1191
    }
1192
  }
1193

    
1194
  function _deleted($lines) {
1195
    foreach ($lines as $line) {
1196
      $this->rows[] = array_merge($this->deletedLine(check_plain($line)), $this->emptyLine());
1197
    }
1198
  }
1199

    
1200
  function _context($lines) {
1201
    foreach ($lines as $line) {
1202
      $this->rows[] = array_merge($this->contextLine(check_plain($line)), $this->contextLine(check_plain($line)));
1203
    }
1204
  }
1205

    
1206
  function _changed($orig, $closing) {
1207
    $diff = new WordLevelDiff($orig, $closing);
1208
    $del = $diff->orig();
1209
    $add = $diff->closing();
1210

    
1211
    // Notice that WordLevelDiff returns HTML-escaped output.
1212
    // Hence, we will be calling addedLine/deletedLine without HTML-escaping.
1213

    
1214
    while ($line = array_shift($del)) {
1215
      $aline = array_shift( $add );
1216
      $this->rows[] = array_merge($this->deletedLine($line), isset($aline) ? $this->addedLine($aline) : $this->emptyLine());
1217
    }
1218
    foreach ($add as $line) {  // If any leftovers
1219
      $this->rows[] = array_merge($this->emptyLine(), $this->addedLine($line));
1220
    }
1221
  }
1222
}
1223

    
1224
/**
1225
 * Drupal inline Diff formatter.
1226
 * @private
1227
 * @subpackage DifferenceEngine
1228
 */
1229
class DrupalDiffInline {
1230
  var $a;
1231
  var $b;
1232

    
1233
  /**
1234
   * Constructor.
1235
   */
1236
  function __construct($a, $b) {
1237
    $this->a = $a;
1238
    $this->b = $b;
1239
  }
1240

    
1241
  /**
1242
   * Render differences inline using HTML markup.
1243
   */
1244
  function render() {
1245
    $a = preg_split('/(<[^>]+?>| )/', $this->a, -1, PREG_SPLIT_DELIM_CAPTURE);
1246
    $b = preg_split('/(<[^>]+?>| )/', $this->b, -1, PREG_SPLIT_DELIM_CAPTURE);
1247
    $diff = new Diff($a, $b);
1248
    $diff->edits = $this->process_edits($diff->edits);
1249

    
1250
    // Assemble highlighted output
1251
    $output = '';
1252
    foreach ($diff->edits as $chunk) {
1253
      switch ($chunk->type) {
1254
        case 'copy':
1255
          $output .= implode('', $chunk->closing);
1256
          break;
1257
        case 'delete':
1258
          foreach ($chunk->orig as $i => $piece) {
1259
            if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
1260
              $output .= $piece;
1261
            }
1262
            else {
1263
              $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
1264
            }
1265
          }
1266
          break;
1267
        default:
1268
          $chunk->closing = $this->process_chunk($chunk->closing);
1269
          foreach ($chunk->closing as $i => $piece) {
1270
            if ($piece === ' ' || (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>' && drupal_strtolower(drupal_substr($piece, 1, 3)) != 'img')) {
1271
              $output .= $piece;
1272
            }
1273
            else {
1274
              $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
1275
            }
1276
          }
1277
          break;
1278
      }
1279
    }
1280
    return $output;
1281
  }
1282

    
1283
  /**
1284
   * Merge chunk segments between tag delimiters.
1285
   */
1286
  function process_chunk($chunk) {
1287
    $processed = array();
1288
    $j = 0;
1289
    foreach ($chunk as $i => $piece) {
1290
      $next = isset($chunk[$i+1]) ? $chunk[$i+1] : NULL;
1291
      if (!isset($processed[$j])) {
1292
        $processed[$j] = '';
1293
      }
1294
      if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
1295
        $processed[$j] = $piece;
1296
        $j++;
1297
      }
1298
      elseif (isset($next) && strpos($next, '<') === 0 && drupal_substr($next, drupal_strlen($next) - 1) === '>') {
1299
        $processed[$j] .= $piece;
1300
        $j++;
1301
      }
1302
      else {
1303
        $processed[$j] .= $piece;
1304
      }
1305
    }
1306
    return $processed;
1307
  }
1308

    
1309
  /**
1310
   * Merge copy and equivalent edits into intelligible chunks.
1311
   */
1312
  function process_edits($edits) {
1313
    $processed = array();
1314
    $current = array_shift($edits);
1315

    
1316
    // Make two passes -- first merge space delimiter copies back into their originals.
1317
    while ($chunk = array_shift($edits)) {
1318
      if ($chunk->type == 'copy' && $chunk->orig === array(' ')) {
1319
        $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
1320
        $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
1321
      }
1322
      else {
1323
        $processed[] = $current;
1324
        $current = $chunk;
1325
      }
1326
    }
1327
    $processed[] = $current;
1328

    
1329
    // Initial setup
1330
    $edits = $processed;
1331
    $processed = array();
1332
    $current = array_shift($edits);
1333

    
1334
    // Second, merge equivalent chunks into each other.
1335
    while ($chunk = array_shift($edits)) {
1336
      if ($current->type == $chunk->type) {
1337
        $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
1338
        $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
1339
      }
1340
      else {
1341
        $processed[] = $current;
1342
        $current = $chunk;
1343
      }
1344
    }
1345
    $processed[] = $current;
1346

    
1347
    return $processed;
1348
  }
1349
}