Projet

Général

Profil

Paste
Télécharger (34,8 ko) Statistiques
| Branche: | Révision:

root / drupal7 / sites / all / modules / diff / DiffEngine.php @ 661d64c9

1
<?php
2

    
3
/**
4
 * @file
5
 * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
6
 *
7
 * Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
8
 * You may copy this code freely under the conditions of the GPL.
9
 */
10

    
11
define('USE_ASSERTS', FALSE);
12

    
13
/**
14
 * @todo document
15
 * @private
16
 * @subpackage DifferenceEngine
17
 */
18
class _DiffOp {
19
  var $type;
20
  var $orig;
21
  var $closing;
22

    
23
  function reverse() {
24
    trigger_error('pure virtual', E_USER_ERROR);
25
  }
26

    
27
  function norig() {
28
    return $this->orig ? sizeof($this->orig) : 0;
29
  }
30

    
31
  function nclosing() {
32
    return $this->closing ? sizeof($this->closing) : 0;
33
  }
34
}
35

    
36
/**
37
 * @todo document
38
 * @private
39
 * @subpackage DifferenceEngine
40
 */
41
class _DiffOp_Copy extends _DiffOp {
42
  var $type = 'copy';
43

    
44
  function __construct($orig, $closing = FALSE) {
45
    if (!is_array($closing)) {
46
      $closing = $orig;
47
    }
48
    $this->orig = $orig;
49
    $this->closing = $closing;
50
  }
51

    
52
  function reverse() {
53
    return new _DiffOp_Copy($this->closing, $this->orig);
54
  }
55
}
56

    
57
/**
58
 * @todo document
59
 * @private
60
 * @subpackage DifferenceEngine
61
 */
62
class _DiffOp_Delete extends _DiffOp {
63
  var $type = 'delete';
64

    
65
  function __construct($lines) {
66
    $this->orig = $lines;
67
    $this->closing = FALSE;
68
  }
69

    
70
  function reverse() {
71
    return new _DiffOp_Add($this->orig);
72
  }
73
}
74

    
75
/**
76
 * @todo document
77
 * @private
78
 * @subpackage DifferenceEngine
79
 */
80
class _DiffOp_Add extends _DiffOp {
81
  var $type = 'add';
82

    
83
  function __construct($lines) {
84
    $this->closing = $lines;
85
    $this->orig = FALSE;
86
  }
87

    
88
  function reverse() {
89
    return new _DiffOp_Delete($this->closing);
90
  }
91
}
92

    
93
/**
94
 * @todo document
95
 * @private
96
 * @subpackage DifferenceEngine
97
 */
98
class _DiffOp_Change extends _DiffOp {
99
  var $type = 'change';
100

    
101
  function __construct($orig, $closing) {
102
    $this->orig = $orig;
103
    $this->closing = $closing;
104
  }
105

    
106
  function reverse() {
107
    return new _DiffOp_Change($this->closing, $this->orig);
108
  }
109
}
110

    
111

    
112
/**
113
 * Class used internally by Diff to actually compute the diffs.
114
 *
115
 * The algorithm used here is mostly lifted from the perl module
116
 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
117
 *   http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
118
 *
119
 * More ideas are taken from:
120
 *   http://www.ics.uci.edu/~eppstein/161/960229.html
121
 *
122
 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
123
 * diffutils-2.7, which can be found at:
124
 *   ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
125
 *
126
 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
127
 * are my own.
128
 *
129
 * Line length limits for robustness added by Tim Starling, 2005-08-31
130
 *
131
 * @author Geoffrey T. Dairiki, Tim Starling
132
 * @private
133
 * @subpackage DifferenceEngine
134
 */
135
class _DiffEngine {
136
  function MAX_XREF_LENGTH() {
137
    return 10000;
138
  }
139

    
140
  function diff($from_lines, $to_lines) {
141

    
142
    $n_from = sizeof($from_lines);
143
    $n_to = sizeof($to_lines);
144

    
145
    $this->xchanged = $this->ychanged = array();
146
    $this->xv = $this->yv = array();
147
    $this->xind = $this->yind = array();
148
    unset($this->seq);
149
    unset($this->in_seq);
150
    unset($this->lcs);
151

    
152
    // Skip leading common lines.
153
    for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
154
      if ($from_lines[$skip] !== $to_lines[$skip]) {
155
        break;
156
      }
157
      $this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
158
    }
159
    // Skip trailing common lines.
160
    $xi = $n_from;
161
    $yi = $n_to;
162
    for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
163
      if ($from_lines[$xi] !== $to_lines[$yi]) {
164
        break;
165
      }
166
      $this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
167
    }
168

    
169
    // Ignore lines which do not exist in both files.
170
    for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
171
      $xhash[$this->_line_hash($from_lines[$xi])] = 1;
172
    }
173

    
174
    for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
175
      $line = $to_lines[$yi];
176
      if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
177
        continue;
178
      }
179
      $yhash[$this->_line_hash($line)] = 1;
180
      $this->yv[] = $line;
181
      $this->yind[] = $yi;
182
    }
183
    for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
184
      $line = $from_lines[$xi];
185
      if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
186
        continue;
187
      }
188
      $this->xv[] = $line;
189
      $this->xind[] = $xi;
190
    }
191

    
192
    // Find the LCS.
193
    $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
194

    
195
    // Merge edits when possible
196
    $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
197
    $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
198

    
199
    // Compute the edit operations.
200
    $edits = array();
201
    $xi = $yi = 0;
202
    while ($xi < $n_from || $yi < $n_to) {
203
      USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
204
      USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
205

    
206
      // Skip matching "snake".
207
      $copy = array();
208
      while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
209
        $copy[] = $from_lines[$xi++];
210
        ++$yi;
211
      }
212
      if ($copy) {
213
        $edits[] = new _DiffOp_Copy($copy);
214
      }
215
      // Find deletes & adds.
216
      $delete = array();
217
      while ($xi < $n_from && $this->xchanged[$xi]) {
218
        $_fl = $from_lines[$xi++];
219
        if (strlen($_fl)) {
220
          $delete[] = $_fl;
221
        }
222
      }
223
      $add = array();
224
      while ($yi < $n_to && $this->ychanged[$yi]) {
225
        $_tl = $to_lines[$yi++];
226
        if (strlen($_tl)) {
227
          $add[] = $_tl;
228
        }
229
      }
230
      if ($delete && $add) {
231
        $edits[] = new _DiffOp_Change($delete, $add);
232
      }
233
      elseif ($delete) {
234
        $edits[] = new _DiffOp_Delete($delete);
235
      }
236
      elseif ($add) {
237
        $edits[] = new _DiffOp_Add($add);
238
      }
239
    }
240
    return $edits;
241
  }
242

    
243
  /**
244
   * Returns the whole line if it's small enough, or the MD5 hash otherwise.
245
   */
246
  function _line_hash($line) {
247
    if (drupal_strlen($line) > $this->MAX_XREF_LENGTH()) {
248
      return md5($line);
249
    }
250
    else {
251
      return $line;
252
    }
253
  }
254

    
255

    
256
  /**
257
   * Divide the Largest Common Subsequence (LCS) of the sequences
258
   * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
259
   * sized segments.
260
   *
261
   * Returns (LCS, PTS).  LCS is the length of the LCS. PTS is an
262
   * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
263
   * sub sequences.  The first sub-sequence is contained in [X0, X1),
264
   * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on.  Note
265
   * that (X0, Y0) == (XOFF, YOFF) and
266
   * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
267
   *
268
   * This function assumes that the first lines of the specified portions
269
   * of the two files do not match, and likewise that the last lines do not
270
   * match.  The caller must trim matching lines from the beginning and end
271
   * of the portions it is going to specify.
272
   */
273
  function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
274
    $flip = FALSE;
275

    
276
    if ($xlim - $xoff > $ylim - $yoff) {
277
      // Things seems faster (I'm not sure I understand why)
278
      // when the shortest sequence in X.
279
      $flip = TRUE;
280
      list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
281
    }
282

    
283
    if ($flip) {
284
      for ($i = $ylim - 1; $i >= $yoff; $i--) {
285
        $ymatches[$this->xv[$i]][] = $i;
286
      }
287
    }
288
    else {
289
      for ($i = $ylim - 1; $i >= $yoff; $i--) {
290
        $ymatches[$this->yv[$i]][] = $i;
291
      }
292
    }
293
    $this->lcs = 0;
294
    $this->seq[0]= $yoff - 1;
295
    $this->in_seq = array();
296
    $ymids[0] = array();
297

    
298
    $numer = $xlim - $xoff + $nchunks - 1;
299
    $x = $xoff;
300
    for ($chunk = 0; $chunk < $nchunks; $chunk++) {
301
      if ($chunk > 0) {
302
        for ($i = 0; $i <= $this->lcs; $i++) {
303
          $ymids[$i][$chunk-1] = $this->seq[$i];
304
        }
305
      }
306

    
307
      $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
308
      for ( ; $x < $x1; $x++) {
309
        $line = $flip ? $this->yv[$x] : $this->xv[$x];
310
        if (empty($ymatches[$line])) {
311
          continue;
312
        }
313
        $matches = $ymatches[$line];
314
        reset($matches);
315
        while (list ($junk, $y) = each($matches)) {
316
          if (empty($this->in_seq[$y])) {
317
            $k = $this->_lcs_pos($y);
318
            USE_ASSERTS && assert($k > 0);
319
            $ymids[$k] = $ymids[$k-1];
320
            break;
321
          }
322
        }
323
        while (list ($junk, $y) = each($matches)) {
324
          if ($y > $this->seq[$k-1]) {
325
            USE_ASSERTS && assert($y < $this->seq[$k]);
326
            // Optimization: this is a common case:
327
            // next match is just replacing previous match.
328
            $this->in_seq[$this->seq[$k]] = FALSE;
329
            $this->seq[$k] = $y;
330
            $this->in_seq[$y] = 1;
331
          }
332
          elseif (empty($this->in_seq[$y])) {
333
            $k = $this->_lcs_pos($y);
334
            USE_ASSERTS && assert($k > 0);
335
            $ymids[$k] = $ymids[$k-1];
336
          }
337
        }
338
      }
339
    }
340

    
341
    $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
342
    $ymid = $ymids[$this->lcs];
343
    for ($n = 0; $n < $nchunks - 1; $n++) {
344
      $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
345
      $y1 = $ymid[$n] + 1;
346
      $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
347
    }
348
    $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
349

    
350
    return array($this->lcs, $seps);
351
  }
352

    
353
  function _lcs_pos($ypos) {
354

    
355
    $end = $this->lcs;
356
    if ($end == 0 || $ypos > $this->seq[$end]) {
357
      $this->seq[++$this->lcs] = $ypos;
358
      $this->in_seq[$ypos] = 1;
359
      return $this->lcs;
360
    }
361

    
362
    $beg = 1;
363
    while ($beg < $end) {
364
      $mid = (int)(($beg + $end) / 2);
365
      if ($ypos > $this->seq[$mid]) {
366
        $beg = $mid + 1;
367
      }
368
      else {
369
        $end = $mid;
370
      }
371
    }
372

    
373
    USE_ASSERTS && assert($ypos != $this->seq[$end]);
374

    
375
    $this->in_seq[$this->seq[$end]] = FALSE;
376
    $this->seq[$end] = $ypos;
377
    $this->in_seq[$ypos] = 1;
378
    return $end;
379
  }
380

    
381
  /**
382
   * Find LCS of two sequences.
383
   *
384
   * The results are recorded in the vectors $this->{x,y}changed[], by
385
   * storing a 1 in the element for each line that is an insertion
386
   * or deletion (ie. is not in the LCS).
387
   *
388
   * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
389
   *
390
   * Note that XLIM, YLIM are exclusive bounds.
391
   * All line numbers are origin-0 and discarded lines are not counted.
392
   */
393
  function _compareseq($xoff, $xlim, $yoff, $ylim) {
394

    
395
    // Slide down the bottom initial diagonal.
396
    while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
397
      ++$xoff;
398
      ++$yoff;
399
    }
400

    
401
    // Slide up the top initial diagonal.
402
    while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
403
      --$xlim;
404
      --$ylim;
405
    }
406

    
407
    if ($xoff == $xlim || $yoff == $ylim) {
408
      $lcs = 0;
409
    }
410
    else {
411
      // This is ad hoc but seems to work well.
412
      //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
413
      //$nchunks = max(2, min(8, (int)$nchunks));
414
      $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
415
      list($lcs, $seps)
416
      = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
417
    }
418

    
419
    if ($lcs == 0) {
420
      // X and Y sequences have no common subsequence:
421
      // mark all changed.
422
      while ($yoff < $ylim) {
423
        $this->ychanged[$this->yind[$yoff++]] = 1;
424
      }
425
      while ($xoff < $xlim) {
426
        $this->xchanged[$this->xind[$xoff++]] = 1;
427
      }
428
    }
429
    else {
430
      // Use the partitions to split this problem into subproblems.
431
      reset($seps);
432
      $pt1 = $seps[0];
433
      while ($pt2 = next($seps)) {
434
        $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
435
        $pt1 = $pt2;
436
      }
437
    }
438
  }
439

    
440
  /**
441
   * Adjust inserts/deletes of identical lines to join changes
442
   * as much as possible.
443
   *
444
   * We do something when a run of changed lines include a
445
   * line at one end and has an excluded, identical line at the other.
446
   * We are free to choose which identical line is included.
447
   * `compareseq' usually chooses the one at the beginning,
448
   * but usually it is cleaner to consider the following identical line
449
   * to be the "change".
450
   *
451
   * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
452
   */
453
  function _shift_boundaries($lines, &$changed, $other_changed) {
454
    $i = 0;
455
    $j = 0;
456

    
457
    USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
458
    $len = sizeof($lines);
459
    $other_len = sizeof($other_changed);
460

    
461
    while (1) {
462
      /*
463
       * Scan forwards to find beginning of another run of changes.
464
       * Also keep track of the corresponding point in the other file.
465
       *
466
       * Throughout this code, $i and $j are adjusted together so that
467
       * the first $i elements of $changed and the first $j elements
468
       * of $other_changed both contain the same number of zeros
469
       * (unchanged lines).
470
       * Furthermore, $j is always kept so that $j == $other_len or
471
       * $other_changed[$j] == FALSE.
472
       */
473
      while ($j < $other_len && $other_changed[$j]) {
474
        $j++;
475
      }
476
      while ($i < $len && ! $changed[$i]) {
477
        USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
478
        $i++;
479
        $j++;
480
        while ($j < $other_len && $other_changed[$j]) {
481
          $j++;
482
        }
483
      }
484

    
485
      if ($i == $len) {
486
        break;
487
      }
488
      $start = $i;
489

    
490
      // Find the end of this run of changes.
491
      while (++$i < $len && $changed[$i]) {
492
        continue;
493
      }
494

    
495
      do {
496
        /*
497
         * Record the length of this run of changes, so that
498
         * we can later determine whether the run has grown.
499
         */
500
        $runlength = $i - $start;
501

    
502
        /*
503
         * Move the changed region back, so long as the
504
         * previous unchanged line matches the last changed one.
505
         * This merges with previous changed regions.
506
         */
507
        while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
508
          $changed[--$start] = 1;
509
          $changed[--$i] = FALSE;
510
          while ($start > 0 && $changed[$start - 1]) {
511
            $start--;
512
          }
513
          USE_ASSERTS && assert('$j > 0');
514
          while ($other_changed[--$j]) {
515
            continue;
516
          }
517
          USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
518
        }
519

    
520
        /*
521
         * Set CORRESPONDING to the end of the changed run, at the last
522
         * point where it corresponds to a changed run in the other file.
523
         * CORRESPONDING == LEN means no such point has been found.
524
         */
525
        $corresponding = $j < $other_len ? $i : $len;
526

    
527
        /*
528
         * Move the changed region forward, so long as the
529
         * first changed line matches the following unchanged one.
530
         * This merges with following changed regions.
531
         * Do this second, so that if there are no merges,
532
         * the changed region is moved forward as far as possible.
533
         */
534
        while ($i < $len && $lines[$start] == $lines[$i]) {
535
          $changed[$start++] = FALSE;
536
          $changed[$i++] = 1;
537
          while ($i < $len && $changed[$i]) {
538
            $i++;
539
          }
540
          USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
541
          $j++;
542
          if ($j < $other_len && $other_changed[$j]) {
543
            $corresponding = $i;
544
            while ($j < $other_len && $other_changed[$j]) {
545
              $j++;
546
            }
547
          }
548
        }
549
      } while ($runlength != $i - $start);
550

    
551
      /*
552
       * If possible, move the fully-merged run of changes
553
       * back to a corresponding run in the other file.
554
       */
555
      while ($corresponding < $i) {
556
        $changed[--$start] = 1;
557
        $changed[--$i] = 0;
558
        USE_ASSERTS && assert('$j > 0');
559
        while ($other_changed[--$j]) {
560
          continue;
561
        }
562
        USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
563
      }
564
    }
565
  }
566
}
567

    
568
/**
569
 * Class representing a 'diff' between two sequences of strings.
570
 * @todo document
571
 * @private
572
 * @subpackage DifferenceEngine
573
 */
574
class Diff {
575
  var $edits;
576

    
577
  /**
578
   * Constructor.
579
   * Computes diff between sequences of strings.
580
   *
581
   * @param $from_lines array An array of strings.
582
   *      (Typically these are lines from a file.)
583
   * @param $to_lines array An array of strings.
584
   */
585
  function __construct($from_lines, $to_lines) {
586
    $eng = new _DiffEngine;
587
    $this->edits = $eng->diff($from_lines, $to_lines);
588
    //$this->_check($from_lines, $to_lines);
589
  }
590

    
591
  /**
592
   * Compute reversed Diff.
593
   *
594
   * SYNOPSIS:
595
   *
596
   *  $diff = new Diff($lines1, $lines2);
597
   *  $rev = $diff->reverse();
598
   * @return object A Diff object representing the inverse of the
599
   *          original diff.
600
   */
601
  function reverse() {
602
    $rev = $this;
603
    $rev->edits = array();
604
    foreach ($this->edits as $edit) {
605
      $rev->edits[] = $edit->reverse();
606
    }
607
    return $rev;
608
  }
609

    
610
  /**
611
   * Check for empty diff.
612
   *
613
   * @return bool True iff two sequences were identical.
614
   */
615
  function isEmpty() {
616
    foreach ($this->edits as $edit) {
617
      if ($edit->type != 'copy') {
618
        return FALSE;
619
      }
620
    }
621
    return TRUE;
622
  }
623

    
624
  /**
625
   * Compute the length of the Longest Common Subsequence (LCS).
626
   *
627
   * This is mostly for diagnostic purposed.
628
   *
629
   * @return int The length of the LCS.
630
   */
631
  function lcs() {
632
    $lcs = 0;
633
    foreach ($this->edits as $edit) {
634
      if ($edit->type == 'copy') {
635
        $lcs += sizeof($edit->orig);
636
      }
637
    }
638
    return $lcs;
639
  }
640

    
641
  /**
642
   * Get the original set of lines.
643
   *
644
   * This reconstructs the $from_lines parameter passed to the
645
   * constructor.
646
   *
647
   * @return array The original sequence of strings.
648
   */
649
  function orig() {
650
    $lines = array();
651

    
652
    foreach ($this->edits as $edit) {
653
      if ($edit->orig) {
654
        array_splice($lines, sizeof($lines), 0, $edit->orig);
655
      }
656
    }
657
    return $lines;
658
  }
659

    
660
  /**
661
   * Get the closing set of lines.
662
   *
663
   * This reconstructs the $to_lines parameter passed to the
664
   * constructor.
665
   *
666
   * @return array The sequence of strings.
667
   */
668
  function closing() {
669
    $lines = array();
670

    
671
    foreach ($this->edits as $edit) {
672
      if ($edit->closing) {
673
        array_splice($lines, sizeof($lines), 0, $edit->closing);
674
      }
675
    }
676
    return $lines;
677
  }
678

    
679
  /**
680
   * Check a Diff for validity.
681
   *
682
   * This is here only for debugging purposes.
683
   */
684
  function _check($from_lines, $to_lines) {
685
    if (serialize($from_lines) != serialize($this->orig())) {
686
      trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
687
    }
688
    if (serialize($to_lines) != serialize($this->closing())) {
689
      trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
690
    }
691

    
692
    $rev = $this->reverse();
693
    if (serialize($to_lines) != serialize($rev->orig())) {
694
      trigger_error("Reversed original doesn't match", E_USER_ERROR);
695
    }
696
    if (serialize($from_lines) != serialize($rev->closing())) {
697
      trigger_error("Reversed closing doesn't match", E_USER_ERROR);
698
    }
699

    
700

    
701
    $prevtype = 'none';
702
    foreach ($this->edits as $edit) {
703
      if ( $prevtype == $edit->type ) {
704
        trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
705
      }
706
      $prevtype = $edit->type;
707
    }
708

    
709
    $lcs = $this->lcs();
710
    trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
711
  }
712
}
713

    
714
/**
715
 * FIXME: bad name.
716
 * @todo document
717
 * @private
718
 * @subpackage DifferenceEngine
719
 */
720
class MappedDiff extends Diff {
721
  /**
722
   * Constructor.
723
   *
724
   * Computes diff between sequences of strings.
725
   *
726
   * This can be used to compute things like
727
   * case-insensitve diffs, or diffs which ignore
728
   * changes in white-space.
729
   *
730
   * @param $from_lines array An array of strings.
731
   *  (Typically these are lines from a file.)
732
   *
733
   * @param $to_lines array An array of strings.
734
   *
735
   * @param $mapped_from_lines array This array should
736
   *  have the same size number of elements as $from_lines.
737
   *  The elements in $mapped_from_lines and
738
   *  $mapped_to_lines are what is actually compared
739
   *  when computing the diff.
740
   *
741
   * @param $mapped_to_lines array This array should
742
   *  have the same number of elements as $to_lines.
743
   */
744
  function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
745
    assert(sizeof($from_lines) == sizeof($mapped_from_lines));
746
    assert(sizeof($to_lines) == sizeof($mapped_to_lines));
747

    
748
    parent::__construct($mapped_from_lines, $mapped_to_lines);
749

    
750
    $xi = $yi = 0;
751
    for ($i = 0; $i < sizeof($this->edits); $i++) {
752
      $orig = &$this->edits[$i]->orig;
753
      if (is_array($orig)) {
754
        $orig = array_slice($from_lines, $xi, sizeof($orig));
755
        $xi += sizeof($orig);
756
      }
757

    
758
      $closing = &$this->edits[$i]->closing;
759
      if (is_array($closing)) {
760
        $closing = array_slice($to_lines, $yi, sizeof($closing));
761
        $yi += sizeof($closing);
762
      }
763
    }
764
  }
765
}
766

    
767
/**
768
 * A class to format Diffs
769
 *
770
 * This class formats the diff in classic diff format.
771
 * It is intended that this class be customized via inheritance,
772
 * to obtain fancier outputs.
773
 * @todo document
774
 * @private
775
 * @subpackage DifferenceEngine
776
 */
777
class DiffFormatter {
778
  /**
779
   * Should a block header be shown?
780
   */
781
  var $show_header = TRUE;
782

    
783
  /**
784
   * Number of leading context "lines" to preserve.
785
   *
786
   * This should be left at zero for this class, but subclasses
787
   * may want to set this to other values.
788
   */
789
  var $leading_context_lines = 0;
790

    
791
  /**
792
   * Number of trailing context "lines" to preserve.
793
   *
794
   * This should be left at zero for this class, but subclasses
795
   * may want to set this to other values.
796
   */
797
  var $trailing_context_lines = 0;
798

    
799
  /**
800
   * Format a diff.
801
   *
802
   * @param $diff object A Diff object.
803
   * @return string The formatted output.
804
   */
805
  function format($diff) {
806
    $xi = $yi = 1;
807
    $block = FALSE;
808
    $context = array();
809

    
810
    $nlead = $this->leading_context_lines;
811
    $ntrail = $this->trailing_context_lines;
812

    
813
    $this->_start_diff();
814

    
815
    foreach ($diff->edits as $edit) {
816
      if ($edit->type == 'copy') {
817
        if (is_array($block)) {
818
          if (sizeof($edit->orig) <= $nlead + $ntrail) {
819
            $block[] = $edit;
820
          }
821
          else {
822
            if ($ntrail) {
823
              $context = array_slice($edit->orig, 0, $ntrail);
824
              $block[] = new _DiffOp_Copy($context);
825
            }
826
            $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
827
            $block = FALSE;
828
          }
829
        }
830
        $context = $edit->orig;
831
      }
832
      else {
833
        if (! is_array($block)) {
834
          $context = array_slice($context, sizeof($context) - $nlead);
835
          $x0 = $xi - sizeof($context);
836
          $y0 = $yi - sizeof($context);
837
          $block = array();
838
          if ($context) {
839
            $block[] = new _DiffOp_Copy($context);
840
          }
841
        }
842
        $block[] = $edit;
843
      }
844

    
845
      if ($edit->orig) {
846
        $xi += sizeof($edit->orig);
847
      }
848
      if ($edit->closing) {
849
        $yi += sizeof($edit->closing);
850
      }
851
    }
852

    
853
    if (is_array($block)) {
854
      $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
855
    }
856
    $end = $this->_end_diff();
857

    
858
    if (!empty($xi)) {
859
      $this->line_stats['counter']['x'] += $xi;
860
    }
861
    if (!empty($yi)) {
862
      $this->line_stats['counter']['y'] += $yi;
863
    }
864

    
865
    return $end;
866
  }
867

    
868
  function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
869
    $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
870
    foreach ($edits as $edit) {
871
      if ($edit->type == 'copy') {
872
        $this->_context($edit->orig);
873
      }
874
      elseif ($edit->type == 'add') {
875
        $this->_added($edit->closing);
876
      }
877
      elseif ($edit->type == 'delete') {
878
        $this->_deleted($edit->orig);
879
      }
880
      elseif ($edit->type == 'change') {
881
        $this->_changed($edit->orig, $edit->closing);
882
      }
883
      else {
884
        trigger_error('Unknown edit type', E_USER_ERROR);
885
      }
886
    }
887
    $this->_end_block();
888
  }
889

    
890
  function _start_diff() {
891
    ob_start();
892
  }
893

    
894
  function _end_diff() {
895
    $val = ob_get_contents();
896
    ob_end_clean();
897
    return $val;
898
  }
899

    
900
  function _block_header($xbeg, $xlen, $ybeg, $ylen) {
901
    if ($xlen > 1) {
902
      $xbeg .= "," . ($xbeg + $xlen - 1);
903
    }
904
    if ($ylen > 1) {
905
      $ybeg .= "," . ($ybeg + $ylen - 1);
906
    }
907

    
908
    return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
909
  }
910

    
911
  function _start_block($header) {
912
    if ($this->show_header) {
913
      echo $header . "\n";
914
    }
915
  }
916

    
917
  function _end_block() {
918
  }
919

    
920
  function _lines($lines, $prefix = ' ') {
921
    foreach ($lines as $line) {
922
      echo "$prefix $line\n";
923
    }
924
  }
925

    
926
  function _context($lines) {
927
    $this->_lines($lines);
928
  }
929

    
930
  function _added($lines) {
931
    $this->_lines($lines, '>');
932
  }
933
  function _deleted($lines) {
934
    $this->_lines($lines, '<');
935
  }
936

    
937
  function _changed($orig, $closing) {
938
    $this->_deleted($orig);
939
    echo "---\n";
940
    $this->_added($closing);
941
  }
942
}
943

    
944

    
945
/**
946
 *  Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
947
 *
948
 */
949

    
950
define('NBSP', '&#160;');      // iso-8859-x non-breaking space.
951

    
952
/**
953
 * @todo document
954
 * @private
955
 * @subpackage DifferenceEngine
956
 */
957
class _HWLDF_WordAccumulator {
958
  function __construct() {
959
    $this->_lines = array();
960
    $this->_line = '';
961
    $this->_group = '';
962
    $this->_tag = '';
963
  }
964

    
965
  function _flushGroup($new_tag) {
966
    if ($this->_group !== '') {
967
      if ($this->_tag == 'mark') {
968
        $this->_line .= '<span class="diffchange">' . check_plain($this->_group) . '</span>';
969
      }
970
      else {
971
        $this->_line .= check_plain($this->_group);
972
      }
973
    }
974
    $this->_group = '';
975
    $this->_tag = $new_tag;
976
  }
977

    
978
  function _flushLine($new_tag) {
979
    $this->_flushGroup($new_tag);
980
    if ($this->_line != '') {
981
      array_push($this->_lines, $this->_line);
982
    }
983
    else {
984
      // make empty lines visible by inserting an NBSP
985
      array_push($this->_lines, NBSP);
986
    }
987
    $this->_line = '';
988
  }
989

    
990
  function addWords($words, $tag = '') {
991
    if ($tag != $this->_tag) {
992
      $this->_flushGroup($tag);
993
    }
994
    foreach ($words as $word) {
995
      // new-line should only come as first char of word.
996
      if ($word == '') {
997
        continue;
998
      }
999
      if ($word[0] == "\n") {
1000
        $this->_flushLine($tag);
1001
        $word = drupal_substr($word, 1);
1002
      }
1003
      assert(!strstr($word, "\n"));
1004
      $this->_group .= $word;
1005
    }
1006
  }
1007

    
1008
  function getLines() {
1009
    $this->_flushLine('~done');
1010
    return $this->_lines;
1011
  }
1012
}
1013

    
1014
/**
1015
 * @todo document
1016
 * @private
1017
 * @subpackage DifferenceEngine
1018
 */
1019
class WordLevelDiff extends MappedDiff {
1020
  function MAX_LINE_LENGTH() {
1021
    return 10000;
1022
  }
1023

    
1024
  function __construct($orig_lines, $closing_lines) {
1025
    list($orig_words, $orig_stripped) = $this->_split($orig_lines);
1026
    list($closing_words, $closing_stripped) = $this->_split($closing_lines);
1027

    
1028
    parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
1029
  }
1030

    
1031
  function _split($lines) {
1032
    $words = array();
1033
    $stripped = array();
1034
    $first = TRUE;
1035
    foreach ($lines as $line) {
1036
      // If the line is too long, just pretend the entire line is one big word
1037
      // This prevents resource exhaustion problems
1038
      if ( $first ) {
1039
        $first = FALSE;
1040
      }
1041
      else {
1042
        $words[] = "\n";
1043
        $stripped[] = "\n";
1044
      }
1045
      if ( drupal_strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
1046
        $words[] = $line;
1047
        $stripped[] = $line;
1048
      }
1049
      else {
1050
        if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) {
1051
          $words = array_merge($words, $m[0]);
1052
          $stripped = array_merge($stripped, $m[1]);
1053
        }
1054
      }
1055
    }
1056
    return array($words, $stripped);
1057
  }
1058

    
1059
  function orig() {
1060
    $orig = new _HWLDF_WordAccumulator;
1061

    
1062
    foreach ($this->edits as $edit) {
1063
      if ($edit->type == 'copy') {
1064
        $orig->addWords($edit->orig);
1065
      }
1066
      elseif ($edit->orig) {
1067
        $orig->addWords($edit->orig, 'mark');
1068
      }
1069
    }
1070
    $lines = $orig->getLines();
1071
    return $lines;
1072
  }
1073

    
1074
  function closing() {
1075
    $closing = new _HWLDF_WordAccumulator;
1076

    
1077
    foreach ($this->edits as $edit) {
1078
      if ($edit->type == 'copy') {
1079
        $closing->addWords($edit->closing);
1080
      }
1081
      elseif ($edit->closing) {
1082
        $closing->addWords($edit->closing, 'mark');
1083
      }
1084
    }
1085
    $lines = $closing->getLines();
1086
    return $lines;
1087
  }
1088
}
1089

    
1090
/**
1091
 * Diff formatter which uses Drupal theme functions.
1092
 * @private
1093
 * @subpackage DifferenceEngine
1094
 */
1095
class DrupalDiffFormatter extends DiffFormatter {
1096

    
1097
  var $rows;
1098
  var $line_stats = array(
1099
    'counter' => array('x' => 0, 'y' => 0),
1100
    'offset' => array('x' => 0, 'y' => 0),
1101
  );
1102

    
1103
  function __construct() {
1104
    $this->leading_context_lines = variable_get('diff_context_lines_leading', 2);
1105
    $this->trailing_context_lines = variable_get('diff_context_lines_trailing', 2);
1106
  }
1107

    
1108
  function _start_diff() {
1109
    $this->rows = array();
1110
  }
1111

    
1112
  function _end_diff() {
1113
    return $this->rows;
1114
  }
1115

    
1116
  function _block_header($xbeg, $xlen, $ybeg, $ylen) {
1117
    return array(
1118
      array(
1119
        'data' => theme('diff_header_line', array('lineno' => $xbeg + $this->line_stats['offset']['x'])),
1120
        'colspan' => 2,
1121
      ),
1122
      array(
1123
        'data' => theme('diff_header_line', array('lineno' => $ybeg + $this->line_stats['offset']['y'])),
1124
        'colspan' => 2,
1125
      )
1126
    );
1127
  }
1128

    
1129
  function _start_block($header) {
1130
    if ($this->show_header) {
1131
      $this->rows[] = $header;
1132
    }
1133
  }
1134

    
1135
  function _end_block() {
1136
  }
1137

    
1138
  function _lines($lines, $prefix=' ', $color='white') {
1139
  }
1140

    
1141
  /**
1142
   * Note: you should HTML-escape parameter before calling this.
1143
   */
1144
  function addedLine($line) {
1145
    return array(
1146
      array(
1147
        'data' => '+',
1148
        'class' => 'diff-marker',
1149
      ),
1150
      array(
1151
        'data' => theme('diff_content_line', array('line' => $line)),
1152
        'class' => 'diff-context diff-addedline',
1153
      )
1154
    );
1155
  }
1156

    
1157
  /**
1158
   * Note: you should HTML-escape parameter before calling this.
1159
   */
1160
  function deletedLine($line) {
1161
    return array(
1162
      array(
1163
        'data' => '-',
1164
        'class' => 'diff-marker',
1165
      ),
1166
      array(
1167
        'data' => theme('diff_content_line', array('line' => $line)),
1168
        'class' => 'diff-context diff-deletedline',
1169
      )
1170
    );
1171
  }
1172

    
1173
  /**
1174
   * Note: you should HTML-escape parameter before calling this.
1175
   */
1176
  function contextLine($line) {
1177
    return array(
1178
      '&nbsp;',
1179
      array(
1180
        'data' => theme('diff_content_line', array('line' => $line)),
1181
        'class' => 'diff-context',
1182
      )
1183
    );
1184
  }
1185

    
1186
  function emptyLine() {
1187
    return array(
1188
      '&nbsp;',
1189
      theme('diff_empty_line', array('line' => '&nbsp;')),
1190
    );
1191
  }
1192

    
1193
  function _added($lines) {
1194
    foreach ($lines as $line) {
1195
      $this->rows[] = array_merge($this->emptyLine(), $this->addedLine(check_plain($line)));
1196
    }
1197
  }
1198

    
1199
  function _deleted($lines) {
1200
    foreach ($lines as $line) {
1201
      $this->rows[] = array_merge($this->deletedLine(check_plain($line)), $this->emptyLine());
1202
    }
1203
  }
1204

    
1205
  function _context($lines) {
1206
    foreach ($lines as $line) {
1207
      $this->rows[] = array_merge($this->contextLine(check_plain($line)), $this->contextLine(check_plain($line)));
1208
    }
1209
  }
1210

    
1211
  function _changed($orig, $closing) {
1212
    $diff = new WordLevelDiff($orig, $closing);
1213
    $del = $diff->orig();
1214
    $add = $diff->closing();
1215

    
1216
    // Notice that WordLevelDiff returns HTML-escaped output.
1217
    // Hence, we will be calling addedLine/deletedLine without HTML-escaping.
1218

    
1219
    while ($line = array_shift($del)) {
1220
      $aline = array_shift( $add );
1221
      $this->rows[] = array_merge($this->deletedLine($line), isset($aline) ? $this->addedLine($aline) : $this->emptyLine());
1222
    }
1223
    foreach ($add as $line) {  // If any leftovers
1224
      $this->rows[] = array_merge($this->emptyLine(), $this->addedLine($line));
1225
    }
1226
  }
1227
}
1228

    
1229
/**
1230
 * Drupal inline Diff formatter.
1231
 * @private
1232
 * @subpackage DifferenceEngine
1233
 */
1234
class DrupalDiffInline {
1235
  var $a;
1236
  var $b;
1237

    
1238
  /**
1239
   * Constructor.
1240
   */
1241
  function __construct($a, $b) {
1242
    $this->a = $a;
1243
    $this->b = $b;
1244
  }
1245

    
1246
  /**
1247
   * Render differences inline using HTML markup.
1248
   */
1249
  function render() {
1250
    $a = preg_split('/(<[^>]+?>| )/', $this->a, -1, PREG_SPLIT_DELIM_CAPTURE);
1251
    $b = preg_split('/(<[^>]+?>| )/', $this->b, -1, PREG_SPLIT_DELIM_CAPTURE);
1252
    $diff = new Diff($a, $b);
1253
    $diff->edits = $this->process_edits($diff->edits);
1254

    
1255
    // Assemble highlighted output
1256
    $output = '';
1257
    foreach ($diff->edits as $chunk) {
1258
      switch ($chunk->type) {
1259
        case 'copy':
1260
          $output .= implode('', $chunk->closing);
1261
          break;
1262
        case 'delete':
1263
          foreach ($chunk->orig as $i => $piece) {
1264
            if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
1265
              $output .= $piece;
1266
            }
1267
            else {
1268
              $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
1269
            }
1270
          }
1271
          break;
1272
        default:
1273
          $chunk->closing = $this->process_chunk($chunk->closing);
1274
          foreach ($chunk->closing as $i => $piece) {
1275
            if ($piece === ' ' || (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>' && drupal_strtolower(drupal_substr($piece, 1, 3)) != 'img')) {
1276
              $output .= $piece;
1277
            }
1278
            else {
1279
              $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
1280
            }
1281
          }
1282
          break;
1283
      }
1284
    }
1285
    return $output;
1286
  }
1287

    
1288
  /**
1289
   * Merge chunk segments between tag delimiters.
1290
   */
1291
  function process_chunk($chunk) {
1292
    $processed = array();
1293
    $j = 0;
1294
    foreach ($chunk as $i => $piece) {
1295
      $next = isset($chunk[$i+1]) ? $chunk[$i+1] : NULL;
1296
      if (!isset($processed[$j])) {
1297
        $processed[$j] = '';
1298
      }
1299
      if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
1300
        $processed[$j] = $piece;
1301
        $j++;
1302
      }
1303
      elseif (isset($next) && strpos($next, '<') === 0 && drupal_substr($next, drupal_strlen($next) - 1) === '>') {
1304
        $processed[$j] .= $piece;
1305
        $j++;
1306
      }
1307
      else {
1308
        $processed[$j] .= $piece;
1309
      }
1310
    }
1311
    return $processed;
1312
  }
1313

    
1314
  /**
1315
   * Merge copy and equivalent edits into intelligible chunks.
1316
   */
1317
  function process_edits($edits) {
1318
    $processed = array();
1319
    $current = array_shift($edits);
1320

    
1321
    // Make two passes -- first merge space delimiter copies back into their originals.
1322
    while ($chunk = array_shift($edits)) {
1323
      if ($chunk->type == 'copy' && $chunk->orig === array(' ')) {
1324
        $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
1325
        $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
1326
      }
1327
      else {
1328
        $processed[] = $current;
1329
        $current = $chunk;
1330
      }
1331
    }
1332
    $processed[] = $current;
1333

    
1334
    // Initial setup
1335
    $edits = $processed;
1336
    $processed = array();
1337
    $current = array_shift($edits);
1338

    
1339
    // Second, merge equivalent chunks into each other.
1340
    while ($chunk = array_shift($edits)) {
1341
      if ($current->type == $chunk->type) {
1342
        $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
1343
        $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
1344
      }
1345
      else {
1346
        $processed[] = $current;
1347
        $current = $chunk;
1348
      }
1349
    }
1350
    $processed[] = $current;
1351

    
1352
    return $processed;
1353
  }
1354
}