1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34:
35:
36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57:
58:
59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77:
78: abstract class FineDiffOp {
79: abstract public function getFromLen();
80: abstract public function getToLen();
81: abstract public function getOpcode();
82: }
83:
84: class FineDiffDeleteOp extends FineDiffOp {
85: public function __construct($len) {
86: $this->fromLen = $len;
87: }
88: public function getFromLen() {
89: return $this->fromLen;
90: }
91: public function getToLen() {
92: return 0;
93: }
94: public function getOpcode() {
95: if ( $this->fromLen === 1 ) {
96: return 'd';
97: }
98: return "d{$this->fromLen}";
99: }
100: }
101:
102: class FineDiffInsertOp extends FineDiffOp {
103: public function __construct($text) {
104: $this->text = $text;
105: }
106: public function getFromLen() {
107: return 0;
108: }
109: public function getToLen() {
110: return strlen($this->text);
111: }
112: public function getText() {
113: return $this->text;
114: }
115: public function getOpcode() {
116: $to_len = strlen($this->text);
117: if ( $to_len === 1 ) {
118: return "i:{$this->text}";
119: }
120: return "i{$to_len}:{$this->text}";
121: }
122: }
123:
124: class FineDiffReplaceOp extends FineDiffOp {
125: public function __construct($fromLen, $text) {
126: $this->fromLen = $fromLen;
127: $this->text = $text;
128: }
129: public function getFromLen() {
130: return $this->fromLen;
131: }
132: public function getToLen() {
133: return strlen($this->text);
134: }
135: public function getText() {
136: return $this->text;
137: }
138: public function getOpcode() {
139: if ( $this->fromLen === 1 ) {
140: $del_opcode = 'd';
141: }
142: else {
143: $del_opcode = "d{$this->fromLen}";
144: }
145: $to_len = strlen($this->text);
146: if ( $to_len === 1 ) {
147: return "{$del_opcode}i:{$this->text}";
148: }
149: return "{$del_opcode}i{$to_len}:{$this->text}";
150: }
151: }
152:
153: class FineDiffCopyOp extends FineDiffOp {
154: public function __construct($len) {
155: $this->len = $len;
156: }
157: public function getFromLen() {
158: return $this->len;
159: }
160: public function getToLen() {
161: return $this->len;
162: }
163: public function getOpcode() {
164: if ( $this->len === 1 ) {
165: return 'c';
166: }
167: return "c{$this->len}";
168: }
169: public function increase($size) {
170: return $this->len += $size;
171: }
172: }
173:
174: 175: 176: 177: 178:
179: class FineDiffOps {
180: public function appendOpcode($opcode, $from, $from_offset, $from_len) {
181: if ( $opcode === 'c' ) {
182: $edits[] = new FineDiffCopyOp($from_len);
183: }
184: else if ( $opcode === 'd' ) {
185: $edits[] = new FineDiffDeleteOp($from_len);
186: }
187: else {
188: $edits[] = new FineDiffInsertOp(substr($from, $from_offset, $from_len));
189: }
190: }
191: public $edits = array();
192: }
193:
194: 195: 196: 197: 198: 199:
200: class FineDiff {
201:
202: 203: 204: 205: 206:
207:
208: 209: 210: 211: 212: 213: 214:
215: public function __construct($from_text = '', $to_text = '', $granularityStack = null) {
216:
217: $this->granularityStack = $granularityStack ? $granularityStack : FineDiff::$characterGranularity;
218: $this->edits = array();
219: $this->from_text = $from_text;
220: $this->doDiff($from_text, $to_text);
221: }
222:
223: public function getOps() {
224: return $this->edits;
225: }
226:
227: public function getOpcodes() {
228: $opcodes = array();
229: foreach ( $this->edits as $edit ) {
230: $opcodes[] = $edit->getOpcode();
231: }
232: return implode('', $opcodes);
233: }
234:
235: public function renderDiffToHTML() {
236: $in_offset = 0;
237: ob_start();
238: foreach ( $this->edits as $edit ) {
239: $n = $edit->getFromLen();
240: if ( $edit instanceof FineDiffCopyOp ) {
241: FineDiff::renderDiffToHTMLFromOpcode('c', $this->from_text, $in_offset, $n);
242: }
243: else if ( $edit instanceof FineDiffDeleteOp ) {
244: FineDiff::renderDiffToHTMLFromOpcode('d', $this->from_text, $in_offset, $n);
245: }
246: else if ( $edit instanceof FineDiffInsertOp ) {
247: FineDiff::renderDiffToHTMLFromOpcode('i', $edit->getText(), 0, $edit->getToLen());
248: }
249: else {
250: FineDiff::renderDiffToHTMLFromOpcode('d', $this->from_text, $in_offset, $n);
251: FineDiff::renderDiffToHTMLFromOpcode('i', $edit->getText(), 0, $edit->getToLen());
252: }
253: $in_offset += $n;
254: }
255: return ob_get_clean();
256: }
257:
258: 259: 260: 261:
262: public static function getDiffOpcodes($from, $to, $granularities = null) {
263: $diff = new FineDiff($from, $to, $granularities);
264: return $diff->getOpcodes();
265: }
266:
267: 268: 269:
270: public static function getDiffOpsFromOpcodes($opcodes) {
271: $diffops = new FineDiffOps();
272: FineDiff::renderFromOpcodes(null, $opcodes, array($diffops,'appendOpcode'));
273: return $diffops->edits;
274: }
275:
276: 277: 278:
279: public static function renderToTextFromOpcodes($from, $opcodes) {
280: ob_start();
281: FineDiff::renderFromOpcodes($from, $opcodes, array('FineDiff','renderToTextFromOpcode'));
282: return ob_get_clean();
283: }
284:
285: 286: 287:
288: public static function renderDiffToHTMLFromOpcodes($from, $opcodes) {
289: ob_start();
290: FineDiff::renderFromOpcodes($from, $opcodes, array('FineDiff','renderDiffToHTMLFromOpcode'));
291: return ob_get_clean();
292: }
293:
294: 295: 296: 297:
298: public static function renderFromOpcodes($from, $opcodes, $callback) {
299: if ( !is_callable($callback) ) {
300: return;
301: }
302: $opcodes_len = strlen($opcodes);
303: $from_offset = $opcodes_offset = 0;
304: while ( $opcodes_offset < $opcodes_len ) {
305: $opcode = substr($opcodes, $opcodes_offset, 1);
306: $opcodes_offset++;
307: $n = intval(substr($opcodes, $opcodes_offset));
308: if ( $n ) {
309: $opcodes_offset += strlen(strval($n));
310: }
311: else {
312: $n = 1;
313: }
314: if ( $opcode === 'c' ) {
315: call_user_func($callback, 'c', $from, $from_offset, $n, '');
316: $from_offset += $n;
317: }
318: else if ( $opcode === 'd' ) {
319: call_user_func($callback, 'd', $from, $from_offset, $n, '');
320: $from_offset += $n;
321: }
322: else {
323: call_user_func($callback, 'i', $opcodes, $opcodes_offset + 1, $n);
324: $opcodes_offset += 1 + $n;
325: }
326: }
327: }
328:
329: 330: 331:
332:
333: const paragraphDelimiters = "\n\r";
334: public static $paragraphGranularity = array(
335: FineDiff::paragraphDelimiters
336: );
337: const sentenceDelimiters = ".\n\r";
338: public static $sentenceGranularity = array(
339: FineDiff::paragraphDelimiters,
340: FineDiff::sentenceDelimiters
341: );
342: const wordDelimiters = " \t.\n\r";
343: public static $wordGranularity = array(
344: FineDiff::paragraphDelimiters,
345: FineDiff::sentenceDelimiters,
346: FineDiff::wordDelimiters
347: );
348: const characterDelimiters = "";
349: public static $characterGranularity = array(
350: FineDiff::paragraphDelimiters,
351: FineDiff::sentenceDelimiters,
352: FineDiff::wordDelimiters,
353: FineDiff::characterDelimiters
354: );
355:
356: public static $textStack = array(
357: ".",
358: " \t.\n\r",
359: ""
360: );
361:
362: 363: 364: 365: 366:
367:
368: 369: 370:
371: private function doDiff($from_text, $to_text) {
372: $this->last_edit = false;
373: $this->stackpointer = 0;
374: $this->from_text = $from_text;
375: $this->from_offset = 0;
376:
377: if ( empty($this->granularityStack) ) {
378: return;
379: }
380: $this->_processGranularity($from_text, $to_text);
381: }
382:
383: 384: 385: 386: 387: 388: 389:
390: private function _processGranularity($from_segment, $to_segment) {
391: $delimiters = $this->granularityStack[$this->stackpointer++];
392: $has_next_stage = $this->stackpointer < count($this->granularityStack);
393: foreach ( FineDiff::doFragmentDiff($from_segment, $to_segment, $delimiters) as $fragment_edit ) {
394:
395: if ( $fragment_edit instanceof fineDiffReplaceOp && $has_next_stage ) {
396: $this->_processGranularity(
397: substr($this->from_text, $this->from_offset, $fragment_edit->getFromLen()),
398: $fragment_edit->getText()
399: );
400: }
401:
402: else if ( $fragment_edit instanceof fineDiffCopyOp && $this->last_edit instanceof fineDiffCopyOp ) {
403: $this->edits[count($this->edits)-1]->increase($fragment_edit->getFromLen());
404: $this->from_offset += $fragment_edit->getFromLen();
405: }
406: else {
407:
408:
409:
410: $this->edits[] = $this->last_edit = $fragment_edit;
411: $this->from_offset += $fragment_edit->getFromLen();
412: }
413: }
414: $this->stackpointer--;
415: }
416:
417: 418: 419: 420: 421: 422: 423:
424: private static function doFragmentDiff($from_text, $to_text, $delimiters) {
425:
426:
427:
428: if ( empty($delimiters) ) {
429: return FineDiff::doCharDiff($from_text, $to_text);
430: }
431:
432: $result = array();
433:
434:
435: $from_text_len = strlen($from_text);
436: $to_text_len = strlen($to_text);
437: $from_fragments = FineDiff::extractFragments($from_text, $delimiters);
438: $to_fragments = FineDiff::extractFragments($to_text, $delimiters);
439:
440: $jobs = array(array(0, $from_text_len, 0, $to_text_len));
441:
442: $cached_array_keys = array();
443:
444: while ( $job = array_pop($jobs) ) {
445:
446:
447: list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job;
448:
449:
450: $from_segment_length = $from_segment_end - $from_segment_start;
451: $to_segment_length = $to_segment_end - $to_segment_start;
452: if ( !$from_segment_length || !$to_segment_length ) {
453: if ( $from_segment_length ) {
454: $result[$from_segment_start * 4] = new fineDiffDeleteOp($from_segment_length);
455: }
456: else if ( $to_segment_length ) {
457: $result[$from_segment_start * 4 + 1] = new fineDiffInsertOp(substr($to_text, $to_segment_start, $to_segment_length));
458: }
459: continue;
460: }
461:
462:
463: $best_copy_length = 0;
464:
465: $from_base_fragment_index = $from_segment_start;
466:
467: $cached_array_keys_for_current_segment = array();
468:
469: while ( $from_base_fragment_index < $from_segment_end ) {
470: $from_base_fragment = $from_fragments[$from_base_fragment_index];
471: $from_base_fragment_length = strlen($from_base_fragment);
472:
473: if ( !isset($cached_array_keys_for_current_segment[$from_base_fragment]) ) {
474: if ( !isset($cached_array_keys[$from_base_fragment]) ) {
475: $to_all_fragment_indices = $cached_array_keys[$from_base_fragment] = array_keys($to_fragments, $from_base_fragment, true);
476: }
477: else {
478: $to_all_fragment_indices = $cached_array_keys[$from_base_fragment];
479: }
480:
481: if ( $to_segment_start > 0 || $to_segment_end < $to_text_len ) {
482: $to_fragment_indices = array();
483: foreach ( $to_all_fragment_indices as $to_fragment_index ) {
484: if ( $to_fragment_index < $to_segment_start ) { continue; }
485: if ( $to_fragment_index >= $to_segment_end ) { break; }
486: $to_fragment_indices[] = $to_fragment_index;
487: }
488: $cached_array_keys_for_current_segment[$from_base_fragment] = $to_fragment_indices;
489: }
490: else {
491: $to_fragment_indices = $to_all_fragment_indices;
492: }
493: }
494: else {
495: $to_fragment_indices = $cached_array_keys_for_current_segment[$from_base_fragment];
496: }
497:
498: foreach ( $to_fragment_indices as $to_base_fragment_index ) {
499: $fragment_index_offset = $from_base_fragment_length;
500:
501: for (;;) {
502: $fragment_from_index = $from_base_fragment_index + $fragment_index_offset;
503: if ( $fragment_from_index >= $from_segment_end ) {
504: break;
505: }
506: $fragment_to_index = $to_base_fragment_index + $fragment_index_offset;
507: if ( $fragment_to_index >= $to_segment_end ) {
508: break;
509: }
510: if ( $from_fragments[$fragment_from_index] !== $to_fragments[$fragment_to_index] ) {
511: break;
512: }
513: $fragment_length = strlen($from_fragments[$fragment_from_index]);
514: $fragment_index_offset += $fragment_length;
515: }
516: if ( $fragment_index_offset > $best_copy_length ) {
517: $best_copy_length = $fragment_index_offset;
518: $best_from_start = $from_base_fragment_index;
519: $best_to_start = $to_base_fragment_index;
520: }
521: }
522: $from_base_fragment_index += strlen($from_base_fragment);
523:
524:
525: if ( $best_copy_length >= $from_segment_length / 2) {
526: break;
527: }
528:
529:
530: if ( $from_base_fragment_index + $best_copy_length >= $from_segment_end ) {
531: break;
532: }
533: }
534:
535: if ( $best_copy_length ) {
536: $jobs[] = array($from_segment_start, $best_from_start, $to_segment_start, $best_to_start);
537: $result[$best_from_start * 4 + 2] = new fineDiffCopyOp($best_copy_length);
538: $jobs[] = array($best_from_start + $best_copy_length, $from_segment_end, $best_to_start + $best_copy_length, $to_segment_end);
539: }
540: else {
541: $result[$from_segment_start * 4 ] = new fineDiffReplaceOp($from_segment_length, substr($to_text, $to_segment_start, $to_segment_length));
542: }
543: }
544:
545: ksort($result, SORT_NUMERIC);
546: return array_values($result);
547: }
548:
549: 550: 551: 552: 553: 554: 555: 556: 557: 558: 559: 560: 561: 562: 563: 564:
565: private static function doCharDiff($from_text, $to_text) {
566: $result = array();
567: $jobs = array(array(0, strlen($from_text), 0, strlen($to_text)));
568: while ( $job = array_pop($jobs) ) {
569:
570: list($from_segment_start, $from_segment_end, $to_segment_start, $to_segment_end) = $job;
571: $from_segment_len = $from_segment_end - $from_segment_start;
572: $to_segment_len = $to_segment_end - $to_segment_start;
573:
574:
575: if ( !$from_segment_len || !$to_segment_len ) {
576: if ( $from_segment_len ) {
577: $result[$from_segment_start * 4 + 0] = new fineDiffDeleteOp($from_segment_len);
578: }
579: else if ( $to_segment_len ) {
580: $result[$from_segment_start * 4 + 1] = new fineDiffInsertOp(substr($to_text, $to_segment_start, $to_segment_len));
581: }
582: continue;
583: }
584: if ( $from_segment_len >= $to_segment_len ) {
585: $copy_len = $to_segment_len;
586: while ( $copy_len ) {
587: $to_copy_start = $to_segment_start;
588: $to_copy_start_max = $to_segment_end - $copy_len;
589: while ( $to_copy_start <= $to_copy_start_max ) {
590: $from_copy_start = strpos(substr($from_text, $from_segment_start, $from_segment_len), substr($to_text, $to_copy_start, $copy_len));
591: if ( $from_copy_start !== false ) {
592: $from_copy_start += $from_segment_start;
593: break 2;
594: }
595: $to_copy_start++;
596: }
597: $copy_len--;
598: }
599: }
600: else {
601: $copy_len = $from_segment_len;
602: while ( $copy_len ) {
603: $from_copy_start = $from_segment_start;
604: $from_copy_start_max = $from_segment_end - $copy_len;
605: while ( $from_copy_start <= $from_copy_start_max ) {
606: $to_copy_start = strpos(substr($to_text, $to_segment_start, $to_segment_len), substr($from_text, $from_copy_start, $copy_len));
607: if ( $to_copy_start !== false ) {
608: $to_copy_start += $to_segment_start;
609: break 2;
610: }
611: $from_copy_start++;
612: }
613: $copy_len--;
614: }
615: }
616:
617: if ( $copy_len ) {
618: $jobs[] = array($from_segment_start, $from_copy_start, $to_segment_start, $to_copy_start);
619: $result[$from_copy_start * 4 + 2] = new FineDiffCopyOp($copy_len);
620: $jobs[] = array($from_copy_start + $copy_len, $from_segment_end, $to_copy_start + $copy_len, $to_segment_end);
621: }
622:
623: else {
624: $result[$from_segment_start * 4] = new FineDiffReplaceOp($from_segment_len, substr($to_text, $to_segment_start, $to_segment_len));
625: }
626: }
627: ksort($result, SORT_NUMERIC);
628: return array_values($result);
629: }
630:
631: 632: 633: 634: 635: 636: 637: 638: 639: 640:
641: private static function ($text, $delimiters) {
642:
643: if ( empty($delimiters) ) {
644: $chars = str_split($text, 1);
645: $chars[strlen($text)] = '';
646: return $chars;
647: }
648: $fragments = array();
649: $start = $end = 0;
650: for (;;) {
651: $end += strcspn($text, $delimiters, $end);
652: $end += strspn($text, $delimiters, $end);
653: if ( $end === $start ) {
654: break;
655: }
656: $fragments[$start] = substr($text, $start, $end - $start);
657: $start = $end;
658: }
659: $fragments[$start] = '';
660: return $fragments;
661: }
662:
663: 664: 665:
666: private static function renderToTextFromOpcode($opcode, $from, $from_offset, $from_len) {
667: if ( $opcode === 'c' || $opcode === 'i' ) {
668: echo substr($from, $from_offset, $from_len);
669: }
670: }
671:
672: private static function renderDiffToHTMLFromOpcode($opcode, $from, $from_offset, $from_len) {
673: if ( $opcode === 'c' ) {
674: echo htmlentities(htmlentities(substr($from, $from_offset, $from_len)));
675: }
676: else if ( $opcode === 'd' ) {
677: $deletion = substr($from, $from_offset, $from_len);
678: if ( strcspn($deletion, " \n\r") === 0 ) {
679: $deletion = str_replace(array("\n","\r"), array('\n','\r'), $deletion);
680: }
681: echo '<del>', htmlentities(htmlentities($deletion)), '</del>';
682: }
683: else {
684: echo '<ins>', htmlentities(htmlentities(substr($from, $from_offset, $from_len))), '</ins>';
685: }
686: }
687: }
688:
689: