Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.
   1  <?php
   2  /**
   3   * Standard diff function plus some extras for handling XHTML diffs.
   4   * @copyright &copy; 2007 The Open University
   5   * @author s.marshall@open.ac.uk
   6   * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
   7   * @package ouwiki
   8   *//** */
   9  
  10  // Standard diff
  11  ////////////////
  12  
  13  /**
  14   * Basic diff utility function, using standard diff algorithm.
  15   *
  16   * Based on Bell Laboratories Computing Science Technical Report #41,
  17   * July 1976, Hunt & McIlroy, Appendix A.1 and A.3.
  18   *
  19   * http://www.cs.dartmouth.edu/~doug/diff.ps
  20   *
  21   * @param array $file1 Array of lines in file 1. The first line in the file
  22   *   MUST BE INDEX 1 NOT ZERO!!
  23   * @param array $file2 Array of lines in file 2, again starting from 1.
  24   * @return array An array with one entry (again 1-based) for each line in
  25   *   file 1, with its corresponding position in file 2 or 0 if it isn't there.
  26   */
  27  function ouwiki_diff_internal($file1,$file2) {
  28      // Basic variables
  29      $n=count($file2);
  30      $m=count($file1);
  31  
  32      // Special-case for empty file2 which otherwise causes error
  33      if($n==0)
  34      {
  35          $result=array();
  36          for($i=1;$i<=$m;$i++)
  37          {
  38              $result[$i]=0;
  39          }
  40          return $result;
  41      }
  42  
  43      // Step 1   Build list of elements
  44      /////////
  45  
  46      $V=array();
  47      for($j=1;$j<=$n;$j++) {
  48          $V[$j]=new StdClass;
  49          $V[$j]->serial=$j;
  50          $V[$j]->hash=crc32($file2[$j]);
  51      }
  52  
  53      // Step 2   Sort by hash,serial
  54      /////////
  55  
  56      usort($V,"ouwiki_diff_sort_v");
  57  
  58      // Make it start from 1 again
  59      array_unshift($V,'bogus');
  60      unset($V[0]);
  61  
  62      // $V is now an array including the line number 'serial' and hash
  63      // of each line in file 2, sorted by hash and then serial.
  64  
  65      // Step 3   Equivalence classes
  66      /////////
  67  
  68      $E=array();
  69      $E[0]=new StdClass;
  70      $E[0]->serial=0;
  71      $E[0]->last=true;
  72      for($j=1;$j<=$n;$j++) {
  73          $E[$j]=new StdClass;
  74          $E[$j]->serial=$V[$j]->serial;
  75          $E[$j]->last=$j===$n || $V[$j]->hash!==$V[$j+1]->hash;
  76      }
  77  
  78      // E is now an array sorted the same way as $V which includes
  79      // the line number 'serial' and whether or not that is the 'last'
  80      // line in the given equivalence class, i.e. set of identical lines
  81  
  82      // Step 4   For each line in file1, finds start of equivalence class
  83      /////////
  84      $P=array();
  85      for($i=1;$i<=$m;$i++) {
  86          // Find matching last entry from equivalence list
  87          $P[$i]=ouwiki_diff_find_last($V,$E,crc32($file1[$i]));
  88      }
  89  
  90      // P is now an array that finds the index (within $V) of the *first*
  91      // matching line in $V (referencing file 2, but not a line number,
  92      // because sorted in $V order) for each line in file 1. In other words
  93      // if you were to start at the P-value in $V and continue through, you
  94      // would find all the lines from file 2 that are equal to the given line
  95      // from file 1.
  96  
  97      // Step 5   Initialise vector of candidates
  98      /////////
  99  
 100      // I do not trust PHP references further than I can throw them (preferably
 101      // at the idiot who came up with the idea) so I am using a separate array
 102      // to store candidates and all references are integers into that.
 103  
 104      $candidates=array();
 105      $candidates[0]=new StdClass;
 106      $candidates[0]->a=0;
 107      $candidates[0]->b=0;
 108      $candidates[0]->previous=null;
 109      $candidates[1]=new StdClass;
 110      $candidates[1]->a=$m+1;
 111      $candidates[1]->b=$n+1;
 112      $candidates[1]->previous=null;
 113  
 114      $K=array();
 115      $K[0]=0; // Ref to candidate 0
 116      $K[1]=1; // Ref to candidate 1
 117      $k=0;
 118  
 119      // Step 6   Merge stage
 120      /////////
 121  
 122      for($i=1;$i<=$m;$i++) {
 123          if($P[$i]!==0) {
 124              ouwiki_diff_merge($K,$k,$i,$E,$P[$i],$candidates);
 125          }
 126      }
 127  
 128      // Step 7
 129      /////////
 130  
 131      $J=array();
 132      for($i=1;$i<=$m;$i++) {
 133          $J[$i]=0;
 134      }
 135  
 136      // Step 8   Follow candidate chain to make nice representation
 137      /////////
 138  
 139      $index=$K[$k];
 140      while(!is_null($index)) {
 141          // Stop when we reach the first, dummy candidate
 142          if($candidates[$index]->a!=0) {
 143              $J[$candidates[$index]->a]=$candidates[$index]->b;
 144          }
 145          $index=$candidates[$index]->previous;
 146      }
 147  
 148      // Step 9   Get rid of 'jackpots' (hash collisions)
 149      /////////
 150  
 151      for($i=1;$i<=$m;$i++) {
 152          if($J[$i]!=0 && $file1[$i]!=$file2[$J[$i]]) {
 153              $J[$i]=0;
 154          }
 155      }
 156  
 157      // Done! (Maybe.)
 158      return $J;
 159  }
 160  
 161  // Functions needed by parts of the algorithm
 162  /////////////////////////////////////////////
 163  
 164  // Merge, from step 7 (Appendix A.3)
 165  function ouwiki_diff_merge(&$K,&$k,$i,&$E,$p,&$candidates) {
 166      $r=0;
 167      $c=$K[0];
 168  
 169      while(true) {
 170          $j=$E[$p]->serial; // Paper says 'i' but this is wrong (OCR)
 171  
 172          // Binary search in $K from $r to $k
 173          $min=$r;
 174          $max=$k+1;
 175  
 176          while(true) {
 177              $try = (int)(($min+$max)/2);
 178              if($candidates[$K[$try]]->b >= $j) {
 179                  $max=$try;
 180              } else if($candidates[$K[$try+1]]->b <= $j) {
 181                  $min=$try+1;
 182              } else { // $try is less and $try+1 is more
 183                  $s=$try;
 184                  break;
 185              }
 186              if($max<=$min) {
 187                  $s=-1;
 188                  break;
 189              }
 190          }
 191  
 192          if($s>-1) {
 193              if($candidates[$K[$s+1]]->b > $j) {
 194                  // Create new candidate
 195                  $index=count($candidates);
 196                  $candidates[$index]=new StdClass;
 197                  $candidates[$index]->a=$i;
 198                  $candidates[$index]->b=$j;
 199                  $candidates[$index]->previous=$K[$s];
 200                  $K[$r]=$c;
 201                  $r=$s+1;
 202                  $c=$index; // Or should this go before?
 203              }
 204  
 205              if($s===$k) {
 206                  $K[$k+2]=$K[$k+1];
 207                  $k++;
 208                   break;
 209              }
 210          }
 211  
 212          if($E[$p]->last) {
 213              break;
 214          }
 215  
 216          $p++;
 217      }
 218      $K[$r]=$c;
 219  
 220  }
 221  
 222  // From Step 2
 223  function ouwiki_diff_sort_v($a,$b) {
 224      if($a->hash < $b->hash) {
 225          return -1;
 226      } else if($a->hash > $b->hash) {
 227          return 1;
 228      } else if($a->serial < $b->serial) {
 229          return -1;
 230      } else if($a->serial > $b->serial) {
 231          return 1;
 232      } else {
 233          return 0;
 234      }
 235  }
 236  
 237  // From Step 4
 238  function ouwiki_diff_find_last(&$V,&$E,$hash) {
 239      // Binary search in $V until we find something with $hash
 240  
 241      // Min = 1, array is 1-indexed
 242      $min=1;
 243      // Max = 1 higher than highest key
 244      end($V);
 245      $max=key($V)+1;
 246      while(true) {
 247          $try = (int)(($min+$max)/2);
 248          if($V[$try]->hash > $hash) {
 249              $max=$try;
 250          } else if($V[$try]->hash < $hash) {
 251              $min=$try+1;
 252          } else { // Equal
 253              break;
 254          }
 255          if($max<=$min) {
 256              // No matching line
 257              return 0;
 258          }
 259      }
 260  
 261      // Now check back in $E to find the first line of that equivalence class
 262      for($j=$try;!$E[$j-1]->last;$j--) ;
 263      return $j;
 264  }
 265  
 266  ///////////////////////////
 267  
 268  
 269  /**
 270   * Class representing one 'line' of HTML content for the purpose of
 271   * text comparison.
 272   */
 273  class ouwiki_line {
 274      /** Array of ouwiki_words */
 275      var $words=array();
 276  
 277      /**
 278       * Construct line object based on a chunk of text.
 279       * @param string $data Text data that makes up this 'line'. (May include line breaks etc.)
 280       * @param int $linepos Position number for first character in text
 281       */
 282      public function __construct($data,$linepos) {
 283          // 1. Turn things we don't want into spaces (so that positioning stays same)
 284  
 285          // Whitespace replaced with space
 286          $data=preg_replace('/\s/',' ',$data);
 287  
 288          // Various ways of writing non-breaking space replaced with space
 289          // Note that using a single param for replace only works because all
 290          // the search strings are 6 characters long
 291          $data=str_replace(array('&nbsp;','&#xA0;','&#160;'),'      ',$data);
 292  
 293          // Tags replaced with equal number of spaces
 294          $data = preg_replace_callback('/<.*?'.'>/', function($matches) {
 295              return preg_replace("/./", " ", $matches[0]);
 296          }, $data);
 297  
 298          // 2. Analyse string so that each space-separated thing
 299          // is counted as a 'word' (note these may not be real words,
 300          // for instance words may include punctuation at either end)
 301          $pos=0;
 302          while(true) {
 303              // Find a non-space
 304              $strlendata = strlen($data);
 305              for(;$pos < $strlendata && substr($data,$pos,1)===' ';$pos++) ;
 306              if($pos==$strlendata) {
 307                  // No more content
 308                  break;
 309              }
 310  
 311              // Aaaand find the next space after that
 312              $space2=strpos($data,' ',$pos);
 313              if($space2===false) {
 314                  // No more spaces? Everything left must be a word
 315                  $this->words[]=new ouwiki_word(substr($data,$pos),$pos+$linepos);
 316                  break;
 317              } else {
 318                  $this->words[]=new ouwiki_word(substr($data,$pos,$space2-$pos),$pos+$linepos);
 319                  $pos=$space2;
 320              }
 321          }
 322      }
 323  
 324      /**
 325       * Old syntax of class constructor. Deprecated in PHP7.
 326       *
 327       * @deprecated since Moodle 3.1
 328       */
 329      public function ouwiki_line($data, $linepos) {
 330          debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
 331          self::__construct($data, $linepos);
 332      }
 333  
 334      /**
 335       * @return string Normalised string representation of this line object
 336       */
 337      function get_as_string() {
 338          $result='';
 339          foreach($this->words as $word) {
 340              if($result!=='') {
 341                  $result.=' ';
 342              }
 343              $result.=$word->word;
 344          }
 345          return $result;
 346      }
 347  
 348      /**
 349       * Static function converts lines to strings.
 350       * @param array $lines Array of ouwiki_line
 351       * @return array Array of strings
 352       */
 353      static function get_as_strings($lines) {
 354          $strings=array();
 355          foreach($lines as $key=>$value) {
 356              $strings[$key]=$value->get_as_string();
 357          }
 358          return $strings;
 359      }
 360  
 361  
 362      /**
 363       * @return True if there are no words in the line
 364       */
 365      function is_empty() {
 366          return count($this->words)===0;
 367      }
 368  }
 369  
 370  /**
 371   * Represents single word for html comparison. Note that words
 372   * are just chunks of plain text and may not be actual words;
 373   * they could include punctuation or (if there was e.g. a span
 374   * in the middle of something) even be part-words.
 375   */
 376  class ouwiki_word {
 377      /** Word as plain string */
 378      var $word;
 379      /** Start position in original xhtml */
 380      var $start;
 381  
 382      public function __construct($word,$start) {
 383          $this->word=$word;
 384          $this->start=$start;
 385      }
 386  
 387      /**
 388       * Old syntax of class constructor. Deprecated in PHP7.
 389       *
 390       * @deprecated since Moodle 3.1
 391       */
 392      public function ouwiki_word($word, $start) {
 393          debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
 394          self::__construct($word, $start);
 395      }
 396  }
 397  
 398  /**
 399   * Prepares XHTML content for text difference comparison.
 400   * @param string $content XHTML content [NO SLASHES]
 401   * @return array Array of ouwiki_line objects
 402   */
 403  function ouwiki_diff_html_to_lines($content) {
 404      // These functions are a pain mostly because PHP preg_* don't provide
 405      // proper information as to the start/end position of matches. As a
 406      // consequence there is a lot of hackery going down. At every point we
 407      // replace things with spaces rather than getting rid, in order to store
 408      // positions within original content.
 409  
 410      // Get rid of all script, style, object tags (that might contain non-text
 411      // outside tags)
 412      $content=preg_replace_callback(
 413          '^(<script .*?</script>)|(<object .*?</object>)|(<style .*?</style>)^i', function($matches) {
 414          return preg_replace("/./", " ", $matches[0]);
 415      }, $content);
 416  
 417      // Get rid of all ` symbols as we are going to use these for a marker later.
 418      $content=preg_replace('/[`]/',' ',$content);
 419  
 420      // Put line breaks on block tags. Mark each line break with ` symbol
 421      $blocktags=array('p','div','h1','h2','h3','h4','h5','h6','td','li');
 422      $taglist='';
 423      foreach($blocktags as $blocktag) {
 424          if($taglist!=='') {
 425              $taglist.='|';
 426          }
 427          $taglist.="<$blocktag>|<\\/$blocktag>";
 428      }
 429      $content = preg_replace_callback('/((' . $taglist . ')\s*)+/i', function($matches) {
 430          return "`" . preg_replace("/./", " ", substr($matches[0], 1));
 431      }, $content);
 432  
 433      // Now go through splitting each line
 434      $lines=array(); $index=1;
 435      $pos=0;
 436      while($pos<strlen($content)) {
 437          $nextline=strpos($content,'`',$pos);
 438          if($nextline===false) {
 439              // No more line breaks? Take content to end
 440              $nextline=strlen($content);
 441          }
 442  
 443          $linestr=substr($content,$pos,$nextline-$pos);
 444          $line=new ouwiki_line($linestr,$pos);
 445          if(!$line->is_empty()) {
 446              $lines[$index++]=$line;
 447          }
 448          $pos=$nextline+1;
 449      }
 450      return $lines;
 451  }
 452  
 453  /**
 454   * Represents a changed area of file and where it is located in the
 455   * two source files.
 456   */
 457  class ouwiki_change_range {
 458      var $file1start,$file1count;
 459      var $file2start,$file2count;
 460  }
 461  
 462  /**
 463   * A more logical representation of the results from ouwiki_internal_diff()
 464   */
 465  class ouwiki_changes {
 466  
 467      /** Array of indexes (in file 2) of added lines */
 468      var $adds;
 469  
 470      /** Array of indexes (in file 1) of deleted lines */
 471      var $deletes;
 472  
 473      /** Array of changed ranges */
 474      var $changes;
 475  
 476      /**
 477       * @param array $diff Array from line indices in file1
 478       *   to indices in file2. All indices 1-based.
 479       * @param int $count2 Number of lines in file2
 480       */
 481      public function __construct($diff,$count2) {
 482          // Find deleted lines
 483          $this->deletes=self::internal_find_deletes($diff,$count2);
 484  
 485          // Added lines work the same way after the comparison is
 486          // reversed.
 487          $this->adds=self::internal_find_deletes(
 488              ouwiki_diff_internal_flip($diff,$count2),count($diff));
 489  
 490          // Changed ranges are all the other lines from file 1 that
 491          // weren't found in file 2 but aren't deleted, and the
 492          // corresponding lines from file 2 (between the equivalent
 493          // 'found' lines).
 494          $this->changes=array();
 495          $matchbefore=0;
 496          $inrange=-1; $lastrange=-1;
 497          foreach($diff as $index1=>$index2) {
 498              // Changed line if this isn't in 'deleted' section and
 499              // doesn't have a match in file2.
 500              if($index2===0 && !in_array($index1,$this->deletes)) {
 501                  if($inrange===-1) {
 502                      // Not already in a range, start a new one at array end
 503                      $inrange=count($this->changes);
 504                      $this->changes[$inrange]=new ouwiki_change_range;
 505                      $this->changes[$inrange]->file1start=$index1;
 506                      $this->changes[$inrange]->file1count=1;
 507                      $this->changes[$inrange]->file2start=$matchbefore+1; // Last valid from file2
 508                      $this->changes[$inrange]->file2count=0;
 509                      $lastrange=$inrange;
 510                  } else {
 511                      // One more line that gets added to the range
 512                      $this->changes[$inrange]->file1count++;
 513                  }
 514              } else {
 515                  // Not in a range any more
 516                  $inrange=-1;
 517                  // If we have a line match...
 518                  if($index2!==0) {
 519                      // Remember this line as next range must start after it
 520                      $matchbefore=$index2;
 521                      // If last range is still looking for a number, fill that in too
 522                      if($lastrange!==-1) {
 523                          $this->changes[$lastrange]->file2count=$index2
 524                              -$this->changes[$lastrange]->file2start;
 525                          $lastrange=-1;
 526                      }
 527                  }
 528              }
 529          }
 530          // Unfinished range in file2 gets end of file
 531          if($lastrange!==-1) {
 532              $this->changes[$lastrange]->file2count=$count2
 533                  -$this->changes[$lastrange]->file2start+1;
 534          }
 535      }
 536  
 537      /**
 538       * Old syntax of class constructor. Deprecated in PHP7.
 539       *
 540       * @deprecated since Moodle 3.1
 541       */
 542      public function ouwiki_changes($diff, $count2) {
 543          debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
 544          self::__construct($diff, $count2);
 545      }
 546  
 547      /**
 548       * Find deleted lines. These are lines in file1 that
 549       * cannot be present even in modified form in file2
 550       * because we have matching lines around them.
 551       * O(n) algorithm.
 552       * @param array $diff Array of file1->file2 indexes
 553       * @param int $count2 Count of lines in file2
 554       */
 555      function internal_find_deletes($diff,$count2) {
 556          $deletes=array();
 557  
 558          // 1. Create a new array that includes the lowest-valued
 559          //    index2 value below each run of 0s.
 560          //    I.e. if our array is say 1,2,0,0,0,3,0 then the
 561          //    resulting array will be -,-,3,3,3,-,0
 562          $squidges=array();
 563          $lowest=0;
 564          $countdiff = count($diff);
 565          for($index1=$countdiff;$index1>=1;$index1--) {
 566              $index2=$diff[$index1];
 567              if($index2===0) {
 568                  $squidges[$index1]=$lowest;
 569              } else {
 570                  $lowest=$index2;
 571              }
 572          }
 573  
 574          // 2. OK now we can use this new array to work out
 575          //    items that are known to be deleted because we
 576          //    have matching items either side
 577          $highest=0;
 578          foreach($diff as $index1=>$index2) {
 579              if($index2===0) {
 580                  if($highest===$count2 || $highest+1===$squidges[$index1]) {
 581                      // Yep! Definitely deleted.
 582                      $deletes[]=$index1;
 583                  }
 584              } else {
 585                  $highest=$index2;
 586              }
 587          }
 588          return $deletes;
 589      }
 590  }
 591  
 592  /**
 593   * Flips around the array returned by ouwiki_diff_internal
 594   * so that it refers to lines from the other file.
 595   * @param array $diff Array of index1=>index2
 596   * @param int $count2 Count of lines in file 2
 597   * @return array Flipped version
 598   */
 599  function ouwiki_diff_internal_flip($diff,$count2) {
 600      $flip=array();
 601      for($i=1;$i<=$count2;$i++) {
 602          $flip[$i]=0;
 603      }
 604      foreach($diff as $index1=>$index2) {
 605          if($index2!==0) {
 606              $flip[$index2]=$index1;
 607          }
 608      }
 609      return $flip;
 610  }
 611  
 612  /**
 613   * Compares two files based initially on lines and then on words within the lines that
 614   * differ.
 615   * @param array $lines1 Array of ouwiki_line
 616   * @param array $lines2 Array of ouwiki_line
 617   * @return array (deleted,added); deleted and added are arrays of ouwiki_word with
 618   *   position numbers from $lines1 and $lines2 respectively
 619   */
 620  function ouwiki_diff_words($lines1,$lines2) {
 621      // Prepare arrays
 622      $deleted=array();
 623      $added=array();
 624      // Get line difference
 625      $linediff=ouwiki_diff(
 626          ouwiki_line::get_as_strings($lines1),
 627          ouwiki_line::get_as_strings($lines2));
 628  
 629      // Handle lines that were entirely deleted
 630      foreach($linediff->deletes as $deletedline) {
 631          $deleted = array_merge($deleted, $lines1[$deletedline]->words);
 632      }
 633      // And ones that were entirely added
 634      foreach($linediff->adds as $addedline) {
 635          $added = array_merge($added, $lines2[$addedline]->words);
 636      }
 637  
 638      // Changes get diffed at the individual-word level
 639      foreach($linediff->changes as $changerange) {
 640          // Build list of all words in each side of the range
 641          $file1words=array();
 642          for($index=$changerange->file1start;
 643              $index<$changerange->file1start+$changerange->file1count;$index++) {
 644              foreach($lines1[$index]->words as $word) {
 645                  $file1words[]=$word;
 646              }
 647          }
 648          $file2words=array();
 649          for($index=$changerange->file2start;
 650              $index<$changerange->file2start+$changerange->file2count;$index++) {
 651              foreach($lines2[$index]->words as $word) {
 652                  $file2words[]=$word;
 653              }
 654          }
 655  
 656          // Make arrays 1-based
 657          array_unshift($file1words,'dummy');
 658          unset($file1words[0]);
 659          array_unshift($file2words,'dummy');
 660          unset($file2words[0]);
 661  
 662          // Convert word lists into plain strings
 663          $file1strings=array();
 664          foreach($file1words as $index=>$word) {
 665              $file1strings[$index]=$word->word;
 666          }
 667          $file2strings=array();
 668          foreach($file2words as $index=>$word) {
 669              $file2strings[$index]=$word->word;
 670          }
 671  
 672          // Run diff on strings
 673          $worddiff=ouwiki_diff($file1strings,$file2strings);
 674          foreach($worddiff->adds as $index) {
 675              $added[]=$file2words[$index];
 676          }
 677          foreach($worddiff->deletes as $index) {
 678              $deleted[]=$file1words[$index];
 679          }
 680          foreach($worddiff->changes as $changerange) {
 681              for($index=$changerange->file1start;
 682                  $index<$changerange->file1start+$changerange->file1count;$index++) {
 683                  $deleted[]=$file1words[$index];
 684              }
 685              for($index=$changerange->file2start;
 686                  $index<$changerange->file2start+$changerange->file2count;$index++) {
 687                  $added[]=$file2words[$index];
 688              }
 689          }
 690      }
 691  
 692      return array($deleted,$added);
 693  }
 694  
 695  /**
 696   * Runs diff and interprets results into ouwiki_changes object.
 697   * @param array $file1 Array of lines in file 1. The first line in the file
 698   *   MUST BE INDEX 1 NOT ZERO!!
 699   * @param array $file2 Array of lines in file 2, again starting from 1.
 700   * @return ouwiki_changes Object describing changes
 701   */
 702  function ouwiki_diff($file1,$file2) {
 703      return new ouwiki_changes(ouwiki_diff_internal($file1,$file2),count($file2));
 704  }
 705  
 706  /**
 707   * Adds HTML span elements to $html around the words listed in $words.
 708   * @param string $html HTML content
 709   * @param array $words Array of ouwiki_word to mark
 710   * @param string $markerclass Name of class for span element
 711   * @return HTML with markup added
 712   */
 713  function ouwiki_diff_add_markers($html,$words,$markerclass,$beforetext,$aftertext) {
 714      // Sort words by start position
 715      usort($words, function($a, $b) {
 716          return $a->start - $b->start;
 717      });
 718  
 719      // Add marker for each word. We use an odd tag name which will
 720      // be replaced by span later, this for ease of replacing
 721      $spanstart="<ouwiki_diff_add_markers>";
 722      $pos=0;
 723      $result='';
 724      foreach($words as $word) {
 725          // Add everything up to the word
 726          $result.=substr($html,$pos,$word->start-$pos);
 727          // Add word
 728          $result.=$spanstart.$word->word.'</ouwiki_diff_add_markers>';
 729          // Update position
 730          $pos=$word->start+strlen($word->word);
 731      }
 732  
 733      // Add everything after last word
 734      $result.=substr($html,$pos);
 735  
 736      // If we end a marker then immediately start one, get rid of
 737      // both the end and start
 738      $result=preg_replace('^</ouwiki_diff_add_markers>(\s*)<ouwiki_diff_add_markers>^','$1',$result);
 739  
 740      // Turn markers into proper span
 741      $result=preg_replace('^<ouwiki_diff_add_markers>^',$beforetext.'<span class="'.$markerclass.'">',$result);
 742      $result=preg_replace('^</ouwiki_diff_add_markers>^','</span>'.$aftertext,$result);
 743  
 744      return $result;
 745  }
 746  
 747  /**
 748   * Compares two HTML files. (This is the main function that everything else supports.)
 749   * @param string $html1 XHTML for file 1
 750   * @param string $html2 XHTML for file 2
 751   * @return array ($result1,$result2) to be displayed indicating the differences
 752   */
 753  function ouwiki_diff_html($html1,$html2) {
 754      $lines1=ouwiki_diff_html_to_lines($html1);
 755      $lines2=ouwiki_diff_html_to_lines($html2);
 756      list($deleted,$added)=ouwiki_diff_words($lines1,$lines2);
 757      $result1=ouwiki_diff_add_markers($html1,$deleted,'ouw_deleted',
 758          '<strong class="accesshide">'.get_string('deletedbegins','wiki').'</strong>',
 759          '<strong class="accesshide">'.get_string('deletedends','wiki').'</strong>');
 760      $result2=ouwiki_diff_add_markers($html2,$added,'ouw_added',
 761          '<strong class="accesshide">'.get_string('addedbegins','wiki').'</strong>',
 762          '<strong class="accesshide">'.get_string('addedends','wiki').'</strong>');
 763      return array($result1,$result2);
 764  }
 765