Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 310 and 402] [Versions 310 and 403]

   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * Utility function to convert wiki-like to Markdown format
  20   *
  21   * @package    core
  22   * @subpackage lib
  23   * @copyright  Howard Miller, 2005
  24   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  25   */
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  /**#@+
  30   *  state defines
  31   */
  32  define( "STATE_NONE",1 ); // blank line has been detected, so looking for first line on next para
  33  define( "STATE_PARAGRAPH",2 ); // currently processing vanilla paragraph
  34  define( "STATE_BLOCKQUOTE",3 ); // currently processing blockquote section
  35  define( "STATE_PREFORM",4 ); // currently processing preformatted text
  36  define( "STATE_NOTIKI",5 ); // currently processing preformatted / no formatting
  37  /**#@-*/
  38  /**#@+
  39   * list defines
  40   */
  41  define( "LIST_NONE", 1 ); // no lists active
  42  define( "LIST_UNORDERED", 2 ); // unordered list active
  43  define( "LIST_ORDERED", 3 ); // ordered list active
  44  define( "LIST_DEFINITION", 4 ); // definition list active
  45  /**#@-*/
  46  
  47  /**
  48   * @package   moodlecore
  49   * @copyright Howard Miller, 2005
  50   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  51   */
  52  class WikiToMarkdown {
  53  
  54    var $block_state;
  55    var $list_state;
  56    var $list_depth;
  57    var $list_backtrack;
  58    var $output; // output buffer
  59    var $courseid;
  60  
  61    function close_block( $state ) {
  62      // provide appropriate closure for block according to state
  63  
  64      // if in list close this first
  65      $lclose = "";
  66      if ($this->list_state != LIST_NONE) {
  67        $lclose = $this->do_list( " ",true );
  68      }
  69  
  70      $sclose = "";
  71      switch ($state) {
  72        case STATE_PARAGRAPH:
  73          $sclose =  "\n";
  74          break;
  75        case STATE_BLOCKQUOTE:
  76          $sclose =  "\n";
  77          break;
  78        case STATE_PREFORM:
  79          $sclose =  "</pre>\n";
  80          break;
  81        case STATE_NOTIKI:
  82          $sclose =  "\n";
  83          break;
  84      }
  85  
  86      return $lclose . $sclose;
  87    }
  88  
  89    function do_replace( $line, $mark, $tag ) {
  90      // do the regex thingy for things like bold, italic etc
  91      // $mark is the magic character, and $tag the HTML tag to insert
  92  
  93      // BODGE: replace inline $mark characters in places where we want them ignored
  94      // they will be put back after main substitutue, stops problems with eg, and/or
  95      $bodge = chr(1);
  96      $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
  97  
  98      $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
  99      $replace = '\\1<'.$tag.'>\\2</'.$tag.'>\\3';
 100      $line = preg_replace( $regex, $replace, $line );
 101  
 102      // BODGE: back we go
 103      $line = preg_replace( '/'.$bodge.'/i', $mark, $line );
 104  
 105      return $line;
 106    }
 107  
 108  
 109    function do_replace_markdown( $line, $mark, $tag ) {
 110      // do the regex thingy for things like bold, italic etc
 111      // $mark is the magic character, and $tag the HTML tag to insert
 112      // MARKDOWN version does not generate HTML tags, just straigt replace
 113  
 114      // BODGE: replace inline $mark characters in places where we want them ignored
 115      // they will be put back after main substitutue, stops problems with eg, and/or
 116      $bodge = chr(1);
 117      $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
 118  
 119      $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
 120      $replace = '\\1'.$tag.'\\2'.$tag.'\\3';
 121      $line = preg_replace( $regex, $replace, $line );
 122  
 123      // BODGE: back we go
 124      $line = preg_replace( '/'.$bodge.'/i', $mark, $line );
 125  
 126      return $line;
 127    }
 128  
 129  
 130    function do_replace_sub( $line, $mark, $tag ) {
 131      // do regex for subscript and superscript (slightly different)
 132      // $mark is the magic character and $tag the HTML tag to insert
 133  
 134      $regex = '/'.$mark.'([^'.$mark.']*)'.$mark.'/i';
 135      $replace = '<'.$tag.'>\\1</'.$tag.'>';
 136  
 137      return preg_replace( $regex, $replace, $line );
 138    }
 139  
 140    function do_list( $line, $blank=false ) {
 141      // handle line with list character on it
 142      // if blank line implies drop to level 0
 143  
 144      // get magic character and then delete it from the line if not blank
 145      if ($blank) {
 146        $listchar="";
 147        $count = 0;
 148      }
 149      else {
 150        $listchar = $line[0];
 151        $count = strspn( $line, $listchar );
 152        $line = preg_replace( "/^[".$listchar."]+ /i", "", $line );
 153      }
 154  
 155      // find what sort of list this character represents
 156      $list_tag = "";
 157      $list_close_tag = "";
 158      $item_tag = "";
 159      $item_close_tag = "";
 160      $list_style = LIST_NONE;
 161      switch ($listchar) {
 162        case '*':
 163          $list_tag = "";
 164          $list_close_tag = "";
 165          $item_tag = "*";
 166          $item_close_tag = "";
 167          $list_style = LIST_UNORDERED;
 168          break;
 169        case '#':
 170          $list_tag = "";
 171          $list_close_tag = "";
 172          $item_tag = "1.";
 173          $item_close_tag = "";
 174          $list_style = LIST_ORDERED;
 175          break;
 176        case ';':
 177          $list_tag = "<dl>";
 178          $list_close_tag = "</dl>";
 179          $item_tag = "<dd>";
 180          $item_close_tag = "</dd>";
 181          $list_style = LIST_DEFINITION;
 182          break;
 183        case ':':
 184          $list_tag = "<dl>";
 185          $list_close_tag = "</dl>";
 186          $item_tag = "<dt>";
 187          $item_close_tag = "</dt>";
 188          $list_style = LIST_DEFINITION;
 189          break;
 190        }
 191  
 192      // tag opening/closing regime now - fun bit :-)
 193      $tags = "";
 194  
 195      // if depth has reduced do number of closes to restore level
 196      for ($i=$this->list_depth; $i>$count; $i-- ) {
 197        $close_tag = array_pop( $this->list_backtrack );
 198        $tags = $tags . $close_tag;
 199        }
 200  
 201      // if depth has increased do number of opens to balance
 202      for ($i=$this->list_depth; $i<$count; $i++ ) {
 203        array_push( $this->list_backtrack, "$list_close_tag" );
 204        $tags = $tags . "$list_tag";
 205      }
 206  
 207      // ok, so list state is now same as style and depth same as count
 208      $this->list_state = $list_style;
 209      $this->list_depth = $count;
 210  
 211      // get indent
 212      $indent = substr( "                      ",1,$count-1 );
 213  
 214      if ($blank) {
 215        $newline = $tags;
 216      }
 217      else {
 218        $newline = $tags . $indent . "$item_tag " . $line . "$item_close_tag";
 219      }
 220  
 221      return $newline;
 222    }
 223  
 224  
 225    function line_replace( $line ) {
 226      // return line after various formatting replacements
 227      // have been made - order is vital to stop them interfering with each other
 228  
 229      global $CFG;
 230  
 231      // ---- (at least) means a <hr />
 232      // MARKDOWN: no change so leave
 233  
 234      // is this a list line (starts with * # ; :)
 235      if (preg_match( "/^([*]+|[#]+|[;]+|[:]+) /i", $line )) {
 236        $line = $this->do_list( $line );
 237      }
 238  
 239     // typographic conventions
 240     // MARKDOWN: no equiv. so convert to entity as before
 241      // $line = str_replace( "--", "&#8212;", $line );
 242      // $line = str_replace( " - ", " &#8211; ", $line );
 243      $line = str_replace( "...", " &#8230; ", $line );
 244      $line = str_replace( "(R)", "&#174;", $line );
 245      $line = str_replace( "(r)", "&#174;", $line );
 246      $line = str_replace( "(TM)", "&#8482;", $line );
 247      $line = str_replace( "(tm)", "&#8482;", $line );
 248      $line = str_replace( "(C)", "&#169;", $line );
 249      $line = str_replace( "1/4", "&#188;", $line );
 250      $line = str_replace( "1/2", "&#189;", $line );
 251      $line = str_replace( "3/4", "&#190;", $line );
 252      $line = preg_replace( "/([[:digit:]]+[[:space:]]*)x([[:space:]]*[[:digit:]]+)/i", "\\1&#215;\\2", $line ); // (digits) x (digits) - multiply
 253      // do formatting tags
 254      // NOTE: The / replacement  *has* to be first, or it will screw the
 255      //    HTML tags that are added by the other ones
 256      // MARKDOWN: only bold and italic change, rest are just HTML
 257      $line = $this->do_replace_markdown( $line, "\*", "**" );
 258      $line = $this->do_replace_markdown( $line, "/", "*" );
 259      $line = $this->do_replace( $line, "\+", "ins" );
 260      // $line = $this->do_replace( $line, "-", "del" );
 261      $line = $this->do_replace_sub( $line, "~", "sub" );
 262      $line = $this->do_replace_sub( $line, "\^", "sup" );
 263      $line = $this->do_replace( $line, "%", "code" );
 264      $line = $this->do_replace( $line, "@", "cite" );
 265  
 266      // convert urls into proper link with optional link text URL(text)
 267      // MARDOWN: HTML conversion should work fine
 268      $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
 269        "\\1[\\5](\\2://\\3\\4)", $line);
 270      $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
 271        "\\1[\\5](http://www.\\2\\3)", $line);
 272  
 273      // make urls (with and without httpd) into proper links
 274      $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])/i",
 275        "\\1<\\2://\\3\\4>", $line);
 276      $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])/i",
 277        "\\1<http://www.\\2\\3\>", $line);
 278  
 279      // make email addresses into mailtos....
 280      // MARKDOWN doesn't quite support this, so do as html
 281      $line = preg_replace("/([[:space:]]|^)([[:alnum:]._-]+@[[:alnum:]._-]+)\(([^)]+)\)/i",
 282         "\\1<a href=\"mailto:\\2\">\\3</a>", $line);
 283  
 284      // !# at the beginning of any lines means a heading
 285      // MARKDOWN: value (1-6) becomes number of hashes
 286      if (preg_match( "/^!([1-6]) (.*)$/i", $line, $regs )) {
 287        $depth = substr( $line, 1, 1 );
 288        $out = substr( '##########', 0, $depth);
 289        $line = preg_replace( "/^!([1-6]) (.*)$/i", "$out \\2", $line );
 290      }
 291  
 292      // acronym handing, example HTML(Hypertext Markyp Language)
 293      // MARKDOWN: no equiv. so just leave as HTML
 294      $line = preg_replace( "/([A-Z]+)\(([^)]+)\)/", "<acronym title=\"\\2\">\\1</acronym>", $line );
 295  
 296      // Replace resource link >>##(Description Text)
 297      // MARKDOWN: change to MD web link style
 298      $line = preg_replace("/ ([a-zA-Z]+):([0-9]+)\(([^)]+)\)/i",
 299         " [\\3](".$CFG->wwwroot."/mod/\\1/view.php?id=\\2) ", $line );
 300  
 301      $coursefileurl = array(moodle_url::make_legacyfile_url($this->courseid, null));
 302  
 303      // Replace picture resource link
 304      $line = preg_replace("#/([a-zA-Z0-9./_-]+)(png|gif|jpg)\(([^)]+)\)#i",
 305              "![\\3](".$coursefileurl."/\\1\\2)", $line );
 306  
 307      // Replace file resource link
 308      $line = preg_replace("#file:/([[:alnum:]/._-]+)\(([^)]+)\)#i",
 309              "[\\2](".$coursefileurl."/\\1)", $line );
 310  
 311      return $line;
 312    }
 313  
 314    function convert( $content,$courseid ) {
 315  
 316      // main entry point for processing Wiki-like text
 317      // $content is string containing text with Wiki-Like formatting
 318      // return: string containing Markdown formatting
 319  
 320      // initialisation stuff
 321      $this->output = "";
 322      $this->block_state = STATE_NONE;
 323      $this->list_state = LIST_NONE;
 324      $this->list_depth = 0;
 325      $this->list_backtrack = array();
 326      $this->spelling_on = false;
 327      $this->courseid = $courseid;
 328  
 329      // split content into array of single lines
 330      $lines = explode( "\n",$content );
 331      $buffer = "";
 332  
 333      // run through lines
 334      foreach( $lines as $line ) {
 335        // is this a blank line?
 336        $blank_line = preg_match( "/^[[:blank:]\r]*$/i", $line );
 337        if ($blank_line) {
 338          // first end current block according to state
 339          $buffer = $buffer . $this->close_block( $this->block_state );
 340          $this->block_state = STATE_NONE;
 341          continue;
 342        }
 343  
 344        // act now depending on current block state
 345        if ($this->block_state == STATE_NONE) {
 346          // first character of line defines block type
 347          if (preg_match( "/^> /i",$line )) {
 348            // blockquote
 349            $buffer = $buffer . $this->line_replace( $line ). "\n";
 350            $this->block_state = STATE_BLOCKQUOTE;
 351          }
 352          else
 353          if (preg_match( "/^  /i",$line) ) {
 354            // preformatted text
 355            // MARKDOWN: no real equiv. so just use <pre>
 356            $buffer = $buffer . "<pre>\n";
 357            $buffer = $buffer . $this->line_replace($line) . "\n";
 358            $this->block_state = STATE_PREFORM;
 359          }
 360          else
 361          if (preg_match("/^\% /i",$line) ) {
 362                  // preformatted text - no processing
 363                  // MARKDOWN: this is MD code form of a paragraph
 364                  $buffer = $buffer . "    " . preg_replace( "/^\%/i","",$line) . "\n";
 365                  $this->block_state = STATE_NOTIKI;
 366          }
 367          else {
 368            // ordinary paragraph
 369            $buffer = $buffer . $this->line_replace($line) . "\n";
 370            $this->block_state = STATE_PARAGRAPH;
 371          }
 372          continue;
 373        }
 374  
 375        if (($this->block_state == STATE_PARAGRAPH) |
 376            ($this->block_state == STATE_BLOCKQUOTE) |
 377            ($this->block_state == STATE_PREFORM) ) {
 378          $buffer = $buffer . $this->line_replace($line) . "\n";
 379          continue;
 380        }
 381        elseif ($this->block_state == STATE_NOTIKI) {
 382          $buffer = $buffer . "    " .$line . "\n";
 383        }
 384      }
 385  
 386      // close off any block level tags
 387      $buffer = $buffer . $this->close_block( $this->block_state );
 388  
 389      //return $buffer;
 390      return $buffer;
 391    }
 392  }