Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403] [Versions 401 and 403]
1 <?php 2 3 // This file is part of Moodle - http://moodle.org/ 4 // 5 // Moodle is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // Moodle is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU General Public License for more details. 14 // 15 // You should have received a copy of the GNU General Public License 16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 17 18 /** 19 * Utility function to convert wiki-like to Markdown format 20 * 21 * @package core 22 * @subpackage lib 23 * @copyright Howard Miller, 2005 24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 25 */ 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /**#@+ 30 * state defines 31 */ 32 define( "STATE_NONE",1 ); // blank line has been detected, so looking for first line on next para 33 define( "STATE_PARAGRAPH",2 ); // currently processing vanilla paragraph 34 define( "STATE_BLOCKQUOTE",3 ); // currently processing blockquote section 35 define( "STATE_PREFORM",4 ); // currently processing preformatted text 36 define( "STATE_NOTIKI",5 ); // currently processing preformatted / no formatting 37 /**#@-*/ 38 /**#@+ 39 * list defines 40 */ 41 define( "LIST_NONE", 1 ); // no lists active 42 define( "LIST_UNORDERED", 2 ); // unordered list active 43 define( "LIST_ORDERED", 3 ); // ordered list active 44 define( "LIST_DEFINITION", 4 ); // definition list active 45 /**#@-*/ 46 47 /** 48 * @package moodlecore 49 * @copyright Howard Miller, 2005 50 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 51 */ 52 class WikiToMarkdown { 53 54 var $block_state; 55 var $list_state; 56 var $list_depth; 57 var $list_backtrack; 58 var $output; // output buffer 59 var $courseid; 60 61 function close_block( $state ) { 62 // provide appropriate closure for block according to state 63 64 // if in list close this first 65 $lclose = ""; 66 if ($this->list_state != LIST_NONE) { 67 $lclose = $this->do_list( " ",true ); 68 } 69 70 $sclose = ""; 71 switch ($state) { 72 case STATE_PARAGRAPH: 73 $sclose = "\n"; 74 break; 75 case STATE_BLOCKQUOTE: 76 $sclose = "\n"; 77 break; 78 case STATE_PREFORM: 79 $sclose = "</pre>\n"; 80 break; 81 case STATE_NOTIKI: 82 $sclose = "\n"; 83 break; 84 } 85 86 return $lclose . $sclose; 87 } 88 89 function do_replace( $line, $mark, $tag ) { 90 // do the regex thingy for things like bold, italic etc 91 // $mark is the magic character, and $tag the HTML tag to insert 92 93 // BODGE: replace inline $mark characters in places where we want them ignored 94 // they will be put back after main substitutue, stops problems with eg, and/or 95 $bodge = chr(1); 96 $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line ); 97 98 $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i'; 99 $replace = '\\1<'.$tag.'>\\2</'.$tag.'>\\3'; 100 $line = preg_replace( $regex, $replace, $line ); 101 102 // BODGE: back we go 103 $line = preg_replace( '/'.$bodge.'/i', $mark, $line ); 104 105 return $line; 106 } 107 108 109 function do_replace_markdown( $line, $mark, $tag ) { 110 // do the regex thingy for things like bold, italic etc 111 // $mark is the magic character, and $tag the HTML tag to insert 112 // MARKDOWN version does not generate HTML tags, just straigt replace 113 114 // BODGE: replace inline $mark characters in places where we want them ignored 115 // they will be put back after main substitutue, stops problems with eg, and/or 116 $bodge = chr(1); 117 $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line ); 118 119 $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i'; 120 $replace = '\\1'.$tag.'\\2'.$tag.'\\3'; 121 $line = preg_replace( $regex, $replace, $line ); 122 123 // BODGE: back we go 124 $line = preg_replace( '/'.$bodge.'/i', $mark, $line ); 125 126 return $line; 127 } 128 129 130 function do_replace_sub( $line, $mark, $tag ) { 131 // do regex for subscript and superscript (slightly different) 132 // $mark is the magic character and $tag the HTML tag to insert 133 134 $regex = '/'.$mark.'([^'.$mark.']*)'.$mark.'/i'; 135 $replace = '<'.$tag.'>\\1</'.$tag.'>'; 136 137 return preg_replace( $regex, $replace, $line ); 138 } 139 140 function do_list( $line, $blank=false ) { 141 // handle line with list character on it 142 // if blank line implies drop to level 0 143 144 // get magic character and then delete it from the line if not blank 145 if ($blank) { 146 $listchar=""; 147 $count = 0; 148 } 149 else { 150 $listchar = $line[0]; 151 $count = strspn( $line, $listchar ); 152 $line = preg_replace( "/^[".$listchar."]+ /i", "", $line ); 153 } 154 155 // find what sort of list this character represents 156 $list_tag = ""; 157 $list_close_tag = ""; 158 $item_tag = ""; 159 $item_close_tag = ""; 160 $list_style = LIST_NONE; 161 switch ($listchar) { 162 case '*': 163 $list_tag = ""; 164 $list_close_tag = ""; 165 $item_tag = "*"; 166 $item_close_tag = ""; 167 $list_style = LIST_UNORDERED; 168 break; 169 case '#': 170 $list_tag = ""; 171 $list_close_tag = ""; 172 $item_tag = "1."; 173 $item_close_tag = ""; 174 $list_style = LIST_ORDERED; 175 break; 176 case ';': 177 $list_tag = "<dl>"; 178 $list_close_tag = "</dl>"; 179 $item_tag = "<dd>"; 180 $item_close_tag = "</dd>"; 181 $list_style = LIST_DEFINITION; 182 break; 183 case ':': 184 $list_tag = "<dl>"; 185 $list_close_tag = "</dl>"; 186 $item_tag = "<dt>"; 187 $item_close_tag = "</dt>"; 188 $list_style = LIST_DEFINITION; 189 break; 190 } 191 192 // tag opening/closing regime now - fun bit :-) 193 $tags = ""; 194 195 // if depth has reduced do number of closes to restore level 196 for ($i=$this->list_depth; $i>$count; $i-- ) { 197 $close_tag = array_pop( $this->list_backtrack ); 198 $tags = $tags . $close_tag; 199 } 200 201 // if depth has increased do number of opens to balance 202 for ($i=$this->list_depth; $i<$count; $i++ ) { 203 array_push( $this->list_backtrack, "$list_close_tag" ); 204 $tags = $tags . "$list_tag"; 205 } 206 207 // ok, so list state is now same as style and depth same as count 208 $this->list_state = $list_style; 209 $this->list_depth = $count; 210 211 // get indent 212 $indent = substr( " ",1,$count-1 ); 213 214 if ($blank) { 215 $newline = $tags; 216 } 217 else { 218 $newline = $tags . $indent . "$item_tag " . $line . "$item_close_tag"; 219 } 220 221 return $newline; 222 } 223 224 225 function line_replace( $line ) { 226 // return line after various formatting replacements 227 // have been made - order is vital to stop them interfering with each other 228 229 global $CFG; 230 231 // ---- (at least) means a <hr /> 232 // MARKDOWN: no change so leave 233 234 // is this a list line (starts with * # ; :) 235 if (preg_match( "/^([*]+|[#]+|[;]+|[:]+) /i", $line )) { 236 $line = $this->do_list( $line ); 237 } 238 239 // typographic conventions 240 // MARKDOWN: no equiv. so convert to entity as before 241 // $line = str_replace( "--", "—", $line ); 242 // $line = str_replace( " - ", " – ", $line ); 243 $line = str_replace( "...", " … ", $line ); 244 $line = str_replace( "(R)", "®", $line ); 245 $line = str_replace( "(r)", "®", $line ); 246 $line = str_replace( "(TM)", "™", $line ); 247 $line = str_replace( "(tm)", "™", $line ); 248 $line = str_replace( "(C)", "©", $line ); 249 $line = str_replace( "1/4", "¼", $line ); 250 $line = str_replace( "1/2", "½", $line ); 251 $line = str_replace( "3/4", "¾", $line ); 252 $line = preg_replace( "/([[:digit:]]+[[:space:]]*)x([[:space:]]*[[:digit:]]+)/i", "\\1×\\2", $line ); // (digits) x (digits) - multiply 253 // do formatting tags 254 // NOTE: The / replacement *has* to be first, or it will screw the 255 // HTML tags that are added by the other ones 256 // MARKDOWN: only bold and italic change, rest are just HTML 257 $line = $this->do_replace_markdown( $line, "\*", "**" ); 258 $line = $this->do_replace_markdown( $line, "/", "*" ); 259 $line = $this->do_replace( $line, "\+", "ins" ); 260 // $line = $this->do_replace( $line, "-", "del" ); 261 $line = $this->do_replace_sub( $line, "~", "sub" ); 262 $line = $this->do_replace_sub( $line, "\^", "sup" ); 263 $line = $this->do_replace( $line, "%", "code" ); 264 $line = $this->do_replace( $line, "@", "cite" ); 265 266 // convert urls into proper link with optional link text URL(text) 267 // MARDOWN: HTML conversion should work fine 268 $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i", 269 "\\1[\\5](\\2://\\3\\4)", $line); 270 $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i", 271 "\\1[\\5](http://www.\\2\\3)", $line); 272 273 // make urls (with and without httpd) into proper links 274 $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])/i", 275 "\\1<\\2://\\3\\4>", $line); 276 $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])/i", 277 "\\1<http://www.\\2\\3\>", $line); 278 279 // make email addresses into mailtos.... 280 // MARKDOWN doesn't quite support this, so do as html 281 $line = preg_replace("/([[:space:]]|^)([[:alnum:]._-]+@[[:alnum:]._-]+)\(([^)]+)\)/i", 282 "\\1<a href=\"mailto:\\2\">\\3</a>", $line); 283 284 // !# at the beginning of any lines means a heading 285 // MARKDOWN: value (1-6) becomes number of hashes 286 if (preg_match( "/^!([1-6]) (.*)$/i", $line, $regs )) { 287 $depth = substr( $line, 1, 1 ); 288 $out = substr( '##########', 0, $depth); 289 $line = preg_replace( "/^!([1-6]) (.*)$/i", "$out \\2", $line ); 290 } 291 292 // acronym handing, example HTML(Hypertext Markyp Language) 293 // MARKDOWN: no equiv. so just leave as HTML 294 $line = preg_replace( "/([A-Z]+)\(([^)]+)\)/", "<acronym title=\"\\2\">\\1</acronym>", $line ); 295 296 // Replace resource link >>##(Description Text) 297 // MARKDOWN: change to MD web link style 298 $line = preg_replace("/ ([a-zA-Z]+):([0-9]+)\(([^)]+)\)/i", 299 " [\\3](".$CFG->wwwroot."/mod/\\1/view.php?id=\\2) ", $line ); 300 301 $coursefileurl = array(moodle_url::make_legacyfile_url($this->courseid, null)); 302 303 // Replace picture resource link 304 $line = preg_replace("#/([a-zA-Z0-9./_-]+)(png|gif|jpg)\(([^)]+)\)#i", 305 "![\\3](".$coursefileurl."/\\1\\2)", $line ); 306 307 // Replace file resource link 308 $line = preg_replace("#file:/([[:alnum:]/._-]+)\(([^)]+)\)#i", 309 "[\\2](".$coursefileurl."/\\1)", $line ); 310 311 return $line; 312 } 313 314 function convert( $content,$courseid ) { 315 316 // main entry point for processing Wiki-like text 317 // $content is string containing text with Wiki-Like formatting 318 // return: string containing Markdown formatting 319 320 // initialisation stuff 321 $this->output = ""; 322 $this->block_state = STATE_NONE; 323 $this->list_state = LIST_NONE; 324 $this->list_depth = 0; 325 $this->list_backtrack = array(); 326 $this->courseid = $courseid; 327 328 // split content into array of single lines 329 $lines = explode( "\n",$content ); 330 $buffer = ""; 331 332 // run through lines 333 foreach( $lines as $line ) { 334 // is this a blank line? 335 $blank_line = preg_match( "/^[[:blank:]\r]*$/i", $line ); 336 if ($blank_line) { 337 // first end current block according to state 338 $buffer = $buffer . $this->close_block( $this->block_state ); 339 $this->block_state = STATE_NONE; 340 continue; 341 } 342 343 // act now depending on current block state 344 if ($this->block_state == STATE_NONE) { 345 // first character of line defines block type 346 if (preg_match( "/^> /i",$line )) { 347 // blockquote 348 $buffer = $buffer . $this->line_replace( $line ). "\n"; 349 $this->block_state = STATE_BLOCKQUOTE; 350 } 351 else 352 if (preg_match( "/^ /i",$line) ) { 353 // preformatted text 354 // MARKDOWN: no real equiv. so just use <pre> 355 $buffer = $buffer . "<pre>\n"; 356 $buffer = $buffer . $this->line_replace($line) . "\n"; 357 $this->block_state = STATE_PREFORM; 358 } 359 else 360 if (preg_match("/^\% /i",$line) ) { 361 // preformatted text - no processing 362 // MARKDOWN: this is MD code form of a paragraph 363 $buffer = $buffer . " " . preg_replace( "/^\%/i","",$line) . "\n"; 364 $this->block_state = STATE_NOTIKI; 365 } 366 else { 367 // ordinary paragraph 368 $buffer = $buffer . $this->line_replace($line) . "\n"; 369 $this->block_state = STATE_PARAGRAPH; 370 } 371 continue; 372 } 373 374 if (($this->block_state == STATE_PARAGRAPH) | 375 ($this->block_state == STATE_BLOCKQUOTE) | 376 ($this->block_state == STATE_PREFORM) ) { 377 $buffer = $buffer . $this->line_replace($line) . "\n"; 378 continue; 379 } 380 elseif ($this->block_state == STATE_NOTIKI) { 381 $buffer = $buffer . " " .$line . "\n"; 382 } 383 } 384 385 // close off any block level tags 386 $buffer = $buffer . $this->close_block( $this->block_state ); 387 388 //return $buffer; 389 return $buffer; 390 } 391 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body