Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 /** 3 * Markdown Extra - A text-to-HTML conversion tool for web writers 4 * 5 * @package php-markdown 6 * @author Michel Fortin <michel.fortin@michelf.com> 7 * @copyright 2004-2018 Michel Fortin <https://michelf.com/projects/php-markdown/> 8 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/> 9 */ 10 11 namespace Michelf; 12 13 /** 14 * Markdown Extra Parser Class 15 */ 16 class MarkdownExtra extends \Michelf\Markdown { 17 /** 18 * Configuration variables 19 */ 20 21 /** 22 * Prefix for footnote ids. 23 * @var string 24 */ 25 public $fn_id_prefix = ""; 26 27 /** 28 * Optional title attribute for footnote links and backlinks. 29 * @var string 30 */ 31 public $fn_link_title = ""; 32 public $fn_backlink_title = ""; 33 34 /** 35 * Optional class attribute for footnote links and backlinks. 36 * @var string 37 */ 38 public $fn_link_class = "footnote-ref"; 39 public $fn_backlink_class = "footnote-backref"; 40 41 /** 42 * Content to be displayed within footnote backlinks. The default is '↩'; 43 * the U+FE0E on the end is a Unicode variant selector used to prevent iOS 44 * from displaying the arrow character as an emoji. 45 * @var string 46 */ 47 public $fn_backlink_html = '↩︎'; 48 49 /** 50 * Class name for table cell alignment (%% replaced left/center/right) 51 * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' 52 * If empty, the align attribute is used instead of a class name. 53 * @var string 54 */ 55 public $table_align_class_tmpl = ''; 56 57 /** 58 * Optional class prefix for fenced code block. 59 * @var string 60 */ 61 public $code_class_prefix = ""; 62 63 /** 64 * Class attribute for code blocks goes on the `code` tag; 65 * setting this to true will put attributes on the `pre` tag instead. 66 * @var boolean 67 */ 68 public $code_attr_on_pre = false; 69 70 /** 71 * Predefined abbreviations. 72 * @var array 73 */ 74 public $predef_abbr = array(); 75 76 /** 77 * Only convert atx-style headers if there's a space between the header and # 78 * @var boolean 79 */ 80 public $hashtag_protection = false; 81 82 /** 83 * Parser implementation 84 */ 85 86 /** 87 * Constructor function. Initialize the parser object. 88 * @return void 89 */ 90 public function __construct() { 91 // Add extra escapable characters before parent constructor 92 // initialize the table. 93 $this->escape_chars .= ':|'; 94 95 // Insert extra document, block, and span transformations. 96 // Parent constructor will do the sorting. 97 $this->document_gamut += array( 98 "doFencedCodeBlocks" => 5, 99 "stripFootnotes" => 15, 100 "stripAbbreviations" => 25, 101 "appendFootnotes" => 50, 102 ); 103 $this->block_gamut += array( 104 "doFencedCodeBlocks" => 5, 105 "doTables" => 15, 106 "doDefLists" => 45, 107 ); 108 $this->span_gamut += array( 109 "doFootnotes" => 5, 110 "doAbbreviations" => 70, 111 ); 112 113 $this->enhanced_ordered_list = true; 114 parent::__construct(); 115 } 116 117 118 /** 119 * Extra variables used during extra transformations. 120 * @var array 121 */ 122 protected $footnotes = array(); 123 protected $footnotes_ordered = array(); 124 protected $footnotes_ref_count = array(); 125 protected $footnotes_numbers = array(); 126 protected $abbr_desciptions = array(); 127 /** @var string */ 128 protected $abbr_word_re = ''; 129 130 /** 131 * Give the current footnote number. 132 * @var integer 133 */ 134 protected $footnote_counter = 1; 135 136 /** 137 * Setting up Extra-specific variables. 138 */ 139 protected function setup() { 140 parent::setup(); 141 142 $this->footnotes = array(); 143 $this->footnotes_ordered = array(); 144 $this->footnotes_ref_count = array(); 145 $this->footnotes_numbers = array(); 146 $this->abbr_desciptions = array(); 147 $this->abbr_word_re = ''; 148 $this->footnote_counter = 1; 149 150 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 151 if ($this->abbr_word_re) 152 $this->abbr_word_re .= '|'; 153 $this->abbr_word_re .= preg_quote($abbr_word); 154 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 155 } 156 } 157 158 /** 159 * Clearing Extra-specific variables. 160 */ 161 protected function teardown() { 162 $this->footnotes = array(); 163 $this->footnotes_ordered = array(); 164 $this->footnotes_ref_count = array(); 165 $this->footnotes_numbers = array(); 166 $this->abbr_desciptions = array(); 167 $this->abbr_word_re = ''; 168 169 parent::teardown(); 170 } 171 172 173 /** 174 * Extra attribute parser 175 */ 176 177 /** 178 * Expression to use to catch attributes (includes the braces) 179 * @var string 180 */ 181 protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}'; 182 183 /** 184 * Expression to use when parsing in a context when no capture is desired 185 * @var string 186 */ 187 protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}'; 188 189 /** 190 * Parse attributes caught by the $this->id_class_attr_catch_re expression 191 * and return the HTML-formatted list of attributes. 192 * 193 * Currently supported attributes are .class and #id. 194 * 195 * In addition, this method also supports supplying a default Id value, 196 * which will be used to populate the id attribute in case it was not 197 * overridden. 198 * @param string $tag_name 199 * @param string $attr 200 * @param mixed $defaultIdValue 201 * @param array $classes 202 * @return string 203 */ 204 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) { 205 if (empty($attr) && !$defaultIdValue && empty($classes)) return ""; 206 207 // Split on components 208 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches); 209 $elements = $matches[0]; 210 211 // Handle classes and IDs (only first ID taken into account) 212 $attributes = array(); 213 $id = false; 214 foreach ($elements as $element) { 215 if ($element[0] == '.') { 216 $classes[] = substr($element, 1); 217 } else if ($element[0] == '#') { 218 if ($id === false) $id = substr($element, 1); 219 } else if (strpos($element, '=') > 0) { 220 $parts = explode('=', $element, 2); 221 $attributes[] = $parts[0] . '="' . $parts[1] . '"'; 222 } 223 } 224 225 if (!$id) $id = $defaultIdValue; 226 227 // Compose attributes as string 228 $attr_str = ""; 229 if (!empty($id)) { 230 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"'; 231 } 232 if (!empty($classes)) { 233 $attr_str .= ' class="'. implode(" ", $classes) . '"'; 234 } 235 if (!$this->no_markup && !empty($attributes)) { 236 $attr_str .= ' '.implode(" ", $attributes); 237 } 238 return $attr_str; 239 } 240 241 /** 242 * Strips link definitions from text, stores the URLs and titles in 243 * hash references. 244 * @param string $text 245 * @return string 246 */ 247 protected function stripLinkDefinitions($text) { 248 $less_than_tab = $this->tab_width - 1; 249 250 // Link defs are in the form: ^[id]: url "optional title" 251 $text = preg_replace_callback('{ 252 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 253 [ ]* 254 \n? # maybe *one* newline 255 [ ]* 256 (?: 257 <(.+?)> # url = $2 258 | 259 (\S+?) # url = $3 260 ) 261 [ ]* 262 \n? # maybe one newline 263 [ ]* 264 (?: 265 (?<=\s) # lookbehind for whitespace 266 ["(] 267 (.*?) # title = $4 268 [")] 269 [ ]* 270 )? # title is optional 271 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 272 (?:\n+|\Z) 273 }xm', 274 array($this, '_stripLinkDefinitions_callback'), 275 $text); 276 return $text; 277 } 278 279 /** 280 * Strip link definition callback 281 * @param array $matches 282 * @return string 283 */ 284 protected function _stripLinkDefinitions_callback($matches) { 285 $link_id = strtolower($matches[1]); 286 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 287 $this->urls[$link_id] = $url; 288 $this->titles[$link_id] =& $matches[4]; 289 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 290 return ''; // String that will replace the block 291 } 292 293 294 /** 295 * HTML block parser 296 */ 297 298 /** 299 * Tags that are always treated as block tags 300 * @var string 301 */ 302 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure'; 303 304 /** 305 * Tags treated as block tags only if the opening tag is alone on its line 306 * @var string 307 */ 308 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 309 310 /** 311 * Tags where markdown="1" default to span mode: 312 * @var string 313 */ 314 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 315 316 /** 317 * Tags which must not have their contents modified, no matter where 318 * they appear 319 * @var string 320 */ 321 protected $clean_tags_re = 'script|style|math|svg'; 322 323 /** 324 * Tags that do not need to be closed. 325 * @var string 326 */ 327 protected $auto_close_tags_re = 'hr|img|param|source|track'; 328 329 /** 330 * Hashify HTML Blocks and "clean tags". 331 * 332 * We only want to do this for block-level HTML tags, such as headers, 333 * lists, and tables. That's because we still want to wrap <p>s around 334 * "paragraphs" that are wrapped in non-block-level tags, such as anchors, 335 * phrase emphasis, and spans. The list of tags we're looking for is 336 * hard-coded. 337 * 338 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls 339 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 340 * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 341 * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 342 * These two functions are calling each other. It's recursive! 343 * @param string $text 344 * @return string 345 */ 346 protected function hashHTMLBlocks($text) { 347 if ($this->no_markup) { 348 return $text; 349 } 350 351 // Call the HTML-in-Markdown hasher. 352 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 353 354 return $text; 355 } 356 357 /** 358 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 359 * 360 * * $indent is the number of space to be ignored when checking for code 361 * blocks. This is important because if we don't take the indent into 362 * account, something like this (which looks right) won't work as expected: 363 * 364 * <div> 365 * <div markdown="1"> 366 * Hello World. <-- Is this a Markdown code block or text? 367 * </div> <-- Is this a Markdown code block or a real tag? 368 * <div> 369 * 370 * If you don't like this, just don't indent the tag on which 371 * you apply the markdown="1" attribute. 372 * 373 * * If $enclosing_tag_re is not empty, stops at the first unmatched closing 374 * tag with that name. Nested tags supported. 375 * 376 * * If $span is true, text inside must treated as span. So any double 377 * newline will be replaced by a single newline so that it does not create 378 * paragraphs. 379 * 380 * Returns an array of that form: ( processed text , remaining text ) 381 * 382 * @param string $text 383 * @param integer $indent 384 * @param string $enclosing_tag_re 385 * @param boolean $span 386 * @return array 387 */ 388 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 389 $enclosing_tag_re = '', $span = false) 390 { 391 392 if ($text === '') return array('', ''); 393 394 // Regex to check for the presense of newlines around a block tag. 395 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 396 $newline_after_re = 397 '{ 398 ^ # Start of text following the tag. 399 (?>[ ]*<!--.*?-->)? # Optional comment. 400 [ ]*\n # Must be followed by newline. 401 }xs'; 402 403 // Regex to match any tag. 404 $block_tag_re = 405 '{ 406 ( # $2: Capture whole tag. 407 </? # Any opening or closing tag. 408 (?> # Tag name. 409 ' . $this->block_tags_re . ' | 410 ' . $this->context_block_tags_re . ' | 411 ' . $this->clean_tags_re . ' | 412 (?!\s)'.$enclosing_tag_re . ' 413 ) 414 (?: 415 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 416 (?> 417 ".*?" | # Double quotes (can contain `>`) 418 \'.*?\' | # Single quotes (can contain `>`) 419 .+? # Anything but quotes and `>`. 420 )*? 421 )? 422 > # End of tag. 423 | 424 <!-- .*? --> # HTML Comment 425 | 426 <\?.*?\?> | <%.*?%> # Processing instruction 427 | 428 <!\[CDATA\[.*?\]\]> # CData Block 429 ' . ( !$span ? ' # If not in span. 430 | 431 # Indented code block 432 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 433 [ ]{' . ($indent + 4) . '}[^\n]* \n 434 (?> 435 (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n 436 )* 437 | 438 # Fenced code block marker 439 (?<= ^ | \n ) 440 [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,}) 441 [ ]* 442 (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name 443 [ ]* 444 (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes 445 [ ]* 446 (?= \n ) 447 ' : '' ) . ' # End (if not is span). 448 | 449 # Code span marker 450 # Note, this regex needs to go after backtick fenced 451 # code blocks but it should also be kept outside of the 452 # "if not in span" condition adding backticks to the parser 453 `+ 454 ) 455 }xs'; 456 457 458 $depth = 0; // Current depth inside the tag tree. 459 $parsed = ""; // Parsed text that will be returned. 460 461 // Loop through every tag until we find the closing tag of the parent 462 // or loop until reaching the end of text if no parent tag specified. 463 do { 464 // Split the text using the first $tag_match pattern found. 465 // Text before pattern will be first in the array, text after 466 // pattern will be at the end, and between will be any catches made 467 // by the pattern. 468 $parts = preg_split($block_tag_re, $text, 2, 469 PREG_SPLIT_DELIM_CAPTURE); 470 471 // If in Markdown span mode, add a empty-string span-level hash 472 // after each newline to prevent triggering any block element. 473 if ($span) { 474 $void = $this->hashPart("", ':'); 475 $newline = "\n$void"; 476 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 477 } 478 479 $parsed .= $parts[0]; // Text before current tag. 480 481 // If end of $text has been reached. Stop loop. 482 if (count($parts) < 3) { 483 $text = ""; 484 break; 485 } 486 487 $tag = $parts[1]; // Tag to handle. 488 $text = $parts[2]; // Remaining text after current tag. 489 $tag_re = preg_quote($tag); // For use in a regular expression. 490 491 // Check for: Fenced code block marker. 492 // Note: need to recheck the whole tag to disambiguate backtick 493 // fences from code spans 494 if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) { 495 // Fenced code block marker: find matching end marker. 496 $fence_indent = strlen($capture[1]); // use captured indent in re 497 $fence_re = $capture[2]; // use captured fence in re 498 if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text, 499 $matches)) 500 { 501 // End marker found: pass text unchanged until marker. 502 $parsed .= $tag . $matches[0]; 503 $text = substr($text, strlen($matches[0])); 504 } 505 else { 506 // No end marker: just skip it. 507 $parsed .= $tag; 508 } 509 } 510 // Check for: Indented code block. 511 else if ($tag[0] == "\n" || $tag[0] == " ") { 512 // Indented code block: pass it unchanged, will be handled 513 // later. 514 $parsed .= $tag; 515 } 516 // Check for: Code span marker 517 // Note: need to check this after backtick fenced code blocks 518 else if ($tag[0] == "`") { 519 // Find corresponding end marker. 520 $tag_re = preg_quote($tag); 521 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}', 522 $text, $matches)) 523 { 524 // End marker found: pass text unchanged until marker. 525 $parsed .= $tag . $matches[0]; 526 $text = substr($text, strlen($matches[0])); 527 } 528 else { 529 // Unmatched marker: just skip it. 530 $parsed .= $tag; 531 } 532 } 533 // Check for: Opening Block level tag or 534 // Opening Context Block tag (like ins and del) 535 // used as a block tag (tag is alone on it's line). 536 else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) || 537 ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) && 538 preg_match($newline_before_re, $parsed) && 539 preg_match($newline_after_re, $text) ) 540 ) 541 { 542 // Need to parse tag and following text using the HTML parser. 543 list($block_text, $text) = 544 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 545 546 // Make sure it stays outside of any paragraph by adding newlines. 547 $parsed .= "\n\n$block_text\n\n"; 548 } 549 // Check for: Clean tag (like script, math) 550 // HTML Comments, processing instructions. 551 else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) || 552 $tag[1] == '!' || $tag[1] == '?') 553 { 554 // Need to parse tag and following text using the HTML parser. 555 // (don't check for markdown attribute) 556 list($block_text, $text) = 557 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 558 559 $parsed .= $block_text; 560 } 561 // Check for: Tag with same name as enclosing tag. 562 else if ($enclosing_tag_re !== '' && 563 // Same name as enclosing tag. 564 preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag)) 565 { 566 // Increase/decrease nested tag count. 567 if ($tag[1] == '/') $depth--; 568 else if ($tag[strlen($tag)-2] != '/') $depth++; 569 570 if ($depth < 0) { 571 // Going out of parent element. Clean up and break so we 572 // return to the calling function. 573 $text = $tag . $text; 574 break; 575 } 576 577 $parsed .= $tag; 578 } 579 else { 580 $parsed .= $tag; 581 } 582 } while ($depth >= 0); 583 584 return array($parsed, $text); 585 } 586 587 /** 588 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 589 * 590 * * Calls $hash_method to convert any blocks. 591 * * Stops when the first opening tag closes. 592 * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 593 * (it is not inside clean tags) 594 * 595 * Returns an array of that form: ( processed text , remaining text ) 596 * @param string $text 597 * @param string $hash_method 598 * @param string $md_attr 599 * @return array 600 */ 601 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 602 if ($text === '') return array('', ''); 603 604 // Regex to match `markdown` attribute inside of a tag. 605 $markdown_attr_re = ' 606 { 607 \s* # Eat whitespace before the `markdown` attribute 608 markdown 609 \s*=\s* 610 (?> 611 (["\']) # $1: quote delimiter 612 (.*?) # $2: attribute value 613 \1 # matching delimiter 614 | 615 ([^\s>]*) # $3: unquoted attribute value 616 ) 617 () # $4: make $3 always defined (avoid warnings) 618 }xs'; 619 620 // Regex to match any tag. 621 $tag_re = '{ 622 ( # $2: Capture whole tag. 623 </? # Any opening or closing tag. 624 [\w:$]+ # Tag name. 625 (?: 626 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 627 (?> 628 ".*?" | # Double quotes (can contain `>`) 629 \'.*?\' | # Single quotes (can contain `>`) 630 .+? # Anything but quotes and `>`. 631 )*? 632 )? 633 > # End of tag. 634 | 635 <!-- .*? --> # HTML Comment 636 | 637 <\?.*?\?> | <%.*?%> # Processing instruction 638 | 639 <!\[CDATA\[.*?\]\]> # CData Block 640 ) 641 }xs'; 642 643 $original_text = $text; // Save original text in case of faliure. 644 645 $depth = 0; // Current depth inside the tag tree. 646 $block_text = ""; // Temporary text holder for current text. 647 $parsed = ""; // Parsed text that will be returned. 648 649 // Get the name of the starting tag. 650 // (This pattern makes $base_tag_name_re safe without quoting.) 651 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 652 $base_tag_name_re = $matches[1]; 653 654 // Loop through every tag until we find the corresponding closing tag. 655 do { 656 // Split the text using the first $tag_match pattern found. 657 // Text before pattern will be first in the array, text after 658 // pattern will be at the end, and between will be any catches made 659 // by the pattern. 660 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 661 662 if (count($parts) < 3) { 663 // End of $text reached with unbalenced tag(s). 664 // In that case, we return original text unchanged and pass the 665 // first character as filtered to prevent an infinite loop in the 666 // parent function. 667 return array($original_text[0], substr($original_text, 1)); 668 } 669 670 $block_text .= $parts[0]; // Text before current tag. 671 $tag = $parts[1]; // Tag to handle. 672 $text = $parts[2]; // Remaining text after current tag. 673 674 // Check for: Auto-close tag (like <hr/>) 675 // Comments and Processing Instructions. 676 if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) || 677 $tag[1] == '!' || $tag[1] == '?') 678 { 679 // Just add the tag to the block as if it was text. 680 $block_text .= $tag; 681 } 682 else { 683 // Increase/decrease nested tag count. Only do so if 684 // the tag's name match base tag's. 685 if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) { 686 if ($tag[1] == '/') $depth--; 687 else if ($tag[strlen($tag)-2] != '/') $depth++; 688 } 689 690 // Check for `markdown="1"` attribute and handle it. 691 if ($md_attr && 692 preg_match($markdown_attr_re, $tag, $attr_m) && 693 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 694 { 695 // Remove `markdown` attribute from opening tag. 696 $tag = preg_replace($markdown_attr_re, '', $tag); 697 698 // Check if text inside this tag must be parsed in span mode. 699 $this->mode = $attr_m[2] . $attr_m[3]; 700 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 701 preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag); 702 703 // Calculate indent before tag. 704 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 705 $strlen = $this->utf8_strlen; 706 $indent = $strlen($matches[1], 'UTF-8'); 707 } else { 708 $indent = 0; 709 } 710 711 // End preceding block with this tag. 712 $block_text .= $tag; 713 $parsed .= $this->$hash_method($block_text); 714 715 // Get enclosing tag name for the ParseMarkdown function. 716 // (This pattern makes $tag_name_re safe without quoting.) 717 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 718 $tag_name_re = $matches[1]; 719 720 // Parse the content using the HTML-in-Markdown parser. 721 list ($block_text, $text) 722 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 723 $tag_name_re, $span_mode); 724 725 // Outdent markdown text. 726 if ($indent > 0) { 727 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 728 $block_text); 729 } 730 731 // Append tag content to parsed text. 732 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 733 else $parsed .= "$block_text"; 734 735 // Start over with a new block. 736 $block_text = ""; 737 } 738 else $block_text .= $tag; 739 } 740 741 } while ($depth > 0); 742 743 // Hash last block text that wasn't processed inside the loop. 744 $parsed .= $this->$hash_method($block_text); 745 746 return array($parsed, $text); 747 } 748 749 /** 750 * Called whenever a tag must be hashed when a function inserts a "clean" tag 751 * in $text, it passes through this function and is automaticaly escaped, 752 * blocking invalid nested overlap. 753 * @param string $text 754 * @return string 755 */ 756 protected function hashClean($text) { 757 return $this->hashPart($text, 'C'); 758 } 759 760 /** 761 * Turn Markdown link shortcuts into XHTML <a> tags. 762 * @param string $text 763 * @return string 764 */ 765 protected function doAnchors($text) { 766 if ($this->in_anchor) { 767 return $text; 768 } 769 $this->in_anchor = true; 770 771 // First, handle reference-style links: [link text] [id] 772 $text = preg_replace_callback('{ 773 ( # wrap whole match in $1 774 \[ 775 (' . $this->nested_brackets_re . ') # link text = $2 776 \] 777 778 [ ]? # one optional space 779 (?:\n[ ]*)? # one optional newline followed by spaces 780 781 \[ 782 (.*?) # id = $3 783 \] 784 ) 785 }xs', 786 array($this, '_doAnchors_reference_callback'), $text); 787 788 // Next, inline-style links: [link text](url "optional title") 789 $text = preg_replace_callback('{ 790 ( # wrap whole match in $1 791 \[ 792 (' . $this->nested_brackets_re . ') # link text = $2 793 \] 794 \( # literal paren 795 [ \n]* 796 (?: 797 <(.+?)> # href = $3 798 | 799 (' . $this->nested_url_parenthesis_re . ') # href = $4 800 ) 801 [ \n]* 802 ( # $5 803 ([\'"]) # quote char = $6 804 (.*?) # Title = $7 805 \6 # matching quote 806 [ \n]* # ignore any spaces/tabs between closing quote and ) 807 )? # title is optional 808 \) 809 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 810 ) 811 }xs', 812 array($this, '_doAnchors_inline_callback'), $text); 813 814 // Last, handle reference-style shortcuts: [link text] 815 // These must come last in case you've also got [link text][1] 816 // or [link text](/foo) 817 $text = preg_replace_callback('{ 818 ( # wrap whole match in $1 819 \[ 820 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 821 \] 822 ) 823 }xs', 824 array($this, '_doAnchors_reference_callback'), $text); 825 826 $this->in_anchor = false; 827 return $text; 828 } 829 830 /** 831 * Callback for reference anchors 832 * @param array $matches 833 * @return string 834 */ 835 protected function _doAnchors_reference_callback($matches) { 836 $whole_match = $matches[1]; 837 $link_text = $matches[2]; 838 $link_id =& $matches[3]; 839 840 if ($link_id == "") { 841 // for shortcut links like [this][] or [this]. 842 $link_id = $link_text; 843 } 844 845 // lower-case and turn embedded newlines into spaces 846 $link_id = strtolower($link_id); 847 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 848 849 if (isset($this->urls[$link_id])) { 850 $url = $this->urls[$link_id]; 851 $url = $this->encodeURLAttribute($url); 852 853 $result = "<a href=\"$url\""; 854 if ( isset( $this->titles[$link_id] ) ) { 855 $title = $this->titles[$link_id]; 856 $title = $this->encodeAttribute($title); 857 $result .= " title=\"$title\""; 858 } 859 if (isset($this->ref_attr[$link_id])) 860 $result .= $this->ref_attr[$link_id]; 861 862 $link_text = $this->runSpanGamut($link_text); 863 $result .= ">$link_text</a>"; 864 $result = $this->hashPart($result); 865 } 866 else { 867 $result = $whole_match; 868 } 869 return $result; 870 } 871 872 /** 873 * Callback for inline anchors 874 * @param array $matches 875 * @return string 876 */ 877 protected function _doAnchors_inline_callback($matches) { 878 $whole_match = $matches[1]; 879 $link_text = $this->runSpanGamut($matches[2]); 880 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 881 $title =& $matches[7]; 882 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 883 884 // if the URL was of the form <s p a c e s> it got caught by the HTML 885 // tag parser and hashed. Need to reverse the process before using the URL. 886 $unhashed = $this->unhash($url); 887 if ($unhashed != $url) 888 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 889 890 $url = $this->encodeURLAttribute($url); 891 892 $result = "<a href=\"$url\""; 893 if (isset($title)) { 894 $title = $this->encodeAttribute($title); 895 $result .= " title=\"$title\""; 896 } 897 $result .= $attr; 898 899 $link_text = $this->runSpanGamut($link_text); 900 $result .= ">$link_text</a>"; 901 902 return $this->hashPart($result); 903 } 904 905 /** 906 * Turn Markdown image shortcuts into <img> tags. 907 * @param string $text 908 * @return string 909 */ 910 protected function doImages($text) { 911 // First, handle reference-style labeled images: ![alt text][id] 912 $text = preg_replace_callback('{ 913 ( # wrap whole match in $1 914 !\[ 915 (' . $this->nested_brackets_re . ') # alt text = $2 916 \] 917 918 [ ]? # one optional space 919 (?:\n[ ]*)? # one optional newline followed by spaces 920 921 \[ 922 (.*?) # id = $3 923 \] 924 925 ) 926 }xs', 927 array($this, '_doImages_reference_callback'), $text); 928 929 // Next, handle inline images: ![alt text](url "optional title") 930 // Don't forget: encode * and _ 931 $text = preg_replace_callback('{ 932 ( # wrap whole match in $1 933 !\[ 934 (' . $this->nested_brackets_re . ') # alt text = $2 935 \] 936 \s? # One optional whitespace character 937 \( # literal paren 938 [ \n]* 939 (?: 940 <(\S*)> # src url = $3 941 | 942 (' . $this->nested_url_parenthesis_re . ') # src url = $4 943 ) 944 [ \n]* 945 ( # $5 946 ([\'"]) # quote char = $6 947 (.*?) # title = $7 948 \6 # matching quote 949 [ \n]* 950 )? # title is optional 951 \) 952 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 953 ) 954 }xs', 955 array($this, '_doImages_inline_callback'), $text); 956 957 return $text; 958 } 959 960 /** 961 * Callback for referenced images 962 * @param array $matches 963 * @return string 964 */ 965 protected function _doImages_reference_callback($matches) { 966 $whole_match = $matches[1]; 967 $alt_text = $matches[2]; 968 $link_id = strtolower($matches[3]); 969 970 if ($link_id == "") { 971 $link_id = strtolower($alt_text); // for shortcut links like ![this][]. 972 } 973 974 $alt_text = $this->encodeAttribute($alt_text); 975 if (isset($this->urls[$link_id])) { 976 $url = $this->encodeURLAttribute($this->urls[$link_id]); 977 $result = "<img src=\"$url\" alt=\"$alt_text\""; 978 if (isset($this->titles[$link_id])) { 979 $title = $this->titles[$link_id]; 980 $title = $this->encodeAttribute($title); 981 $result .= " title=\"$title\""; 982 } 983 if (isset($this->ref_attr[$link_id])) 984 $result .= $this->ref_attr[$link_id]; 985 $result .= $this->empty_element_suffix; 986 $result = $this->hashPart($result); 987 } 988 else { 989 // If there's no such link ID, leave intact: 990 $result = $whole_match; 991 } 992 993 return $result; 994 } 995 996 /** 997 * Callback for inline images 998 * @param array $matches 999 * @return string 1000 */ 1001 protected function _doImages_inline_callback($matches) { 1002 $whole_match = $matches[1]; 1003 $alt_text = $matches[2]; 1004 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 1005 $title =& $matches[7]; 1006 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 1007 1008 $alt_text = $this->encodeAttribute($alt_text); 1009 $url = $this->encodeURLAttribute($url); 1010 $result = "<img src=\"$url\" alt=\"$alt_text\""; 1011 if (isset($title)) { 1012 $title = $this->encodeAttribute($title); 1013 $result .= " title=\"$title\""; // $title already quoted 1014 } 1015 $result .= $attr; 1016 $result .= $this->empty_element_suffix; 1017 1018 return $this->hashPart($result); 1019 } 1020 1021 /** 1022 * Process markdown headers. Redefined to add ID and class attribute support. 1023 * @param string $text 1024 * @return string 1025 */ 1026 protected function doHeaders($text) { 1027 // Setext-style headers: 1028 // Header 1 {#header1} 1029 // ======== 1030 // 1031 // Header 2 {#header2 .class1 .class2} 1032 // -------- 1033 // 1034 $text = preg_replace_callback( 1035 '{ 1036 (^.+?) # $1: Header text 1037 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 1038 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 1039 }mx', 1040 array($this, '_doHeaders_callback_setext'), $text); 1041 1042 // atx-style headers: 1043 // # Header 1 {#header1} 1044 // ## Header 2 {#header2} 1045 // ## Header 2 with closing hashes ## {#header3.class1.class2} 1046 // ... 1047 // ###### Header 6 {.class2} 1048 // 1049 $text = preg_replace_callback('{ 1050 ^(\#{1,6}) # $1 = string of #\'s 1051 [ ]'.($this->hashtag_protection ? '+' : '*').' 1052 (.+?) # $2 = Header text 1053 [ ]* 1054 \#* # optional closing #\'s (not counted) 1055 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 1056 [ ]* 1057 \n+ 1058 }xm', 1059 array($this, '_doHeaders_callback_atx'), $text); 1060 1061 return $text; 1062 } 1063 1064 /** 1065 * Callback for setext headers 1066 * @param array $matches 1067 * @return string 1068 */ 1069 protected function _doHeaders_callback_setext($matches) { 1070 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) { 1071 return $matches[0]; 1072 } 1073 1074 $level = $matches[3][0] == '=' ? 1 : 2; 1075 1076 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null; 1077 1078 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId); 1079 $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>"; 1080 return "\n" . $this->hashBlock($block) . "\n\n"; 1081 } 1082 1083 /** 1084 * Callback for atx headers 1085 * @param array $matches 1086 * @return string 1087 */ 1088 protected function _doHeaders_callback_atx($matches) { 1089 $level = strlen($matches[1]); 1090 1091 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null; 1092 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId); 1093 $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>"; 1094 return "\n" . $this->hashBlock($block) . "\n\n"; 1095 } 1096 1097 /** 1098 * Form HTML tables. 1099 * @param string $text 1100 * @return string 1101 */ 1102 protected function doTables($text) { 1103 $less_than_tab = $this->tab_width - 1; 1104 // Find tables with leading pipe. 1105 // 1106 // | Header 1 | Header 2 1107 // | -------- | -------- 1108 // | Cell 1 | Cell 2 1109 // | Cell 3 | Cell 4 1110 $text = preg_replace_callback(' 1111 { 1112 ^ # Start of a line 1113 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1114 [|] # Optional leading pipe (present) 1115 (.+) \n # $1: Header row (at least one pipe) 1116 1117 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1118 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 1119 1120 ( # $3: Cells 1121 (?> 1122 [ ]* # Allowed whitespace. 1123 [|] .* \n # Row content. 1124 )* 1125 ) 1126 (?=\n|\Z) # Stop at final double newline. 1127 }xm', 1128 array($this, '_doTable_leadingPipe_callback'), $text); 1129 1130 // Find tables without leading pipe. 1131 // 1132 // Header 1 | Header 2 1133 // -------- | -------- 1134 // Cell 1 | Cell 2 1135 // Cell 3 | Cell 4 1136 $text = preg_replace_callback(' 1137 { 1138 ^ # Start of a line 1139 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1140 (\S.*[|].*) \n # $1: Header row (at least one pipe) 1141 1142 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1143 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 1144 1145 ( # $3: Cells 1146 (?> 1147 .* [|] .* \n # Row content 1148 )* 1149 ) 1150 (?=\n|\Z) # Stop at final double newline. 1151 }xm', 1152 array($this, '_DoTable_callback'), $text); 1153 1154 return $text; 1155 } 1156 1157 /** 1158 * Callback for removing the leading pipe for each row 1159 * @param array $matches 1160 * @return string 1161 */ 1162 protected function _doTable_leadingPipe_callback($matches) { 1163 $head = $matches[1]; 1164 $underline = $matches[2]; 1165 $content = $matches[3]; 1166 1167 $content = preg_replace('/^ *[|]/m', '', $content); 1168 1169 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 1170 } 1171 1172 /** 1173 * Make the align attribute in a table 1174 * @param string $alignname 1175 * @return string 1176 */ 1177 protected function _doTable_makeAlignAttr($alignname) 1178 { 1179 if (empty($this->table_align_class_tmpl)) { 1180 return " align=\"$alignname\""; 1181 } 1182 1183 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); 1184 return " class=\"$classname\""; 1185 } 1186 1187 /** 1188 * Calback for processing tables 1189 * @param array $matches 1190 * @return string 1191 */ 1192 protected function _doTable_callback($matches) { 1193 $head = $matches[1]; 1194 $underline = $matches[2]; 1195 $content = $matches[3]; 1196 1197 // Remove any tailing pipes for each line. 1198 $head = preg_replace('/[|] *$/m', '', $head); 1199 $underline = preg_replace('/[|] *$/m', '', $underline); 1200 $content = preg_replace('/[|] *$/m', '', $content); 1201 1202 // Reading alignement from header underline. 1203 $separators = preg_split('/ *[|] */', $underline); 1204 foreach ($separators as $n => $s) { 1205 if (preg_match('/^ *-+: *$/', $s)) 1206 $attr[$n] = $this->_doTable_makeAlignAttr('right'); 1207 else if (preg_match('/^ *:-+: *$/', $s)) 1208 $attr[$n] = $this->_doTable_makeAlignAttr('center'); 1209 else if (preg_match('/^ *:-+ *$/', $s)) 1210 $attr[$n] = $this->_doTable_makeAlignAttr('left'); 1211 else 1212 $attr[$n] = ''; 1213 } 1214 1215 // Parsing span elements, including code spans, character escapes, 1216 // and inline HTML tags, so that pipes inside those gets ignored. 1217 $head = $this->parseSpan($head); 1218 $headers = preg_split('/ *[|] */', $head); 1219 $col_count = count($headers); 1220 $attr = array_pad($attr, $col_count, ''); 1221 1222 // Write column headers. 1223 $text = "<table>\n"; 1224 $text .= "<thead>\n"; 1225 $text .= "<tr>\n"; 1226 foreach ($headers as $n => $header) 1227 $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n"; 1228 $text .= "</tr>\n"; 1229 $text .= "</thead>\n"; 1230 1231 // Split content by row. 1232 $rows = explode("\n", trim($content, "\n")); 1233 1234 $text .= "<tbody>\n"; 1235 foreach ($rows as $row) { 1236 // Parsing span elements, including code spans, character escapes, 1237 // and inline HTML tags, so that pipes inside those gets ignored. 1238 $row = $this->parseSpan($row); 1239 1240 // Split row by cell. 1241 $row_cells = preg_split('/ *[|] */', $row, $col_count); 1242 $row_cells = array_pad($row_cells, $col_count, ''); 1243 1244 $text .= "<tr>\n"; 1245 foreach ($row_cells as $n => $cell) 1246 $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n"; 1247 $text .= "</tr>\n"; 1248 } 1249 $text .= "</tbody>\n"; 1250 $text .= "</table>"; 1251 1252 return $this->hashBlock($text) . "\n"; 1253 } 1254 1255 /** 1256 * Form HTML definition lists. 1257 * @param string $text 1258 * @return string 1259 */ 1260 protected function doDefLists($text) { 1261 $less_than_tab = $this->tab_width - 1; 1262 1263 // Re-usable pattern to match any entire dl list: 1264 $whole_list_re = '(?> 1265 ( # $1 = whole list 1266 ( # $2 1267 [ ]{0,' . $less_than_tab . '} 1268 ((?>.*\S.*\n)+) # $3 = defined term 1269 \n? 1270 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1271 ) 1272 (?s:.+?) 1273 ( # $4 1274 \z 1275 | 1276 \n{2,} 1277 (?=\S) 1278 (?! # Negative lookahead for another term 1279 [ ]{0,' . $less_than_tab . '} 1280 (?: \S.*\n )+? # defined term 1281 \n? 1282 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1283 ) 1284 (?! # Negative lookahead for another definition 1285 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1286 ) 1287 ) 1288 ) 1289 )'; // mx 1290 1291 $text = preg_replace_callback('{ 1292 (?>\A\n?|(?<=\n\n)) 1293 ' . $whole_list_re . ' 1294 }mx', 1295 array($this, '_doDefLists_callback'), $text); 1296 1297 return $text; 1298 } 1299 1300 /** 1301 * Callback for processing definition lists 1302 * @param array $matches 1303 * @return string 1304 */ 1305 protected function _doDefLists_callback($matches) { 1306 // Re-usable patterns to match list item bullets and number markers: 1307 $list = $matches[1]; 1308 1309 // Turn double returns into triple returns, so that we can make a 1310 // paragraph for the last item in a list, if necessary: 1311 $result = trim($this->processDefListItems($list)); 1312 $result = "<dl>\n" . $result . "\n</dl>"; 1313 return $this->hashBlock($result) . "\n\n"; 1314 } 1315 1316 /** 1317 * Process the contents of a single definition list, splitting it 1318 * into individual term and definition list items. 1319 * @param string $list_str 1320 * @return string 1321 */ 1322 protected function processDefListItems($list_str) { 1323 1324 $less_than_tab = $this->tab_width - 1; 1325 1326 // Trim trailing blank lines: 1327 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1328 1329 // Process definition terms. 1330 $list_str = preg_replace_callback('{ 1331 (?>\A\n?|\n\n+) # leading line 1332 ( # definition terms = $1 1333 [ ]{0,' . $less_than_tab . '} # leading whitespace 1334 (?!\:[ ]|[ ]) # negative lookahead for a definition 1335 # mark (colon) or more whitespace. 1336 (?> \S.* \n)+? # actual term (not whitespace). 1337 ) 1338 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 1339 # with a definition mark. 1340 }xm', 1341 array($this, '_processDefListItems_callback_dt'), $list_str); 1342 1343 // Process actual definitions. 1344 $list_str = preg_replace_callback('{ 1345 \n(\n+)? # leading line = $1 1346 ( # marker space = $2 1347 [ ]{0,' . $less_than_tab . '} # whitespace before colon 1348 \:[ ]+ # definition mark (colon) 1349 ) 1350 ((?s:.+?)) # definition text = $3 1351 (?= \n+ # stop at next definition mark, 1352 (?: # next term or end of text 1353 [ ]{0,' . $less_than_tab . '} \:[ ] | 1354 <dt> | \z 1355 ) 1356 ) 1357 }xm', 1358 array($this, '_processDefListItems_callback_dd'), $list_str); 1359 1360 return $list_str; 1361 } 1362 1363 /** 1364 * Callback for <dt> elements in definition lists 1365 * @param array $matches 1366 * @return string 1367 */ 1368 protected function _processDefListItems_callback_dt($matches) { 1369 $terms = explode("\n", trim($matches[1])); 1370 $text = ''; 1371 foreach ($terms as $term) { 1372 $term = $this->runSpanGamut(trim($term)); 1373 $text .= "\n<dt>" . $term . "</dt>"; 1374 } 1375 return $text . "\n"; 1376 } 1377 1378 /** 1379 * Callback for <dd> elements in definition lists 1380 * @param array $matches 1381 * @return string 1382 */ 1383 protected function _processDefListItems_callback_dd($matches) { 1384 $leading_line = $matches[1]; 1385 $marker_space = $matches[2]; 1386 $def = $matches[3]; 1387 1388 if ($leading_line || preg_match('/\n{2,}/', $def)) { 1389 // Replace marker with the appropriate whitespace indentation 1390 $def = str_repeat(' ', strlen($marker_space)) . $def; 1391 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 1392 $def = "\n". $def ."\n"; 1393 } 1394 else { 1395 $def = rtrim($def); 1396 $def = $this->runSpanGamut($this->outdent($def)); 1397 } 1398 1399 return "\n<dd>" . $def . "</dd>\n"; 1400 } 1401 1402 /** 1403 * Adding the fenced code block syntax to regular Markdown: 1404 * 1405 * ~~~ 1406 * Code block 1407 * ~~~ 1408 * 1409 * @param string $text 1410 * @return string 1411 */ 1412 protected function doFencedCodeBlocks($text) { 1413 1414 $less_than_tab = $this->tab_width; 1415 1416 $text = preg_replace_callback('{ 1417 (?:\n|\A) 1418 # 1: Opening marker 1419 ( 1420 (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 1421 ) 1422 [ ]* 1423 (?: 1424 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 1425 )? 1426 [ ]* 1427 (?: 1428 ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes 1429 )? 1430 [ ]* \n # Whitespace and newline following marker. 1431 1432 # 4: Content 1433 ( 1434 (?> 1435 (?!\1 [ ]* \n) # Not a closing marker. 1436 .*\n+ 1437 )+ 1438 ) 1439 1440 # Closing marker. 1441 \1 [ ]* (?= \n ) 1442 }xm', 1443 array($this, '_doFencedCodeBlocks_callback'), $text); 1444 1445 return $text; 1446 } 1447 1448 /** 1449 * Callback to process fenced code blocks 1450 * @param array $matches 1451 * @return string 1452 */ 1453 protected function _doFencedCodeBlocks_callback($matches) { 1454 $classname =& $matches[2]; 1455 $attrs =& $matches[3]; 1456 $codeblock = $matches[4]; 1457 1458 if ($this->code_block_content_func) { 1459 $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname); 1460 } else { 1461 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1462 } 1463 1464 $codeblock = preg_replace_callback('/^\n+/', 1465 array($this, '_doFencedCodeBlocks_newlines'), $codeblock); 1466 1467 $classes = array(); 1468 if ($classname != "") { 1469 if ($classname[0] == '.') 1470 $classname = substr($classname, 1); 1471 $classes[] = $this->code_class_prefix . $classname; 1472 } 1473 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes); 1474 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 1475 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 1476 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 1477 1478 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1479 } 1480 1481 /** 1482 * Replace new lines in fenced code blocks 1483 * @param array $matches 1484 * @return string 1485 */ 1486 protected function _doFencedCodeBlocks_newlines($matches) { 1487 return str_repeat("<br$this->empty_element_suffix", 1488 strlen($matches[0])); 1489 } 1490 1491 /** 1492 * Redefining emphasis markers so that emphasis by underscore does not 1493 * work in the middle of a word. 1494 * @var array 1495 */ 1496 protected $em_relist = array( 1497 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)', 1498 '*' => '(?<![\s*])\*(?!\*)', 1499 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])', 1500 ); 1501 protected $strong_relist = array( 1502 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)', 1503 '**' => '(?<![\s*])\*\*(?!\*)', 1504 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])', 1505 ); 1506 protected $em_strong_relist = array( 1507 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)', 1508 '***' => '(?<![\s*])\*\*\*(?!\*)', 1509 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])', 1510 ); 1511 1512 /** 1513 * Parse text into paragraphs 1514 * @param string $text String to process in paragraphs 1515 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags 1516 * @return string HTML output 1517 */ 1518 protected function formParagraphs($text, $wrap_in_p = true) { 1519 // Strip leading and trailing lines: 1520 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1521 1522 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1523 1524 // Wrap <p> tags and unhashify HTML blocks 1525 foreach ($grafs as $key => $value) { 1526 $value = trim($this->runSpanGamut($value)); 1527 1528 // Check if this should be enclosed in a paragraph. 1529 // Clean tag hashes & block tag hashes are left alone. 1530 $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 1531 1532 if ($is_p) { 1533 $value = "<p>$value</p>"; 1534 } 1535 $grafs[$key] = $value; 1536 } 1537 1538 // Join grafs in one text, then unhash HTML tags. 1539 $text = implode("\n\n", $grafs); 1540 1541 // Finish by removing any tag hashes still present in $text. 1542 $text = $this->unhash($text); 1543 1544 return $text; 1545 } 1546 1547 1548 /** 1549 * Footnotes - Strips link definitions from text, stores the URLs and 1550 * titles in hash references. 1551 * @param string $text 1552 * @return string 1553 */ 1554 protected function stripFootnotes($text) { 1555 $less_than_tab = $this->tab_width - 1; 1556 1557 // Link defs are in the form: [^id]: url "optional title" 1558 $text = preg_replace_callback('{ 1559 ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1 1560 [ ]* 1561 \n? # maybe *one* newline 1562 ( # text = $2 (no blank lines allowed) 1563 (?: 1564 .+ # actual text 1565 | 1566 \n # newlines but 1567 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. 1568 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 1569 # by non-indented content 1570 )* 1571 ) 1572 }xm', 1573 array($this, '_stripFootnotes_callback'), 1574 $text); 1575 return $text; 1576 } 1577 1578 /** 1579 * Callback for stripping footnotes 1580 * @param array $matches 1581 * @return string 1582 */ 1583 protected function _stripFootnotes_callback($matches) { 1584 $note_id = $this->fn_id_prefix . $matches[1]; 1585 $this->footnotes[$note_id] = $this->outdent($matches[2]); 1586 return ''; // String that will replace the block 1587 } 1588 1589 /** 1590 * Replace footnote references in $text [^id] with a special text-token 1591 * which will be replaced by the actual footnote marker in appendFootnotes. 1592 * @param string $text 1593 * @return string 1594 */ 1595 protected function doFootnotes($text) { 1596 if (!$this->in_anchor) { 1597 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 1598 } 1599 return $text; 1600 } 1601 1602 /** 1603 * Append footnote list to text 1604 * @param string $text 1605 * @return string 1606 */ 1607 protected function appendFootnotes($text) { 1608 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1609 array($this, '_appendFootnotes_callback'), $text); 1610 1611 if (!empty($this->footnotes_ordered)) { 1612 $text .= "\n\n"; 1613 $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n"; 1614 $text .= "<hr" . $this->empty_element_suffix . "\n"; 1615 $text .= "<ol>\n\n"; 1616 1617 $attr = ""; 1618 if ($this->fn_backlink_class != "") { 1619 $class = $this->fn_backlink_class; 1620 $class = $this->encodeAttribute($class); 1621 $attr .= " class=\"$class\""; 1622 } 1623 if ($this->fn_backlink_title != "") { 1624 $title = $this->fn_backlink_title; 1625 $title = $this->encodeAttribute($title); 1626 $attr .= " title=\"$title\""; 1627 $attr .= " aria-label=\"$title\""; 1628 } 1629 $attr .= " role=\"doc-backlink\""; 1630 $backlink_text = $this->fn_backlink_html; 1631 $num = 0; 1632 1633 while (!empty($this->footnotes_ordered)) { 1634 $footnote = reset($this->footnotes_ordered); 1635 $note_id = key($this->footnotes_ordered); 1636 unset($this->footnotes_ordered[$note_id]); 1637 $ref_count = $this->footnotes_ref_count[$note_id]; 1638 unset($this->footnotes_ref_count[$note_id]); 1639 unset($this->footnotes[$note_id]); 1640 1641 $footnote .= "\n"; // Need to append newline before parsing. 1642 $footnote = $this->runBlockGamut("$footnote\n"); 1643 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1644 array($this, '_appendFootnotes_callback'), $footnote); 1645 1646 $attr = str_replace("%%", ++$num, $attr); 1647 $note_id = $this->encodeAttribute($note_id); 1648 1649 // Prepare backlink, multiple backlinks if multiple references 1650 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>"; 1651 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) { 1652 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>"; 1653 } 1654 // Add backlink to last paragraph; create new paragraph if needed. 1655 if (preg_match('{</p>$}', $footnote)) { 1656 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 1657 } else { 1658 $footnote .= "\n\n<p>$backlink</p>"; 1659 } 1660 1661 $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n"; 1662 $text .= $footnote . "\n"; 1663 $text .= "</li>\n\n"; 1664 } 1665 1666 $text .= "</ol>\n"; 1667 $text .= "</div>"; 1668 } 1669 return $text; 1670 } 1671 1672 /** 1673 * Callback for appending footnotes 1674 * @param array $matches 1675 * @return string 1676 */ 1677 protected function _appendFootnotes_callback($matches) { 1678 $node_id = $this->fn_id_prefix . $matches[1]; 1679 1680 // Create footnote marker only if it has a corresponding footnote *and* 1681 // the footnote hasn't been used by another marker. 1682 if (isset($this->footnotes[$node_id])) { 1683 $num =& $this->footnotes_numbers[$node_id]; 1684 if (!isset($num)) { 1685 // Transfer footnote content to the ordered list and give it its 1686 // number 1687 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 1688 $this->footnotes_ref_count[$node_id] = 1; 1689 $num = $this->footnote_counter++; 1690 $ref_count_mark = ''; 1691 } else { 1692 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 1693 } 1694 1695 $attr = ""; 1696 if ($this->fn_link_class != "") { 1697 $class = $this->fn_link_class; 1698 $class = $this->encodeAttribute($class); 1699 $attr .= " class=\"$class\""; 1700 } 1701 if ($this->fn_link_title != "") { 1702 $title = $this->fn_link_title; 1703 $title = $this->encodeAttribute($title); 1704 $attr .= " title=\"$title\""; 1705 } 1706 $attr .= " role=\"doc-noteref\""; 1707 1708 $attr = str_replace("%%", $num, $attr); 1709 $node_id = $this->encodeAttribute($node_id); 1710 1711 return 1712 "<sup id=\"fnref$ref_count_mark:$node_id\">". 1713 "<a href=\"#fn:$node_id\"$attr>$num</a>". 1714 "</sup>"; 1715 } 1716 1717 return "[^" . $matches[1] . "]"; 1718 } 1719 1720 1721 /** 1722 * Abbreviations - strips abbreviations from text, stores titles in hash 1723 * references. 1724 * @param string $text 1725 * @return string 1726 */ 1727 protected function stripAbbreviations($text) { 1728 $less_than_tab = $this->tab_width - 1; 1729 1730 // Link defs are in the form: [id]*: url "optional title" 1731 $text = preg_replace_callback('{ 1732 ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1 1733 (.*) # text = $2 (no blank lines allowed) 1734 }xm', 1735 array($this, '_stripAbbreviations_callback'), 1736 $text); 1737 return $text; 1738 } 1739 1740 /** 1741 * Callback for stripping abbreviations 1742 * @param array $matches 1743 * @return string 1744 */ 1745 protected function _stripAbbreviations_callback($matches) { 1746 $abbr_word = $matches[1]; 1747 $abbr_desc = $matches[2]; 1748 if ($this->abbr_word_re) { 1749 $this->abbr_word_re .= '|'; 1750 } 1751 $this->abbr_word_re .= preg_quote($abbr_word); 1752 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1753 return ''; // String that will replace the block 1754 } 1755 1756 /** 1757 * Find defined abbreviations in text and wrap them in <abbr> elements. 1758 * @param string $text 1759 * @return string 1760 */ 1761 protected function doAbbreviations($text) { 1762 if ($this->abbr_word_re) { 1763 // cannot use the /x modifier because abbr_word_re may 1764 // contain significant spaces: 1765 $text = preg_replace_callback('{' . 1766 '(?<![\w\x1A])' . 1767 '(?:' . $this->abbr_word_re . ')' . 1768 '(?![\w\x1A])' . 1769 '}', 1770 array($this, '_doAbbreviations_callback'), $text); 1771 } 1772 return $text; 1773 } 1774 1775 /** 1776 * Callback for processing abbreviations 1777 * @param array $matches 1778 * @return string 1779 */ 1780 protected function _doAbbreviations_callback($matches) { 1781 $abbr = $matches[0]; 1782 if (isset($this->abbr_desciptions[$abbr])) { 1783 $desc = $this->abbr_desciptions[$abbr]; 1784 if (empty($desc)) { 1785 return $this->hashPart("<abbr>$abbr</abbr>"); 1786 } else { 1787 $desc = $this->encodeAttribute($desc); 1788 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 1789 } 1790 } else { 1791 return $matches[0]; 1792 } 1793 } 1794 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body