Differences Between: [Versions 310 and 311] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403] [Versions 39 and 311]
1 <?php 2 /** 3 * Markdown Extra - A text-to-HTML conversion tool for web writers 4 * 5 * @package php-markdown 6 * @author Michel Fortin <michel.fortin@michelf.com> 7 * @copyright 2004-2019 Michel Fortin <https://michelf.com/projects/php-markdown/> 8 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/> 9 */ 10 11 namespace Michelf; 12 13 /** 14 * Markdown Extra Parser Class 15 */ 16 class MarkdownExtra extends \Michelf\Markdown { 17 /** 18 * Configuration variables 19 */ 20 21 /** 22 * Prefix for footnote ids. 23 * @var string 24 */ 25 public $fn_id_prefix = ""; 26 27 /** 28 * Optional title attribute for footnote links. 29 * @var string 30 */ 31 public $fn_link_title = ""; 32 33 /** 34 * Optional class attribute for footnote links and backlinks. 35 * @var string 36 */ 37 public $fn_link_class = "footnote-ref"; 38 public $fn_backlink_class = "footnote-backref"; 39 40 /** 41 * Content to be displayed within footnote backlinks. The default is '↩'; 42 * the U+FE0E on the end is a Unicode variant selector used to prevent iOS 43 * from displaying the arrow character as an emoji. 44 * Optionally use '^^' and '%%' to refer to the footnote number and 45 * reference number respectively. {@see parseFootnotePlaceholders()} 46 * @var string 47 */ 48 public $fn_backlink_html = '↩︎'; 49 50 /** 51 * Optional title and aria-label attributes for footnote backlinks for 52 * added accessibility (to ensure backlink uniqueness). 53 * Use '^^' and '%%' to refer to the footnote number and reference number 54 * respectively. {@see parseFootnotePlaceholders()} 55 * @var string 56 */ 57 public $fn_backlink_title = ""; 58 public $fn_backlink_label = ""; 59 60 /** 61 * Class name for table cell alignment (%% replaced left/center/right) 62 * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' 63 * If empty, the align attribute is used instead of a class name. 64 * @var string 65 */ 66 public $table_align_class_tmpl = ''; 67 68 /** 69 * Optional class prefix for fenced code block. 70 * @var string 71 */ 72 public $code_class_prefix = ""; 73 74 /** 75 * Class attribute for code blocks goes on the `code` tag; 76 * setting this to true will put attributes on the `pre` tag instead. 77 * @var boolean 78 */ 79 public $code_attr_on_pre = false; 80 81 /** 82 * Predefined abbreviations. 83 * @var array 84 */ 85 public $predef_abbr = array(); 86 87 /** 88 * Only convert atx-style headers if there's a space between the header and # 89 * @var boolean 90 */ 91 public $hashtag_protection = false; 92 93 /** 94 * Determines whether footnotes should be appended to the end of the document. 95 * If true, footnote html can be retrieved from $this->footnotes_assembled. 96 * @var boolean 97 */ 98 public $omit_footnotes = false; 99 100 101 /** 102 * After parsing, the HTML for the list of footnotes appears here. 103 * This is available only if $omit_footnotes == true. 104 * 105 * Note: when placing the content of `footnotes_assembled` on the page, 106 * consider adding the attribute `role="doc-endnotes"` to the `div` or 107 * `section` that will enclose the list of footnotes so they are 108 * reachable to accessibility tools the same way they would be with the 109 * default HTML output. 110 * @var null|string 111 */ 112 public $footnotes_assembled = null; 113 114 /** 115 * Parser implementation 116 */ 117 118 /** 119 * Constructor function. Initialize the parser object. 120 * @return void 121 */ 122 public function __construct() { 123 // Add extra escapable characters before parent constructor 124 // initialize the table. 125 $this->escape_chars .= ':|'; 126 127 // Insert extra document, block, and span transformations. 128 // Parent constructor will do the sorting. 129 $this->document_gamut += array( 130 "doFencedCodeBlocks" => 5, 131 "stripFootnotes" => 15, 132 "stripAbbreviations" => 25, 133 "appendFootnotes" => 50, 134 ); 135 $this->block_gamut += array( 136 "doFencedCodeBlocks" => 5, 137 "doTables" => 15, 138 "doDefLists" => 45, 139 ); 140 $this->span_gamut += array( 141 "doFootnotes" => 5, 142 "doAbbreviations" => 70, 143 ); 144 145 $this->enhanced_ordered_list = true; 146 parent::__construct(); 147 } 148 149 150 /** 151 * Extra variables used during extra transformations. 152 * @var array 153 */ 154 protected $footnotes = array(); 155 protected $footnotes_ordered = array(); 156 protected $footnotes_ref_count = array(); 157 protected $footnotes_numbers = array(); 158 protected $abbr_desciptions = array(); 159 /** @var string */ 160 protected $abbr_word_re = ''; 161 162 /** 163 * Give the current footnote number. 164 * @var integer 165 */ 166 protected $footnote_counter = 1; 167 168 /** 169 * Ref attribute for links 170 * @var array 171 */ 172 protected $ref_attr = array(); 173 174 /** 175 * Setting up Extra-specific variables. 176 */ 177 protected function setup() { 178 parent::setup(); 179 180 $this->footnotes = array(); 181 $this->footnotes_ordered = array(); 182 $this->footnotes_ref_count = array(); 183 $this->footnotes_numbers = array(); 184 $this->abbr_desciptions = array(); 185 $this->abbr_word_re = ''; 186 $this->footnote_counter = 1; 187 $this->footnotes_assembled = null; 188 189 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 190 if ($this->abbr_word_re) 191 $this->abbr_word_re .= '|'; 192 $this->abbr_word_re .= preg_quote($abbr_word); 193 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 194 } 195 } 196 197 /** 198 * Clearing Extra-specific variables. 199 */ 200 protected function teardown() { 201 $this->footnotes = array(); 202 $this->footnotes_ordered = array(); 203 $this->footnotes_ref_count = array(); 204 $this->footnotes_numbers = array(); 205 $this->abbr_desciptions = array(); 206 $this->abbr_word_re = ''; 207 208 if ( ! $this->omit_footnotes ) 209 $this->footnotes_assembled = null; 210 211 parent::teardown(); 212 } 213 214 215 /** 216 * Extra attribute parser 217 */ 218 219 /** 220 * Expression to use to catch attributes (includes the braces) 221 * @var string 222 */ 223 protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}'; 224 225 /** 226 * Expression to use when parsing in a context when no capture is desired 227 * @var string 228 */ 229 protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}'; 230 231 /** 232 * Parse attributes caught by the $this->id_class_attr_catch_re expression 233 * and return the HTML-formatted list of attributes. 234 * 235 * Currently supported attributes are .class and #id. 236 * 237 * In addition, this method also supports supplying a default Id value, 238 * which will be used to populate the id attribute in case it was not 239 * overridden. 240 * @param string $tag_name 241 * @param string $attr 242 * @param mixed $defaultIdValue 243 * @param array $classes 244 * @return string 245 */ 246 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) { 247 if (empty($attr) && !$defaultIdValue && empty($classes)) { 248 return ""; 249 } 250 251 // Split on components 252 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches); 253 $elements = $matches[0]; 254 255 // Handle classes and IDs (only first ID taken into account) 256 $attributes = array(); 257 $id = false; 258 foreach ($elements as $element) { 259 if ($element[0] === '.') { 260 $classes[] = substr($element, 1); 261 } else if ($element[0] === '#') { 262 if ($id === false) $id = substr($element, 1); 263 } else if (strpos($element, '=') > 0) { 264 $parts = explode('=', $element, 2); 265 $attributes[] = $parts[0] . '="' . $parts[1] . '"'; 266 } 267 } 268 269 if ($id === false || $id === '') { 270 $id = $defaultIdValue; 271 } 272 273 // Compose attributes as string 274 $attr_str = ""; 275 if (!empty($id)) { 276 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"'; 277 } 278 if (!empty($classes)) { 279 $attr_str .= ' class="'. implode(" ", $classes) . '"'; 280 } 281 if (!$this->no_markup && !empty($attributes)) { 282 $attr_str .= ' '.implode(" ", $attributes); 283 } 284 return $attr_str; 285 } 286 287 /** 288 * Strips link definitions from text, stores the URLs and titles in 289 * hash references. 290 * @param string $text 291 * @return string 292 */ 293 protected function stripLinkDefinitions($text) { 294 $less_than_tab = $this->tab_width - 1; 295 296 // Link defs are in the form: ^[id]: url "optional title" 297 $text = preg_replace_callback('{ 298 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 299 [ ]* 300 \n? # maybe *one* newline 301 [ ]* 302 (?: 303 <(.+?)> # url = $2 304 | 305 (\S+?) # url = $3 306 ) 307 [ ]* 308 \n? # maybe one newline 309 [ ]* 310 (?: 311 (?<=\s) # lookbehind for whitespace 312 ["(] 313 (.*?) # title = $4 314 [")] 315 [ ]* 316 )? # title is optional 317 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 318 (?:\n+|\Z) 319 }xm', 320 array($this, '_stripLinkDefinitions_callback'), 321 $text); 322 return $text; 323 } 324 325 /** 326 * Strip link definition callback 327 * @param array $matches 328 * @return string 329 */ 330 protected function _stripLinkDefinitions_callback($matches) { 331 $link_id = strtolower($matches[1]); 332 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 333 $this->urls[$link_id] = $url; 334 $this->titles[$link_id] =& $matches[4]; 335 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 336 return ''; // String that will replace the block 337 } 338 339 340 /** 341 * HTML block parser 342 */ 343 344 /** 345 * Tags that are always treated as block tags 346 * @var string 347 */ 348 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure'; 349 350 /** 351 * Tags treated as block tags only if the opening tag is alone on its line 352 * @var string 353 */ 354 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 355 356 /** 357 * Tags where markdown="1" default to span mode: 358 * @var string 359 */ 360 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 361 362 /** 363 * Tags which must not have their contents modified, no matter where 364 * they appear 365 * @var string 366 */ 367 protected $clean_tags_re = 'script|style|math|svg'; 368 369 /** 370 * Tags that do not need to be closed. 371 * @var string 372 */ 373 protected $auto_close_tags_re = 'hr|img|param|source|track'; 374 375 /** 376 * Hashify HTML Blocks and "clean tags". 377 * 378 * We only want to do this for block-level HTML tags, such as headers, 379 * lists, and tables. That's because we still want to wrap <p>s around 380 * "paragraphs" that are wrapped in non-block-level tags, such as anchors, 381 * phrase emphasis, and spans. The list of tags we're looking for is 382 * hard-coded. 383 * 384 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls 385 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 386 * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 387 * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 388 * These two functions are calling each other. It's recursive! 389 * @param string $text 390 * @return string 391 */ 392 protected function hashHTMLBlocks($text) { 393 if ($this->no_markup) { 394 return $text; 395 } 396 397 // Call the HTML-in-Markdown hasher. 398 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 399 400 return $text; 401 } 402 403 /** 404 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 405 * 406 * * $indent is the number of space to be ignored when checking for code 407 * blocks. This is important because if we don't take the indent into 408 * account, something like this (which looks right) won't work as expected: 409 * 410 * <div> 411 * <div markdown="1"> 412 * Hello World. <-- Is this a Markdown code block or text? 413 * </div> <-- Is this a Markdown code block or a real tag? 414 * <div> 415 * 416 * If you don't like this, just don't indent the tag on which 417 * you apply the markdown="1" attribute. 418 * 419 * * If $enclosing_tag_re is not empty, stops at the first unmatched closing 420 * tag with that name. Nested tags supported. 421 * 422 * * If $span is true, text inside must treated as span. So any double 423 * newline will be replaced by a single newline so that it does not create 424 * paragraphs. 425 * 426 * Returns an array of that form: ( processed text , remaining text ) 427 * 428 * @param string $text 429 * @param integer $indent 430 * @param string $enclosing_tag_re 431 * @param boolean $span 432 * @return array 433 */ 434 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 435 $enclosing_tag_re = '', $span = false) 436 { 437 438 if ($text === '') return array('', ''); 439 440 // Regex to check for the presense of newlines around a block tag. 441 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 442 $newline_after_re = 443 '{ 444 ^ # Start of text following the tag. 445 (?>[ ]*<!--.*?-->)? # Optional comment. 446 [ ]*\n # Must be followed by newline. 447 }xs'; 448 449 // Regex to match any tag. 450 $block_tag_re = 451 '{ 452 ( # $2: Capture whole tag. 453 </? # Any opening or closing tag. 454 (?> # Tag name. 455 ' . $this->block_tags_re . ' | 456 ' . $this->context_block_tags_re . ' | 457 ' . $this->clean_tags_re . ' | 458 (?!\s)'.$enclosing_tag_re . ' 459 ) 460 (?: 461 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 462 (?> 463 ".*?" | # Double quotes (can contain `>`) 464 \'.*?\' | # Single quotes (can contain `>`) 465 .+? # Anything but quotes and `>`. 466 )*? 467 )? 468 > # End of tag. 469 | 470 <!-- .*? --> # HTML Comment 471 | 472 <\?.*?\?> | <%.*?%> # Processing instruction 473 | 474 <!\[CDATA\[.*?\]\]> # CData Block 475 ' . ( !$span ? ' # If not in span. 476 | 477 # Indented code block 478 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 479 [ ]{' . ($indent + 4) . '}[^\n]* \n 480 (?> 481 (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n 482 )* 483 | 484 # Fenced code block marker 485 (?<= ^ | \n ) 486 [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,}) 487 [ ]* 488 (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name 489 [ ]* 490 (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes 491 [ ]* 492 (?= \n ) 493 ' : '' ) . ' # End (if not is span). 494 | 495 # Code span marker 496 # Note, this regex needs to go after backtick fenced 497 # code blocks but it should also be kept outside of the 498 # "if not in span" condition adding backticks to the parser 499 `+ 500 ) 501 }xs'; 502 503 504 $depth = 0; // Current depth inside the tag tree. 505 $parsed = ""; // Parsed text that will be returned. 506 507 // Loop through every tag until we find the closing tag of the parent 508 // or loop until reaching the end of text if no parent tag specified. 509 do { 510 // Split the text using the first $tag_match pattern found. 511 // Text before pattern will be first in the array, text after 512 // pattern will be at the end, and between will be any catches made 513 // by the pattern. 514 $parts = preg_split($block_tag_re, $text, 2, 515 PREG_SPLIT_DELIM_CAPTURE); 516 517 // If in Markdown span mode, add a empty-string span-level hash 518 // after each newline to prevent triggering any block element. 519 if ($span) { 520 $void = $this->hashPart("", ':'); 521 $newline = "\n$void"; 522 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 523 } 524 525 $parsed .= $parts[0]; // Text before current tag. 526 527 // If end of $text has been reached. Stop loop. 528 if (count($parts) < 3) { 529 $text = ""; 530 break; 531 } 532 533 $tag = $parts[1]; // Tag to handle. 534 $text = $parts[2]; // Remaining text after current tag. 535 536 // Check for: Fenced code block marker. 537 // Note: need to recheck the whole tag to disambiguate backtick 538 // fences from code spans 539 if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) { 540 // Fenced code block marker: find matching end marker. 541 $fence_indent = strlen($capture[1]); // use captured indent in re 542 $fence_re = $capture[2]; // use captured fence in re 543 if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text, 544 $matches)) 545 { 546 // End marker found: pass text unchanged until marker. 547 $parsed .= $tag . $matches[0]; 548 $text = substr($text, strlen($matches[0])); 549 } 550 else { 551 // No end marker: just skip it. 552 $parsed .= $tag; 553 } 554 } 555 // Check for: Indented code block. 556 else if ($tag[0] === "\n" || $tag[0] === " ") { 557 // Indented code block: pass it unchanged, will be handled 558 // later. 559 $parsed .= $tag; 560 } 561 // Check for: Code span marker 562 // Note: need to check this after backtick fenced code blocks 563 else if ($tag[0] === "`") { 564 // Find corresponding end marker. 565 $tag_re = preg_quote($tag); 566 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}', 567 $text, $matches)) 568 { 569 // End marker found: pass text unchanged until marker. 570 $parsed .= $tag . $matches[0]; 571 $text = substr($text, strlen($matches[0])); 572 } 573 else { 574 // Unmatched marker: just skip it. 575 $parsed .= $tag; 576 } 577 } 578 // Check for: Opening Block level tag or 579 // Opening Context Block tag (like ins and del) 580 // used as a block tag (tag is alone on it's line). 581 else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) || 582 ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) && 583 preg_match($newline_before_re, $parsed) && 584 preg_match($newline_after_re, $text) ) 585 ) 586 { 587 // Need to parse tag and following text using the HTML parser. 588 list($block_text, $text) = 589 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 590 591 // Make sure it stays outside of any paragraph by adding newlines. 592 $parsed .= "\n\n$block_text\n\n"; 593 } 594 // Check for: Clean tag (like script, math) 595 // HTML Comments, processing instructions. 596 else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) || 597 $tag[1] === '!' || $tag[1] === '?') 598 { 599 // Need to parse tag and following text using the HTML parser. 600 // (don't check for markdown attribute) 601 list($block_text, $text) = 602 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 603 604 $parsed .= $block_text; 605 } 606 // Check for: Tag with same name as enclosing tag. 607 else if ($enclosing_tag_re !== '' && 608 // Same name as enclosing tag. 609 preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag)) 610 { 611 // Increase/decrease nested tag count. 612 if ($tag[1] === '/') { 613 $depth--; 614 } else if ($tag[strlen($tag)-2] !== '/') { 615 $depth++; 616 } 617 618 if ($depth < 0) { 619 // Going out of parent element. Clean up and break so we 620 // return to the calling function. 621 $text = $tag . $text; 622 break; 623 } 624 625 $parsed .= $tag; 626 } 627 else { 628 $parsed .= $tag; 629 } 630 } while ($depth >= 0); 631 632 return array($parsed, $text); 633 } 634 635 /** 636 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 637 * 638 * * Calls $hash_method to convert any blocks. 639 * * Stops when the first opening tag closes. 640 * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 641 * (it is not inside clean tags) 642 * 643 * Returns an array of that form: ( processed text , remaining text ) 644 * @param string $text 645 * @param string $hash_method 646 * @param bool $md_attr Handle `markdown="1"` attribute 647 * @return array 648 */ 649 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 650 if ($text === '') return array('', ''); 651 652 // Regex to match `markdown` attribute inside of a tag. 653 $markdown_attr_re = ' 654 { 655 \s* # Eat whitespace before the `markdown` attribute 656 markdown 657 \s*=\s* 658 (?> 659 (["\']) # $1: quote delimiter 660 (.*?) # $2: attribute value 661 \1 # matching delimiter 662 | 663 ([^\s>]*) # $3: unquoted attribute value 664 ) 665 () # $4: make $3 always defined (avoid warnings) 666 }xs'; 667 668 // Regex to match any tag. 669 $tag_re = '{ 670 ( # $2: Capture whole tag. 671 </? # Any opening or closing tag. 672 [\w:$]+ # Tag name. 673 (?: 674 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 675 (?> 676 ".*?" | # Double quotes (can contain `>`) 677 \'.*?\' | # Single quotes (can contain `>`) 678 .+? # Anything but quotes and `>`. 679 )*? 680 )? 681 > # End of tag. 682 | 683 <!-- .*? --> # HTML Comment 684 | 685 <\?.*?\?> | <%.*?%> # Processing instruction 686 | 687 <!\[CDATA\[.*?\]\]> # CData Block 688 ) 689 }xs'; 690 691 $original_text = $text; // Save original text in case of faliure. 692 693 $depth = 0; // Current depth inside the tag tree. 694 $block_text = ""; // Temporary text holder for current text. 695 $parsed = ""; // Parsed text that will be returned. 696 $base_tag_name_re = ''; 697 698 // Get the name of the starting tag. 699 // (This pattern makes $base_tag_name_re safe without quoting.) 700 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 701 $base_tag_name_re = $matches[1]; 702 703 // Loop through every tag until we find the corresponding closing tag. 704 do { 705 // Split the text using the first $tag_match pattern found. 706 // Text before pattern will be first in the array, text after 707 // pattern will be at the end, and between will be any catches made 708 // by the pattern. 709 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 710 711 if (count($parts) < 3) { 712 // End of $text reached with unbalenced tag(s). 713 // In that case, we return original text unchanged and pass the 714 // first character as filtered to prevent an infinite loop in the 715 // parent function. 716 return array($original_text[0], substr($original_text, 1)); 717 } 718 719 $block_text .= $parts[0]; // Text before current tag. 720 $tag = $parts[1]; // Tag to handle. 721 $text = $parts[2]; // Remaining text after current tag. 722 723 // Check for: Auto-close tag (like <hr/>) 724 // Comments and Processing Instructions. 725 if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) || 726 $tag[1] === '!' || $tag[1] === '?') 727 { 728 // Just add the tag to the block as if it was text. 729 $block_text .= $tag; 730 } 731 else { 732 // Increase/decrease nested tag count. Only do so if 733 // the tag's name match base tag's. 734 if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) { 735 if ($tag[1] === '/') { 736 $depth--; 737 } else if ($tag[strlen($tag)-2] !== '/') { 738 $depth++; 739 } 740 } 741 742 // Check for `markdown="1"` attribute and handle it. 743 if ($md_attr && 744 preg_match($markdown_attr_re, $tag, $attr_m) && 745 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 746 { 747 // Remove `markdown` attribute from opening tag. 748 $tag = preg_replace($markdown_attr_re, '', $tag); 749 750 // Check if text inside this tag must be parsed in span mode. 751 $mode = $attr_m[2] . $attr_m[3]; 752 $span_mode = $mode === 'span' || ($mode !== 'block' && 753 preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag)); 754 755 // Calculate indent before tag. 756 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 757 $strlen = $this->utf8_strlen; 758 $indent = $strlen($matches[1], 'UTF-8'); 759 } else { 760 $indent = 0; 761 } 762 763 // End preceding block with this tag. 764 $block_text .= $tag; 765 $parsed .= $this->$hash_method($block_text); 766 767 // Get enclosing tag name for the ParseMarkdown function. 768 // (This pattern makes $tag_name_re safe without quoting.) 769 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 770 $tag_name_re = $matches[1]; 771 772 // Parse the content using the HTML-in-Markdown parser. 773 list ($block_text, $text) 774 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 775 $tag_name_re, $span_mode); 776 777 // Outdent markdown text. 778 if ($indent > 0) { 779 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 780 $block_text); 781 } 782 783 // Append tag content to parsed text. 784 if (!$span_mode) { 785 $parsed .= "\n\n$block_text\n\n"; 786 } else { 787 $parsed .= (string) $block_text; 788 } 789 790 // Start over with a new block. 791 $block_text = ""; 792 } 793 else $block_text .= $tag; 794 } 795 796 } while ($depth > 0); 797 798 // Hash last block text that wasn't processed inside the loop. 799 $parsed .= $this->$hash_method($block_text); 800 801 return array($parsed, $text); 802 } 803 804 /** 805 * Called whenever a tag must be hashed when a function inserts a "clean" tag 806 * in $text, it passes through this function and is automaticaly escaped, 807 * blocking invalid nested overlap. 808 * @param string $text 809 * @return string 810 */ 811 protected function hashClean($text) { 812 return $this->hashPart($text, 'C'); 813 } 814 815 /** 816 * Turn Markdown link shortcuts into XHTML <a> tags. 817 * @param string $text 818 * @return string 819 */ 820 protected function doAnchors($text) { 821 if ($this->in_anchor) { 822 return $text; 823 } 824 $this->in_anchor = true; 825 826 // First, handle reference-style links: [link text] [id] 827 $text = preg_replace_callback('{ 828 ( # wrap whole match in $1 829 \[ 830 (' . $this->nested_brackets_re . ') # link text = $2 831 \] 832 833 [ ]? # one optional space 834 (?:\n[ ]*)? # one optional newline followed by spaces 835 836 \[ 837 (.*?) # id = $3 838 \] 839 ) 840 }xs', 841 array($this, '_doAnchors_reference_callback'), $text); 842 843 // Next, inline-style links: [link text](url "optional title") 844 $text = preg_replace_callback('{ 845 ( # wrap whole match in $1 846 \[ 847 (' . $this->nested_brackets_re . ') # link text = $2 848 \] 849 \( # literal paren 850 [ \n]* 851 (?: 852 <(.+?)> # href = $3 853 | 854 (' . $this->nested_url_parenthesis_re . ') # href = $4 855 ) 856 [ \n]* 857 ( # $5 858 ([\'"]) # quote char = $6 859 (.*?) # Title = $7 860 \6 # matching quote 861 [ \n]* # ignore any spaces/tabs between closing quote and ) 862 )? # title is optional 863 \) 864 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 865 ) 866 }xs', 867 array($this, '_doAnchors_inline_callback'), $text); 868 869 // Last, handle reference-style shortcuts: [link text] 870 // These must come last in case you've also got [link text][1] 871 // or [link text](/foo) 872 $text = preg_replace_callback('{ 873 ( # wrap whole match in $1 874 \[ 875 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 876 \] 877 ) 878 }xs', 879 array($this, '_doAnchors_reference_callback'), $text); 880 881 $this->in_anchor = false; 882 return $text; 883 } 884 885 /** 886 * Callback for reference anchors 887 * @param array $matches 888 * @return string 889 */ 890 protected function _doAnchors_reference_callback($matches) { 891 $whole_match = $matches[1]; 892 $link_text = $matches[2]; 893 $link_id =& $matches[3]; 894 895 if ($link_id == "") { 896 // for shortcut links like [this][] or [this]. 897 $link_id = $link_text; 898 } 899 900 // lower-case and turn embedded newlines into spaces 901 $link_id = strtolower($link_id); 902 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 903 904 if (isset($this->urls[$link_id])) { 905 $url = $this->urls[$link_id]; 906 $url = $this->encodeURLAttribute($url); 907 908 $result = "<a href=\"$url\""; 909 if ( isset( $this->titles[$link_id] ) ) { 910 $title = $this->titles[$link_id]; 911 $title = $this->encodeAttribute($title); 912 $result .= " title=\"$title\""; 913 } 914 if (isset($this->ref_attr[$link_id])) 915 $result .= $this->ref_attr[$link_id]; 916 917 $link_text = $this->runSpanGamut($link_text); 918 $result .= ">$link_text</a>"; 919 $result = $this->hashPart($result); 920 } 921 else { 922 $result = $whole_match; 923 } 924 return $result; 925 } 926 927 /** 928 * Callback for inline anchors 929 * @param array $matches 930 * @return string 931 */ 932 protected function _doAnchors_inline_callback($matches) { 933 $link_text = $this->runSpanGamut($matches[2]); 934 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 935 $title =& $matches[7]; 936 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 937 938 // if the URL was of the form <s p a c e s> it got caught by the HTML 939 // tag parser and hashed. Need to reverse the process before using the URL. 940 $unhashed = $this->unhash($url); 941 if ($unhashed !== $url) 942 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 943 944 $url = $this->encodeURLAttribute($url); 945 946 $result = "<a href=\"$url\""; 947 if (isset($title)) { 948 $title = $this->encodeAttribute($title); 949 $result .= " title=\"$title\""; 950 } 951 $result .= $attr; 952 953 $link_text = $this->runSpanGamut($link_text); 954 $result .= ">$link_text</a>"; 955 956 return $this->hashPart($result); 957 } 958 959 /** 960 * Turn Markdown image shortcuts into <img> tags. 961 * @param string $text 962 * @return string 963 */ 964 protected function doImages($text) { 965 // First, handle reference-style labeled images: ![alt text][id] 966 $text = preg_replace_callback('{ 967 ( # wrap whole match in $1 968 !\[ 969 (' . $this->nested_brackets_re . ') # alt text = $2 970 \] 971 972 [ ]? # one optional space 973 (?:\n[ ]*)? # one optional newline followed by spaces 974 975 \[ 976 (.*?) # id = $3 977 \] 978 979 ) 980 }xs', 981 array($this, '_doImages_reference_callback'), $text); 982 983 // Next, handle inline images: ![alt text](url "optional title") 984 // Don't forget: encode * and _ 985 $text = preg_replace_callback('{ 986 ( # wrap whole match in $1 987 !\[ 988 (' . $this->nested_brackets_re . ') # alt text = $2 989 \] 990 \s? # One optional whitespace character 991 \( # literal paren 992 [ \n]* 993 (?: 994 <(\S*)> # src url = $3 995 | 996 (' . $this->nested_url_parenthesis_re . ') # src url = $4 997 ) 998 [ \n]* 999 ( # $5 1000 ([\'"]) # quote char = $6 1001 (.*?) # title = $7 1002 \6 # matching quote 1003 [ \n]* 1004 )? # title is optional 1005 \) 1006 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 1007 ) 1008 }xs', 1009 array($this, '_doImages_inline_callback'), $text); 1010 1011 return $text; 1012 } 1013 1014 /** 1015 * Callback for referenced images 1016 * @param array $matches 1017 * @return string 1018 */ 1019 protected function _doImages_reference_callback($matches) { 1020 $whole_match = $matches[1]; 1021 $alt_text = $matches[2]; 1022 $link_id = strtolower($matches[3]); 1023 1024 if ($link_id === "") { 1025 $link_id = strtolower($alt_text); // for shortcut links like ![this][]. 1026 } 1027 1028 $alt_text = $this->encodeAttribute($alt_text); 1029 if (isset($this->urls[$link_id])) { 1030 $url = $this->encodeURLAttribute($this->urls[$link_id]); 1031 $result = "<img src=\"$url\" alt=\"$alt_text\""; 1032 if (isset($this->titles[$link_id])) { 1033 $title = $this->titles[$link_id]; 1034 $title = $this->encodeAttribute($title); 1035 $result .= " title=\"$title\""; 1036 } 1037 if (isset($this->ref_attr[$link_id])) { 1038 $result .= $this->ref_attr[$link_id]; 1039 } 1040 $result .= $this->empty_element_suffix; 1041 $result = $this->hashPart($result); 1042 } 1043 else { 1044 // If there's no such link ID, leave intact: 1045 $result = $whole_match; 1046 } 1047 1048 return $result; 1049 } 1050 1051 /** 1052 * Callback for inline images 1053 * @param array $matches 1054 * @return string 1055 */ 1056 protected function _doImages_inline_callback($matches) { 1057 $alt_text = $matches[2]; 1058 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 1059 $title =& $matches[7]; 1060 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 1061 1062 $alt_text = $this->encodeAttribute($alt_text); 1063 $url = $this->encodeURLAttribute($url); 1064 $result = "<img src=\"$url\" alt=\"$alt_text\""; 1065 if (isset($title)) { 1066 $title = $this->encodeAttribute($title); 1067 $result .= " title=\"$title\""; // $title already quoted 1068 } 1069 $result .= $attr; 1070 $result .= $this->empty_element_suffix; 1071 1072 return $this->hashPart($result); 1073 } 1074 1075 /** 1076 * Process markdown headers. Redefined to add ID and class attribute support. 1077 * @param string $text 1078 * @return string 1079 */ 1080 protected function doHeaders($text) { 1081 // Setext-style headers: 1082 // Header 1 {#header1} 1083 // ======== 1084 // 1085 // Header 2 {#header2 .class1 .class2} 1086 // -------- 1087 // 1088 $text = preg_replace_callback( 1089 '{ 1090 (^.+?) # $1: Header text 1091 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 1092 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 1093 }mx', 1094 array($this, '_doHeaders_callback_setext'), $text); 1095 1096 // atx-style headers: 1097 // # Header 1 {#header1} 1098 // ## Header 2 {#header2} 1099 // ## Header 2 with closing hashes ## {#header3.class1.class2} 1100 // ... 1101 // ###### Header 6 {.class2} 1102 // 1103 $text = preg_replace_callback('{ 1104 ^(\#{1,6}) # $1 = string of #\'s 1105 [ ]'.($this->hashtag_protection ? '+' : '*').' 1106 (.+?) # $2 = Header text 1107 [ ]* 1108 \#* # optional closing #\'s (not counted) 1109 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 1110 [ ]* 1111 \n+ 1112 }xm', 1113 array($this, '_doHeaders_callback_atx'), $text); 1114 1115 return $text; 1116 } 1117 1118 /** 1119 * Callback for setext headers 1120 * @param array $matches 1121 * @return string 1122 */ 1123 protected function _doHeaders_callback_setext($matches) { 1124 if ($matches[3] === '-' && preg_match('{^- }', $matches[1])) { 1125 return $matches[0]; 1126 } 1127 1128 $level = $matches[3][0] === '=' ? 1 : 2; 1129 1130 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null; 1131 1132 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId); 1133 $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>"; 1134 return "\n" . $this->hashBlock($block) . "\n\n"; 1135 } 1136 1137 /** 1138 * Callback for atx headers 1139 * @param array $matches 1140 * @return string 1141 */ 1142 protected function _doHeaders_callback_atx($matches) { 1143 $level = strlen($matches[1]); 1144 1145 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null; 1146 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId); 1147 $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>"; 1148 return "\n" . $this->hashBlock($block) . "\n\n"; 1149 } 1150 1151 /** 1152 * Form HTML tables. 1153 * @param string $text 1154 * @return string 1155 */ 1156 protected function doTables($text) { 1157 $less_than_tab = $this->tab_width - 1; 1158 // Find tables with leading pipe. 1159 // 1160 // | Header 1 | Header 2 1161 // | -------- | -------- 1162 // | Cell 1 | Cell 2 1163 // | Cell 3 | Cell 4 1164 $text = preg_replace_callback(' 1165 { 1166 ^ # Start of a line 1167 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1168 [|] # Optional leading pipe (present) 1169 (.+) \n # $1: Header row (at least one pipe) 1170 1171 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1172 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 1173 1174 ( # $3: Cells 1175 (?> 1176 [ ]* # Allowed whitespace. 1177 [|] .* \n # Row content. 1178 )* 1179 ) 1180 (?=\n|\Z) # Stop at final double newline. 1181 }xm', 1182 array($this, '_doTable_leadingPipe_callback'), $text); 1183 1184 // Find tables without leading pipe. 1185 // 1186 // Header 1 | Header 2 1187 // -------- | -------- 1188 // Cell 1 | Cell 2 1189 // Cell 3 | Cell 4 1190 $text = preg_replace_callback(' 1191 { 1192 ^ # Start of a line 1193 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1194 (\S.*[|].*) \n # $1: Header row (at least one pipe) 1195 1196 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1197 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 1198 1199 ( # $3: Cells 1200 (?> 1201 .* [|] .* \n # Row content 1202 )* 1203 ) 1204 (?=\n|\Z) # Stop at final double newline. 1205 }xm', 1206 array($this, '_DoTable_callback'), $text); 1207 1208 return $text; 1209 } 1210 1211 /** 1212 * Callback for removing the leading pipe for each row 1213 * @param array $matches 1214 * @return string 1215 */ 1216 protected function _doTable_leadingPipe_callback($matches) { 1217 $head = $matches[1]; 1218 $underline = $matches[2]; 1219 $content = $matches[3]; 1220 1221 $content = preg_replace('/^ *[|]/m', '', $content); 1222 1223 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 1224 } 1225 1226 /** 1227 * Make the align attribute in a table 1228 * @param string $alignname 1229 * @return string 1230 */ 1231 protected function _doTable_makeAlignAttr($alignname) { 1232 if (empty($this->table_align_class_tmpl)) { 1233 return " align=\"$alignname\""; 1234 } 1235 1236 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); 1237 return " class=\"$classname\""; 1238 } 1239 1240 /** 1241 * Calback for processing tables 1242 * @param array $matches 1243 * @return string 1244 */ 1245 protected function _doTable_callback($matches) { 1246 $head = $matches[1]; 1247 $underline = $matches[2]; 1248 $content = $matches[3]; 1249 1250 // Remove any tailing pipes for each line. 1251 $head = preg_replace('/[|] *$/m', '', $head); 1252 $underline = preg_replace('/[|] *$/m', '', $underline); 1253 $content = preg_replace('/[|] *$/m', '', $content); 1254 1255 // Reading alignement from header underline. 1256 $separators = preg_split('/ *[|] */', $underline); 1257 foreach ($separators as $n => $s) { 1258 if (preg_match('/^ *-+: *$/', $s)) 1259 $attr[$n] = $this->_doTable_makeAlignAttr('right'); 1260 else if (preg_match('/^ *:-+: *$/', $s)) 1261 $attr[$n] = $this->_doTable_makeAlignAttr('center'); 1262 else if (preg_match('/^ *:-+ *$/', $s)) 1263 $attr[$n] = $this->_doTable_makeAlignAttr('left'); 1264 else 1265 $attr[$n] = ''; 1266 } 1267 1268 // Parsing span elements, including code spans, character escapes, 1269 // and inline HTML tags, so that pipes inside those gets ignored. 1270 $head = $this->parseSpan($head); 1271 $headers = preg_split('/ *[|] */', $head); 1272 $col_count = count($headers); 1273 $attr = array_pad($attr, $col_count, ''); 1274 1275 // Write column headers. 1276 $text = "<table>\n"; 1277 $text .= "<thead>\n"; 1278 $text .= "<tr>\n"; 1279 foreach ($headers as $n => $header) { 1280 $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n"; 1281 } 1282 $text .= "</tr>\n"; 1283 $text .= "</thead>\n"; 1284 1285 // Split content by row. 1286 $rows = explode("\n", trim($content, "\n")); 1287 1288 $text .= "<tbody>\n"; 1289 foreach ($rows as $row) { 1290 // Parsing span elements, including code spans, character escapes, 1291 // and inline HTML tags, so that pipes inside those gets ignored. 1292 $row = $this->parseSpan($row); 1293 1294 // Split row by cell. 1295 $row_cells = preg_split('/ *[|] */', $row, $col_count); 1296 $row_cells = array_pad($row_cells, $col_count, ''); 1297 1298 $text .= "<tr>\n"; 1299 foreach ($row_cells as $n => $cell) { 1300 $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n"; 1301 } 1302 $text .= "</tr>\n"; 1303 } 1304 $text .= "</tbody>\n"; 1305 $text .= "</table>"; 1306 1307 return $this->hashBlock($text) . "\n"; 1308 } 1309 1310 /** 1311 * Form HTML definition lists. 1312 * @param string $text 1313 * @return string 1314 */ 1315 protected function doDefLists($text) { 1316 $less_than_tab = $this->tab_width - 1; 1317 1318 // Re-usable pattern to match any entire dl list: 1319 $whole_list_re = '(?> 1320 ( # $1 = whole list 1321 ( # $2 1322 [ ]{0,' . $less_than_tab . '} 1323 ((?>.*\S.*\n)+) # $3 = defined term 1324 \n? 1325 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1326 ) 1327 (?s:.+?) 1328 ( # $4 1329 \z 1330 | 1331 \n{2,} 1332 (?=\S) 1333 (?! # Negative lookahead for another term 1334 [ ]{0,' . $less_than_tab . '} 1335 (?: \S.*\n )+? # defined term 1336 \n? 1337 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1338 ) 1339 (?! # Negative lookahead for another definition 1340 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1341 ) 1342 ) 1343 ) 1344 )'; // mx 1345 1346 $text = preg_replace_callback('{ 1347 (?>\A\n?|(?<=\n\n)) 1348 ' . $whole_list_re . ' 1349 }mx', 1350 array($this, '_doDefLists_callback'), $text); 1351 1352 return $text; 1353 } 1354 1355 /** 1356 * Callback for processing definition lists 1357 * @param array $matches 1358 * @return string 1359 */ 1360 protected function _doDefLists_callback($matches) { 1361 // Re-usable patterns to match list item bullets and number markers: 1362 $list = $matches[1]; 1363 1364 // Turn double returns into triple returns, so that we can make a 1365 // paragraph for the last item in a list, if necessary: 1366 $result = trim($this->processDefListItems($list)); 1367 $result = "<dl>\n" . $result . "\n</dl>"; 1368 return $this->hashBlock($result) . "\n\n"; 1369 } 1370 1371 /** 1372 * Process the contents of a single definition list, splitting it 1373 * into individual term and definition list items. 1374 * @param string $list_str 1375 * @return string 1376 */ 1377 protected function processDefListItems($list_str) { 1378 1379 $less_than_tab = $this->tab_width - 1; 1380 1381 // Trim trailing blank lines: 1382 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1383 1384 // Process definition terms. 1385 $list_str = preg_replace_callback('{ 1386 (?>\A\n?|\n\n+) # leading line 1387 ( # definition terms = $1 1388 [ ]{0,' . $less_than_tab . '} # leading whitespace 1389 (?!\:[ ]|[ ]) # negative lookahead for a definition 1390 # mark (colon) or more whitespace. 1391 (?> \S.* \n)+? # actual term (not whitespace). 1392 ) 1393 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 1394 # with a definition mark. 1395 }xm', 1396 array($this, '_processDefListItems_callback_dt'), $list_str); 1397 1398 // Process actual definitions. 1399 $list_str = preg_replace_callback('{ 1400 \n(\n+)? # leading line = $1 1401 ( # marker space = $2 1402 [ ]{0,' . $less_than_tab . '} # whitespace before colon 1403 \:[ ]+ # definition mark (colon) 1404 ) 1405 ((?s:.+?)) # definition text = $3 1406 (?= \n+ # stop at next definition mark, 1407 (?: # next term or end of text 1408 [ ]{0,' . $less_than_tab . '} \:[ ] | 1409 <dt> | \z 1410 ) 1411 ) 1412 }xm', 1413 array($this, '_processDefListItems_callback_dd'), $list_str); 1414 1415 return $list_str; 1416 } 1417 1418 /** 1419 * Callback for <dt> elements in definition lists 1420 * @param array $matches 1421 * @return string 1422 */ 1423 protected function _processDefListItems_callback_dt($matches) { 1424 $terms = explode("\n", trim($matches[1])); 1425 $text = ''; 1426 foreach ($terms as $term) { 1427 $term = $this->runSpanGamut(trim($term)); 1428 $text .= "\n<dt>" . $term . "</dt>"; 1429 } 1430 return $text . "\n"; 1431 } 1432 1433 /** 1434 * Callback for <dd> elements in definition lists 1435 * @param array $matches 1436 * @return string 1437 */ 1438 protected function _processDefListItems_callback_dd($matches) { 1439 $leading_line = $matches[1]; 1440 $marker_space = $matches[2]; 1441 $def = $matches[3]; 1442 1443 if ($leading_line || preg_match('/\n{2,}/', $def)) { 1444 // Replace marker with the appropriate whitespace indentation 1445 $def = str_repeat(' ', strlen($marker_space)) . $def; 1446 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 1447 $def = "\n". $def ."\n"; 1448 } 1449 else { 1450 $def = rtrim($def); 1451 $def = $this->runSpanGamut($this->outdent($def)); 1452 } 1453 1454 return "\n<dd>" . $def . "</dd>\n"; 1455 } 1456 1457 /** 1458 * Adding the fenced code block syntax to regular Markdown: 1459 * 1460 * ~~~ 1461 * Code block 1462 * ~~~ 1463 * 1464 * @param string $text 1465 * @return string 1466 */ 1467 protected function doFencedCodeBlocks($text) { 1468 1469 $text = preg_replace_callback('{ 1470 (?:\n|\A) 1471 # 1: Opening marker 1472 ( 1473 (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 1474 ) 1475 [ ]* 1476 (?: 1477 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 1478 )? 1479 [ ]* 1480 (?: 1481 ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes 1482 )? 1483 [ ]* \n # Whitespace and newline following marker. 1484 1485 # 4: Content 1486 ( 1487 (?> 1488 (?!\1 [ ]* \n) # Not a closing marker. 1489 .*\n+ 1490 )+ 1491 ) 1492 1493 # Closing marker. 1494 \1 [ ]* (?= \n ) 1495 }xm', 1496 array($this, '_doFencedCodeBlocks_callback'), $text); 1497 1498 return $text; 1499 } 1500 1501 /** 1502 * Callback to process fenced code blocks 1503 * @param array $matches 1504 * @return string 1505 */ 1506 protected function _doFencedCodeBlocks_callback($matches) { 1507 $classname =& $matches[2]; 1508 $attrs =& $matches[3]; 1509 $codeblock = $matches[4]; 1510 1511 if ($this->code_block_content_func) { 1512 $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname); 1513 } else { 1514 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1515 } 1516 1517 $codeblock = preg_replace_callback('/^\n+/', 1518 array($this, '_doFencedCodeBlocks_newlines'), $codeblock); 1519 1520 $classes = array(); 1521 if ($classname !== "") { 1522 if ($classname[0] === '.') { 1523 $classname = substr($classname, 1); 1524 } 1525 $classes[] = $this->code_class_prefix . $classname; 1526 } 1527 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes); 1528 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 1529 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 1530 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 1531 1532 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1533 } 1534 1535 /** 1536 * Replace new lines in fenced code blocks 1537 * @param array $matches 1538 * @return string 1539 */ 1540 protected function _doFencedCodeBlocks_newlines($matches) { 1541 return str_repeat("<br$this->empty_element_suffix", 1542 strlen($matches[0])); 1543 } 1544 1545 /** 1546 * Redefining emphasis markers so that emphasis by underscore does not 1547 * work in the middle of a word. 1548 * @var array 1549 */ 1550 protected $em_relist = array( 1551 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)', 1552 '*' => '(?<![\s*])\*(?!\*)', 1553 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])', 1554 ); 1555 protected $strong_relist = array( 1556 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)', 1557 '**' => '(?<![\s*])\*\*(?!\*)', 1558 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])', 1559 ); 1560 protected $em_strong_relist = array( 1561 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)', 1562 '***' => '(?<![\s*])\*\*\*(?!\*)', 1563 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])', 1564 ); 1565 1566 /** 1567 * Parse text into paragraphs 1568 * @param string $text String to process in paragraphs 1569 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags 1570 * @return string HTML output 1571 */ 1572 protected function formParagraphs($text, $wrap_in_p = true) { 1573 // Strip leading and trailing lines: 1574 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1575 1576 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1577 1578 // Wrap <p> tags and unhashify HTML blocks 1579 foreach ($grafs as $key => $value) { 1580 $value = trim($this->runSpanGamut($value)); 1581 1582 // Check if this should be enclosed in a paragraph. 1583 // Clean tag hashes & block tag hashes are left alone. 1584 $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 1585 1586 if ($is_p) { 1587 $value = "<p>$value</p>"; 1588 } 1589 $grafs[$key] = $value; 1590 } 1591 1592 // Join grafs in one text, then unhash HTML tags. 1593 $text = implode("\n\n", $grafs); 1594 1595 // Finish by removing any tag hashes still present in $text. 1596 $text = $this->unhash($text); 1597 1598 return $text; 1599 } 1600 1601 1602 /** 1603 * Footnotes - Strips link definitions from text, stores the URLs and 1604 * titles in hash references. 1605 * @param string $text 1606 * @return string 1607 */ 1608 protected function stripFootnotes($text) { 1609 $less_than_tab = $this->tab_width - 1; 1610 1611 // Link defs are in the form: [^id]: url "optional title" 1612 $text = preg_replace_callback('{ 1613 ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1 1614 [ ]* 1615 \n? # maybe *one* newline 1616 ( # text = $2 (no blank lines allowed) 1617 (?: 1618 .+ # actual text 1619 | 1620 \n # newlines but 1621 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. 1622 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 1623 # by non-indented content 1624 )* 1625 ) 1626 }xm', 1627 array($this, '_stripFootnotes_callback'), 1628 $text); 1629 return $text; 1630 } 1631 1632 /** 1633 * Callback for stripping footnotes 1634 * @param array $matches 1635 * @return string 1636 */ 1637 protected function _stripFootnotes_callback($matches) { 1638 $note_id = $this->fn_id_prefix . $matches[1]; 1639 $this->footnotes[$note_id] = $this->outdent($matches[2]); 1640 return ''; // String that will replace the block 1641 } 1642 1643 /** 1644 * Replace footnote references in $text [^id] with a special text-token 1645 * which will be replaced by the actual footnote marker in appendFootnotes. 1646 * @param string $text 1647 * @return string 1648 */ 1649 protected function doFootnotes($text) { 1650 if (!$this->in_anchor) { 1651 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 1652 } 1653 return $text; 1654 } 1655 1656 /** 1657 * Append footnote list to text 1658 * @param string $text 1659 * @return string 1660 */ 1661 protected function appendFootnotes($text) { 1662 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1663 array($this, '_appendFootnotes_callback'), $text); 1664 1665 if ( ! empty( $this->footnotes_ordered ) ) { 1666 $this->_doFootnotes(); 1667 if ( ! $this->omit_footnotes ) { 1668 $text .= "\n\n"; 1669 $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n"; 1670 $text .= "<hr" . $this->empty_element_suffix . "\n"; 1671 $text .= $this->footnotes_assembled; 1672 $text .= "</div>"; 1673 } 1674 } 1675 return $text; 1676 } 1677 1678 1679 /** 1680 * Generates the HTML for footnotes. Called by appendFootnotes, even if 1681 * footnotes are not being appended. 1682 * @return void 1683 */ 1684 protected function _doFootnotes() { 1685 $attr = array(); 1686 if ($this->fn_backlink_class !== "") { 1687 $class = $this->fn_backlink_class; 1688 $class = $this->encodeAttribute($class); 1689 $attr['class'] = " class=\"$class\""; 1690 } 1691 $attr['role'] = " role=\"doc-backlink\""; 1692 $num = 0; 1693 1694 $text = "<ol>\n\n"; 1695 while (!empty($this->footnotes_ordered)) { 1696 $footnote = reset($this->footnotes_ordered); 1697 $note_id = key($this->footnotes_ordered); 1698 unset($this->footnotes_ordered[$note_id]); 1699 $ref_count = $this->footnotes_ref_count[$note_id]; 1700 unset($this->footnotes_ref_count[$note_id]); 1701 unset($this->footnotes[$note_id]); 1702 1703 $footnote .= "\n"; // Need to append newline before parsing. 1704 $footnote = $this->runBlockGamut("$footnote\n"); 1705 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1706 array($this, '_appendFootnotes_callback'), $footnote); 1707 1708 $num++; 1709 $note_id = $this->encodeAttribute($note_id); 1710 1711 // Prepare backlink, multiple backlinks if multiple references 1712 // Do not create empty backlinks if the html is blank 1713 $backlink = ""; 1714 if (!empty($this->fn_backlink_html)) { 1715 for ($ref_num = 1; $ref_num <= $ref_count; ++$ref_num) { 1716 if (!empty($this->fn_backlink_title)) { 1717 $attr['title'] = ' title="' . $this->encodeAttribute($this->fn_backlink_title) . '"'; 1718 } 1719 if (!empty($this->fn_backlink_label)) { 1720 $attr['label'] = ' aria-label="' . $this->encodeAttribute($this->fn_backlink_label) . '"'; 1721 } 1722 $parsed_attr = $this->parseFootnotePlaceholders( 1723 implode('', $attr), 1724 $num, 1725 $ref_num 1726 ); 1727 $backlink_text = $this->parseFootnotePlaceholders( 1728 $this->fn_backlink_html, 1729 $num, 1730 $ref_num 1731 ); 1732 $ref_count_mark = $ref_num > 1 ? $ref_num : ''; 1733 $backlink .= " <a href=\"#fnref$ref_count_mark:$note_id\"$parsed_attr>$backlink_text</a>"; 1734 } 1735 $backlink = trim($backlink); 1736 } 1737 1738 // Add backlink to last paragraph; create new paragraph if needed. 1739 if (!empty($backlink)) { 1740 if (preg_match('{</p>$}', $footnote)) { 1741 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 1742 } else { 1743 $footnote .= "\n\n<p>$backlink</p>"; 1744 } 1745 } 1746 1747 $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n"; 1748 $text .= $footnote . "\n"; 1749 $text .= "</li>\n\n"; 1750 } 1751 $text .= "</ol>\n"; 1752 1753 $this->footnotes_assembled = $text; 1754 } 1755 1756 /** 1757 * Callback for appending footnotes 1758 * @param array $matches 1759 * @return string 1760 */ 1761 protected function _appendFootnotes_callback($matches) { 1762 $node_id = $this->fn_id_prefix . $matches[1]; 1763 1764 // Create footnote marker only if it has a corresponding footnote *and* 1765 // the footnote hasn't been used by another marker. 1766 if (isset($this->footnotes[$node_id])) { 1767 $num =& $this->footnotes_numbers[$node_id]; 1768 if (!isset($num)) { 1769 // Transfer footnote content to the ordered list and give it its 1770 // number 1771 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 1772 $this->footnotes_ref_count[$node_id] = 1; 1773 $num = $this->footnote_counter++; 1774 $ref_count_mark = ''; 1775 } else { 1776 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 1777 } 1778 1779 $attr = ""; 1780 if ($this->fn_link_class !== "") { 1781 $class = $this->fn_link_class; 1782 $class = $this->encodeAttribute($class); 1783 $attr .= " class=\"$class\""; 1784 } 1785 if ($this->fn_link_title !== "") { 1786 $title = $this->fn_link_title; 1787 $title = $this->encodeAttribute($title); 1788 $attr .= " title=\"$title\""; 1789 } 1790 $attr .= " role=\"doc-noteref\""; 1791 1792 $attr = str_replace("%%", $num, $attr); 1793 $node_id = $this->encodeAttribute($node_id); 1794 1795 return 1796 "<sup id=\"fnref$ref_count_mark:$node_id\">". 1797 "<a href=\"#fn:$node_id\"$attr>$num</a>". 1798 "</sup>"; 1799 } 1800 1801 return "[^" . $matches[1] . "]"; 1802 } 1803 1804 /** 1805 * Build footnote label by evaluating any placeholders. 1806 * - ^^ footnote number 1807 * - %% footnote reference number (Nth reference to footnote number) 1808 * @param string $label 1809 * @param int $footnote_number 1810 * @param int $reference_number 1811 * @return string 1812 */ 1813 protected function parseFootnotePlaceholders($label, $footnote_number, $reference_number) { 1814 return str_replace( 1815 array('^^', '%%'), 1816 array($footnote_number, $reference_number), 1817 $label 1818 ); 1819 } 1820 1821 1822 /** 1823 * Abbreviations - strips abbreviations from text, stores titles in hash 1824 * references. 1825 * @param string $text 1826 * @return string 1827 */ 1828 protected function stripAbbreviations($text) { 1829 $less_than_tab = $this->tab_width - 1; 1830 1831 // Link defs are in the form: [id]*: url "optional title" 1832 $text = preg_replace_callback('{ 1833 ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1 1834 (.*) # text = $2 (no blank lines allowed) 1835 }xm', 1836 array($this, '_stripAbbreviations_callback'), 1837 $text); 1838 return $text; 1839 } 1840 1841 /** 1842 * Callback for stripping abbreviations 1843 * @param array $matches 1844 * @return string 1845 */ 1846 protected function _stripAbbreviations_callback($matches) { 1847 $abbr_word = $matches[1]; 1848 $abbr_desc = $matches[2]; 1849 if ($this->abbr_word_re) { 1850 $this->abbr_word_re .= '|'; 1851 } 1852 $this->abbr_word_re .= preg_quote($abbr_word); 1853 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1854 return ''; // String that will replace the block 1855 } 1856 1857 /** 1858 * Find defined abbreviations in text and wrap them in <abbr> elements. 1859 * @param string $text 1860 * @return string 1861 */ 1862 protected function doAbbreviations($text) { 1863 if ($this->abbr_word_re) { 1864 // cannot use the /x modifier because abbr_word_re may 1865 // contain significant spaces: 1866 $text = preg_replace_callback('{' . 1867 '(?<![\w\x1A])' . 1868 '(?:' . $this->abbr_word_re . ')' . 1869 '(?![\w\x1A])' . 1870 '}', 1871 array($this, '_doAbbreviations_callback'), $text); 1872 } 1873 return $text; 1874 } 1875 1876 /** 1877 * Callback for processing abbreviations 1878 * @param array $matches 1879 * @return string 1880 */ 1881 protected function _doAbbreviations_callback($matches) { 1882 $abbr = $matches[0]; 1883 if (isset($this->abbr_desciptions[$abbr])) { 1884 $desc = $this->abbr_desciptions[$abbr]; 1885 if (empty($desc)) { 1886 return $this->hashPart("<abbr>$abbr</abbr>"); 1887 } 1888 $desc = $this->encodeAttribute($desc); 1889 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 1890 } 1891 return $matches[0]; 1892 } 1893 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body