Differences Between: [Versions 310 and 402] [Versions 311 and 402] [Versions 39 and 402] [Versions 400 and 402]
1 <?php 2 /** 3 * Markdown Extra - A text-to-HTML conversion tool for web writers 4 * 5 * @package php-markdown 6 * @author Michel Fortin <michel.fortin@michelf.com> 7 * @copyright 2004-2022 Michel Fortin <https://michelf.com/projects/php-markdown/> 8 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/> 9 */ 10 11 namespace Michelf; 12 13 /** 14 * Markdown Extra Parser Class 15 */ 16 class MarkdownExtra extends \Michelf\Markdown { 17 /** 18 * Configuration variables 19 */ 20 /** 21 * Prefix for footnote ids. 22 */ 23 public string $fn_id_prefix = ""; 24 25 /** 26 * Optional title attribute for footnote links. 27 */ 28 public string $fn_link_title = ""; 29 30 /** 31 * Optional class attribute for footnote links and backlinks. 32 */ 33 public string $fn_link_class = "footnote-ref"; 34 public string $fn_backlink_class = "footnote-backref"; 35 36 /** 37 * Content to be displayed within footnote backlinks. The default is '↩'; 38 * the U+FE0E on the end is a Unicode variant selector used to prevent iOS 39 * from displaying the arrow character as an emoji. 40 * Optionally use '^^' and '%%' to refer to the footnote number and 41 * reference number respectively. {@see parseFootnotePlaceholders()} 42 */ 43 public string $fn_backlink_html = '↩︎'; 44 45 /** 46 * Optional title and aria-label attributes for footnote backlinks for 47 * added accessibility (to ensure backlink uniqueness). 48 * Use '^^' and '%%' to refer to the footnote number and reference number 49 * respectively. {@see parseFootnotePlaceholders()} 50 */ 51 public string $fn_backlink_title = ""; 52 public string $fn_backlink_label = ""; 53 54 /** 55 * Class name for table cell alignment (%% replaced left/center/right) 56 * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' 57 * If empty, the align attribute is used instead of a class name. 58 */ 59 public string $table_align_class_tmpl = ''; 60 61 /** 62 * Optional class prefix for fenced code block. 63 */ 64 public string $code_class_prefix = ""; 65 66 /** 67 * Class attribute for code blocks goes on the `code` tag; 68 * setting this to true will put attributes on the `pre` tag instead. 69 */ 70 public bool $code_attr_on_pre = false; 71 72 /** 73 * Predefined abbreviations. 74 */ 75 public array $predef_abbr = array(); 76 77 /** 78 * Only convert atx-style headers if there's a space between the header and # 79 */ 80 public bool $hashtag_protection = false; 81 82 /** 83 * Determines whether footnotes should be appended to the end of the document. 84 * If true, footnote html can be retrieved from $this->footnotes_assembled. 85 */ 86 public bool $omit_footnotes = false; 87 88 89 /** 90 * After parsing, the HTML for the list of footnotes appears here. 91 * This is available only if $omit_footnotes == true. 92 * 93 * Note: when placing the content of `footnotes_assembled` on the page, 94 * consider adding the attribute `role="doc-endnotes"` to the `div` or 95 * `section` that will enclose the list of footnotes so they are 96 * reachable to accessibility tools the same way they would be with the 97 * default HTML output. 98 */ 99 public ?string $footnotes_assembled = null; 100 101 /** 102 * Parser implementation 103 */ 104 105 /** 106 * Constructor function. Initialize the parser object. 107 * @return void 108 */ 109 public function __construct() { 110 // Add extra escapable characters before parent constructor 111 // initialize the table. 112 $this->escape_chars .= ':|'; 113 114 // Insert extra document, block, and span transformations. 115 // Parent constructor will do the sorting. 116 $this->document_gamut += array( 117 "doFencedCodeBlocks" => 5, 118 "stripFootnotes" => 15, 119 "stripAbbreviations" => 25, 120 "appendFootnotes" => 50, 121 ); 122 $this->block_gamut += array( 123 "doFencedCodeBlocks" => 5, 124 "doTables" => 15, 125 "doDefLists" => 45, 126 ); 127 $this->span_gamut += array( 128 "doFootnotes" => 5, 129 "doAbbreviations" => 70, 130 ); 131 132 $this->enhanced_ordered_list = true; 133 parent::__construct(); 134 } 135 136 137 /** 138 * Extra variables used during extra transformations. 139 */ 140 protected array $footnotes = array(); 141 protected array $footnotes_ordered = array(); 142 protected array $footnotes_ref_count = array(); 143 protected array $footnotes_numbers = array(); 144 protected array $abbr_desciptions = array(); 145 protected string $abbr_word_re = ''; 146 147 /** 148 * Give the current footnote number. 149 */ 150 protected int $footnote_counter = 1; 151 152 /** 153 * Ref attribute for links 154 */ 155 protected array $ref_attr = array(); 156 157 /** 158 * Setting up Extra-specific variables. 159 */ 160 protected function setup() { 161 parent::setup(); 162 163 $this->footnotes = array(); 164 $this->footnotes_ordered = array(); 165 $this->footnotes_ref_count = array(); 166 $this->footnotes_numbers = array(); 167 $this->abbr_desciptions = array(); 168 $this->abbr_word_re = ''; 169 $this->footnote_counter = 1; 170 $this->footnotes_assembled = null; 171 172 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 173 if ($this->abbr_word_re) 174 $this->abbr_word_re .= '|'; 175 $this->abbr_word_re .= preg_quote($abbr_word); 176 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 177 } 178 } 179 180 /** 181 * Clearing Extra-specific variables. 182 */ 183 protected function teardown() { 184 $this->footnotes = array(); 185 $this->footnotes_ordered = array(); 186 $this->footnotes_ref_count = array(); 187 $this->footnotes_numbers = array(); 188 $this->abbr_desciptions = array(); 189 $this->abbr_word_re = ''; 190 191 if ( ! $this->omit_footnotes ) 192 $this->footnotes_assembled = null; 193 194 parent::teardown(); 195 } 196 197 198 /** 199 * Extra attribute parser 200 */ 201 /** 202 * Expression to use to catch attributes (includes the braces) 203 */ 204 protected string $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}'; 205 206 /** 207 * Expression to use when parsing in a context when no capture is desired 208 */ 209 protected string $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}'; 210 211 /** 212 * Parse attributes caught by the $this->id_class_attr_catch_re expression 213 * and return the HTML-formatted list of attributes. 214 * 215 * Currently supported attributes are .class and #id. 216 * 217 * In addition, this method also supports supplying a default Id value, 218 * which will be used to populate the id attribute in case it was not 219 * overridden. 220 * @param string $tag_name 221 * @param string $attr 222 * @param mixed $defaultIdValue 223 * @param array $classes 224 * @return string 225 */ 226 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) { 227 if (empty($attr) && !$defaultIdValue && empty($classes)) { 228 return ""; 229 } 230 231 // Split on components 232 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches); 233 $elements = $matches[0]; 234 235 // Handle classes and IDs (only first ID taken into account) 236 $attributes = array(); 237 $id = false; 238 foreach ($elements as $element) { 239 if ($element[0] === '.') { 240 $classes[] = substr($element, 1); 241 } else if ($element[0] === '#') { 242 if ($id === false) $id = substr($element, 1); 243 } else if (strpos($element, '=') > 0) { 244 $parts = explode('=', $element, 2); 245 $attributes[] = $parts[0] . '="' . $parts[1] . '"'; 246 } 247 } 248 249 if ($id === false || $id === '') { 250 $id = $defaultIdValue; 251 } 252 253 // Compose attributes as string 254 $attr_str = ""; 255 if (!empty($id)) { 256 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"'; 257 } 258 if (!empty($classes)) { 259 $attr_str .= ' class="'. implode(" ", $classes) . '"'; 260 } 261 if (!$this->no_markup && !empty($attributes)) { 262 $attr_str .= ' '.implode(" ", $attributes); 263 } 264 return $attr_str; 265 } 266 267 /** 268 * Strips link definitions from text, stores the URLs and titles in 269 * hash references. 270 * @param string $text 271 * @return string 272 */ 273 protected function stripLinkDefinitions($text) { 274 $less_than_tab = $this->tab_width - 1; 275 276 // Link defs are in the form: ^[id]: url "optional title" 277 $text = preg_replace_callback('{ 278 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 279 [ ]* 280 \n? # maybe *one* newline 281 [ ]* 282 (?: 283 <(.+?)> # url = $2 284 | 285 (\S+?) # url = $3 286 ) 287 [ ]* 288 \n? # maybe one newline 289 [ ]* 290 (?: 291 (?<=\s) # lookbehind for whitespace 292 ["(] 293 (.*?) # title = $4 294 [")] 295 [ ]* 296 )? # title is optional 297 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 298 (?:\n+|\Z) 299 }xm', 300 array($this, '_stripLinkDefinitions_callback'), 301 $text); 302 return $text; 303 } 304 305 /** 306 * Strip link definition callback 307 * @param array $matches 308 * @return string 309 */ 310 protected function _stripLinkDefinitions_callback($matches) { 311 $link_id = strtolower($matches[1]); 312 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 313 $this->urls[$link_id] = $url; 314 $this->titles[$link_id] =& $matches[4]; 315 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 316 return ''; // String that will replace the block 317 } 318 319 320 /** 321 * HTML block parser 322 */ 323 /** 324 * Tags that are always treated as block tags 325 */ 326 protected string $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure|details|summary'; 327 328 /** 329 * Tags treated as block tags only if the opening tag is alone on its line 330 */ 331 protected string $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 332 333 /** 334 * Tags where markdown="1" default to span mode: 335 */ 336 protected string $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 337 338 /** 339 * Tags which must not have their contents modified, no matter where 340 * they appear 341 */ 342 protected string $clean_tags_re = 'script|style|math|svg'; 343 344 /** 345 * Tags that do not need to be closed. 346 */ 347 protected string $auto_close_tags_re = 'hr|img|param|source|track'; 348 349 /** 350 * Hashify HTML Blocks and "clean tags". 351 * 352 * We only want to do this for block-level HTML tags, such as headers, 353 * lists, and tables. That's because we still want to wrap <p>s around 354 * "paragraphs" that are wrapped in non-block-level tags, such as anchors, 355 * phrase emphasis, and spans. The list of tags we're looking for is 356 * hard-coded. 357 * 358 * This works by calling _HashHTMLBlocks_InMarkdown, which then calls 359 * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 360 * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 361 * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 362 * These two functions are calling each other. It's recursive! 363 * @param string $text 364 * @return string 365 */ 366 protected function hashHTMLBlocks($text) { 367 if ($this->no_markup) { 368 return $text; 369 } 370 371 // Call the HTML-in-Markdown hasher. 372 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 373 374 return $text; 375 } 376 377 /** 378 * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 379 * 380 * * $indent is the number of space to be ignored when checking for code 381 * blocks. This is important because if we don't take the indent into 382 * account, something like this (which looks right) won't work as expected: 383 * 384 * <div> 385 * <div markdown="1"> 386 * Hello World. <-- Is this a Markdown code block or text? 387 * </div> <-- Is this a Markdown code block or a real tag? 388 * <div> 389 * 390 * If you don't like this, just don't indent the tag on which 391 * you apply the markdown="1" attribute. 392 * 393 * * If $enclosing_tag_re is not empty, stops at the first unmatched closing 394 * tag with that name. Nested tags supported. 395 * 396 * * If $span is true, text inside must treated as span. So any double 397 * newline will be replaced by a single newline so that it does not create 398 * paragraphs. 399 * 400 * Returns an array of that form: ( processed text , remaining text ) 401 * 402 * @param string $text 403 * @param integer $indent 404 * @param string $enclosing_tag_re 405 * @param boolean $span 406 * @return array 407 */ 408 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 409 $enclosing_tag_re = '', $span = false) 410 { 411 412 if ($text === '') return array('', ''); 413 414 // Regex to check for the presense of newlines around a block tag. 415 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 416 $newline_after_re = 417 '{ 418 ^ # Start of text following the tag. 419 (?>[ ]*<!--.*?-->)? # Optional comment. 420 [ ]*\n # Must be followed by newline. 421 }xs'; 422 423 // Regex to match any tag. 424 $block_tag_re = 425 '{ 426 ( # $2: Capture whole tag. 427 </? # Any opening or closing tag. 428 (?> # Tag name. 429 ' . $this->block_tags_re . ' | 430 ' . $this->context_block_tags_re . ' | 431 ' . $this->clean_tags_re . ' | 432 (?!\s)'.$enclosing_tag_re . ' 433 ) 434 (?: 435 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 436 (?> 437 ".*?" | # Double quotes (can contain `>`) 438 \'.*?\' | # Single quotes (can contain `>`) 439 .+? # Anything but quotes and `>`. 440 )*? 441 )? 442 > # End of tag. 443 | 444 <!-- .*? --> # HTML Comment 445 | 446 <\?.*?\?> | <%.*?%> # Processing instruction 447 | 448 <!\[CDATA\[.*?\]\]> # CData Block 449 ' . ( !$span ? ' # If not in span. 450 | 451 # Indented code block 452 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 453 [ ]{' . ($indent + 4) . '}[^\n]* \n 454 (?> 455 (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n 456 )* 457 | 458 # Fenced code block marker 459 (?<= ^ | \n ) 460 [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,}) 461 [ ]* 462 (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name 463 [ ]* 464 (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes 465 [ ]* 466 (?= \n ) 467 ' : '' ) . ' # End (if not is span). 468 | 469 # Code span marker 470 # Note, this regex needs to go after backtick fenced 471 # code blocks but it should also be kept outside of the 472 # "if not in span" condition adding backticks to the parser 473 `+ 474 ) 475 }xs'; 476 477 478 $depth = 0; // Current depth inside the tag tree. 479 $parsed = ""; // Parsed text that will be returned. 480 481 // Loop through every tag until we find the closing tag of the parent 482 // or loop until reaching the end of text if no parent tag specified. 483 do { 484 // Split the text using the first $tag_match pattern found. 485 // Text before pattern will be first in the array, text after 486 // pattern will be at the end, and between will be any catches made 487 // by the pattern. 488 $parts = preg_split($block_tag_re, $text, 2, 489 PREG_SPLIT_DELIM_CAPTURE); 490 491 // If in Markdown span mode, add a empty-string span-level hash 492 // after each newline to prevent triggering any block element. 493 if ($span) { 494 $void = $this->hashPart("", ':'); 495 $newline = "\n$void"; 496 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 497 } 498 499 $parsed .= $parts[0]; // Text before current tag. 500 501 // If end of $text has been reached. Stop loop. 502 if (count($parts) < 3) { 503 $text = ""; 504 break; 505 } 506 507 $tag = $parts[1]; // Tag to handle. 508 $text = $parts[2]; // Remaining text after current tag. 509 510 // Check for: Fenced code block marker. 511 // Note: need to recheck the whole tag to disambiguate backtick 512 // fences from code spans 513 if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) { 514 // Fenced code block marker: find matching end marker. 515 $fence_indent = strlen($capture[1]); // use captured indent in re 516 $fence_re = $capture[2]; // use captured fence in re 517 if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text, 518 $matches)) 519 { 520 // End marker found: pass text unchanged until marker. 521 $parsed .= $tag . $matches[0]; 522 $text = substr($text, strlen($matches[0])); 523 } 524 else { 525 // No end marker: just skip it. 526 $parsed .= $tag; 527 } 528 } 529 // Check for: Indented code block. 530 else if ($tag[0] === "\n" || $tag[0] === " ") { 531 // Indented code block: pass it unchanged, will be handled 532 // later. 533 $parsed .= $tag; 534 } 535 // Check for: Code span marker 536 // Note: need to check this after backtick fenced code blocks 537 else if ($tag[0] === "`") { 538 // Find corresponding end marker. 539 $tag_re = preg_quote($tag); 540 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}', 541 $text, $matches)) 542 { 543 // End marker found: pass text unchanged until marker. 544 $parsed .= $tag . $matches[0]; 545 $text = substr($text, strlen($matches[0])); 546 } 547 else { 548 // Unmatched marker: just skip it. 549 $parsed .= $tag; 550 } 551 } 552 // Check for: Opening Block level tag or 553 // Opening Context Block tag (like ins and del) 554 // used as a block tag (tag is alone on it's line). 555 else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) || 556 ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) && 557 preg_match($newline_before_re, $parsed) && 558 preg_match($newline_after_re, $text) ) 559 ) 560 { 561 // Need to parse tag and following text using the HTML parser. 562 list($block_text, $text) = 563 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 564 565 // Make sure it stays outside of any paragraph by adding newlines. 566 $parsed .= "\n\n$block_text\n\n"; 567 } 568 // Check for: Clean tag (like script, math) 569 // HTML Comments, processing instructions. 570 else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) || 571 $tag[1] === '!' || $tag[1] === '?') 572 { 573 // Need to parse tag and following text using the HTML parser. 574 // (don't check for markdown attribute) 575 list($block_text, $text) = 576 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 577 578 $parsed .= $block_text; 579 } 580 // Check for: Tag with same name as enclosing tag. 581 else if ($enclosing_tag_re !== '' && 582 // Same name as enclosing tag. 583 preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag)) 584 { 585 // Increase/decrease nested tag count. 586 if ($tag[1] === '/') { 587 $depth--; 588 } else if ($tag[strlen($tag)-2] !== '/') { 589 $depth++; 590 } 591 592 if ($depth < 0) { 593 // Going out of parent element. Clean up and break so we 594 // return to the calling function. 595 $text = $tag . $text; 596 break; 597 } 598 599 $parsed .= $tag; 600 } 601 else { 602 $parsed .= $tag; 603 } 604 // @phpstan-ignore-next-line 605 } while ($depth >= 0); 606 607 return array($parsed, $text); 608 } 609 610 /** 611 * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 612 * 613 * * Calls $hash_method to convert any blocks. 614 * * Stops when the first opening tag closes. 615 * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 616 * (it is not inside clean tags) 617 * 618 * Returns an array of that form: ( processed text , remaining text ) 619 * @param string $text 620 * @param string $hash_method 621 * @param bool $md_attr Handle `markdown="1"` attribute 622 * @return array 623 */ 624 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 625 if ($text === '') return array('', ''); 626 627 // Regex to match `markdown` attribute inside of a tag. 628 $markdown_attr_re = ' 629 { 630 \s* # Eat whitespace before the `markdown` attribute 631 markdown 632 \s*=\s* 633 (?> 634 (["\']) # $1: quote delimiter 635 (.*?) # $2: attribute value 636 \1 # matching delimiter 637 | 638 ([^\s>]*) # $3: unquoted attribute value 639 ) 640 () # $4: make $3 always defined (avoid warnings) 641 }xs'; 642 643 // Regex to match any tag. 644 $tag_re = '{ 645 ( # $2: Capture whole tag. 646 </? # Any opening or closing tag. 647 [\w:$]+ # Tag name. 648 (?: 649 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 650 (?> 651 ".*?" | # Double quotes (can contain `>`) 652 \'.*?\' | # Single quotes (can contain `>`) 653 .+? # Anything but quotes and `>`. 654 )*? 655 )? 656 > # End of tag. 657 | 658 <!-- .*? --> # HTML Comment 659 | 660 <\?.*?\?> | <%.*?%> # Processing instruction 661 | 662 <!\[CDATA\[.*?\]\]> # CData Block 663 ) 664 }xs'; 665 666 $original_text = $text; // Save original text in case of faliure. 667 668 $depth = 0; // Current depth inside the tag tree. 669 $block_text = ""; // Temporary text holder for current text. 670 $parsed = ""; // Parsed text that will be returned. 671 $base_tag_name_re = ''; 672 673 // Get the name of the starting tag. 674 // (This pattern makes $base_tag_name_re safe without quoting.) 675 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 676 $base_tag_name_re = $matches[1]; 677 678 // Loop through every tag until we find the corresponding closing tag. 679 do { 680 // Split the text using the first $tag_match pattern found. 681 // Text before pattern will be first in the array, text after 682 // pattern will be at the end, and between will be any catches made 683 // by the pattern. 684 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 685 686 if ($parts === false || count($parts) < 3) { 687 // End of $text reached with unbalenced tag(s). 688 // In that case, we return original text unchanged and pass the 689 // first character as filtered to prevent an infinite loop in the 690 // parent function. 691 return array($original_text[0], substr($original_text, 1)); 692 } 693 694 $block_text .= $parts[0]; // Text before current tag. 695 $tag = $parts[1]; // Tag to handle. 696 $text = $parts[2]; // Remaining text after current tag. 697 698 // Check for: Auto-close tag (like <hr/>) 699 // Comments and Processing Instructions. 700 if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) || 701 $tag[1] === '!' || $tag[1] === '?') 702 { 703 // Just add the tag to the block as if it was text. 704 $block_text .= $tag; 705 } 706 else { 707 // Increase/decrease nested tag count. Only do so if 708 // the tag's name match base tag's. 709 if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) { 710 if ($tag[1] === '/') { 711 $depth--; 712 } else if ($tag[strlen($tag)-2] !== '/') { 713 $depth++; 714 } 715 } 716 717 // Check for `markdown="1"` attribute and handle it. 718 if ($md_attr && 719 preg_match($markdown_attr_re, $tag, $attr_m) && 720 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 721 { 722 // Remove `markdown` attribute from opening tag. 723 $tag = preg_replace($markdown_attr_re, '', $tag); 724 725 // Check if text inside this tag must be parsed in span mode. 726 $mode = $attr_m[2] . $attr_m[3]; 727 $span_mode = $mode === 'span' || ($mode !== 'block' && 728 preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag)); 729 730 // Calculate indent before tag. 731 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 732 $strlen = $this->utf8_strlen; 733 $indent = $strlen($matches[1], 'UTF-8'); 734 } else { 735 $indent = 0; 736 } 737 738 // End preceding block with this tag. 739 $block_text .= $tag; 740 $parsed .= $this->$hash_method($block_text); 741 742 // Get enclosing tag name for the ParseMarkdown function. 743 // (This pattern makes $tag_name_re safe without quoting.) 744 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 745 $tag_name_re = $matches[1]; 746 747 // Parse the content using the HTML-in-Markdown parser. 748 list ($block_text, $text) 749 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 750 $tag_name_re, $span_mode); 751 752 // Outdent markdown text. 753 if ($indent > 0) { 754 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 755 $block_text); 756 } 757 758 // Append tag content to parsed text. 759 if (!$span_mode) { 760 $parsed .= "\n\n$block_text\n\n"; 761 } else { 762 $parsed .= (string) $block_text; 763 } 764 765 // Start over with a new block. 766 $block_text = ""; 767 } 768 else $block_text .= $tag; 769 } 770 771 } while ($depth > 0); 772 773 // Hash last block text that wasn't processed inside the loop. 774 $parsed .= $this->$hash_method($block_text); 775 776 return array($parsed, $text); 777 } 778 779 /** 780 * Called whenever a tag must be hashed when a function inserts a "clean" tag 781 * in $text, it passes through this function and is automaticaly escaped, 782 * blocking invalid nested overlap. 783 * @param string $text 784 * @return string 785 */ 786 protected function hashClean($text) { 787 return $this->hashPart($text, 'C'); 788 } 789 790 /** 791 * Turn Markdown link shortcuts into XHTML <a> tags. 792 * @param string $text 793 * @return string 794 */ 795 protected function doAnchors($text) { 796 if ($this->in_anchor) { 797 return $text; 798 } 799 $this->in_anchor = true; 800 801 // First, handle reference-style links: [link text] [id] 802 $text = preg_replace_callback('{ 803 ( # wrap whole match in $1 804 \[ 805 (' . $this->nested_brackets_re . ') # link text = $2 806 \] 807 808 [ ]? # one optional space 809 (?:\n[ ]*)? # one optional newline followed by spaces 810 811 \[ 812 (.*?) # id = $3 813 \] 814 ) 815 }xs', 816 array($this, '_doAnchors_reference_callback'), $text); 817 818 // Next, inline-style links: [link text](url "optional title") 819 $text = preg_replace_callback('{ 820 ( # wrap whole match in $1 821 \[ 822 (' . $this->nested_brackets_re . ') # link text = $2 823 \] 824 \( # literal paren 825 [ \n]* 826 (?: 827 <(.+?)> # href = $3 828 | 829 (' . $this->nested_url_parenthesis_re . ') # href = $4 830 ) 831 [ \n]* 832 ( # $5 833 ([\'"]) # quote char = $6 834 (.*?) # Title = $7 835 \6 # matching quote 836 [ \n]* # ignore any spaces/tabs between closing quote and ) 837 )? # title is optional 838 \) 839 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 840 ) 841 }xs', 842 array($this, '_doAnchors_inline_callback'), $text); 843 844 // Last, handle reference-style shortcuts: [link text] 845 // These must come last in case you've also got [link text][1] 846 // or [link text](/foo) 847 $text = preg_replace_callback('{ 848 ( # wrap whole match in $1 849 \[ 850 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 851 \] 852 ) 853 }xs', 854 array($this, '_doAnchors_reference_callback'), $text); 855 856 $this->in_anchor = false; 857 return $text; 858 } 859 860 /** 861 * Callback for reference anchors 862 * @param array $matches 863 * @return string 864 */ 865 protected function _doAnchors_reference_callback($matches) { 866 $whole_match = $matches[1]; 867 $link_text = $matches[2]; 868 $link_id =& $matches[3]; 869 870 if ($link_id == "") { 871 // for shortcut links like [this][] or [this]. 872 $link_id = $link_text; 873 } 874 875 // lower-case and turn embedded newlines into spaces 876 $link_id = strtolower($link_id); 877 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 878 879 if (isset($this->urls[$link_id])) { 880 $url = $this->urls[$link_id]; 881 $url = $this->encodeURLAttribute($url); 882 883 $result = "<a href=\"$url\""; 884 if ( isset( $this->titles[$link_id] ) ) { 885 $title = $this->titles[$link_id]; 886 $title = $this->encodeAttribute($title); 887 $result .= " title=\"$title\""; 888 } 889 if (isset($this->ref_attr[$link_id])) 890 $result .= $this->ref_attr[$link_id]; 891 892 $link_text = $this->runSpanGamut($link_text); 893 $result .= ">$link_text</a>"; 894 $result = $this->hashPart($result); 895 } 896 else { 897 $result = $whole_match; 898 } 899 return $result; 900 } 901 902 /** 903 * Callback for inline anchors 904 * @param array $matches 905 * @return string 906 */ 907 protected function _doAnchors_inline_callback($matches) { 908 $link_text = $this->runSpanGamut($matches[2]); 909 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 910 $title_quote =& $matches[6]; 911 $title =& $matches[7]; 912 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 913 914 // if the URL was of the form <s p a c e s> it got caught by the HTML 915 // tag parser and hashed. Need to reverse the process before using the URL. 916 $unhashed = $this->unhash($url); 917 if ($unhashed !== $url) 918 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 919 920 $url = $this->encodeURLAttribute($url); 921 922 $result = "<a href=\"$url\""; 923 if (isset($title) && $title_quote) { 924 $title = $this->encodeAttribute($title); 925 $result .= " title=\"$title\""; 926 } 927 $result .= $attr; 928 929 $link_text = $this->runSpanGamut($link_text); 930 $result .= ">$link_text</a>"; 931 932 return $this->hashPart($result); 933 } 934 935 /** 936 * Turn Markdown image shortcuts into <img> tags. 937 * @param string $text 938 * @return string 939 */ 940 protected function doImages($text) { 941 // First, handle reference-style labeled images: ![alt text][id] 942 $text = preg_replace_callback('{ 943 ( # wrap whole match in $1 944 !\[ 945 (' . $this->nested_brackets_re . ') # alt text = $2 946 \] 947 948 [ ]? # one optional space 949 (?:\n[ ]*)? # one optional newline followed by spaces 950 951 \[ 952 (.*?) # id = $3 953 \] 954 955 ) 956 }xs', 957 array($this, '_doImages_reference_callback'), $text); 958 959 // Next, handle inline images: ![alt text](url "optional title") 960 // Don't forget: encode * and _ 961 $text = preg_replace_callback('{ 962 ( # wrap whole match in $1 963 !\[ 964 (' . $this->nested_brackets_re . ') # alt text = $2 965 \] 966 \s? # One optional whitespace character 967 \( # literal paren 968 [ \n]* 969 (?: 970 <(\S*)> # src url = $3 971 | 972 (' . $this->nested_url_parenthesis_re . ') # src url = $4 973 ) 974 [ \n]* 975 ( # $5 976 ([\'"]) # quote char = $6 977 (.*?) # title = $7 978 \6 # matching quote 979 [ \n]* 980 )? # title is optional 981 \) 982 (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes 983 ) 984 }xs', 985 array($this, '_doImages_inline_callback'), $text); 986 987 return $text; 988 } 989 990 /** 991 * Callback for referenced images 992 * @param array $matches 993 * @return string 994 */ 995 protected function _doImages_reference_callback($matches) { 996 $whole_match = $matches[1]; 997 $alt_text = $matches[2]; 998 $link_id = strtolower($matches[3]); 999 1000 if ($link_id === "") { 1001 $link_id = strtolower($alt_text); // for shortcut links like ![this][]. 1002 } 1003 1004 $alt_text = $this->encodeAttribute($alt_text); 1005 if (isset($this->urls[$link_id])) { 1006 $url = $this->encodeURLAttribute($this->urls[$link_id]); 1007 $result = "<img src=\"$url\" alt=\"$alt_text\""; 1008 if (isset($this->titles[$link_id])) { 1009 $title = $this->titles[$link_id]; 1010 $title = $this->encodeAttribute($title); 1011 $result .= " title=\"$title\""; 1012 } 1013 if (isset($this->ref_attr[$link_id])) { 1014 $result .= $this->ref_attr[$link_id]; 1015 } 1016 $result .= $this->empty_element_suffix; 1017 $result = $this->hashPart($result); 1018 } 1019 else { 1020 // If there's no such link ID, leave intact: 1021 $result = $whole_match; 1022 } 1023 1024 return $result; 1025 } 1026 1027 /** 1028 * Callback for inline images 1029 * @param array $matches 1030 * @return string 1031 */ 1032 protected function _doImages_inline_callback($matches) { 1033 $alt_text = $matches[2]; 1034 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 1035 $title_quote =& $matches[6]; 1036 $title =& $matches[7]; 1037 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 1038 1039 $alt_text = $this->encodeAttribute($alt_text); 1040 $url = $this->encodeURLAttribute($url); 1041 $result = "<img src=\"$url\" alt=\"$alt_text\""; 1042 if (isset($title) && $title_quote) { 1043 $title = $this->encodeAttribute($title); 1044 $result .= " title=\"$title\""; // $title already quoted 1045 } 1046 $result .= $attr; 1047 $result .= $this->empty_element_suffix; 1048 1049 return $this->hashPart($result); 1050 } 1051 1052 /** 1053 * Process markdown headers. Redefined to add ID and class attribute support. 1054 * @param string $text 1055 * @return string 1056 */ 1057 protected function doHeaders($text) { 1058 // Setext-style headers: 1059 // Header 1 {#header1} 1060 // ======== 1061 // 1062 // Header 2 {#header2 .class1 .class2} 1063 // -------- 1064 // 1065 $text = preg_replace_callback( 1066 '{ 1067 (^.+?) # $1: Header text 1068 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 1069 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 1070 }mx', 1071 array($this, '_doHeaders_callback_setext'), $text); 1072 1073 // atx-style headers: 1074 // # Header 1 {#header1} 1075 // ## Header 2 {#header2} 1076 // ## Header 2 with closing hashes ## {#header3.class1.class2} 1077 // ... 1078 // ###### Header 6 {.class2} 1079 // 1080 $text = preg_replace_callback('{ 1081 ^(\#{1,6}) # $1 = string of #\'s 1082 [ ]'.($this->hashtag_protection ? '+' : '*').' 1083 (.+?) # $2 = Header text 1084 [ ]* 1085 \#* # optional closing #\'s (not counted) 1086 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes 1087 [ ]* 1088 \n+ 1089 }xm', 1090 array($this, '_doHeaders_callback_atx'), $text); 1091 1092 return $text; 1093 } 1094 1095 /** 1096 * Callback for setext headers 1097 * @param array $matches 1098 * @return string 1099 */ 1100 protected function _doHeaders_callback_setext($matches) { 1101 if ($matches[3] === '-' && preg_match('{^- }', $matches[1])) { 1102 return $matches[0]; 1103 } 1104 1105 $level = $matches[3][0] === '=' ? 1 : 2; 1106 1107 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null; 1108 1109 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId); 1110 $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>"; 1111 return "\n" . $this->hashBlock($block) . "\n\n"; 1112 } 1113 1114 /** 1115 * Callback for atx headers 1116 * @param array $matches 1117 * @return string 1118 */ 1119 protected function _doHeaders_callback_atx($matches) { 1120 $level = strlen($matches[1]); 1121 1122 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null; 1123 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId); 1124 $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>"; 1125 return "\n" . $this->hashBlock($block) . "\n\n"; 1126 } 1127 1128 /** 1129 * Form HTML tables. 1130 * @param string $text 1131 * @return string 1132 */ 1133 protected function doTables($text) { 1134 $less_than_tab = $this->tab_width - 1; 1135 // Find tables with leading pipe. 1136 // 1137 // | Header 1 | Header 2 1138 // | -------- | -------- 1139 // | Cell 1 | Cell 2 1140 // | Cell 3 | Cell 4 1141 $text = preg_replace_callback(' 1142 { 1143 ^ # Start of a line 1144 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1145 [|] # Optional leading pipe (present) 1146 (.+) \n # $1: Header row (at least one pipe) 1147 1148 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1149 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 1150 1151 ( # $3: Cells 1152 (?> 1153 [ ]* # Allowed whitespace. 1154 [|] .* \n # Row content. 1155 )* 1156 ) 1157 (?=\n|\Z) # Stop at final double newline. 1158 }xm', 1159 array($this, '_doTable_leadingPipe_callback'), $text); 1160 1161 // Find tables without leading pipe. 1162 // 1163 // Header 1 | Header 2 1164 // -------- | -------- 1165 // Cell 1 | Cell 2 1166 // Cell 3 | Cell 4 1167 $text = preg_replace_callback(' 1168 { 1169 ^ # Start of a line 1170 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1171 (\S.*[|].*) \n # $1: Header row (at least one pipe) 1172 1173 [ ]{0,' . $less_than_tab . '} # Allowed whitespace. 1174 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 1175 1176 ( # $3: Cells 1177 (?> 1178 .* [|] .* \n # Row content 1179 )* 1180 ) 1181 (?=\n|\Z) # Stop at final double newline. 1182 }xm', 1183 array($this, '_DoTable_callback'), $text); 1184 1185 return $text; 1186 } 1187 1188 /** 1189 * Callback for removing the leading pipe for each row 1190 * @param array $matches 1191 * @return string 1192 */ 1193 protected function _doTable_leadingPipe_callback($matches) { 1194 $head = $matches[1]; 1195 $underline = $matches[2]; 1196 $content = $matches[3]; 1197 1198 $content = preg_replace('/^ *[|]/m', '', $content); 1199 1200 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 1201 } 1202 1203 /** 1204 * Make the align attribute in a table 1205 * @param string $alignname 1206 * @return string 1207 */ 1208 protected function _doTable_makeAlignAttr($alignname) { 1209 if (empty($this->table_align_class_tmpl)) { 1210 return " align=\"$alignname\""; 1211 } 1212 1213 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); 1214 return " class=\"$classname\""; 1215 } 1216 1217 /** 1218 * Calback for processing tables 1219 * @param array $matches 1220 * @return string 1221 */ 1222 protected function _doTable_callback($matches) { 1223 $head = $matches[1]; 1224 $underline = $matches[2]; 1225 $content = $matches[3]; 1226 $attr = []; 1227 1228 // Remove any tailing pipes for each line. 1229 $head = preg_replace('/[|] *$/m', '', $head); 1230 $underline = preg_replace('/[|] *$/m', '', $underline); 1231 $content = preg_replace('/[|] *$/m', '', $content); 1232 1233 // Reading alignement from header underline. 1234 $separators = preg_split('/ *[|] */', $underline); 1235 foreach ($separators as $n => $s) { 1236 if (preg_match('/^ *-+: *$/', $s)) 1237 $attr[$n] = $this->_doTable_makeAlignAttr('right'); 1238 else if (preg_match('/^ *:-+: *$/', $s)) 1239 $attr[$n] = $this->_doTable_makeAlignAttr('center'); 1240 else if (preg_match('/^ *:-+ *$/', $s)) 1241 $attr[$n] = $this->_doTable_makeAlignAttr('left'); 1242 else 1243 $attr[$n] = ''; 1244 } 1245 1246 // Parsing span elements, including code spans, character escapes, 1247 // and inline HTML tags, so that pipes inside those gets ignored. 1248 $head = $this->parseSpan($head); 1249 $headers = preg_split('/ *[|] */', $head); 1250 $col_count = count($headers); 1251 $attr = array_pad($attr, $col_count, ''); 1252 1253 // Write column headers. 1254 $text = "<table>\n"; 1255 $text .= "<thead>\n"; 1256 $text .= "<tr>\n"; 1257 foreach ($headers as $n => $header) { 1258 $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n"; 1259 } 1260 $text .= "</tr>\n"; 1261 $text .= "</thead>\n"; 1262 1263 // Split content by row. 1264 $rows = explode("\n", trim($content, "\n")); 1265 1266 $text .= "<tbody>\n"; 1267 foreach ($rows as $row) { 1268 // Parsing span elements, including code spans, character escapes, 1269 // and inline HTML tags, so that pipes inside those gets ignored. 1270 $row = $this->parseSpan($row); 1271 1272 // Split row by cell. 1273 $row_cells = preg_split('/ *[|] */', $row, $col_count); 1274 $row_cells = array_pad($row_cells, $col_count, ''); 1275 1276 $text .= "<tr>\n"; 1277 foreach ($row_cells as $n => $cell) { 1278 $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n"; 1279 } 1280 $text .= "</tr>\n"; 1281 } 1282 $text .= "</tbody>\n"; 1283 $text .= "</table>"; 1284 1285 return $this->hashBlock($text) . "\n"; 1286 } 1287 1288 /** 1289 * Form HTML definition lists. 1290 * @param string $text 1291 * @return string 1292 */ 1293 protected function doDefLists($text) { 1294 $less_than_tab = $this->tab_width - 1; 1295 1296 // Re-usable pattern to match any entire dl list: 1297 $whole_list_re = '(?> 1298 ( # $1 = whole list 1299 ( # $2 1300 [ ]{0,' . $less_than_tab . '} 1301 ((?>.*\S.*\n)+) # $3 = defined term 1302 \n? 1303 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1304 ) 1305 (?s:.+?) 1306 ( # $4 1307 \z 1308 | 1309 \n{2,} 1310 (?=\S) 1311 (?! # Negative lookahead for another term 1312 [ ]{0,' . $less_than_tab . '} 1313 (?: \S.*\n )+? # defined term 1314 \n? 1315 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1316 ) 1317 (?! # Negative lookahead for another definition 1318 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition 1319 ) 1320 ) 1321 ) 1322 )'; // mx 1323 1324 $text = preg_replace_callback('{ 1325 (?>\A\n?|(?<=\n\n)) 1326 ' . $whole_list_re . ' 1327 }mx', 1328 array($this, '_doDefLists_callback'), $text); 1329 1330 return $text; 1331 } 1332 1333 /** 1334 * Callback for processing definition lists 1335 * @param array $matches 1336 * @return string 1337 */ 1338 protected function _doDefLists_callback($matches) { 1339 // Re-usable patterns to match list item bullets and number markers: 1340 $list = $matches[1]; 1341 1342 // Turn double returns into triple returns, so that we can make a 1343 // paragraph for the last item in a list, if necessary: 1344 $result = trim($this->processDefListItems($list)); 1345 $result = "<dl>\n" . $result . "\n</dl>"; 1346 return $this->hashBlock($result) . "\n\n"; 1347 } 1348 1349 /** 1350 * Process the contents of a single definition list, splitting it 1351 * into individual term and definition list items. 1352 * @param string $list_str 1353 * @return string 1354 */ 1355 protected function processDefListItems($list_str) { 1356 1357 $less_than_tab = $this->tab_width - 1; 1358 1359 // Trim trailing blank lines: 1360 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1361 1362 // Process definition terms. 1363 $list_str = preg_replace_callback('{ 1364 (?>\A\n?|\n\n+) # leading line 1365 ( # definition terms = $1 1366 [ ]{0,' . $less_than_tab . '} # leading whitespace 1367 (?!\:[ ]|[ ]) # negative lookahead for a definition 1368 # mark (colon) or more whitespace. 1369 (?> \S.* \n)+? # actual term (not whitespace). 1370 ) 1371 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 1372 # with a definition mark. 1373 }xm', 1374 array($this, '_processDefListItems_callback_dt'), $list_str); 1375 1376 // Process actual definitions. 1377 $list_str = preg_replace_callback('{ 1378 \n(\n+)? # leading line = $1 1379 ( # marker space = $2 1380 [ ]{0,' . $less_than_tab . '} # whitespace before colon 1381 \:[ ]+ # definition mark (colon) 1382 ) 1383 ((?s:.+?)) # definition text = $3 1384 (?= \n+ # stop at next definition mark, 1385 (?: # next term or end of text 1386 [ ]{0,' . $less_than_tab . '} \:[ ] | 1387 <dt> | \z 1388 ) 1389 ) 1390 }xm', 1391 array($this, '_processDefListItems_callback_dd'), $list_str); 1392 1393 return $list_str; 1394 } 1395 1396 /** 1397 * Callback for <dt> elements in definition lists 1398 * @param array $matches 1399 * @return string 1400 */ 1401 protected function _processDefListItems_callback_dt($matches) { 1402 $terms = explode("\n", trim($matches[1])); 1403 $text = ''; 1404 foreach ($terms as $term) { 1405 $term = $this->runSpanGamut(trim($term)); 1406 $text .= "\n<dt>" . $term . "</dt>"; 1407 } 1408 return $text . "\n"; 1409 } 1410 1411 /** 1412 * Callback for <dd> elements in definition lists 1413 * @param array $matches 1414 * @return string 1415 */ 1416 protected function _processDefListItems_callback_dd($matches) { 1417 $leading_line = $matches[1]; 1418 $marker_space = $matches[2]; 1419 $def = $matches[3]; 1420 1421 if ($leading_line || preg_match('/\n{2,}/', $def)) { 1422 // Replace marker with the appropriate whitespace indentation 1423 $def = str_repeat(' ', strlen($marker_space)) . $def; 1424 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 1425 $def = "\n". $def ."\n"; 1426 } 1427 else { 1428 $def = rtrim($def); 1429 $def = $this->runSpanGamut($this->outdent($def)); 1430 } 1431 1432 return "\n<dd>" . $def . "</dd>\n"; 1433 } 1434 1435 /** 1436 * Adding the fenced code block syntax to regular Markdown: 1437 * 1438 * ~~~ 1439 * Code block 1440 * ~~~ 1441 * 1442 * @param string $text 1443 * @return string 1444 */ 1445 protected function doFencedCodeBlocks($text) { 1446 1447 $text = preg_replace_callback('{ 1448 (?:\n|\A) 1449 # 1: Opening marker 1450 ( 1451 (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 1452 ) 1453 [ ]* 1454 (?: 1455 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 1456 )? 1457 [ ]* 1458 (?: 1459 ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes 1460 )? 1461 [ ]* \n # Whitespace and newline following marker. 1462 1463 # 4: Content 1464 ( 1465 (?> 1466 (?!\1 [ ]* \n) # Not a closing marker. 1467 .*\n+ 1468 )+ 1469 ) 1470 1471 # Closing marker. 1472 \1 [ ]* (?= \n ) 1473 }xm', 1474 array($this, '_doFencedCodeBlocks_callback'), $text); 1475 1476 return $text; 1477 } 1478 1479 /** 1480 * Callback to process fenced code blocks 1481 * @param array $matches 1482 * @return string 1483 */ 1484 protected function _doFencedCodeBlocks_callback($matches) { 1485 $classname =& $matches[2]; 1486 $attrs =& $matches[3]; 1487 $codeblock = $matches[4]; 1488 1489 if ($this->code_block_content_func) { 1490 $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname); 1491 } else { 1492 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1493 } 1494 1495 $codeblock = preg_replace_callback('/^\n+/', 1496 array($this, '_doFencedCodeBlocks_newlines'), $codeblock); 1497 1498 $classes = array(); 1499 if ($classname !== "") { 1500 if ($classname[0] === '.') { 1501 $classname = substr($classname, 1); 1502 } 1503 $classes[] = $this->code_class_prefix . $classname; 1504 } 1505 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes); 1506 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 1507 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 1508 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 1509 1510 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1511 } 1512 1513 /** 1514 * Replace new lines in fenced code blocks 1515 * @param array $matches 1516 * @return string 1517 */ 1518 protected function _doFencedCodeBlocks_newlines($matches) { 1519 return str_repeat("<br$this->empty_element_suffix", 1520 strlen($matches[0])); 1521 } 1522 1523 /** 1524 * Redefining emphasis markers so that emphasis by underscore does not 1525 * work in the middle of a word. 1526 * @var array 1527 */ 1528 protected array $em_relist = array( 1529 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)', 1530 '*' => '(?<![\s*])\*(?!\*)', 1531 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])', 1532 ); 1533 protected array $strong_relist = array( 1534 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)', 1535 '**' => '(?<![\s*])\*\*(?!\*)', 1536 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])', 1537 ); 1538 protected array $em_strong_relist = array( 1539 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)', 1540 '***' => '(?<![\s*])\*\*\*(?!\*)', 1541 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])', 1542 ); 1543 1544 /** 1545 * Parse text into paragraphs 1546 * @param string $text String to process in paragraphs 1547 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags 1548 * @return string HTML output 1549 */ 1550 protected function formParagraphs($text, $wrap_in_p = true) { 1551 // Strip leading and trailing lines: 1552 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1553 1554 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1555 1556 // Wrap <p> tags and unhashify HTML blocks 1557 foreach ($grafs as $key => $value) { 1558 $value = trim($this->runSpanGamut($value)); 1559 1560 // Check if this should be enclosed in a paragraph. 1561 // Clean tag hashes & block tag hashes are left alone. 1562 $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 1563 1564 if ($is_p) { 1565 $value = "<p>$value</p>"; 1566 } 1567 $grafs[$key] = $value; 1568 } 1569 1570 // Join grafs in one text, then unhash HTML tags. 1571 $text = implode("\n\n", $grafs); 1572 1573 // Finish by removing any tag hashes still present in $text. 1574 $text = $this->unhash($text); 1575 1576 return $text; 1577 } 1578 1579 1580 /** 1581 * Footnotes - Strips link definitions from text, stores the URLs and 1582 * titles in hash references. 1583 * @param string $text 1584 * @return string 1585 */ 1586 protected function stripFootnotes($text) { 1587 $less_than_tab = $this->tab_width - 1; 1588 1589 // Link defs are in the form: [^id]: url "optional title" 1590 $text = preg_replace_callback('{ 1591 ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1 1592 [ ]* 1593 \n? # maybe *one* newline 1594 ( # text = $2 (no blank lines allowed) 1595 (?: 1596 .+ # actual text 1597 | 1598 \n # newlines but 1599 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. 1600 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 1601 # by non-indented content 1602 )* 1603 ) 1604 }xm', 1605 array($this, '_stripFootnotes_callback'), 1606 $text); 1607 return $text; 1608 } 1609 1610 /** 1611 * Callback for stripping footnotes 1612 * @param array $matches 1613 * @return string 1614 */ 1615 protected function _stripFootnotes_callback($matches) { 1616 $note_id = $this->fn_id_prefix . $matches[1]; 1617 $this->footnotes[$note_id] = $this->outdent($matches[2]); 1618 return ''; // String that will replace the block 1619 } 1620 1621 /** 1622 * Replace footnote references in $text [^id] with a special text-token 1623 * which will be replaced by the actual footnote marker in appendFootnotes. 1624 * @param string $text 1625 * @return string 1626 */ 1627 protected function doFootnotes($text) { 1628 if (!$this->in_anchor) { 1629 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 1630 } 1631 return $text; 1632 } 1633 1634 /** 1635 * Append footnote list to text 1636 * @param string $text 1637 * @return string 1638 */ 1639 protected function appendFootnotes($text) { 1640 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1641 array($this, '_appendFootnotes_callback'), $text); 1642 1643 if ( ! empty( $this->footnotes_ordered ) ) { 1644 $this->_doFootnotes(); 1645 if ( ! $this->omit_footnotes ) { 1646 $text .= "\n\n"; 1647 $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n"; 1648 $text .= "<hr" . $this->empty_element_suffix . "\n"; 1649 $text .= $this->footnotes_assembled; 1650 $text .= "</div>"; 1651 } 1652 } 1653 return $text; 1654 } 1655 1656 1657 /** 1658 * Generates the HTML for footnotes. Called by appendFootnotes, even if 1659 * footnotes are not being appended. 1660 * @return void 1661 */ 1662 protected function _doFootnotes() { 1663 $attr = array(); 1664 if ($this->fn_backlink_class !== "") { 1665 $class = $this->fn_backlink_class; 1666 $class = $this->encodeAttribute($class); 1667 $attr['class'] = " class=\"$class\""; 1668 } 1669 $attr['role'] = " role=\"doc-backlink\""; 1670 $num = 0; 1671 1672 $text = "<ol>\n\n"; 1673 while (!empty($this->footnotes_ordered)) { 1674 $footnote = reset($this->footnotes_ordered); 1675 $note_id = key($this->footnotes_ordered); 1676 unset($this->footnotes_ordered[$note_id]); 1677 $ref_count = $this->footnotes_ref_count[$note_id]; 1678 unset($this->footnotes_ref_count[$note_id]); 1679 unset($this->footnotes[$note_id]); 1680 1681 $footnote .= "\n"; // Need to append newline before parsing. 1682 $footnote = $this->runBlockGamut("$footnote\n"); 1683 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1684 array($this, '_appendFootnotes_callback'), $footnote); 1685 1686 $num++; 1687 $note_id = $this->encodeAttribute($note_id); 1688 1689 // Prepare backlink, multiple backlinks if multiple references 1690 // Do not create empty backlinks if the html is blank 1691 $backlink = ""; 1692 if (!empty($this->fn_backlink_html)) { 1693 for ($ref_num = 1; $ref_num <= $ref_count; ++$ref_num) { 1694 if (!empty($this->fn_backlink_title)) { 1695 $attr['title'] = ' title="' . $this->encodeAttribute($this->fn_backlink_title) . '"'; 1696 } 1697 if (!empty($this->fn_backlink_label)) { 1698 $attr['label'] = ' aria-label="' . $this->encodeAttribute($this->fn_backlink_label) . '"'; 1699 } 1700 $parsed_attr = $this->parseFootnotePlaceholders( 1701 implode('', $attr), 1702 $num, 1703 $ref_num 1704 ); 1705 $backlink_text = $this->parseFootnotePlaceholders( 1706 $this->fn_backlink_html, 1707 $num, 1708 $ref_num 1709 ); 1710 $ref_count_mark = $ref_num > 1 ? $ref_num : ''; 1711 $backlink .= " <a href=\"#fnref$ref_count_mark:$note_id\"$parsed_attr>$backlink_text</a>"; 1712 } 1713 $backlink = trim($backlink); 1714 } 1715 1716 // Add backlink to last paragraph; create new paragraph if needed. 1717 if (!empty($backlink)) { 1718 if (preg_match('{</p>$}', $footnote)) { 1719 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 1720 } else { 1721 $footnote .= "\n\n<p>$backlink</p>"; 1722 } 1723 } 1724 1725 $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n"; 1726 $text .= $footnote . "\n"; 1727 $text .= "</li>\n\n"; 1728 } 1729 $text .= "</ol>\n"; 1730 1731 $this->footnotes_assembled = $text; 1732 } 1733 1734 /** 1735 * Callback for appending footnotes 1736 * @param array $matches 1737 * @return string 1738 */ 1739 protected function _appendFootnotes_callback($matches) { 1740 $node_id = $this->fn_id_prefix . $matches[1]; 1741 1742 // Create footnote marker only if it has a corresponding footnote *and* 1743 // the footnote hasn't been used by another marker. 1744 if (isset($this->footnotes[$node_id])) { 1745 $num =& $this->footnotes_numbers[$node_id]; 1746 if (!isset($num)) { 1747 // Transfer footnote content to the ordered list and give it its 1748 // number 1749 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 1750 $this->footnotes_ref_count[$node_id] = 1; 1751 $num = $this->footnote_counter++; 1752 $ref_count_mark = ''; 1753 } else { 1754 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 1755 } 1756 1757 $attr = ""; 1758 if ($this->fn_link_class !== "") { 1759 $class = $this->fn_link_class; 1760 $class = $this->encodeAttribute($class); 1761 $attr .= " class=\"$class\""; 1762 } 1763 if ($this->fn_link_title !== "") { 1764 $title = $this->fn_link_title; 1765 $title = $this->encodeAttribute($title); 1766 $attr .= " title=\"$title\""; 1767 } 1768 $attr .= " role=\"doc-noteref\""; 1769 1770 $attr = str_replace("%%", $num, $attr); 1771 $node_id = $this->encodeAttribute($node_id); 1772 1773 return 1774 "<sup id=\"fnref$ref_count_mark:$node_id\">". 1775 "<a href=\"#fn:$node_id\"$attr>$num</a>". 1776 "</sup>"; 1777 } 1778 1779 return "[^" . $matches[1] . "]"; 1780 } 1781 1782 /** 1783 * Build footnote label by evaluating any placeholders. 1784 * - ^^ footnote number 1785 * - %% footnote reference number (Nth reference to footnote number) 1786 * @param string $label 1787 * @param int $footnote_number 1788 * @param int $reference_number 1789 * @return string 1790 */ 1791 protected function parseFootnotePlaceholders($label, $footnote_number, $reference_number) { 1792 return str_replace( 1793 array('^^', '%%'), 1794 array($footnote_number, $reference_number), 1795 $label 1796 ); 1797 } 1798 1799 1800 /** 1801 * Abbreviations - strips abbreviations from text, stores titles in hash 1802 * references. 1803 * @param string $text 1804 * @return string 1805 */ 1806 protected function stripAbbreviations($text) { 1807 $less_than_tab = $this->tab_width - 1; 1808 1809 // Link defs are in the form: [id]*: url "optional title" 1810 $text = preg_replace_callback('{ 1811 ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1 1812 (.*) # text = $2 (no blank lines allowed) 1813 }xm', 1814 array($this, '_stripAbbreviations_callback'), 1815 $text); 1816 return $text; 1817 } 1818 1819 /** 1820 * Callback for stripping abbreviations 1821 * @param array $matches 1822 * @return string 1823 */ 1824 protected function _stripAbbreviations_callback($matches) { 1825 $abbr_word = $matches[1]; 1826 $abbr_desc = $matches[2]; 1827 if ($this->abbr_word_re) { 1828 $this->abbr_word_re .= '|'; 1829 } 1830 $this->abbr_word_re .= preg_quote($abbr_word); 1831 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1832 return ''; // String that will replace the block 1833 } 1834 1835 /** 1836 * Find defined abbreviations in text and wrap them in <abbr> elements. 1837 * @param string $text 1838 * @return string 1839 */ 1840 protected function doAbbreviations($text) { 1841 if ($this->abbr_word_re) { 1842 // cannot use the /x modifier because abbr_word_re may 1843 // contain significant spaces: 1844 $text = preg_replace_callback('{' . 1845 '(?<![\w\x1A])' . 1846 '(?:' . $this->abbr_word_re . ')' . 1847 '(?![\w\x1A])' . 1848 '}', 1849 array($this, '_doAbbreviations_callback'), $text); 1850 } 1851 return $text; 1852 } 1853 1854 /** 1855 * Callback for processing abbreviations 1856 * @param array $matches 1857 * @return string 1858 */ 1859 protected function _doAbbreviations_callback($matches) { 1860 $abbr = $matches[0]; 1861 if (isset($this->abbr_desciptions[$abbr])) { 1862 $desc = $this->abbr_desciptions[$abbr]; 1863 if (empty($desc)) { 1864 return $this->hashPart("<abbr>$abbr</abbr>"); 1865 } 1866 $desc = $this->encodeAttribute($desc); 1867 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 1868 } 1869 return $matches[0]; 1870 } 1871 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body