Differences Between: [Versions 310 and 311] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403] [Versions 39 and 311]
1 <?php 2 /** 3 * Markdown - A text-to-HTML conversion tool for web writers 4 * 5 * @package php-markdown 6 * @author Michel Fortin <michel.fortin@michelf.com> 7 * @copyright 2004-2019 Michel Fortin <https://michelf.com/projects/php-markdown/> 8 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/> 9 */ 10 11 namespace Michelf; 12 13 /** 14 * Markdown Parser Class 15 */ 16 class Markdown implements MarkdownInterface { 17 /** 18 * Define the package version 19 * @var string 20 */ 21 const MARKDOWNLIB_VERSION = "1.9.0"; 22 23 /** 24 * Simple function interface - Initialize the parser and return the result 25 * of its transform method. This will work fine for derived classes too. 26 * 27 * @api 28 * 29 * @param string $text 30 * @return string 31 */ 32 public static function defaultTransform($text) { 33 // Take parser class on which this function was called. 34 $parser_class = \get_called_class(); 35 36 // Try to take parser from the static parser list 37 static $parser_list; 38 $parser =& $parser_list[$parser_class]; 39 40 // Create the parser it not already set 41 if (!$parser) { 42 $parser = new $parser_class; 43 } 44 45 // Transform text using parser. 46 return $parser->transform($text); 47 } 48 49 /** 50 * Configuration variables 51 */ 52 53 /** 54 * Change to ">" for HTML output. 55 * @var string 56 */ 57 public $empty_element_suffix = " />"; 58 59 /** 60 * The width of indentation of the output markup 61 * @var int 62 */ 63 public $tab_width = 4; 64 65 /** 66 * Change to `true` to disallow markup or entities. 67 * @var boolean 68 */ 69 public $no_markup = false; 70 public $no_entities = false; 71 72 73 /** 74 * Change to `true` to enable line breaks on \n without two trailling spaces 75 * @var boolean 76 */ 77 public $hard_wrap = false; 78 79 /** 80 * Predefined URLs and titles for reference links and images. 81 * @var array 82 */ 83 public $predef_urls = array(); 84 public $predef_titles = array(); 85 86 /** 87 * Optional filter function for URLs 88 * @var callable|null 89 */ 90 public $url_filter_func = null; 91 92 /** 93 * Optional header id="" generation callback function. 94 * @var callable|null 95 */ 96 public $header_id_func = null; 97 98 /** 99 * Optional function for converting code block content to HTML 100 * @var callable|null 101 */ 102 public $code_block_content_func = null; 103 104 /** 105 * Optional function for converting code span content to HTML. 106 * @var callable|null 107 */ 108 public $code_span_content_func = null; 109 110 /** 111 * Class attribute to toggle "enhanced ordered list" behaviour 112 * setting this to true will allow ordered lists to start from the index 113 * number that is defined first. 114 * 115 * For example: 116 * 2. List item two 117 * 3. List item three 118 * 119 * Becomes: 120 * <ol start="2"> 121 * <li>List item two</li> 122 * <li>List item three</li> 123 * </ol> 124 * 125 * @var bool 126 */ 127 public $enhanced_ordered_list = false; 128 129 /** 130 * Parser implementation 131 */ 132 133 /** 134 * Regex to match balanced [brackets]. 135 * Needed to insert a maximum bracked depth while converting to PHP. 136 * @var int 137 */ 138 protected $nested_brackets_depth = 6; 139 protected $nested_brackets_re; 140 141 protected $nested_url_parenthesis_depth = 4; 142 protected $nested_url_parenthesis_re; 143 144 /** 145 * Table of hash values for escaped characters: 146 * @var string 147 */ 148 protected $escape_chars = '\`*_{}[]()>#+-.!'; 149 protected $escape_chars_re; 150 151 /** 152 * Constructor function. Initialize appropriate member variables. 153 * @return void 154 */ 155 public function __construct() { 156 $this->_initDetab(); 157 $this->prepareItalicsAndBold(); 158 159 $this->nested_brackets_re = 160 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 161 str_repeat('\])*', $this->nested_brackets_depth); 162 163 $this->nested_url_parenthesis_re = 164 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 165 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 166 167 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 168 169 // Sort document, block, and span gamut in ascendent priority order. 170 asort($this->document_gamut); 171 asort($this->block_gamut); 172 asort($this->span_gamut); 173 } 174 175 176 /** 177 * Internal hashes used during transformation. 178 * @var array 179 */ 180 protected $urls = array(); 181 protected $titles = array(); 182 protected $html_hashes = array(); 183 184 /** 185 * Status flag to avoid invalid nesting. 186 * @var boolean 187 */ 188 protected $in_anchor = false; 189 190 /** 191 * Status flag to avoid invalid nesting. 192 * @var boolean 193 */ 194 protected $in_emphasis_processing = false; 195 196 /** 197 * Called before the transformation process starts to setup parser states. 198 * @return void 199 */ 200 protected function setup() { 201 // Clear global hashes. 202 $this->urls = $this->predef_urls; 203 $this->titles = $this->predef_titles; 204 $this->html_hashes = array(); 205 $this->in_anchor = false; 206 $this->in_emphasis_processing = false; 207 } 208 209 /** 210 * Called after the transformation process to clear any variable which may 211 * be taking up memory unnecessarly. 212 * @return void 213 */ 214 protected function teardown() { 215 $this->urls = array(); 216 $this->titles = array(); 217 $this->html_hashes = array(); 218 } 219 220 /** 221 * Main function. Performs some preprocessing on the input text and pass 222 * it through the document gamut. 223 * 224 * @api 225 * 226 * @param string $text 227 * @return string 228 */ 229 public function transform($text) { 230 $this->setup(); 231 232 # Remove UTF-8 BOM and marker character in input, if present. 233 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 234 235 # Standardize line endings: 236 # DOS to Unix and Mac to Unix 237 $text = preg_replace('{\r\n?}', "\n", $text); 238 239 # Make sure $text ends with a couple of newlines: 240 $text .= "\n\n"; 241 242 # Convert all tabs to spaces. 243 $text = $this->detab($text); 244 245 # Turn block-level HTML blocks into hash entries 246 $text = $this->hashHTMLBlocks($text); 247 248 # Strip any lines consisting only of spaces and tabs. 249 # This makes subsequent regexen easier to write, because we can 250 # match consecutive blank lines with /\n+/ instead of something 251 # contorted like /[ ]*\n+/ . 252 $text = preg_replace('/^[ ]+$/m', '', $text); 253 254 # Run document gamut methods. 255 foreach ($this->document_gamut as $method => $priority) { 256 $text = $this->$method($text); 257 } 258 259 $this->teardown(); 260 261 return $text . "\n"; 262 } 263 264 /** 265 * Define the document gamut 266 * @var array 267 */ 268 protected $document_gamut = array( 269 // Strip link definitions, store in hashes. 270 "stripLinkDefinitions" => 20, 271 "runBasicBlockGamut" => 30, 272 ); 273 274 /** 275 * Strips link definitions from text, stores the URLs and titles in 276 * hash references 277 * @param string $text 278 * @return string 279 */ 280 protected function stripLinkDefinitions($text) { 281 282 $less_than_tab = $this->tab_width - 1; 283 284 // Link defs are in the form: ^[id]: url "optional title" 285 $text = preg_replace_callback('{ 286 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 287 [ ]* 288 \n? # maybe *one* newline 289 [ ]* 290 (?: 291 <(.+?)> # url = $2 292 | 293 (\S+?) # url = $3 294 ) 295 [ ]* 296 \n? # maybe one newline 297 [ ]* 298 (?: 299 (?<=\s) # lookbehind for whitespace 300 ["(] 301 (.*?) # title = $4 302 [")] 303 [ ]* 304 )? # title is optional 305 (?:\n+|\Z) 306 }xm', 307 array($this, '_stripLinkDefinitions_callback'), 308 $text 309 ); 310 return $text; 311 } 312 313 /** 314 * The callback to strip link definitions 315 * @param array $matches 316 * @return string 317 */ 318 protected function _stripLinkDefinitions_callback($matches) { 319 $link_id = strtolower($matches[1]); 320 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 321 $this->urls[$link_id] = $url; 322 $this->titles[$link_id] =& $matches[4]; 323 return ''; // String that will replace the block 324 } 325 326 /** 327 * Hashify HTML blocks 328 * @param string $text 329 * @return string 330 */ 331 protected function hashHTMLBlocks($text) { 332 if ($this->no_markup) { 333 return $text; 334 } 335 336 $less_than_tab = $this->tab_width - 1; 337 338 /** 339 * Hashify HTML blocks: 340 * 341 * We only want to do this for block-level HTML tags, such as headers, 342 * lists, and tables. That's because we still want to wrap <p>s around 343 * "paragraphs" that are wrapped in non-block-level tags, such as 344 * anchors, phrase emphasis, and spans. The list of tags we're looking 345 * for is hard-coded: 346 * 347 * * List "a" is made of tags which can be both inline or block-level. 348 * These will be treated block-level when the start tag is alone on 349 * its line, otherwise they're not matched here and will be taken as 350 * inline later. 351 * * List "b" is made of tags which are always block-level; 352 */ 353 $block_tags_a_re = 'ins|del'; 354 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 355 'script|noscript|style|form|fieldset|iframe|math|svg|'. 356 'article|section|nav|aside|hgroup|header|footer|'. 357 'figure'; 358 359 // Regular expression for the content of a block tag. 360 $nested_tags_level = 4; 361 $attr = ' 362 (?> # optional tag attributes 363 \s # starts with whitespace 364 (?> 365 [^>"/]+ # text outside quotes 366 | 367 /+(?!>) # slash not followed by ">" 368 | 369 "[^"]*" # text inside double quotes (tolerate ">") 370 | 371 \'[^\']*\' # text inside single quotes (tolerate ">") 372 )* 373 )? 374 '; 375 $content = 376 str_repeat(' 377 (?> 378 [^<]+ # content without tag 379 | 380 <\2 # nested opening tag 381 '.$attr.' # attributes 382 (?> 383 /> 384 | 385 >', $nested_tags_level). // end of opening tag 386 '.*?'. // last level nested tag content 387 str_repeat(' 388 </\2\s*> # closing nested tag 389 ) 390 | 391 <(?!/\2\s*> # other tags with a different name 392 ) 393 )*', 394 $nested_tags_level); 395 $content2 = str_replace('\2', '\3', $content); 396 397 /** 398 * First, look for nested blocks, e.g.: 399 * <div> 400 * <div> 401 * tags for inner block must be indented. 402 * </div> 403 * </div> 404 * 405 * The outermost tags must start at the left margin for this to match, 406 * and the inner nested divs must be indented. 407 * We need to do this before the next, more liberal match, because the 408 * next match will start at the first `<div>` and stop at the 409 * first `</div>`. 410 */ 411 $text = preg_replace_callback('{(?> 412 (?> 413 (?<=\n) # Starting on its own line 414 | # or 415 \A\n? # the at beginning of the doc 416 ) 417 ( # save in $1 418 419 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 420 # in between. 421 422 [ ]{0,'.$less_than_tab.'} 423 <('.$block_tags_b_re.')# start tag = $2 424 '.$attr.'> # attributes followed by > and \n 425 '.$content.' # content, support nesting 426 </\2> # the matching end tag 427 [ ]* # trailing spaces/tabs 428 (?=\n+|\Z) # followed by a newline or end of document 429 430 | # Special version for tags of group a. 431 432 [ ]{0,'.$less_than_tab.'} 433 <('.$block_tags_a_re.')# start tag = $3 434 '.$attr.'>[ ]*\n # attributes followed by > 435 '.$content2.' # content, support nesting 436 </\3> # the matching end tag 437 [ ]* # trailing spaces/tabs 438 (?=\n+|\Z) # followed by a newline or end of document 439 440 | # Special case just for <hr />. It was easier to make a special 441 # case than to make the other regex more complicated. 442 443 [ ]{0,'.$less_than_tab.'} 444 <(hr) # start tag = $2 445 '.$attr.' # attributes 446 /?> # the matching end tag 447 [ ]* 448 (?=\n{2,}|\Z) # followed by a blank line or end of document 449 450 | # Special case for standalone HTML comments: 451 452 [ ]{0,'.$less_than_tab.'} 453 (?s: 454 <!-- .*? --> 455 ) 456 [ ]* 457 (?=\n{2,}|\Z) # followed by a blank line or end of document 458 459 | # PHP and ASP-style processor instructions (<? and <%) 460 461 [ ]{0,'.$less_than_tab.'} 462 (?s: 463 <([?%]) # $2 464 .*? 465 \2> 466 ) 467 [ ]* 468 (?=\n{2,}|\Z) # followed by a blank line or end of document 469 470 ) 471 )}Sxmi', 472 array($this, '_hashHTMLBlocks_callback'), 473 $text 474 ); 475 476 return $text; 477 } 478 479 /** 480 * The callback for hashing HTML blocks 481 * @param string $matches 482 * @return string 483 */ 484 protected function _hashHTMLBlocks_callback($matches) { 485 $text = $matches[1]; 486 $key = $this->hashBlock($text); 487 return "\n\n$key\n\n"; 488 } 489 490 /** 491 * Called whenever a tag must be hashed when a function insert an atomic 492 * element in the text stream. Passing $text to through this function gives 493 * a unique text-token which will be reverted back when calling unhash. 494 * 495 * The $boundary argument specify what character should be used to surround 496 * the token. By convension, "B" is used for block elements that needs not 497 * to be wrapped into paragraph tags at the end, ":" is used for elements 498 * that are word separators and "X" is used in the general case. 499 * 500 * @param string $text 501 * @param string $boundary 502 * @return string 503 */ 504 protected function hashPart($text, $boundary = 'X') { 505 // Swap back any tag hash found in $text so we do not have to `unhash` 506 // multiple times at the end. 507 $text = $this->unhash($text); 508 509 // Then hash the block. 510 static $i = 0; 511 $key = "$boundary\x1A" . ++$i . $boundary; 512 $this->html_hashes[$key] = $text; 513 return $key; // String that will replace the tag. 514 } 515 516 /** 517 * Shortcut function for hashPart with block-level boundaries. 518 * @param string $text 519 * @return string 520 */ 521 protected function hashBlock($text) { 522 return $this->hashPart($text, 'B'); 523 } 524 525 /** 526 * Define the block gamut - these are all the transformations that form 527 * block-level tags like paragraphs, headers, and list items. 528 * @var array 529 */ 530 protected $block_gamut = array( 531 "doHeaders" => 10, 532 "doHorizontalRules" => 20, 533 "doLists" => 40, 534 "doCodeBlocks" => 50, 535 "doBlockQuotes" => 60, 536 ); 537 538 /** 539 * Run block gamut tranformations. 540 * 541 * We need to escape raw HTML in Markdown source before doing anything 542 * else. This need to be done for each block, and not only at the 543 * begining in the Markdown function since hashed blocks can be part of 544 * list items and could have been indented. Indented blocks would have 545 * been seen as a code block in a previous pass of hashHTMLBlocks. 546 * 547 * @param string $text 548 * @return string 549 */ 550 protected function runBlockGamut($text) { 551 $text = $this->hashHTMLBlocks($text); 552 return $this->runBasicBlockGamut($text); 553 } 554 555 /** 556 * Run block gamut tranformations, without hashing HTML blocks. This is 557 * useful when HTML blocks are known to be already hashed, like in the first 558 * whole-document pass. 559 * 560 * @param string $text 561 * @return string 562 */ 563 protected function runBasicBlockGamut($text) { 564 565 foreach ($this->block_gamut as $method => $priority) { 566 $text = $this->$method($text); 567 } 568 569 // Finally form paragraph and restore hashed blocks. 570 $text = $this->formParagraphs($text); 571 572 return $text; 573 } 574 575 /** 576 * Convert horizontal rules 577 * @param string $text 578 * @return string 579 */ 580 protected function doHorizontalRules($text) { 581 return preg_replace( 582 '{ 583 ^[ ]{0,3} # Leading space 584 ([-*_]) # $1: First marker 585 (?> # Repeated marker group 586 [ ]{0,2} # Zero, one, or two spaces. 587 \1 # Marker character 588 ){2,} # Group repeated at least twice 589 [ ]* # Tailing spaces 590 $ # End of line. 591 }mx', 592 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 593 $text 594 ); 595 } 596 597 /** 598 * These are all the transformations that occur *within* block-level 599 * tags like paragraphs, headers, and list items. 600 * @var array 601 */ 602 protected $span_gamut = array( 603 // Process character escapes, code spans, and inline HTML 604 // in one shot. 605 "parseSpan" => -30, 606 // Process anchor and image tags. Images must come first, 607 // because ![foo][f] looks like an anchor. 608 "doImages" => 10, 609 "doAnchors" => 20, 610 // Make links out of things like `<https://example.com/>` 611 // Must come after doAnchors, because you can use < and > 612 // delimiters in inline links like [this](<url>). 613 "doAutoLinks" => 30, 614 "encodeAmpsAndAngles" => 40, 615 "doItalicsAndBold" => 50, 616 "doHardBreaks" => 60, 617 ); 618 619 /** 620 * Run span gamut transformations 621 * @param string $text 622 * @return string 623 */ 624 protected function runSpanGamut($text) { 625 foreach ($this->span_gamut as $method => $priority) { 626 $text = $this->$method($text); 627 } 628 629 return $text; 630 } 631 632 /** 633 * Do hard breaks 634 * @param string $text 635 * @return string 636 */ 637 protected function doHardBreaks($text) { 638 if ($this->hard_wrap) { 639 return preg_replace_callback('/ *\n/', 640 array($this, '_doHardBreaks_callback'), $text); 641 } else { 642 return preg_replace_callback('/ {2,}\n/', 643 array($this, '_doHardBreaks_callback'), $text); 644 } 645 } 646 647 /** 648 * Trigger part hashing for the hard break (callback method) 649 * @param array $matches 650 * @return string 651 */ 652 protected function _doHardBreaks_callback($matches) { 653 return $this->hashPart("<br$this->empty_element_suffix\n"); 654 } 655 656 /** 657 * Turn Markdown link shortcuts into XHTML <a> tags. 658 * @param string $text 659 * @return string 660 */ 661 protected function doAnchors($text) { 662 if ($this->in_anchor) { 663 return $text; 664 } 665 $this->in_anchor = true; 666 667 // First, handle reference-style links: [link text] [id] 668 $text = preg_replace_callback('{ 669 ( # wrap whole match in $1 670 \[ 671 ('.$this->nested_brackets_re.') # link text = $2 672 \] 673 674 [ ]? # one optional space 675 (?:\n[ ]*)? # one optional newline followed by spaces 676 677 \[ 678 (.*?) # id = $3 679 \] 680 ) 681 }xs', 682 array($this, '_doAnchors_reference_callback'), $text); 683 684 // Next, inline-style links: [link text](url "optional title") 685 $text = preg_replace_callback('{ 686 ( # wrap whole match in $1 687 \[ 688 ('.$this->nested_brackets_re.') # link text = $2 689 \] 690 \( # literal paren 691 [ \n]* 692 (?: 693 <(.+?)> # href = $3 694 | 695 ('.$this->nested_url_parenthesis_re.') # href = $4 696 ) 697 [ \n]* 698 ( # $5 699 ([\'"]) # quote char = $6 700 (.*?) # Title = $7 701 \6 # matching quote 702 [ \n]* # ignore any spaces/tabs between closing quote and ) 703 )? # title is optional 704 \) 705 ) 706 }xs', 707 array($this, '_doAnchors_inline_callback'), $text); 708 709 // Last, handle reference-style shortcuts: [link text] 710 // These must come last in case you've also got [link text][1] 711 // or [link text](/foo) 712 $text = preg_replace_callback('{ 713 ( # wrap whole match in $1 714 \[ 715 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 716 \] 717 ) 718 }xs', 719 array($this, '_doAnchors_reference_callback'), $text); 720 721 $this->in_anchor = false; 722 return $text; 723 } 724 725 /** 726 * Callback method to parse referenced anchors 727 * @param string $matches 728 * @return string 729 */ 730 protected function _doAnchors_reference_callback($matches) { 731 $whole_match = $matches[1]; 732 $link_text = $matches[2]; 733 $link_id =& $matches[3]; 734 735 if ($link_id == "") { 736 // for shortcut links like [this][] or [this]. 737 $link_id = $link_text; 738 } 739 740 // lower-case and turn embedded newlines into spaces 741 $link_id = strtolower($link_id); 742 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 743 744 if (isset($this->urls[$link_id])) { 745 $url = $this->urls[$link_id]; 746 $url = $this->encodeURLAttribute($url); 747 748 $result = "<a href=\"$url\""; 749 if ( isset( $this->titles[$link_id] ) ) { 750 $title = $this->titles[$link_id]; 751 $title = $this->encodeAttribute($title); 752 $result .= " title=\"$title\""; 753 } 754 755 $link_text = $this->runSpanGamut($link_text); 756 $result .= ">$link_text</a>"; 757 $result = $this->hashPart($result); 758 } else { 759 $result = $whole_match; 760 } 761 return $result; 762 } 763 764 /** 765 * Callback method to parse inline anchors 766 * @param string $matches 767 * @return string 768 */ 769 protected function _doAnchors_inline_callback($matches) { 770 $link_text = $this->runSpanGamut($matches[2]); 771 $url = $matches[3] === '' ? $matches[4] : $matches[3]; 772 $title =& $matches[7]; 773 774 // If the URL was of the form <s p a c e s> it got caught by the HTML 775 // tag parser and hashed. Need to reverse the process before using 776 // the URL. 777 $unhashed = $this->unhash($url); 778 if ($unhashed !== $url) 779 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 780 781 $url = $this->encodeURLAttribute($url); 782 783 $result = "<a href=\"$url\""; 784 if (isset($title)) { 785 $title = $this->encodeAttribute($title); 786 $result .= " title=\"$title\""; 787 } 788 789 $link_text = $this->runSpanGamut($link_text); 790 $result .= ">$link_text</a>"; 791 792 return $this->hashPart($result); 793 } 794 795 /** 796 * Turn Markdown image shortcuts into <img> tags. 797 * @param string $text 798 * @return string 799 */ 800 protected function doImages($text) { 801 // First, handle reference-style labeled images: ![alt text][id] 802 $text = preg_replace_callback('{ 803 ( # wrap whole match in $1 804 !\[ 805 ('.$this->nested_brackets_re.') # alt text = $2 806 \] 807 808 [ ]? # one optional space 809 (?:\n[ ]*)? # one optional newline followed by spaces 810 811 \[ 812 (.*?) # id = $3 813 \] 814 815 ) 816 }xs', 817 array($this, '_doImages_reference_callback'), $text); 818 819 // Next, handle inline images: ![alt text](url "optional title") 820 // Don't forget: encode * and _ 821 $text = preg_replace_callback('{ 822 ( # wrap whole match in $1 823 !\[ 824 ('.$this->nested_brackets_re.') # alt text = $2 825 \] 826 \s? # One optional whitespace character 827 \( # literal paren 828 [ \n]* 829 (?: 830 <(\S*)> # src url = $3 831 | 832 ('.$this->nested_url_parenthesis_re.') # src url = $4 833 ) 834 [ \n]* 835 ( # $5 836 ([\'"]) # quote char = $6 837 (.*?) # title = $7 838 \6 # matching quote 839 [ \n]* 840 )? # title is optional 841 \) 842 ) 843 }xs', 844 array($this, '_doImages_inline_callback'), $text); 845 846 return $text; 847 } 848 849 /** 850 * Callback to parse references image tags 851 * @param array $matches 852 * @return string 853 */ 854 protected function _doImages_reference_callback($matches) { 855 $whole_match = $matches[1]; 856 $alt_text = $matches[2]; 857 $link_id = strtolower($matches[3]); 858 859 if ($link_id == "") { 860 $link_id = strtolower($alt_text); // for shortcut links like ![this][]. 861 } 862 863 $alt_text = $this->encodeAttribute($alt_text); 864 if (isset($this->urls[$link_id])) { 865 $url = $this->encodeURLAttribute($this->urls[$link_id]); 866 $result = "<img src=\"$url\" alt=\"$alt_text\""; 867 if (isset($this->titles[$link_id])) { 868 $title = $this->titles[$link_id]; 869 $title = $this->encodeAttribute($title); 870 $result .= " title=\"$title\""; 871 } 872 $result .= $this->empty_element_suffix; 873 $result = $this->hashPart($result); 874 } else { 875 // If there's no such link ID, leave intact: 876 $result = $whole_match; 877 } 878 879 return $result; 880 } 881 882 /** 883 * Callback to parse inline image tags 884 * @param array $matches 885 * @return string 886 */ 887 protected function _doImages_inline_callback($matches) { 888 $whole_match = $matches[1]; 889 $alt_text = $matches[2]; 890 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 891 $title =& $matches[7]; 892 893 $alt_text = $this->encodeAttribute($alt_text); 894 $url = $this->encodeURLAttribute($url); 895 $result = "<img src=\"$url\" alt=\"$alt_text\""; 896 if (isset($title)) { 897 $title = $this->encodeAttribute($title); 898 $result .= " title=\"$title\""; // $title already quoted 899 } 900 $result .= $this->empty_element_suffix; 901 902 return $this->hashPart($result); 903 } 904 905 /** 906 * Parse Markdown heading elements to HTML 907 * @param string $text 908 * @return string 909 */ 910 protected function doHeaders($text) { 911 /** 912 * Setext-style headers: 913 * Header 1 914 * ======== 915 * 916 * Header 2 917 * -------- 918 */ 919 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 920 array($this, '_doHeaders_callback_setext'), $text); 921 922 /** 923 * atx-style headers: 924 * # Header 1 925 * ## Header 2 926 * ## Header 2 with closing hashes ## 927 * ... 928 * ###### Header 6 929 */ 930 $text = preg_replace_callback('{ 931 ^(\#{1,6}) # $1 = string of #\'s 932 [ ]* 933 (.+?) # $2 = Header text 934 [ ]* 935 \#* # optional closing #\'s (not counted) 936 \n+ 937 }xm', 938 array($this, '_doHeaders_callback_atx'), $text); 939 940 return $text; 941 } 942 943 /** 944 * Setext header parsing callback 945 * @param array $matches 946 * @return string 947 */ 948 protected function _doHeaders_callback_setext($matches) { 949 // Terrible hack to check we haven't found an empty list item. 950 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) { 951 return $matches[0]; 952 } 953 954 $level = $matches[2][0] == '=' ? 1 : 2; 955 956 // ID attribute generation 957 $idAtt = $this->_generateIdFromHeaderValue($matches[1]); 958 959 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>"; 960 return "\n" . $this->hashBlock($block) . "\n\n"; 961 } 962 963 /** 964 * ATX header parsing callback 965 * @param array $matches 966 * @return string 967 */ 968 protected function _doHeaders_callback_atx($matches) { 969 // ID attribute generation 970 $idAtt = $this->_generateIdFromHeaderValue($matches[2]); 971 972 $level = strlen($matches[1]); 973 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>"; 974 return "\n" . $this->hashBlock($block) . "\n\n"; 975 } 976 977 /** 978 * If a header_id_func property is set, we can use it to automatically 979 * generate an id attribute. 980 * 981 * This method returns a string in the form id="foo", or an empty string 982 * otherwise. 983 * @param string $headerValue 984 * @return string 985 */ 986 protected function _generateIdFromHeaderValue($headerValue) { 987 if (!is_callable($this->header_id_func)) { 988 return ""; 989 } 990 991 $idValue = call_user_func($this->header_id_func, $headerValue); 992 if (!$idValue) { 993 return ""; 994 } 995 996 return ' id="' . $this->encodeAttribute($idValue) . '"'; 997 } 998 999 /** 1000 * Form HTML ordered (numbered) and unordered (bulleted) lists. 1001 * @param string $text 1002 * @return string 1003 */ 1004 protected function doLists($text) { 1005 $less_than_tab = $this->tab_width - 1; 1006 1007 // Re-usable patterns to match list item bullets and number markers: 1008 $marker_ul_re = '[*+-]'; 1009 $marker_ol_re = '\d+[\.]'; 1010 1011 $markers_relist = array( 1012 $marker_ul_re => $marker_ol_re, 1013 $marker_ol_re => $marker_ul_re, 1014 ); 1015 1016 foreach ($markers_relist as $marker_re => $other_marker_re) { 1017 // Re-usable pattern to match any entirel ul or ol list: 1018 $whole_list_re = ' 1019 ( # $1 = whole list 1020 ( # $2 1021 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 1022 ('.$marker_re.') # $4 = first list item marker 1023 [ ]+ 1024 ) 1025 (?s:.+?) 1026 ( # $5 1027 \z 1028 | 1029 \n{2,} 1030 (?=\S) 1031 (?! # Negative lookahead for another list item marker 1032 [ ]* 1033 '.$marker_re.'[ ]+ 1034 ) 1035 | 1036 (?= # Lookahead for another kind of list 1037 \n 1038 \3 # Must have the same indentation 1039 '.$other_marker_re.'[ ]+ 1040 ) 1041 ) 1042 ) 1043 '; // mx 1044 1045 // We use a different prefix before nested lists than top-level lists. 1046 //See extended comment in _ProcessListItems(). 1047 1048 if ($this->list_level) { 1049 $text = preg_replace_callback('{ 1050 ^ 1051 '.$whole_list_re.' 1052 }mx', 1053 array($this, '_doLists_callback'), $text); 1054 } else { 1055 $text = preg_replace_callback('{ 1056 (?:(?<=\n)\n|\A\n?) # Must eat the newline 1057 '.$whole_list_re.' 1058 }mx', 1059 array($this, '_doLists_callback'), $text); 1060 } 1061 } 1062 1063 return $text; 1064 } 1065 1066 /** 1067 * List parsing callback 1068 * @param array $matches 1069 * @return string 1070 */ 1071 protected function _doLists_callback($matches) { 1072 // Re-usable patterns to match list item bullets and number markers: 1073 $marker_ul_re = '[*+-]'; 1074 $marker_ol_re = '\d+[\.]'; 1075 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 1076 $marker_ol_start_re = '[0-9]+'; 1077 1078 $list = $matches[1]; 1079 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 1080 1081 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 1082 1083 $list .= "\n"; 1084 $result = $this->processListItems($list, $marker_any_re); 1085 1086 $ol_start = 1; 1087 if ($this->enhanced_ordered_list) { 1088 // Get the start number for ordered list. 1089 if ($list_type == 'ol') { 1090 $ol_start_array = array(); 1091 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array); 1092 if ($ol_start_check){ 1093 $ol_start = $ol_start_array[0]; 1094 } 1095 } 1096 } 1097 1098 if ($ol_start > 1 && $list_type == 'ol'){ 1099 $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>"); 1100 } else { 1101 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 1102 } 1103 return "\n". $result ."\n\n"; 1104 } 1105 1106 /** 1107 * Nesting tracker for list levels 1108 * @var integer 1109 */ 1110 protected $list_level = 0; 1111 1112 /** 1113 * Process the contents of a single ordered or unordered list, splitting it 1114 * into individual list items. 1115 * @param string $list_str 1116 * @param string $marker_any_re 1117 * @return string 1118 */ 1119 protected function processListItems($list_str, $marker_any_re) { 1120 /** 1121 * The $this->list_level global keeps track of when we're inside a list. 1122 * Each time we enter a list, we increment it; when we leave a list, 1123 * we decrement. If it's zero, we're not in a list anymore. 1124 * 1125 * We do this because when we're not inside a list, we want to treat 1126 * something like this: 1127 * 1128 * I recommend upgrading to version 1129 * 8. Oops, now this line is treated 1130 * as a sub-list. 1131 * 1132 * As a single paragraph, despite the fact that the second line starts 1133 * with a digit-period-space sequence. 1134 * 1135 * Whereas when we're inside a list (or sub-list), that line will be 1136 * treated as the start of a sub-list. What a kludge, huh? This is 1137 * an aspect of Markdown's syntax that's hard to parse perfectly 1138 * without resorting to mind-reading. Perhaps the solution is to 1139 * change the syntax rules such that sub-lists must start with a 1140 * starting cardinal number; e.g. "1." or "a.". 1141 */ 1142 $this->list_level++; 1143 1144 // Trim trailing blank lines: 1145 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1146 1147 $list_str = preg_replace_callback('{ 1148 (\n)? # leading line = $1 1149 (^[ ]*) # leading whitespace = $2 1150 ('.$marker_any_re.' # list marker and space = $3 1151 (?:[ ]+|(?=\n)) # space only required if item is not empty 1152 ) 1153 ((?s:.*?)) # list item text = $4 1154 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1155 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 1156 }xm', 1157 array($this, '_processListItems_callback'), $list_str); 1158 1159 $this->list_level--; 1160 return $list_str; 1161 } 1162 1163 /** 1164 * List item parsing callback 1165 * @param array $matches 1166 * @return string 1167 */ 1168 protected function _processListItems_callback($matches) { 1169 $item = $matches[4]; 1170 $leading_line =& $matches[1]; 1171 $leading_space =& $matches[2]; 1172 $marker_space = $matches[3]; 1173 $tailing_blank_line =& $matches[5]; 1174 1175 if ($leading_line || $tailing_blank_line || 1176 preg_match('/\n{2,}/', $item)) 1177 { 1178 // Replace marker with the appropriate whitespace indentation 1179 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 1180 $item = $this->runBlockGamut($this->outdent($item)."\n"); 1181 } else { 1182 // Recursion for sub-lists: 1183 $item = $this->doLists($this->outdent($item)); 1184 $item = $this->formParagraphs($item, false); 1185 } 1186 1187 return "<li>" . $item . "</li>\n"; 1188 } 1189 1190 /** 1191 * Process Markdown `<pre><code>` blocks. 1192 * @param string $text 1193 * @return string 1194 */ 1195 protected function doCodeBlocks($text) { 1196 $text = preg_replace_callback('{ 1197 (?:\n\n|\A\n?) 1198 ( # $1 = the code block -- one or more lines, starting with a space/tab 1199 (?> 1200 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1201 .*\n+ 1202 )+ 1203 ) 1204 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 1205 }xm', 1206 array($this, '_doCodeBlocks_callback'), $text); 1207 1208 return $text; 1209 } 1210 1211 /** 1212 * Code block parsing callback 1213 * @param array $matches 1214 * @return string 1215 */ 1216 protected function _doCodeBlocks_callback($matches) { 1217 $codeblock = $matches[1]; 1218 1219 $codeblock = $this->outdent($codeblock); 1220 if (is_callable($this->code_block_content_func)) { 1221 $codeblock = call_user_func($this->code_block_content_func, $codeblock, ""); 1222 } else { 1223 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1224 } 1225 1226 # trim leading newlines and trailing newlines 1227 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1228 1229 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1230 return "\n\n" . $this->hashBlock($codeblock) . "\n\n"; 1231 } 1232 1233 /** 1234 * Create a code span markup for $code. Called from handleSpanToken. 1235 * @param string $code 1236 * @return string 1237 */ 1238 protected function makeCodeSpan($code) { 1239 if (is_callable($this->code_span_content_func)) { 1240 $code = call_user_func($this->code_span_content_func, $code); 1241 } else { 1242 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1243 } 1244 return $this->hashPart("<code>$code</code>"); 1245 } 1246 1247 /** 1248 * Define the emphasis operators with their regex matches 1249 * @var array 1250 */ 1251 protected $em_relist = array( 1252 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)', 1253 '*' => '(?<![\s*])\*(?!\*)', 1254 '_' => '(?<![\s_])_(?!_)', 1255 ); 1256 1257 /** 1258 * Define the strong operators with their regex matches 1259 * @var array 1260 */ 1261 protected $strong_relist = array( 1262 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)', 1263 '**' => '(?<![\s*])\*\*(?!\*)', 1264 '__' => '(?<![\s_])__(?!_)', 1265 ); 1266 1267 /** 1268 * Define the emphasis + strong operators with their regex matches 1269 * @var array 1270 */ 1271 protected $em_strong_relist = array( 1272 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)', 1273 '***' => '(?<![\s*])\*\*\*(?!\*)', 1274 '___' => '(?<![\s_])___(?!_)', 1275 ); 1276 1277 /** 1278 * Container for prepared regular expressions 1279 * @var array 1280 */ 1281 protected $em_strong_prepared_relist; 1282 1283 /** 1284 * Prepare regular expressions for searching emphasis tokens in any 1285 * context. 1286 * @return void 1287 */ 1288 protected function prepareItalicsAndBold() { 1289 foreach ($this->em_relist as $em => $em_re) { 1290 foreach ($this->strong_relist as $strong => $strong_re) { 1291 // Construct list of allowed token expressions. 1292 $token_relist = array(); 1293 if (isset($this->em_strong_relist["$em$strong"])) { 1294 $token_relist[] = $this->em_strong_relist["$em$strong"]; 1295 } 1296 $token_relist[] = $em_re; 1297 $token_relist[] = $strong_re; 1298 1299 // Construct master expression from list. 1300 $token_re = '{(' . implode('|', $token_relist) . ')}'; 1301 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1302 } 1303 } 1304 } 1305 1306 /** 1307 * Convert Markdown italics (emphasis) and bold (strong) to HTML 1308 * @param string $text 1309 * @return string 1310 */ 1311 protected function doItalicsAndBold($text) { 1312 if ($this->in_emphasis_processing) { 1313 return $text; // avoid reentrency 1314 } 1315 $this->in_emphasis_processing = true; 1316 1317 $token_stack = array(''); 1318 $text_stack = array(''); 1319 $em = ''; 1320 $strong = ''; 1321 $tree_char_em = false; 1322 1323 while (1) { 1324 // Get prepared regular expression for seraching emphasis tokens 1325 // in current context. 1326 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1327 1328 // Each loop iteration search for the next emphasis token. 1329 // Each token is then passed to handleSpanToken. 1330 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1331 $text_stack[0] .= $parts[0]; 1332 $token =& $parts[1]; 1333 $text =& $parts[2]; 1334 1335 if (empty($token)) { 1336 // Reached end of text span: empty stack without emitting. 1337 // any more emphasis. 1338 while ($token_stack[0]) { 1339 $text_stack[1] .= array_shift($token_stack); 1340 $text_stack[0] .= array_shift($text_stack); 1341 } 1342 break; 1343 } 1344 1345 $token_len = strlen($token); 1346 if ($tree_char_em) { 1347 // Reached closing marker while inside a three-char emphasis. 1348 if ($token_len == 3) { 1349 // Three-char closing marker, close em and strong. 1350 array_shift($token_stack); 1351 $span = array_shift($text_stack); 1352 $span = $this->runSpanGamut($span); 1353 $span = "<strong><em>$span</em></strong>"; 1354 $text_stack[0] .= $this->hashPart($span); 1355 $em = ''; 1356 $strong = ''; 1357 } else { 1358 // Other closing marker: close one em or strong and 1359 // change current token state to match the other 1360 $token_stack[0] = str_repeat($token[0], 3-$token_len); 1361 $tag = $token_len == 2 ? "strong" : "em"; 1362 $span = $text_stack[0]; 1363 $span = $this->runSpanGamut($span); 1364 $span = "<$tag>$span</$tag>"; 1365 $text_stack[0] = $this->hashPart($span); 1366 $$tag = ''; // $$tag stands for $em or $strong 1367 } 1368 $tree_char_em = false; 1369 } else if ($token_len == 3) { 1370 if ($em) { 1371 // Reached closing marker for both em and strong. 1372 // Closing strong marker: 1373 for ($i = 0; $i < 2; ++$i) { 1374 $shifted_token = array_shift($token_stack); 1375 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1376 $span = array_shift($text_stack); 1377 $span = $this->runSpanGamut($span); 1378 $span = "<$tag>$span</$tag>"; 1379 $text_stack[0] .= $this->hashPart($span); 1380 $$tag = ''; // $$tag stands for $em or $strong 1381 } 1382 } else { 1383 // Reached opening three-char emphasis marker. Push on token 1384 // stack; will be handled by the special condition above. 1385 $em = $token[0]; 1386 $strong = "$em$em"; 1387 array_unshift($token_stack, $token); 1388 array_unshift($text_stack, ''); 1389 $tree_char_em = true; 1390 } 1391 } else if ($token_len == 2) { 1392 if ($strong) { 1393 // Unwind any dangling emphasis marker: 1394 if (strlen($token_stack[0]) == 1) { 1395 $text_stack[1] .= array_shift($token_stack); 1396 $text_stack[0] .= array_shift($text_stack); 1397 $em = ''; 1398 } 1399 // Closing strong marker: 1400 array_shift($token_stack); 1401 $span = array_shift($text_stack); 1402 $span = $this->runSpanGamut($span); 1403 $span = "<strong>$span</strong>"; 1404 $text_stack[0] .= $this->hashPart($span); 1405 $strong = ''; 1406 } else { 1407 array_unshift($token_stack, $token); 1408 array_unshift($text_stack, ''); 1409 $strong = $token; 1410 } 1411 } else { 1412 // Here $token_len == 1 1413 if ($em) { 1414 if (strlen($token_stack[0]) == 1) { 1415 // Closing emphasis marker: 1416 array_shift($token_stack); 1417 $span = array_shift($text_stack); 1418 $span = $this->runSpanGamut($span); 1419 $span = "<em>$span</em>"; 1420 $text_stack[0] .= $this->hashPart($span); 1421 $em = ''; 1422 } else { 1423 $text_stack[0] .= $token; 1424 } 1425 } else { 1426 array_unshift($token_stack, $token); 1427 array_unshift($text_stack, ''); 1428 $em = $token; 1429 } 1430 } 1431 } 1432 $this->in_emphasis_processing = false; 1433 return $text_stack[0]; 1434 } 1435 1436 /** 1437 * Parse Markdown blockquotes to HTML 1438 * @param string $text 1439 * @return string 1440 */ 1441 protected function doBlockQuotes($text) { 1442 $text = preg_replace_callback('/ 1443 ( # Wrap whole match in $1 1444 (?> 1445 ^[ ]*>[ ]? # ">" at the start of a line 1446 .+\n # rest of the first line 1447 (.+\n)* # subsequent consecutive lines 1448 \n* # blanks 1449 )+ 1450 ) 1451 /xm', 1452 array($this, '_doBlockQuotes_callback'), $text); 1453 1454 return $text; 1455 } 1456 1457 /** 1458 * Blockquote parsing callback 1459 * @param array $matches 1460 * @return string 1461 */ 1462 protected function _doBlockQuotes_callback($matches) { 1463 $bq = $matches[1]; 1464 // trim one level of quoting - trim whitespace-only lines 1465 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1466 $bq = $this->runBlockGamut($bq); // recurse 1467 1468 $bq = preg_replace('/^/m', " ", $bq); 1469 // These leading spaces cause problem with <pre> content, 1470 // so we need to fix that: 1471 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1472 array($this, '_doBlockQuotes_callback2'), $bq); 1473 1474 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n"; 1475 } 1476 1477 /** 1478 * Blockquote parsing callback 1479 * @param array $matches 1480 * @return string 1481 */ 1482 protected function _doBlockQuotes_callback2($matches) { 1483 $pre = $matches[1]; 1484 $pre = preg_replace('/^ /m', '', $pre); 1485 return $pre; 1486 } 1487 1488 /** 1489 * Parse paragraphs 1490 * 1491 * @param string $text String to process in paragraphs 1492 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags 1493 * @return string 1494 */ 1495 protected function formParagraphs($text, $wrap_in_p = true) { 1496 // Strip leading and trailing lines: 1497 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1498 1499 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1500 1501 // Wrap <p> tags and unhashify HTML blocks 1502 foreach ($grafs as $key => $value) { 1503 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1504 // Is a paragraph. 1505 $value = $this->runSpanGamut($value); 1506 if ($wrap_in_p) { 1507 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1508 $value .= "</p>"; 1509 } 1510 $grafs[$key] = $this->unhash($value); 1511 } else { 1512 // Is a block. 1513 // Modify elements of @grafs in-place... 1514 $graf = $value; 1515 $block = $this->html_hashes[$graf]; 1516 $graf = $block; 1517 // if (preg_match('{ 1518 // \A 1519 // ( # $1 = <div> tag 1520 // <div \s+ 1521 // [^>]* 1522 // \b 1523 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1524 // 1 1525 // \2 1526 // [^>]* 1527 // > 1528 // ) 1529 // ( # $3 = contents 1530 // .* 1531 // ) 1532 // (</div>) # $4 = closing tag 1533 // \z 1534 // }xs', $block, $matches)) 1535 // { 1536 // list(, $div_open, , $div_content, $div_close) = $matches; 1537 // 1538 // // We can't call Markdown(), because that resets the hash; 1539 // // that initialization code should be pulled into its own sub, though. 1540 // $div_content = $this->hashHTMLBlocks($div_content); 1541 // 1542 // // Run document gamut methods on the content. 1543 // foreach ($this->document_gamut as $method => $priority) { 1544 // $div_content = $this->$method($div_content); 1545 // } 1546 // 1547 // $div_open = preg_replace( 1548 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1549 // 1550 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1551 // } 1552 $grafs[$key] = $graf; 1553 } 1554 } 1555 1556 return implode("\n\n", $grafs); 1557 } 1558 1559 /** 1560 * Encode text for a double-quoted HTML attribute. This function 1561 * is *not* suitable for attributes enclosed in single quotes. 1562 * @param string $text 1563 * @return string 1564 */ 1565 protected function encodeAttribute($text) { 1566 $text = $this->encodeAmpsAndAngles($text); 1567 $text = str_replace('"', '"', $text); 1568 return $text; 1569 } 1570 1571 /** 1572 * Encode text for a double-quoted HTML attribute containing a URL, 1573 * applying the URL filter if set. Also generates the textual 1574 * representation for the URL (removing mailto: or tel:) storing it in $text. 1575 * This function is *not* suitable for attributes enclosed in single quotes. 1576 * 1577 * @param string $url 1578 * @param string $text Passed by reference 1579 * @return string URL 1580 */ 1581 protected function encodeURLAttribute($url, &$text = null) { 1582 if (is_callable($this->url_filter_func)) { 1583 $url = call_user_func($this->url_filter_func, $url); 1584 } 1585 1586 if (preg_match('{^mailto:}i', $url)) { 1587 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); 1588 } else if (preg_match('{^tel:}i', $url)) { 1589 $url = $this->encodeAttribute($url); 1590 $text = substr($url, 4); 1591 } else { 1592 $url = $this->encodeAttribute($url); 1593 $text = $url; 1594 } 1595 1596 return $url; 1597 } 1598 1599 /** 1600 * Smart processing for ampersands and angle brackets that need to 1601 * be encoded. Valid character entities are left alone unless the 1602 * no-entities mode is set. 1603 * @param string $text 1604 * @return string 1605 */ 1606 protected function encodeAmpsAndAngles($text) { 1607 if ($this->no_entities) { 1608 $text = str_replace('&', '&', $text); 1609 } else { 1610 // Ampersand-encoding based entirely on Nat Irons's Amputator 1611 // MT plugin: <http://bumppo.net/projects/amputator/> 1612 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1613 '&', $text); 1614 } 1615 // Encode remaining <'s 1616 $text = str_replace('<', '<', $text); 1617 1618 return $text; 1619 } 1620 1621 /** 1622 * Parse Markdown automatic links to anchor HTML tags 1623 * @param string $text 1624 * @return string 1625 */ 1626 protected function doAutoLinks($text) { 1627 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', 1628 array($this, '_doAutoLinks_url_callback'), $text); 1629 1630 // Email addresses: <address@domain.foo> 1631 $text = preg_replace_callback('{ 1632 < 1633 (?:mailto:)? 1634 ( 1635 (?: 1636 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1637 | 1638 ".*?" 1639 ) 1640 \@ 1641 (?: 1642 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1643 | 1644 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1645 ) 1646 ) 1647 > 1648 }xi', 1649 array($this, '_doAutoLinks_email_callback'), $text); 1650 1651 return $text; 1652 } 1653 1654 /** 1655 * Parse URL callback 1656 * @param array $matches 1657 * @return string 1658 */ 1659 protected function _doAutoLinks_url_callback($matches) { 1660 $url = $this->encodeURLAttribute($matches[1], $text); 1661 $link = "<a href=\"$url\">$text</a>"; 1662 return $this->hashPart($link); 1663 } 1664 1665 /** 1666 * Parse email address callback 1667 * @param array $matches 1668 * @return string 1669 */ 1670 protected function _doAutoLinks_email_callback($matches) { 1671 $addr = $matches[1]; 1672 $url = $this->encodeURLAttribute("mailto:$addr", $text); 1673 $link = "<a href=\"$url\">$text</a>"; 1674 return $this->hashPart($link); 1675 } 1676 1677 /** 1678 * Input: some text to obfuscate, e.g. "mailto:foo@example.com" 1679 * 1680 * Output: the same text but with most characters encoded as either a 1681 * decimal or hex entity, in the hopes of foiling most address 1682 * harvesting spam bots. E.g.: 1683 * 1684 * mailto:foo 1685 * @example.co 1686 * m 1687 * 1688 * Note: the additional output $tail is assigned the same value as the 1689 * ouput, minus the number of characters specified by $head_length. 1690 * 1691 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1692 * With some optimizations by Milian Wolff. Forced encoding of HTML 1693 * attribute special characters by Allan Odgaard. 1694 * 1695 * @param string $text 1696 * @param string $tail Passed by reference 1697 * @param integer $head_length 1698 * @return string 1699 */ 1700 protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { 1701 if ($text == "") { 1702 return $tail = ""; 1703 } 1704 1705 $chars = preg_split('/(?<!^)(?!$)/', $text); 1706 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed. 1707 1708 foreach ($chars as $key => $char) { 1709 $ord = ord($char); 1710 // Ignore non-ascii chars. 1711 if ($ord < 128) { 1712 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function. 1713 // roughly 10% raw, 45% hex, 45% dec 1714 // '@' *must* be encoded. I insist. 1715 // '"' and '>' have to be encoded inside the attribute 1716 if ($r > 90 && strpos('@"&>', $char) === false) { 1717 /* do nothing */ 1718 } else if ($r < 45) { 1719 $chars[$key] = '&#x'.dechex($ord).';'; 1720 } else { 1721 $chars[$key] = '&#'.$ord.';'; 1722 } 1723 } 1724 } 1725 1726 $text = implode('', $chars); 1727 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; 1728 1729 return $text; 1730 } 1731 1732 /** 1733 * Take the string $str and parse it into tokens, hashing embeded HTML, 1734 * escaped characters and handling code spans. 1735 * @param string $str 1736 * @return string 1737 */ 1738 protected function parseSpan($str) { 1739 $output = ''; 1740 1741 $span_re = '{ 1742 ( 1743 \\\\'.$this->escape_chars_re.' 1744 | 1745 (?<![`\\\\]) 1746 `+ # code span marker 1747 '.( $this->no_markup ? '' : ' 1748 | 1749 <!-- .*? --> # comment 1750 | 1751 <\?.*?\?> | <%.*?%> # processing instruction 1752 | 1753 <[!$]?[-a-zA-Z0-9:_]+ # regular tags 1754 (?> 1755 \s 1756 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1757 )? 1758 > 1759 | 1760 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 1761 | 1762 </[-a-zA-Z0-9:_]+\s*> # closing tag 1763 ').' 1764 ) 1765 }xs'; 1766 1767 while (1) { 1768 // Each loop iteration seach for either the next tag, the next 1769 // openning code span marker, or the next escaped character. 1770 // Each token is then passed to handleSpanToken. 1771 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1772 1773 // Create token from text preceding tag. 1774 if ($parts[0] != "") { 1775 $output .= $parts[0]; 1776 } 1777 1778 // Check if we reach the end. 1779 if (isset($parts[1])) { 1780 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1781 $str = $parts[2]; 1782 } else { 1783 break; 1784 } 1785 } 1786 1787 return $output; 1788 } 1789 1790 /** 1791 * Handle $token provided by parseSpan by determining its nature and 1792 * returning the corresponding value that should replace it. 1793 * @param string $token 1794 * @param string $str Passed by reference 1795 * @return string 1796 */ 1797 protected function handleSpanToken($token, &$str) { 1798 switch ($token[0]) { 1799 case "\\": 1800 return $this->hashPart("&#". ord($token[1]). ";"); 1801 case "`": 1802 // Search for end marker in remaining text. 1803 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1804 $str, $matches)) 1805 { 1806 $str = $matches[2]; 1807 $codespan = $this->makeCodeSpan($matches[1]); 1808 return $this->hashPart($codespan); 1809 } 1810 return $token; // Return as text since no ending marker found. 1811 default: 1812 return $this->hashPart($token); 1813 } 1814 } 1815 1816 /** 1817 * Remove one level of line-leading tabs or spaces 1818 * @param string $text 1819 * @return string 1820 */ 1821 protected function outdent($text) { 1822 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text); 1823 } 1824 1825 1826 /** 1827 * String length function for detab. `_initDetab` will create a function to 1828 * handle UTF-8 if the default function does not exist. 1829 * @var string 1830 */ 1831 protected $utf8_strlen = 'mb_strlen'; 1832 1833 /** 1834 * Replace tabs with the appropriate amount of spaces. 1835 * 1836 * For each line we separate the line in blocks delemited by tab characters. 1837 * Then we reconstruct every line by adding the appropriate number of space 1838 * between each blocks. 1839 * 1840 * @param string $text 1841 * @return string 1842 */ 1843 protected function detab($text) { 1844 $text = preg_replace_callback('/^.*\t.*$/m', 1845 array($this, '_detab_callback'), $text); 1846 1847 return $text; 1848 } 1849 1850 /** 1851 * Replace tabs callback 1852 * @param string $matches 1853 * @return string 1854 */ 1855 protected function _detab_callback($matches) { 1856 $line = $matches[0]; 1857 $strlen = $this->utf8_strlen; // strlen function for UTF-8. 1858 1859 // Split in blocks. 1860 $blocks = explode("\t", $line); 1861 // Add each blocks to the line. 1862 $line = $blocks[0]; 1863 unset($blocks[0]); // Do not add first block twice. 1864 foreach ($blocks as $block) { 1865 // Calculate amount of space, insert spaces, insert block. 1866 $amount = $this->tab_width - 1867 $strlen($line, 'UTF-8') % $this->tab_width; 1868 $line .= str_repeat(" ", $amount) . $block; 1869 } 1870 return $line; 1871 } 1872 1873 /** 1874 * Check for the availability of the function in the `utf8_strlen` property 1875 * (initially `mb_strlen`). If the function is not available, create a 1876 * function that will loosely count the number of UTF-8 characters with a 1877 * regular expression. 1878 * @return void 1879 */ 1880 protected function _initDetab() { 1881 1882 if (function_exists($this->utf8_strlen)) { 1883 return; 1884 } 1885 1886 $this->utf8_strlen = function($text) { 1887 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m); 1888 }; 1889 } 1890 1891 /** 1892 * Swap back in all the tags hashed by _HashHTMLBlocks. 1893 * @param string $text 1894 * @return string 1895 */ 1896 protected function unhash($text) { 1897 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1898 array($this, '_unhash_callback'), $text); 1899 } 1900 1901 /** 1902 * Unhashing callback 1903 * @param array $matches 1904 * @return string 1905 */ 1906 protected function _unhash_callback($matches) { 1907 return $this->html_hashes[$matches[0]]; 1908 } 1909 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body