Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 /** 3 * Markdown - A text-to-HTML conversion tool for web writers 4 * 5 * @package php-markdown 6 * @author Michel Fortin <michel.fortin@michelf.com> 7 * @copyright 2004-2018 Michel Fortin <https://michelf.com/projects/php-markdown/> 8 * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/> 9 */ 10 11 namespace Michelf; 12 13 /** 14 * Markdown Parser Class 15 */ 16 class Markdown implements MarkdownInterface { 17 /** 18 * Define the package version 19 * @var string 20 */ 21 const MARKDOWNLIB_VERSION = "1.8.0"; 22 23 /** 24 * Simple function interface - Initialize the parser and return the result 25 * of its transform method. This will work fine for derived classes too. 26 * 27 * @api 28 * 29 * @param string $text 30 * @return string 31 */ 32 public static function defaultTransform($text) { 33 // Take parser class on which this function was called. 34 $parser_class = \get_called_class(); 35 36 // Try to take parser from the static parser list 37 static $parser_list; 38 $parser =& $parser_list[$parser_class]; 39 40 // Create the parser it not already set 41 if (!$parser) { 42 $parser = new $parser_class; 43 } 44 45 // Transform text using parser. 46 return $parser->transform($text); 47 } 48 49 /** 50 * Configuration variables 51 */ 52 53 /** 54 * Change to ">" for HTML output. 55 * @var string 56 */ 57 public $empty_element_suffix = " />"; 58 59 /** 60 * The width of indentation of the output markup 61 * @var int 62 */ 63 public $tab_width = 4; 64 65 /** 66 * Change to `true` to disallow markup or entities. 67 * @var boolean 68 */ 69 public $no_markup = false; 70 public $no_entities = false; 71 72 73 /** 74 * Change to `true` to enable line breaks on \n without two trailling spaces 75 * @var boolean 76 */ 77 public $hard_wrap = false; 78 79 /** 80 * Predefined URLs and titles for reference links and images. 81 * @var array 82 */ 83 public $predef_urls = array(); 84 public $predef_titles = array(); 85 86 /** 87 * Optional filter function for URLs 88 * @var callable 89 */ 90 public $url_filter_func = null; 91 92 /** 93 * Optional header id="" generation callback function. 94 * @var callable 95 */ 96 public $header_id_func = null; 97 98 /** 99 * Optional function for converting code block content to HTML 100 * @var callable 101 */ 102 public $code_block_content_func = null; 103 104 /** 105 * Optional function for converting code span content to HTML. 106 * @var callable 107 */ 108 public $code_span_content_func = null; 109 110 /** 111 * Class attribute to toggle "enhanced ordered list" behaviour 112 * setting this to true will allow ordered lists to start from the index 113 * number that is defined first. 114 * 115 * For example: 116 * 2. List item two 117 * 3. List item three 118 * 119 * Becomes: 120 * <ol start="2"> 121 * <li>List item two</li> 122 * <li>List item three</li> 123 * </ol> 124 * 125 * @var bool 126 */ 127 public $enhanced_ordered_list = false; 128 129 /** 130 * Parser implementation 131 */ 132 133 /** 134 * Regex to match balanced [brackets]. 135 * Needed to insert a maximum bracked depth while converting to PHP. 136 * @var int 137 */ 138 protected $nested_brackets_depth = 6; 139 protected $nested_brackets_re; 140 141 protected $nested_url_parenthesis_depth = 4; 142 protected $nested_url_parenthesis_re; 143 144 /** 145 * Table of hash values for escaped characters: 146 * @var string 147 */ 148 protected $escape_chars = '\`*_{}[]()>#+-.!'; 149 protected $escape_chars_re; 150 151 /** 152 * Constructor function. Initialize appropriate member variables. 153 * @return void 154 */ 155 public function __construct() { 156 $this->_initDetab(); 157 $this->prepareItalicsAndBold(); 158 159 $this->nested_brackets_re = 160 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 161 str_repeat('\])*', $this->nested_brackets_depth); 162 163 $this->nested_url_parenthesis_re = 164 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 165 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 166 167 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 168 169 // Sort document, block, and span gamut in ascendent priority order. 170 asort($this->document_gamut); 171 asort($this->block_gamut); 172 asort($this->span_gamut); 173 } 174 175 176 /** 177 * Internal hashes used during transformation. 178 * @var array 179 */ 180 protected $urls = array(); 181 protected $titles = array(); 182 protected $html_hashes = array(); 183 184 /** 185 * Status flag to avoid invalid nesting. 186 * @var boolean 187 */ 188 protected $in_anchor = false; 189 190 /** 191 * Status flag to avoid invalid nesting. 192 * @var boolean 193 */ 194 protected $in_emphasis_processing = false; 195 196 /** 197 * Called before the transformation process starts to setup parser states. 198 * @return void 199 */ 200 protected function setup() { 201 // Clear global hashes. 202 $this->urls = $this->predef_urls; 203 $this->titles = $this->predef_titles; 204 $this->html_hashes = array(); 205 $this->in_anchor = false; 206 $this->in_emphasis_processing = false; 207 } 208 209 /** 210 * Called after the transformation process to clear any variable which may 211 * be taking up memory unnecessarly. 212 * @return void 213 */ 214 protected function teardown() { 215 $this->urls = array(); 216 $this->titles = array(); 217 $this->html_hashes = array(); 218 } 219 220 /** 221 * Main function. Performs some preprocessing on the input text and pass 222 * it through the document gamut. 223 * 224 * @api 225 * 226 * @param string $text 227 * @return string 228 */ 229 public function transform($text) { 230 $this->setup(); 231 232 # Remove UTF-8 BOM and marker character in input, if present. 233 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 234 235 # Standardize line endings: 236 # DOS to Unix and Mac to Unix 237 $text = preg_replace('{\r\n?}', "\n", $text); 238 239 # Make sure $text ends with a couple of newlines: 240 $text .= "\n\n"; 241 242 # Convert all tabs to spaces. 243 $text = $this->detab($text); 244 245 # Turn block-level HTML blocks into hash entries 246 $text = $this->hashHTMLBlocks($text); 247 248 # Strip any lines consisting only of spaces and tabs. 249 # This makes subsequent regexen easier to write, because we can 250 # match consecutive blank lines with /\n+/ instead of something 251 # contorted like /[ ]*\n+/ . 252 $text = preg_replace('/^[ ]+$/m', '', $text); 253 254 # Run document gamut methods. 255 foreach ($this->document_gamut as $method => $priority) { 256 $text = $this->$method($text); 257 } 258 259 $this->teardown(); 260 261 return $text . "\n"; 262 } 263 264 /** 265 * Define the document gamut 266 * @var array 267 */ 268 protected $document_gamut = array( 269 // Strip link definitions, store in hashes. 270 "stripLinkDefinitions" => 20, 271 "runBasicBlockGamut" => 30, 272 ); 273 274 /** 275 * Strips link definitions from text, stores the URLs and titles in 276 * hash references 277 * @param string $text 278 * @return string 279 */ 280 protected function stripLinkDefinitions($text) { 281 282 $less_than_tab = $this->tab_width - 1; 283 284 // Link defs are in the form: ^[id]: url "optional title" 285 $text = preg_replace_callback('{ 286 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 287 [ ]* 288 \n? # maybe *one* newline 289 [ ]* 290 (?: 291 <(.+?)> # url = $2 292 | 293 (\S+?) # url = $3 294 ) 295 [ ]* 296 \n? # maybe one newline 297 [ ]* 298 (?: 299 (?<=\s) # lookbehind for whitespace 300 ["(] 301 (.*?) # title = $4 302 [")] 303 [ ]* 304 )? # title is optional 305 (?:\n+|\Z) 306 }xm', 307 array($this, '_stripLinkDefinitions_callback'), 308 $text 309 ); 310 return $text; 311 } 312 313 /** 314 * The callback to strip link definitions 315 * @param array $matches 316 * @return string 317 */ 318 protected function _stripLinkDefinitions_callback($matches) { 319 $link_id = strtolower($matches[1]); 320 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 321 $this->urls[$link_id] = $url; 322 $this->titles[$link_id] =& $matches[4]; 323 return ''; // String that will replace the block 324 } 325 326 /** 327 * Hashify HTML blocks 328 * @param string $text 329 * @return string 330 */ 331 protected function hashHTMLBlocks($text) { 332 if ($this->no_markup) { 333 return $text; 334 } 335 336 $less_than_tab = $this->tab_width - 1; 337 338 /** 339 * Hashify HTML blocks: 340 * 341 * We only want to do this for block-level HTML tags, such as headers, 342 * lists, and tables. That's because we still want to wrap <p>s around 343 * "paragraphs" that are wrapped in non-block-level tags, such as 344 * anchors, phrase emphasis, and spans. The list of tags we're looking 345 * for is hard-coded: 346 * 347 * * List "a" is made of tags which can be both inline or block-level. 348 * These will be treated block-level when the start tag is alone on 349 * its line, otherwise they're not matched here and will be taken as 350 * inline later. 351 * * List "b" is made of tags which are always block-level; 352 */ 353 $block_tags_a_re = 'ins|del'; 354 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 355 'script|noscript|style|form|fieldset|iframe|math|svg|'. 356 'article|section|nav|aside|hgroup|header|footer|'. 357 'figure'; 358 359 // Regular expression for the content of a block tag. 360 $nested_tags_level = 4; 361 $attr = ' 362 (?> # optional tag attributes 363 \s # starts with whitespace 364 (?> 365 [^>"/]+ # text outside quotes 366 | 367 /+(?!>) # slash not followed by ">" 368 | 369 "[^"]*" # text inside double quotes (tolerate ">") 370 | 371 \'[^\']*\' # text inside single quotes (tolerate ">") 372 )* 373 )? 374 '; 375 $content = 376 str_repeat(' 377 (?> 378 [^<]+ # content without tag 379 | 380 <\2 # nested opening tag 381 '.$attr.' # attributes 382 (?> 383 /> 384 | 385 >', $nested_tags_level). // end of opening tag 386 '.*?'. // last level nested tag content 387 str_repeat(' 388 </\2\s*> # closing nested tag 389 ) 390 | 391 <(?!/\2\s*> # other tags with a different name 392 ) 393 )*', 394 $nested_tags_level); 395 $content2 = str_replace('\2', '\3', $content); 396 397 /** 398 * First, look for nested blocks, e.g.: 399 * <div> 400 * <div> 401 * tags for inner block must be indented. 402 * </div> 403 * </div> 404 * 405 * The outermost tags must start at the left margin for this to match, 406 * and the inner nested divs must be indented. 407 * We need to do this before the next, more liberal match, because the 408 * next match will start at the first `<div>` and stop at the 409 * first `</div>`. 410 */ 411 $text = preg_replace_callback('{(?> 412 (?> 413 (?<=\n) # Starting on its own line 414 | # or 415 \A\n? # the at beginning of the doc 416 ) 417 ( # save in $1 418 419 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 420 # in between. 421 422 [ ]{0,'.$less_than_tab.'} 423 <('.$block_tags_b_re.')# start tag = $2 424 '.$attr.'> # attributes followed by > and \n 425 '.$content.' # content, support nesting 426 </\2> # the matching end tag 427 [ ]* # trailing spaces/tabs 428 (?=\n+|\Z) # followed by a newline or end of document 429 430 | # Special version for tags of group a. 431 432 [ ]{0,'.$less_than_tab.'} 433 <('.$block_tags_a_re.')# start tag = $3 434 '.$attr.'>[ ]*\n # attributes followed by > 435 '.$content2.' # content, support nesting 436 </\3> # the matching end tag 437 [ ]* # trailing spaces/tabs 438 (?=\n+|\Z) # followed by a newline or end of document 439 440 | # Special case just for <hr />. It was easier to make a special 441 # case than to make the other regex more complicated. 442 443 [ ]{0,'.$less_than_tab.'} 444 <(hr) # start tag = $2 445 '.$attr.' # attributes 446 /?> # the matching end tag 447 [ ]* 448 (?=\n{2,}|\Z) # followed by a blank line or end of document 449 450 | # Special case for standalone HTML comments: 451 452 [ ]{0,'.$less_than_tab.'} 453 (?s: 454 <!-- .*? --> 455 ) 456 [ ]* 457 (?=\n{2,}|\Z) # followed by a blank line or end of document 458 459 | # PHP and ASP-style processor instructions (<? and <%) 460 461 [ ]{0,'.$less_than_tab.'} 462 (?s: 463 <([?%]) # $2 464 .*? 465 \2> 466 ) 467 [ ]* 468 (?=\n{2,}|\Z) # followed by a blank line or end of document 469 470 ) 471 )}Sxmi', 472 array($this, '_hashHTMLBlocks_callback'), 473 $text 474 ); 475 476 return $text; 477 } 478 479 /** 480 * The callback for hashing HTML blocks 481 * @param string $matches 482 * @return string 483 */ 484 protected function _hashHTMLBlocks_callback($matches) { 485 $text = $matches[1]; 486 $key = $this->hashBlock($text); 487 return "\n\n$key\n\n"; 488 } 489 490 /** 491 * Called whenever a tag must be hashed when a function insert an atomic 492 * element in the text stream. Passing $text to through this function gives 493 * a unique text-token which will be reverted back when calling unhash. 494 * 495 * The $boundary argument specify what character should be used to surround 496 * the token. By convension, "B" is used for block elements that needs not 497 * to be wrapped into paragraph tags at the end, ":" is used for elements 498 * that are word separators and "X" is used in the general case. 499 * 500 * @param string $text 501 * @param string $boundary 502 * @return string 503 */ 504 protected function hashPart($text, $boundary = 'X') { 505 // Swap back any tag hash found in $text so we do not have to `unhash` 506 // multiple times at the end. 507 $text = $this->unhash($text); 508 509 // Then hash the block. 510 static $i = 0; 511 $key = "$boundary\x1A" . ++$i . $boundary; 512 $this->html_hashes[$key] = $text; 513 return $key; // String that will replace the tag. 514 } 515 516 /** 517 * Shortcut function for hashPart with block-level boundaries. 518 * @param string $text 519 * @return string 520 */ 521 protected function hashBlock($text) { 522 return $this->hashPart($text, 'B'); 523 } 524 525 /** 526 * Define the block gamut - these are all the transformations that form 527 * block-level tags like paragraphs, headers, and list items. 528 * @var array 529 */ 530 protected $block_gamut = array( 531 "doHeaders" => 10, 532 "doHorizontalRules" => 20, 533 "doLists" => 40, 534 "doCodeBlocks" => 50, 535 "doBlockQuotes" => 60, 536 ); 537 538 /** 539 * Run block gamut tranformations. 540 * 541 * We need to escape raw HTML in Markdown source before doing anything 542 * else. This need to be done for each block, and not only at the 543 * begining in the Markdown function since hashed blocks can be part of 544 * list items and could have been indented. Indented blocks would have 545 * been seen as a code block in a previous pass of hashHTMLBlocks. 546 * 547 * @param string $text 548 * @return string 549 */ 550 protected function runBlockGamut($text) { 551 $text = $this->hashHTMLBlocks($text); 552 return $this->runBasicBlockGamut($text); 553 } 554 555 /** 556 * Run block gamut tranformations, without hashing HTML blocks. This is 557 * useful when HTML blocks are known to be already hashed, like in the first 558 * whole-document pass. 559 * 560 * @param string $text 561 * @return string 562 */ 563 protected function runBasicBlockGamut($text) { 564 565 foreach ($this->block_gamut as $method => $priority) { 566 $text = $this->$method($text); 567 } 568 569 // Finally form paragraph and restore hashed blocks. 570 $text = $this->formParagraphs($text); 571 572 return $text; 573 } 574 575 /** 576 * Convert horizontal rules 577 * @param string $text 578 * @return string 579 */ 580 protected function doHorizontalRules($text) { 581 return preg_replace( 582 '{ 583 ^[ ]{0,3} # Leading space 584 ([-*_]) # $1: First marker 585 (?> # Repeated marker group 586 [ ]{0,2} # Zero, one, or two spaces. 587 \1 # Marker character 588 ){2,} # Group repeated at least twice 589 [ ]* # Tailing spaces 590 $ # End of line. 591 }mx', 592 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 593 $text 594 ); 595 } 596 597 /** 598 * These are all the transformations that occur *within* block-level 599 * tags like paragraphs, headers, and list items. 600 * @var array 601 */ 602 protected $span_gamut = array( 603 // Process character escapes, code spans, and inline HTML 604 // in one shot. 605 "parseSpan" => -30, 606 // Process anchor and image tags. Images must come first, 607 // because ![foo][f] looks like an anchor. 608 "doImages" => 10, 609 "doAnchors" => 20, 610 // Make links out of things like `<https://example.com/>` 611 // Must come after doAnchors, because you can use < and > 612 // delimiters in inline links like [this](<url>). 613 "doAutoLinks" => 30, 614 "encodeAmpsAndAngles" => 40, 615 "doItalicsAndBold" => 50, 616 "doHardBreaks" => 60, 617 ); 618 619 /** 620 * Run span gamut transformations 621 * @param string $text 622 * @return string 623 */ 624 protected function runSpanGamut($text) { 625 foreach ($this->span_gamut as $method => $priority) { 626 $text = $this->$method($text); 627 } 628 629 return $text; 630 } 631 632 /** 633 * Do hard breaks 634 * @param string $text 635 * @return string 636 */ 637 protected function doHardBreaks($text) { 638 if ($this->hard_wrap) { 639 return preg_replace_callback('/ *\n/', 640 array($this, '_doHardBreaks_callback'), $text); 641 } else { 642 return preg_replace_callback('/ {2,}\n/', 643 array($this, '_doHardBreaks_callback'), $text); 644 } 645 } 646 647 /** 648 * Trigger part hashing for the hard break (callback method) 649 * @param array $matches 650 * @return string 651 */ 652 protected function _doHardBreaks_callback($matches) { 653 return $this->hashPart("<br$this->empty_element_suffix\n"); 654 } 655 656 /** 657 * Turn Markdown link shortcuts into XHTML <a> tags. 658 * @param string $text 659 * @return string 660 */ 661 protected function doAnchors($text) { 662 if ($this->in_anchor) { 663 return $text; 664 } 665 $this->in_anchor = true; 666 667 // First, handle reference-style links: [link text] [id] 668 $text = preg_replace_callback('{ 669 ( # wrap whole match in $1 670 \[ 671 ('.$this->nested_brackets_re.') # link text = $2 672 \] 673 674 [ ]? # one optional space 675 (?:\n[ ]*)? # one optional newline followed by spaces 676 677 \[ 678 (.*?) # id = $3 679 \] 680 ) 681 }xs', 682 array($this, '_doAnchors_reference_callback'), $text); 683 684 // Next, inline-style links: [link text](url "optional title") 685 $text = preg_replace_callback('{ 686 ( # wrap whole match in $1 687 \[ 688 ('.$this->nested_brackets_re.') # link text = $2 689 \] 690 \( # literal paren 691 [ \n]* 692 (?: 693 <(.+?)> # href = $3 694 | 695 ('.$this->nested_url_parenthesis_re.') # href = $4 696 ) 697 [ \n]* 698 ( # $5 699 ([\'"]) # quote char = $6 700 (.*?) # Title = $7 701 \6 # matching quote 702 [ \n]* # ignore any spaces/tabs between closing quote and ) 703 )? # title is optional 704 \) 705 ) 706 }xs', 707 array($this, '_doAnchors_inline_callback'), $text); 708 709 // Last, handle reference-style shortcuts: [link text] 710 // These must come last in case you've also got [link text][1] 711 // or [link text](/foo) 712 $text = preg_replace_callback('{ 713 ( # wrap whole match in $1 714 \[ 715 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 716 \] 717 ) 718 }xs', 719 array($this, '_doAnchors_reference_callback'), $text); 720 721 $this->in_anchor = false; 722 return $text; 723 } 724 725 /** 726 * Callback method to parse referenced anchors 727 * @param string $matches 728 * @return string 729 */ 730 protected function _doAnchors_reference_callback($matches) { 731 $whole_match = $matches[1]; 732 $link_text = $matches[2]; 733 $link_id =& $matches[3]; 734 735 if ($link_id == "") { 736 // for shortcut links like [this][] or [this]. 737 $link_id = $link_text; 738 } 739 740 // lower-case and turn embedded newlines into spaces 741 $link_id = strtolower($link_id); 742 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 743 744 if (isset($this->urls[$link_id])) { 745 $url = $this->urls[$link_id]; 746 $url = $this->encodeURLAttribute($url); 747 748 $result = "<a href=\"$url\""; 749 if ( isset( $this->titles[$link_id] ) ) { 750 $title = $this->titles[$link_id]; 751 $title = $this->encodeAttribute($title); 752 $result .= " title=\"$title\""; 753 } 754 755 $link_text = $this->runSpanGamut($link_text); 756 $result .= ">$link_text</a>"; 757 $result = $this->hashPart($result); 758 } else { 759 $result = $whole_match; 760 } 761 return $result; 762 } 763 764 /** 765 * Callback method to parse inline anchors 766 * @param string $matches 767 * @return string 768 */ 769 protected function _doAnchors_inline_callback($matches) { 770 $whole_match = $matches[1]; 771 $link_text = $this->runSpanGamut($matches[2]); 772 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 773 $title =& $matches[7]; 774 775 // If the URL was of the form <s p a c e s> it got caught by the HTML 776 // tag parser and hashed. Need to reverse the process before using 777 // the URL. 778 $unhashed = $this->unhash($url); 779 if ($unhashed != $url) 780 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 781 782 $url = $this->encodeURLAttribute($url); 783 784 $result = "<a href=\"$url\""; 785 if (isset($title)) { 786 $title = $this->encodeAttribute($title); 787 $result .= " title=\"$title\""; 788 } 789 790 $link_text = $this->runSpanGamut($link_text); 791 $result .= ">$link_text</a>"; 792 793 return $this->hashPart($result); 794 } 795 796 /** 797 * Turn Markdown image shortcuts into <img> tags. 798 * @param string $text 799 * @return string 800 */ 801 protected function doImages($text) { 802 // First, handle reference-style labeled images: ![alt text][id] 803 $text = preg_replace_callback('{ 804 ( # wrap whole match in $1 805 !\[ 806 ('.$this->nested_brackets_re.') # alt text = $2 807 \] 808 809 [ ]? # one optional space 810 (?:\n[ ]*)? # one optional newline followed by spaces 811 812 \[ 813 (.*?) # id = $3 814 \] 815 816 ) 817 }xs', 818 array($this, '_doImages_reference_callback'), $text); 819 820 // Next, handle inline images: ![alt text](url "optional title") 821 // Don't forget: encode * and _ 822 $text = preg_replace_callback('{ 823 ( # wrap whole match in $1 824 !\[ 825 ('.$this->nested_brackets_re.') # alt text = $2 826 \] 827 \s? # One optional whitespace character 828 \( # literal paren 829 [ \n]* 830 (?: 831 <(\S*)> # src url = $3 832 | 833 ('.$this->nested_url_parenthesis_re.') # src url = $4 834 ) 835 [ \n]* 836 ( # $5 837 ([\'"]) # quote char = $6 838 (.*?) # title = $7 839 \6 # matching quote 840 [ \n]* 841 )? # title is optional 842 \) 843 ) 844 }xs', 845 array($this, '_doImages_inline_callback'), $text); 846 847 return $text; 848 } 849 850 /** 851 * Callback to parse references image tags 852 * @param array $matches 853 * @return string 854 */ 855 protected function _doImages_reference_callback($matches) { 856 $whole_match = $matches[1]; 857 $alt_text = $matches[2]; 858 $link_id = strtolower($matches[3]); 859 860 if ($link_id == "") { 861 $link_id = strtolower($alt_text); // for shortcut links like ![this][]. 862 } 863 864 $alt_text = $this->encodeAttribute($alt_text); 865 if (isset($this->urls[$link_id])) { 866 $url = $this->encodeURLAttribute($this->urls[$link_id]); 867 $result = "<img src=\"$url\" alt=\"$alt_text\""; 868 if (isset($this->titles[$link_id])) { 869 $title = $this->titles[$link_id]; 870 $title = $this->encodeAttribute($title); 871 $result .= " title=\"$title\""; 872 } 873 $result .= $this->empty_element_suffix; 874 $result = $this->hashPart($result); 875 } else { 876 // If there's no such link ID, leave intact: 877 $result = $whole_match; 878 } 879 880 return $result; 881 } 882 883 /** 884 * Callback to parse inline image tags 885 * @param array $matches 886 * @return string 887 */ 888 protected function _doImages_inline_callback($matches) { 889 $whole_match = $matches[1]; 890 $alt_text = $matches[2]; 891 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 892 $title =& $matches[7]; 893 894 $alt_text = $this->encodeAttribute($alt_text); 895 $url = $this->encodeURLAttribute($url); 896 $result = "<img src=\"$url\" alt=\"$alt_text\""; 897 if (isset($title)) { 898 $title = $this->encodeAttribute($title); 899 $result .= " title=\"$title\""; // $title already quoted 900 } 901 $result .= $this->empty_element_suffix; 902 903 return $this->hashPart($result); 904 } 905 906 /** 907 * Parse Markdown heading elements to HTML 908 * @param string $text 909 * @return string 910 */ 911 protected function doHeaders($text) { 912 /** 913 * Setext-style headers: 914 * Header 1 915 * ======== 916 * 917 * Header 2 918 * -------- 919 */ 920 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 921 array($this, '_doHeaders_callback_setext'), $text); 922 923 /** 924 * atx-style headers: 925 * # Header 1 926 * ## Header 2 927 * ## Header 2 with closing hashes ## 928 * ... 929 * ###### Header 6 930 */ 931 $text = preg_replace_callback('{ 932 ^(\#{1,6}) # $1 = string of #\'s 933 [ ]* 934 (.+?) # $2 = Header text 935 [ ]* 936 \#* # optional closing #\'s (not counted) 937 \n+ 938 }xm', 939 array($this, '_doHeaders_callback_atx'), $text); 940 941 return $text; 942 } 943 944 /** 945 * Setext header parsing callback 946 * @param array $matches 947 * @return string 948 */ 949 protected function _doHeaders_callback_setext($matches) { 950 // Terrible hack to check we haven't found an empty list item. 951 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) { 952 return $matches[0]; 953 } 954 955 $level = $matches[2][0] == '=' ? 1 : 2; 956 957 // ID attribute generation 958 $idAtt = $this->_generateIdFromHeaderValue($matches[1]); 959 960 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>"; 961 return "\n" . $this->hashBlock($block) . "\n\n"; 962 } 963 964 /** 965 * ATX header parsing callback 966 * @param array $matches 967 * @return string 968 */ 969 protected function _doHeaders_callback_atx($matches) { 970 // ID attribute generation 971 $idAtt = $this->_generateIdFromHeaderValue($matches[2]); 972 973 $level = strlen($matches[1]); 974 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>"; 975 return "\n" . $this->hashBlock($block) . "\n\n"; 976 } 977 978 /** 979 * If a header_id_func property is set, we can use it to automatically 980 * generate an id attribute. 981 * 982 * This method returns a string in the form id="foo", or an empty string 983 * otherwise. 984 * @param string $headerValue 985 * @return string 986 */ 987 protected function _generateIdFromHeaderValue($headerValue) { 988 if (!is_callable($this->header_id_func)) { 989 return ""; 990 } 991 992 $idValue = call_user_func($this->header_id_func, $headerValue); 993 if (!$idValue) { 994 return ""; 995 } 996 997 return ' id="' . $this->encodeAttribute($idValue) . '"'; 998 } 999 1000 /** 1001 * Form HTML ordered (numbered) and unordered (bulleted) lists. 1002 * @param string $text 1003 * @return string 1004 */ 1005 protected function doLists($text) { 1006 $less_than_tab = $this->tab_width - 1; 1007 1008 // Re-usable patterns to match list item bullets and number markers: 1009 $marker_ul_re = '[*+-]'; 1010 $marker_ol_re = '\d+[\.]'; 1011 1012 $markers_relist = array( 1013 $marker_ul_re => $marker_ol_re, 1014 $marker_ol_re => $marker_ul_re, 1015 ); 1016 1017 foreach ($markers_relist as $marker_re => $other_marker_re) { 1018 // Re-usable pattern to match any entirel ul or ol list: 1019 $whole_list_re = ' 1020 ( # $1 = whole list 1021 ( # $2 1022 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 1023 ('.$marker_re.') # $4 = first list item marker 1024 [ ]+ 1025 ) 1026 (?s:.+?) 1027 ( # $5 1028 \z 1029 | 1030 \n{2,} 1031 (?=\S) 1032 (?! # Negative lookahead for another list item marker 1033 [ ]* 1034 '.$marker_re.'[ ]+ 1035 ) 1036 | 1037 (?= # Lookahead for another kind of list 1038 \n 1039 \3 # Must have the same indentation 1040 '.$other_marker_re.'[ ]+ 1041 ) 1042 ) 1043 ) 1044 '; // mx 1045 1046 // We use a different prefix before nested lists than top-level lists. 1047 //See extended comment in _ProcessListItems(). 1048 1049 if ($this->list_level) { 1050 $text = preg_replace_callback('{ 1051 ^ 1052 '.$whole_list_re.' 1053 }mx', 1054 array($this, '_doLists_callback'), $text); 1055 } else { 1056 $text = preg_replace_callback('{ 1057 (?:(?<=\n)\n|\A\n?) # Must eat the newline 1058 '.$whole_list_re.' 1059 }mx', 1060 array($this, '_doLists_callback'), $text); 1061 } 1062 } 1063 1064 return $text; 1065 } 1066 1067 /** 1068 * List parsing callback 1069 * @param array $matches 1070 * @return string 1071 */ 1072 protected function _doLists_callback($matches) { 1073 // Re-usable patterns to match list item bullets and number markers: 1074 $marker_ul_re = '[*+-]'; 1075 $marker_ol_re = '\d+[\.]'; 1076 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 1077 $marker_ol_start_re = '[0-9]+'; 1078 1079 $list = $matches[1]; 1080 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 1081 1082 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 1083 1084 $list .= "\n"; 1085 $result = $this->processListItems($list, $marker_any_re); 1086 1087 $ol_start = 1; 1088 if ($this->enhanced_ordered_list) { 1089 // Get the start number for ordered list. 1090 if ($list_type == 'ol') { 1091 $ol_start_array = array(); 1092 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array); 1093 if ($ol_start_check){ 1094 $ol_start = $ol_start_array[0]; 1095 } 1096 } 1097 } 1098 1099 if ($ol_start > 1 && $list_type == 'ol'){ 1100 $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>"); 1101 } else { 1102 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 1103 } 1104 return "\n". $result ."\n\n"; 1105 } 1106 1107 /** 1108 * Nesting tracker for list levels 1109 * @var integer 1110 */ 1111 protected $list_level = 0; 1112 1113 /** 1114 * Process the contents of a single ordered or unordered list, splitting it 1115 * into individual list items. 1116 * @param string $list_str 1117 * @param string $marker_any_re 1118 * @return string 1119 */ 1120 protected function processListItems($list_str, $marker_any_re) { 1121 /** 1122 * The $this->list_level global keeps track of when we're inside a list. 1123 * Each time we enter a list, we increment it; when we leave a list, 1124 * we decrement. If it's zero, we're not in a list anymore. 1125 * 1126 * We do this because when we're not inside a list, we want to treat 1127 * something like this: 1128 * 1129 * I recommend upgrading to version 1130 * 8. Oops, now this line is treated 1131 * as a sub-list. 1132 * 1133 * As a single paragraph, despite the fact that the second line starts 1134 * with a digit-period-space sequence. 1135 * 1136 * Whereas when we're inside a list (or sub-list), that line will be 1137 * treated as the start of a sub-list. What a kludge, huh? This is 1138 * an aspect of Markdown's syntax that's hard to parse perfectly 1139 * without resorting to mind-reading. Perhaps the solution is to 1140 * change the syntax rules such that sub-lists must start with a 1141 * starting cardinal number; e.g. "1." or "a.". 1142 */ 1143 $this->list_level++; 1144 1145 // Trim trailing blank lines: 1146 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1147 1148 $list_str = preg_replace_callback('{ 1149 (\n)? # leading line = $1 1150 (^[ ]*) # leading whitespace = $2 1151 ('.$marker_any_re.' # list marker and space = $3 1152 (?:[ ]+|(?=\n)) # space only required if item is not empty 1153 ) 1154 ((?s:.*?)) # list item text = $4 1155 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1156 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 1157 }xm', 1158 array($this, '_processListItems_callback'), $list_str); 1159 1160 $this->list_level--; 1161 return $list_str; 1162 } 1163 1164 /** 1165 * List item parsing callback 1166 * @param array $matches 1167 * @return string 1168 */ 1169 protected function _processListItems_callback($matches) { 1170 $item = $matches[4]; 1171 $leading_line =& $matches[1]; 1172 $leading_space =& $matches[2]; 1173 $marker_space = $matches[3]; 1174 $tailing_blank_line =& $matches[5]; 1175 1176 if ($leading_line || $tailing_blank_line || 1177 preg_match('/\n{2,}/', $item)) 1178 { 1179 // Replace marker with the appropriate whitespace indentation 1180 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 1181 $item = $this->runBlockGamut($this->outdent($item)."\n"); 1182 } else { 1183 // Recursion for sub-lists: 1184 $item = $this->doLists($this->outdent($item)); 1185 $item = $this->formParagraphs($item, false); 1186 } 1187 1188 return "<li>" . $item . "</li>\n"; 1189 } 1190 1191 /** 1192 * Process Markdown `<pre><code>` blocks. 1193 * @param string $text 1194 * @return string 1195 */ 1196 protected function doCodeBlocks($text) { 1197 $text = preg_replace_callback('{ 1198 (?:\n\n|\A\n?) 1199 ( # $1 = the code block -- one or more lines, starting with a space/tab 1200 (?> 1201 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1202 .*\n+ 1203 )+ 1204 ) 1205 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 1206 }xm', 1207 array($this, '_doCodeBlocks_callback'), $text); 1208 1209 return $text; 1210 } 1211 1212 /** 1213 * Code block parsing callback 1214 * @param array $matches 1215 * @return string 1216 */ 1217 protected function _doCodeBlocks_callback($matches) { 1218 $codeblock = $matches[1]; 1219 1220 $codeblock = $this->outdent($codeblock); 1221 if ($this->code_block_content_func) { 1222 $codeblock = call_user_func($this->code_block_content_func, $codeblock, ""); 1223 } else { 1224 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1225 } 1226 1227 # trim leading newlines and trailing newlines 1228 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1229 1230 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1231 return "\n\n" . $this->hashBlock($codeblock) . "\n\n"; 1232 } 1233 1234 /** 1235 * Create a code span markup for $code. Called from handleSpanToken. 1236 * @param string $code 1237 * @return string 1238 */ 1239 protected function makeCodeSpan($code) { 1240 if ($this->code_span_content_func) { 1241 $code = call_user_func($this->code_span_content_func, $code); 1242 } else { 1243 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1244 } 1245 return $this->hashPart("<code>$code</code>"); 1246 } 1247 1248 /** 1249 * Define the emphasis operators with their regex matches 1250 * @var array 1251 */ 1252 protected $em_relist = array( 1253 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)', 1254 '*' => '(?<![\s*])\*(?!\*)', 1255 '_' => '(?<![\s_])_(?!_)', 1256 ); 1257 1258 /** 1259 * Define the strong operators with their regex matches 1260 * @var array 1261 */ 1262 protected $strong_relist = array( 1263 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)', 1264 '**' => '(?<![\s*])\*\*(?!\*)', 1265 '__' => '(?<![\s_])__(?!_)', 1266 ); 1267 1268 /** 1269 * Define the emphasis + strong operators with their regex matches 1270 * @var array 1271 */ 1272 protected $em_strong_relist = array( 1273 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)', 1274 '***' => '(?<![\s*])\*\*\*(?!\*)', 1275 '___' => '(?<![\s_])___(?!_)', 1276 ); 1277 1278 /** 1279 * Container for prepared regular expressions 1280 * @var array 1281 */ 1282 protected $em_strong_prepared_relist; 1283 1284 /** 1285 * Prepare regular expressions for searching emphasis tokens in any 1286 * context. 1287 * @return void 1288 */ 1289 protected function prepareItalicsAndBold() { 1290 foreach ($this->em_relist as $em => $em_re) { 1291 foreach ($this->strong_relist as $strong => $strong_re) { 1292 // Construct list of allowed token expressions. 1293 $token_relist = array(); 1294 if (isset($this->em_strong_relist["$em$strong"])) { 1295 $token_relist[] = $this->em_strong_relist["$em$strong"]; 1296 } 1297 $token_relist[] = $em_re; 1298 $token_relist[] = $strong_re; 1299 1300 // Construct master expression from list. 1301 $token_re = '{(' . implode('|', $token_relist) . ')}'; 1302 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1303 } 1304 } 1305 } 1306 1307 /** 1308 * Convert Markdown italics (emphasis) and bold (strong) to HTML 1309 * @param string $text 1310 * @return string 1311 */ 1312 protected function doItalicsAndBold($text) { 1313 if ($this->in_emphasis_processing) { 1314 return $text; // avoid reentrency 1315 } 1316 $this->in_emphasis_processing = true; 1317 1318 $token_stack = array(''); 1319 $text_stack = array(''); 1320 $em = ''; 1321 $strong = ''; 1322 $tree_char_em = false; 1323 1324 while (1) { 1325 // Get prepared regular expression for seraching emphasis tokens 1326 // in current context. 1327 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1328 1329 // Each loop iteration search for the next emphasis token. 1330 // Each token is then passed to handleSpanToken. 1331 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1332 $text_stack[0] .= $parts[0]; 1333 $token =& $parts[1]; 1334 $text =& $parts[2]; 1335 1336 if (empty($token)) { 1337 // Reached end of text span: empty stack without emitting. 1338 // any more emphasis. 1339 while ($token_stack[0]) { 1340 $text_stack[1] .= array_shift($token_stack); 1341 $text_stack[0] .= array_shift($text_stack); 1342 } 1343 break; 1344 } 1345 1346 $token_len = strlen($token); 1347 if ($tree_char_em) { 1348 // Reached closing marker while inside a three-char emphasis. 1349 if ($token_len == 3) { 1350 // Three-char closing marker, close em and strong. 1351 array_shift($token_stack); 1352 $span = array_shift($text_stack); 1353 $span = $this->runSpanGamut($span); 1354 $span = "<strong><em>$span</em></strong>"; 1355 $text_stack[0] .= $this->hashPart($span); 1356 $em = ''; 1357 $strong = ''; 1358 } else { 1359 // Other closing marker: close one em or strong and 1360 // change current token state to match the other 1361 $token_stack[0] = str_repeat($token[0], 3-$token_len); 1362 $tag = $token_len == 2 ? "strong" : "em"; 1363 $span = $text_stack[0]; 1364 $span = $this->runSpanGamut($span); 1365 $span = "<$tag>$span</$tag>"; 1366 $text_stack[0] = $this->hashPart($span); 1367 $$tag = ''; // $$tag stands for $em or $strong 1368 } 1369 $tree_char_em = false; 1370 } else if ($token_len == 3) { 1371 if ($em) { 1372 // Reached closing marker for both em and strong. 1373 // Closing strong marker: 1374 for ($i = 0; $i < 2; ++$i) { 1375 $shifted_token = array_shift($token_stack); 1376 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1377 $span = array_shift($text_stack); 1378 $span = $this->runSpanGamut($span); 1379 $span = "<$tag>$span</$tag>"; 1380 $text_stack[0] .= $this->hashPart($span); 1381 $$tag = ''; // $$tag stands for $em or $strong 1382 } 1383 } else { 1384 // Reached opening three-char emphasis marker. Push on token 1385 // stack; will be handled by the special condition above. 1386 $em = $token[0]; 1387 $strong = "$em$em"; 1388 array_unshift($token_stack, $token); 1389 array_unshift($text_stack, ''); 1390 $tree_char_em = true; 1391 } 1392 } else if ($token_len == 2) { 1393 if ($strong) { 1394 // Unwind any dangling emphasis marker: 1395 if (strlen($token_stack[0]) == 1) { 1396 $text_stack[1] .= array_shift($token_stack); 1397 $text_stack[0] .= array_shift($text_stack); 1398 $em = ''; 1399 } 1400 // Closing strong marker: 1401 array_shift($token_stack); 1402 $span = array_shift($text_stack); 1403 $span = $this->runSpanGamut($span); 1404 $span = "<strong>$span</strong>"; 1405 $text_stack[0] .= $this->hashPart($span); 1406 $strong = ''; 1407 } else { 1408 array_unshift($token_stack, $token); 1409 array_unshift($text_stack, ''); 1410 $strong = $token; 1411 } 1412 } else { 1413 // Here $token_len == 1 1414 if ($em) { 1415 if (strlen($token_stack[0]) == 1) { 1416 // Closing emphasis marker: 1417 array_shift($token_stack); 1418 $span = array_shift($text_stack); 1419 $span = $this->runSpanGamut($span); 1420 $span = "<em>$span</em>"; 1421 $text_stack[0] .= $this->hashPart($span); 1422 $em = ''; 1423 } else { 1424 $text_stack[0] .= $token; 1425 } 1426 } else { 1427 array_unshift($token_stack, $token); 1428 array_unshift($text_stack, ''); 1429 $em = $token; 1430 } 1431 } 1432 } 1433 $this->in_emphasis_processing = false; 1434 return $text_stack[0]; 1435 } 1436 1437 /** 1438 * Parse Markdown blockquotes to HTML 1439 * @param string $text 1440 * @return string 1441 */ 1442 protected function doBlockQuotes($text) { 1443 $text = preg_replace_callback('/ 1444 ( # Wrap whole match in $1 1445 (?> 1446 ^[ ]*>[ ]? # ">" at the start of a line 1447 .+\n # rest of the first line 1448 (.+\n)* # subsequent consecutive lines 1449 \n* # blanks 1450 )+ 1451 ) 1452 /xm', 1453 array($this, '_doBlockQuotes_callback'), $text); 1454 1455 return $text; 1456 } 1457 1458 /** 1459 * Blockquote parsing callback 1460 * @param array $matches 1461 * @return string 1462 */ 1463 protected function _doBlockQuotes_callback($matches) { 1464 $bq = $matches[1]; 1465 // trim one level of quoting - trim whitespace-only lines 1466 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1467 $bq = $this->runBlockGamut($bq); // recurse 1468 1469 $bq = preg_replace('/^/m', " ", $bq); 1470 // These leading spaces cause problem with <pre> content, 1471 // so we need to fix that: 1472 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1473 array($this, '_doBlockQuotes_callback2'), $bq); 1474 1475 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n"; 1476 } 1477 1478 /** 1479 * Blockquote parsing callback 1480 * @param array $matches 1481 * @return string 1482 */ 1483 protected function _doBlockQuotes_callback2($matches) { 1484 $pre = $matches[1]; 1485 $pre = preg_replace('/^ /m', '', $pre); 1486 return $pre; 1487 } 1488 1489 /** 1490 * Parse paragraphs 1491 * 1492 * @param string $text String to process in paragraphs 1493 * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags 1494 * @return string 1495 */ 1496 protected function formParagraphs($text, $wrap_in_p = true) { 1497 // Strip leading and trailing lines: 1498 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1499 1500 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1501 1502 // Wrap <p> tags and unhashify HTML blocks 1503 foreach ($grafs as $key => $value) { 1504 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1505 // Is a paragraph. 1506 $value = $this->runSpanGamut($value); 1507 if ($wrap_in_p) { 1508 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1509 $value .= "</p>"; 1510 } 1511 $grafs[$key] = $this->unhash($value); 1512 } else { 1513 // Is a block. 1514 // Modify elements of @grafs in-place... 1515 $graf = $value; 1516 $block = $this->html_hashes[$graf]; 1517 $graf = $block; 1518 // if (preg_match('{ 1519 // \A 1520 // ( # $1 = <div> tag 1521 // <div \s+ 1522 // [^>]* 1523 // \b 1524 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1525 // 1 1526 // \2 1527 // [^>]* 1528 // > 1529 // ) 1530 // ( # $3 = contents 1531 // .* 1532 // ) 1533 // (</div>) # $4 = closing tag 1534 // \z 1535 // }xs', $block, $matches)) 1536 // { 1537 // list(, $div_open, , $div_content, $div_close) = $matches; 1538 // 1539 // // We can't call Markdown(), because that resets the hash; 1540 // // that initialization code should be pulled into its own sub, though. 1541 // $div_content = $this->hashHTMLBlocks($div_content); 1542 // 1543 // // Run document gamut methods on the content. 1544 // foreach ($this->document_gamut as $method => $priority) { 1545 // $div_content = $this->$method($div_content); 1546 // } 1547 // 1548 // $div_open = preg_replace( 1549 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1550 // 1551 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1552 // } 1553 $grafs[$key] = $graf; 1554 } 1555 } 1556 1557 return implode("\n\n", $grafs); 1558 } 1559 1560 /** 1561 * Encode text for a double-quoted HTML attribute. This function 1562 * is *not* suitable for attributes enclosed in single quotes. 1563 * @param string $text 1564 * @return string 1565 */ 1566 protected function encodeAttribute($text) { 1567 $text = $this->encodeAmpsAndAngles($text); 1568 $text = str_replace('"', '"', $text); 1569 return $text; 1570 } 1571 1572 /** 1573 * Encode text for a double-quoted HTML attribute containing a URL, 1574 * applying the URL filter if set. Also generates the textual 1575 * representation for the URL (removing mailto: or tel:) storing it in $text. 1576 * This function is *not* suitable for attributes enclosed in single quotes. 1577 * 1578 * @param string $url 1579 * @param string &$text Passed by reference 1580 * @return string URL 1581 */ 1582 protected function encodeURLAttribute($url, &$text = null) { 1583 if ($this->url_filter_func) { 1584 $url = call_user_func($this->url_filter_func, $url); 1585 } 1586 1587 if (preg_match('{^mailto:}i', $url)) { 1588 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); 1589 } else if (preg_match('{^tel:}i', $url)) { 1590 $url = $this->encodeAttribute($url); 1591 $text = substr($url, 4); 1592 } else { 1593 $url = $this->encodeAttribute($url); 1594 $text = $url; 1595 } 1596 1597 return $url; 1598 } 1599 1600 /** 1601 * Smart processing for ampersands and angle brackets that need to 1602 * be encoded. Valid character entities are left alone unless the 1603 * no-entities mode is set. 1604 * @param string $text 1605 * @return string 1606 */ 1607 protected function encodeAmpsAndAngles($text) { 1608 if ($this->no_entities) { 1609 $text = str_replace('&', '&', $text); 1610 } else { 1611 // Ampersand-encoding based entirely on Nat Irons's Amputator 1612 // MT plugin: <http://bumppo.net/projects/amputator/> 1613 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1614 '&', $text); 1615 } 1616 // Encode remaining <'s 1617 $text = str_replace('<', '<', $text); 1618 1619 return $text; 1620 } 1621 1622 /** 1623 * Parse Markdown automatic links to anchor HTML tags 1624 * @param string $text 1625 * @return string 1626 */ 1627 protected function doAutoLinks($text) { 1628 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', 1629 array($this, '_doAutoLinks_url_callback'), $text); 1630 1631 // Email addresses: <address@domain.foo> 1632 $text = preg_replace_callback('{ 1633 < 1634 (?:mailto:)? 1635 ( 1636 (?: 1637 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1638 | 1639 ".*?" 1640 ) 1641 \@ 1642 (?: 1643 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1644 | 1645 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1646 ) 1647 ) 1648 > 1649 }xi', 1650 array($this, '_doAutoLinks_email_callback'), $text); 1651 1652 return $text; 1653 } 1654 1655 /** 1656 * Parse URL callback 1657 * @param array $matches 1658 * @return string 1659 */ 1660 protected function _doAutoLinks_url_callback($matches) { 1661 $url = $this->encodeURLAttribute($matches[1], $text); 1662 $link = "<a href=\"$url\">$text</a>"; 1663 return $this->hashPart($link); 1664 } 1665 1666 /** 1667 * Parse email address callback 1668 * @param array $matches 1669 * @return string 1670 */ 1671 protected function _doAutoLinks_email_callback($matches) { 1672 $addr = $matches[1]; 1673 $url = $this->encodeURLAttribute("mailto:$addr", $text); 1674 $link = "<a href=\"$url\">$text</a>"; 1675 return $this->hashPart($link); 1676 } 1677 1678 /** 1679 * Input: some text to obfuscate, e.g. "mailto:foo@example.com" 1680 * 1681 * Output: the same text but with most characters encoded as either a 1682 * decimal or hex entity, in the hopes of foiling most address 1683 * harvesting spam bots. E.g.: 1684 * 1685 * mailto:foo 1686 * @example.co 1687 * m 1688 * 1689 * Note: the additional output $tail is assigned the same value as the 1690 * ouput, minus the number of characters specified by $head_length. 1691 * 1692 * Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1693 * With some optimizations by Milian Wolff. Forced encoding of HTML 1694 * attribute special characters by Allan Odgaard. 1695 * 1696 * @param string $text 1697 * @param string &$tail 1698 * @param integer $head_length 1699 * @return string 1700 */ 1701 protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { 1702 if ($text == "") { 1703 return $tail = ""; 1704 } 1705 1706 $chars = preg_split('/(?<!^)(?!$)/', $text); 1707 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed. 1708 1709 foreach ($chars as $key => $char) { 1710 $ord = ord($char); 1711 // Ignore non-ascii chars. 1712 if ($ord < 128) { 1713 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function. 1714 // roughly 10% raw, 45% hex, 45% dec 1715 // '@' *must* be encoded. I insist. 1716 // '"' and '>' have to be encoded inside the attribute 1717 if ($r > 90 && strpos('@"&>', $char) === false) { 1718 /* do nothing */ 1719 } else if ($r < 45) { 1720 $chars[$key] = '&#x'.dechex($ord).';'; 1721 } else { 1722 $chars[$key] = '&#'.$ord.';'; 1723 } 1724 } 1725 } 1726 1727 $text = implode('', $chars); 1728 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; 1729 1730 return $text; 1731 } 1732 1733 /** 1734 * Take the string $str and parse it into tokens, hashing embeded HTML, 1735 * escaped characters and handling code spans. 1736 * @param string $str 1737 * @return string 1738 */ 1739 protected function parseSpan($str) { 1740 $output = ''; 1741 1742 $span_re = '{ 1743 ( 1744 \\\\'.$this->escape_chars_re.' 1745 | 1746 (?<![`\\\\]) 1747 `+ # code span marker 1748 '.( $this->no_markup ? '' : ' 1749 | 1750 <!-- .*? --> # comment 1751 | 1752 <\?.*?\?> | <%.*?%> # processing instruction 1753 | 1754 <[!$]?[-a-zA-Z0-9:_]+ # regular tags 1755 (?> 1756 \s 1757 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1758 )? 1759 > 1760 | 1761 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 1762 | 1763 </[-a-zA-Z0-9:_]+\s*> # closing tag 1764 ').' 1765 ) 1766 }xs'; 1767 1768 while (1) { 1769 // Each loop iteration seach for either the next tag, the next 1770 // openning code span marker, or the next escaped character. 1771 // Each token is then passed to handleSpanToken. 1772 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1773 1774 // Create token from text preceding tag. 1775 if ($parts[0] != "") { 1776 $output .= $parts[0]; 1777 } 1778 1779 // Check if we reach the end. 1780 if (isset($parts[1])) { 1781 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1782 $str = $parts[2]; 1783 } else { 1784 break; 1785 } 1786 } 1787 1788 return $output; 1789 } 1790 1791 /** 1792 * Handle $token provided by parseSpan by determining its nature and 1793 * returning the corresponding value that should replace it. 1794 * @param string $token 1795 * @param string &$str 1796 * @return string 1797 */ 1798 protected function handleSpanToken($token, &$str) { 1799 switch ($token[0]) { 1800 case "\\": 1801 return $this->hashPart("&#". ord($token[1]). ";"); 1802 case "`": 1803 // Search for end marker in remaining text. 1804 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1805 $str, $matches)) 1806 { 1807 $str = $matches[2]; 1808 $codespan = $this->makeCodeSpan($matches[1]); 1809 return $this->hashPart($codespan); 1810 } 1811 return $token; // Return as text since no ending marker found. 1812 default: 1813 return $this->hashPart($token); 1814 } 1815 } 1816 1817 /** 1818 * Remove one level of line-leading tabs or spaces 1819 * @param string $text 1820 * @return string 1821 */ 1822 protected function outdent($text) { 1823 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text); 1824 } 1825 1826 1827 /** 1828 * String length function for detab. `_initDetab` will create a function to 1829 * handle UTF-8 if the default function does not exist. 1830 * @var string 1831 */ 1832 protected $utf8_strlen = 'mb_strlen'; 1833 1834 /** 1835 * Replace tabs with the appropriate amount of spaces. 1836 * 1837 * For each line we separate the line in blocks delemited by tab characters. 1838 * Then we reconstruct every line by adding the appropriate number of space 1839 * between each blocks. 1840 * 1841 * @param string $text 1842 * @return string 1843 */ 1844 protected function detab($text) { 1845 $text = preg_replace_callback('/^.*\t.*$/m', 1846 array($this, '_detab_callback'), $text); 1847 1848 return $text; 1849 } 1850 1851 /** 1852 * Replace tabs callback 1853 * @param string $matches 1854 * @return string 1855 */ 1856 protected function _detab_callback($matches) { 1857 $line = $matches[0]; 1858 $strlen = $this->utf8_strlen; // strlen function for UTF-8. 1859 1860 // Split in blocks. 1861 $blocks = explode("\t", $line); 1862 // Add each blocks to the line. 1863 $line = $blocks[0]; 1864 unset($blocks[0]); // Do not add first block twice. 1865 foreach ($blocks as $block) { 1866 // Calculate amount of space, insert spaces, insert block. 1867 $amount = $this->tab_width - 1868 $strlen($line, 'UTF-8') % $this->tab_width; 1869 $line .= str_repeat(" ", $amount) . $block; 1870 } 1871 return $line; 1872 } 1873 1874 /** 1875 * Check for the availability of the function in the `utf8_strlen` property 1876 * (initially `mb_strlen`). If the function is not available, create a 1877 * function that will loosely count the number of UTF-8 characters with a 1878 * regular expression. 1879 * @return void 1880 */ 1881 protected function _initDetab() { 1882 1883 if (function_exists($this->utf8_strlen)) { 1884 return; 1885 } 1886 1887 $this->utf8_strlen = function($text) { 1888 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m); 1889 }; 1890 } 1891 1892 /** 1893 * Swap back in all the tags hashed by _HashHTMLBlocks. 1894 * @param string $text 1895 * @return string 1896 */ 1897 protected function unhash($text) { 1898 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1899 array($this, '_unhash_callback'), $text); 1900 } 1901 1902 /** 1903 * Unhashing callback 1904 * @param array $matches 1905 * @return string 1906 */ 1907 protected function _unhash_callback($matches) { 1908 return $this->html_hashes[$matches[0]]; 1909 } 1910 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body