Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 310 and 400] [Versions 39 and 400] [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]

   1  <?php
   2  /**
   3   * Markdown  -  A text-to-HTML conversion tool for web writers
   4   *
   5   * @package   php-markdown
   6   * @author    Michel Fortin <michel.fortin@michelf.com>
   7   * @copyright 2004-2019 Michel Fortin <https://michelf.com/projects/php-markdown/>
   8   * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
   9   */
  10  
  11  namespace Michelf;
  12  
  13  /**
  14   * Markdown Parser Class
  15   */
  16  class Markdown implements MarkdownInterface {
  17  	 /**
  18  	  * Define the package version
  19  	  * @var string
  20  	  */
  21  	 const MARKDOWNLIB_VERSION = "1.9.0";
  22  
  23  	 /**
  24  	  * Simple function interface - Initialize the parser and return the result
  25  	  * of its transform method. This will work fine for derived classes too.
  26  	  *
  27  	  * @api
  28  	  *
  29  	  * @param  string $text
  30  	  * @return string
  31  	  */
  32  	public static function defaultTransform($text) {
  33  	 	 // Take parser class on which this function was called.
  34  	 	 $parser_class = \get_called_class();
  35  
  36  	 	 // Try to take parser from the static parser list
  37  	 	 static $parser_list;
  38  	 	 $parser =& $parser_list[$parser_class];
  39  
  40  	 	 // Create the parser it not already set
  41  	 	 if (!$parser) {
  42  	 	 	 $parser = new $parser_class;
  43  	 	 }
  44  
  45  	 	 // Transform text using parser.
  46  	 	 return $parser->transform($text);
  47  	 }
  48  
  49  	 /**
  50  	  * Configuration variables
  51  	  */
  52  
  53  	 /**
  54  	  * Change to ">" for HTML output.
  55  	  * @var string
  56  	  */
  57  	 public $empty_element_suffix = " />";
  58  
  59  	 /**
  60  	  * The width of indentation of the output markup
  61  	  * @var int
  62  	  */
  63  	 public $tab_width = 4;
  64  
  65  	 /**
  66  	  * Change to `true` to disallow markup or entities.
  67  	  * @var boolean
  68  	  */
  69  	 public $no_markup   = false;
  70  	 public $no_entities = false;
  71  
  72  
  73  	 /**
  74  	  * Change to `true` to enable line breaks on \n without two trailling spaces
  75  	  * @var boolean
  76  	  */
  77  	 public $hard_wrap = false;
  78  
  79  	 /**
  80  	  * Predefined URLs and titles for reference links and images.
  81  	  * @var array
  82  	  */
  83  	 public $predef_urls   = array();
  84  	 public $predef_titles = array();
  85  
  86  	 /**
  87  	  * Optional filter function for URLs
  88  	  * @var callable|null
  89  	  */
  90  	 public $url_filter_func = null;
  91  
  92  	 /**
  93  	  * Optional header id="" generation callback function.
  94  	  * @var callable|null
  95  	  */
  96  	 public $header_id_func = null;
  97  
  98  	 /**
  99  	  * Optional function for converting code block content to HTML
 100  	  * @var callable|null
 101  	  */
 102  	 public $code_block_content_func = null;
 103  
 104  	 /**
 105  	  * Optional function for converting code span content to HTML.
 106  	  * @var callable|null
 107  	  */
 108  	 public $code_span_content_func = null;
 109  
 110  	 /**
 111  	  * Class attribute to toggle "enhanced ordered list" behaviour
 112  	  * setting this to true will allow ordered lists to start from the index
 113  	  * number that is defined first.
 114  	  *
 115  	  * For example:
 116  	  * 2. List item two
 117  	  * 3. List item three
 118  	  *
 119  	  * Becomes:
 120  	  * <ol start="2">
 121  	  * <li>List item two</li>
 122  	  * <li>List item three</li>
 123  	  * </ol>
 124  	  *
 125  	  * @var bool
 126  	  */
 127  	 public $enhanced_ordered_list = false;
 128  
 129  	 /**
 130  	  * Parser implementation
 131  	  */
 132  
 133  	 /**
 134  	  * Regex to match balanced [brackets].
 135  	  * Needed to insert a maximum bracked depth while converting to PHP.
 136  	  * @var int
 137  	  */
 138  	 protected $nested_brackets_depth = 6;
 139  	 protected $nested_brackets_re;
 140  
 141  	 protected $nested_url_parenthesis_depth = 4;
 142  	 protected $nested_url_parenthesis_re;
 143  
 144  	 /**
 145  	  * Table of hash values for escaped characters:
 146  	  * @var string
 147  	  */
 148  	 protected $escape_chars = '\`*_{}[]()>#+-.!';
 149  	 protected $escape_chars_re;
 150  
 151  	 /**
 152  	  * Constructor function. Initialize appropriate member variables.
 153  	  * @return void
 154  	  */
 155  	public function __construct() {
 156  	 	 $this->_initDetab();
 157  	 	 $this->prepareItalicsAndBold();
 158  
 159  	 	 $this->nested_brackets_re =
 160  	 	 	 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 161  	 	 	 str_repeat('\])*', $this->nested_brackets_depth);
 162  
 163  	 	 $this->nested_url_parenthesis_re =
 164  	 	 	 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 165  	 	 	 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 166  
 167  	 	 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 168  
 169  	 	 // Sort document, block, and span gamut in ascendent priority order.
 170  	 	 asort($this->document_gamut);
 171  	 	 asort($this->block_gamut);
 172  	 	 asort($this->span_gamut);
 173  	 }
 174  
 175  
 176  	 /**
 177  	  * Internal hashes used during transformation.
 178  	  * @var array
 179  	  */
 180  	 protected $urls        = array();
 181  	 protected $titles      = array();
 182  	 protected $html_hashes = array();
 183  
 184  	 /**
 185  	  * Status flag to avoid invalid nesting.
 186  	  * @var boolean
 187  	  */
 188  	 protected $in_anchor = false;
 189  
 190  	 /**
 191  	  * Status flag to avoid invalid nesting.
 192  	  * @var boolean
 193  	  */
 194  	 protected $in_emphasis_processing = false;
 195  
 196  	 /**
 197  	  * Called before the transformation process starts to setup parser states.
 198  	  * @return void
 199  	  */
 200  	protected function setup() {
 201  	 	 // Clear global hashes.
 202  	 	 $this->urls        = $this->predef_urls;
 203  	 	 $this->titles      = $this->predef_titles;
 204  	 	 $this->html_hashes = array();
 205  	 	 $this->in_anchor   = false;
 206  	 	 $this->in_emphasis_processing = false;
 207  	 }
 208  
 209  	 /**
 210  	  * Called after the transformation process to clear any variable which may
 211  	  * be taking up memory unnecessarly.
 212  	  * @return void
 213  	  */
 214  	protected function teardown() {
 215  	 	 $this->urls        = array();
 216  	 	 $this->titles      = array();
 217  	 	 $this->html_hashes = array();
 218  	 }
 219  
 220  	 /**
 221  	  * Main function. Performs some preprocessing on the input text and pass
 222  	  * it through the document gamut.
 223  	  *
 224  	  * @api
 225  	  *
 226  	  * @param  string $text
 227  	  * @return string
 228  	  */
 229  	public function transform($text) {
 230  	 	 $this->setup();
 231  
 232  	 	 # Remove UTF-8 BOM and marker character in input, if present.
 233  	 	 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 234  
 235  	 	 # Standardize line endings:
 236  	 	 #   DOS to Unix and Mac to Unix
 237  	 	 $text = preg_replace('{\r\n?}', "\n", $text);
 238  
 239  	 	 # Make sure $text ends with a couple of newlines:
 240  	 	 $text .= "\n\n";
 241  
 242  	 	 # Convert all tabs to spaces.
 243  	 	 $text = $this->detab($text);
 244  
 245  	 	 # Turn block-level HTML blocks into hash entries
 246  	 	 $text = $this->hashHTMLBlocks($text);
 247  
 248  	 	 # Strip any lines consisting only of spaces and tabs.
 249  	 	 # This makes subsequent regexen easier to write, because we can
 250  	 	 # match consecutive blank lines with /\n+/ instead of something
 251  	 	 # contorted like /[ ]*\n+/ .
 252  	 	 $text = preg_replace('/^[ ]+$/m', '', $text);
 253  
 254  	 	 # Run document gamut methods.
 255  	 	 foreach ($this->document_gamut as $method => $priority) {
 256  	 	 	 $text = $this->$method($text);
 257  	 	 }
 258  
 259  	 	 $this->teardown();
 260  
 261  	 	 return $text . "\n";
 262  	 }
 263  
 264  	 /**
 265  	  * Define the document gamut
 266  	  * @var array
 267  	  */
 268  	 protected $document_gamut = array(
 269  	 	 // Strip link definitions, store in hashes.
 270  	 	 "stripLinkDefinitions" => 20,
 271  	 	 "runBasicBlockGamut"   => 30,
 272  	 );
 273  
 274  	 /**
 275  	  * Strips link definitions from text, stores the URLs and titles in
 276  	  * hash references
 277  	  * @param  string $text
 278  	  * @return string
 279  	  */
 280  	protected function stripLinkDefinitions($text) {
 281  
 282  	 	 $less_than_tab = $this->tab_width - 1;
 283  
 284  	 	 // Link defs are in the form: ^[id]: url "optional title"
 285  	 	 $text = preg_replace_callback('{
 286  	 	 	 	 	 	 	 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	 # id = $1
 287  	 	 	 	 	 	 	   [ ]*
 288  	 	 	 	 	 	 	   \n?	 	 	 	 # maybe *one* newline
 289  	 	 	 	 	 	 	   [ ]*
 290  	 	 	 	 	 	 	 (?:
 291  	 	 	 	 	 	 	   <(.+?)>	 	 	 # url = $2
 292  	 	 	 	 	 	 	 |
 293  	 	 	 	 	 	 	   (\S+?)	 	 	 # url = $3
 294  	 	 	 	 	 	 	 )
 295  	 	 	 	 	 	 	   [ ]*
 296  	 	 	 	 	 	 	   \n?	 	 	 	 # maybe one newline
 297  	 	 	 	 	 	 	   [ ]*
 298  	 	 	 	 	 	 	 (?:
 299  	 	 	 	 	 	 	 	 (?<=\s)	 	 	 # lookbehind for whitespace
 300  	 	 	 	 	 	 	 	 ["(]
 301  	 	 	 	 	 	 	 	 (.*?)	 	 	 # title = $4
 302  	 	 	 	 	 	 	 	 [")]
 303  	 	 	 	 	 	 	 	 [ ]*
 304  	 	 	 	 	 	 	 )?	 # title is optional
 305  	 	 	 	 	 	 	 (?:\n+|\Z)
 306  	 	 	 }xm',
 307  	 	 	 array($this, '_stripLinkDefinitions_callback'),
 308  	 	 	 $text
 309  	 	 );
 310  	 	 return $text;
 311  	 }
 312  
 313  	 /**
 314  	  * The callback to strip link definitions
 315  	  * @param  array $matches
 316  	  * @return string
 317  	  */
 318  	protected function _stripLinkDefinitions_callback($matches) {
 319  	 	 $link_id = strtolower($matches[1]);
 320  	 	 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 321  	 	 $this->urls[$link_id] = $url;
 322  	 	 $this->titles[$link_id] =& $matches[4];
 323  	 	 return ''; // String that will replace the block
 324  	 }
 325  
 326  	 /**
 327  	  * Hashify HTML blocks
 328  	  * @param  string $text
 329  	  * @return string
 330  	  */
 331  	protected function hashHTMLBlocks($text) {
 332  	 	 if ($this->no_markup) {
 333  	 	 	 return $text;
 334  	 	 }
 335  
 336  	 	 $less_than_tab = $this->tab_width - 1;
 337  
 338  	 	 /**
 339  	 	  * Hashify HTML blocks:
 340  	 	  *
 341  	 	  * We only want to do this for block-level HTML tags, such as headers,
 342  	 	  * lists, and tables. That's because we still want to wrap <p>s around
 343  	 	  * "paragraphs" that are wrapped in non-block-level tags, such as
 344  	 	  * anchors, phrase emphasis, and spans. The list of tags we're looking
 345  	 	  * for is hard-coded:
 346  	 	  *
 347  	 	  * *  List "a" is made of tags which can be both inline or block-level.
 348  	 	  *    These will be treated block-level when the start tag is alone on
 349  	 	  *    its line, otherwise they're not matched here and will be taken as
 350  	 	  *    inline later.
 351  	 	  * *  List "b" is made of tags which are always block-level;
 352  	 	  */
 353  	 	 $block_tags_a_re = 'ins|del';
 354  	 	 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 355  	 	 	 	 	 	    'script|noscript|style|form|fieldset|iframe|math|svg|'.
 356  	 	 	 	 	 	    'article|section|nav|aside|hgroup|header|footer|'.
 357  	 	 	 	 	 	    'figure';
 358  
 359  	 	 // Regular expression for the content of a block tag.
 360  	 	 $nested_tags_level = 4;
 361  	 	 $attr = '
 362  	 	 	 (?>	 	 	 	 # optional tag attributes
 363  	 	 	   \s	 	 	 # starts with whitespace
 364  	 	 	   (?>
 365  	 	 	 	 [^>"/]+	 	 # text outside quotes
 366  	 	 	   |
 367  	 	 	 	 /+(?!>)	 	 # slash not followed by ">"
 368  	 	 	   |
 369  	 	 	 	 "[^"]*"	 	 # text inside double quotes (tolerate ">")
 370  	 	 	   |
 371  	 	 	 	 \'[^\']*\'	 # text inside single quotes (tolerate ">")
 372  	 	 	   )*
 373  	 	 	 )?
 374  	 	 	 ';
 375  	 	 $content =
 376  	 	 	 str_repeat('
 377  	 	 	 	 (?>
 378  	 	 	 	   [^<]+	 	 	 # content without tag
 379  	 	 	 	 |
 380  	 	 	 	   <\2	 	 	 # nested opening tag
 381  	 	 	 	 	 '.$attr.'	 # attributes
 382  	 	 	 	 	 (?>
 383  	 	 	 	 	   />
 384  	 	 	 	 	 |
 385  	 	 	 	 	   >', $nested_tags_level).	 // end of opening tag
 386  	 	 	 	 	   '.*?'.	 	 	 	 	 // last level nested tag content
 387  	 	 	 str_repeat('
 388  	 	 	 	 	   </\2\s*>	 # closing nested tag
 389  	 	 	 	 	 )
 390  	 	 	 	   |
 391  	 	 	 	 	 <(?!/\2\s*>	 # other tags with a different name
 392  	 	 	 	   )
 393  	 	 	 	 )*',
 394  	 	 	 	 $nested_tags_level);
 395  	 	 $content2 = str_replace('\2', '\3', $content);
 396  
 397  	 	 /**
 398  	 	  * First, look for nested blocks, e.g.:
 399  	 	  * 	 <div>
 400  	 	  * 	 	 <div>
 401  	 	  * 	 	 tags for inner block must be indented.
 402  	 	  * 	 	 </div>
 403  	 	  * 	 </div>
 404  	 	  *
 405  	 	  * The outermost tags must start at the left margin for this to match,
 406  	 	  * and the inner nested divs must be indented.
 407  	 	  * We need to do this before the next, more liberal match, because the
 408  	 	  * next match will start at the first `<div>` and stop at the
 409  	 	  * first `</div>`.
 410  	 	  */
 411  	 	 $text = preg_replace_callback('{(?>
 412  	 	 	 (?>
 413  	 	 	 	 (?<=\n)	 	 	 # Starting on its own line
 414  	 	 	 	 |	 	 	 	 # or
 415  	 	 	 	 \A\n?	 	 	 # the at beginning of the doc
 416  	 	 	 )
 417  	 	 	 (	 	 	 	 	 	 # save in $1
 418  
 419  	 	 	   # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 420  	 	 	   # in between.
 421  
 422  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 423  	 	 	 	 	 	 <('.$block_tags_b_re.')# start tag = $2
 424  	 	 	 	 	 	 '.$attr.'>	 	 	 # attributes followed by > and \n
 425  	 	 	 	 	 	 '.$content.'	 	 # content, support nesting
 426  	 	 	 	 	 	 </\2>	 	 	 	 # the matching end tag
 427  	 	 	 	 	 	 [ ]*	 	 	 	 # trailing spaces/tabs
 428  	 	 	 	 	 	 (?=\n+|\Z)	 # followed by a newline or end of document
 429  
 430  	 	 	 | # Special version for tags of group a.
 431  
 432  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 433  	 	 	 	 	 	 <('.$block_tags_a_re.')# start tag = $3
 434  	 	 	 	 	 	 '.$attr.'>[ ]*\n	 # attributes followed by >
 435  	 	 	 	 	 	 '.$content2.'	 	 # content, support nesting
 436  	 	 	 	 	 	 </\3>	 	 	 	 # the matching end tag
 437  	 	 	 	 	 	 [ ]*	 	 	 	 # trailing spaces/tabs
 438  	 	 	 	 	 	 (?=\n+|\Z)	 # followed by a newline or end of document
 439  
 440  	 	 	 | # Special case just for <hr />. It was easier to make a special
 441  	 	 	   # case than to make the other regex more complicated.
 442  
 443  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 444  	 	 	 	 	 	 <(hr)	 	 	 	 # start tag = $2
 445  	 	 	 	 	 	 '.$attr.'	 	 	 # attributes
 446  	 	 	 	 	 	 /?>	 	 	 	 	 # the matching end tag
 447  	 	 	 	 	 	 [ ]*
 448  	 	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 449  
 450  	 	 	 | # Special case for standalone HTML comments:
 451  
 452  	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 453  	 	 	 	 	 (?s:
 454  	 	 	 	 	 	 <!-- .*? -->
 455  	 	 	 	 	 )
 456  	 	 	 	 	 [ ]*
 457  	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 458  
 459  	 	 	 | # PHP and ASP-style processor instructions (<? and <%)
 460  
 461  	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 462  	 	 	 	 	 (?s:
 463  	 	 	 	 	 	 <([?%])	 	 	 # $2
 464  	 	 	 	 	 	 .*?
 465  	 	 	 	 	 	 \2>
 466  	 	 	 	 	 )
 467  	 	 	 	 	 [ ]*
 468  	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 469  
 470  	 	 	 )
 471  	 	 	 )}Sxmi',
 472  	 	 	 array($this, '_hashHTMLBlocks_callback'),
 473  	 	 	 $text
 474  	 	 );
 475  
 476  	 	 return $text;
 477  	 }
 478  
 479  	 /**
 480  	  * The callback for hashing HTML blocks
 481  	  * @param  string $matches
 482  	  * @return string
 483  	  */
 484  	protected function _hashHTMLBlocks_callback($matches) {
 485  	 	 $text = $matches[1];
 486  	 	 $key  = $this->hashBlock($text);
 487  	 	 return "\n\n$key\n\n";
 488  	 }
 489  
 490  	 /**
 491  	  * Called whenever a tag must be hashed when a function insert an atomic
 492  	  * element in the text stream. Passing $text to through this function gives
 493  	  * a unique text-token which will be reverted back when calling unhash.
 494  	  *
 495  	  * The $boundary argument specify what character should be used to surround
 496  	  * the token. By convension, "B" is used for block elements that needs not
 497  	  * to be wrapped into paragraph tags at the end, ":" is used for elements
 498  	  * that are word separators and "X" is used in the general case.
 499  	  *
 500  	  * @param  string $text
 501  	  * @param  string $boundary
 502  	  * @return string
 503  	  */
 504  	protected function hashPart($text, $boundary = 'X') {
 505  	 	 // Swap back any tag hash found in $text so we do not have to `unhash`
 506  	 	 // multiple times at the end.
 507  	 	 $text = $this->unhash($text);
 508  
 509  	 	 // Then hash the block.
 510  	 	 static $i = 0;
 511  	 	 $key = "$boundary\x1A" . ++$i . $boundary;
 512  	 	 $this->html_hashes[$key] = $text;
 513  	 	 return $key; // String that will replace the tag.
 514  	 }
 515  
 516  	 /**
 517  	  * Shortcut function for hashPart with block-level boundaries.
 518  	  * @param  string $text
 519  	  * @return string
 520  	  */
 521  	protected function hashBlock($text) {
 522  	 	 return $this->hashPart($text, 'B');
 523  	 }
 524  
 525  	 /**
 526  	  * Define the block gamut - these are all the transformations that form
 527  	  * block-level tags like paragraphs, headers, and list items.
 528  	  * @var array
 529  	  */
 530  	 protected $block_gamut = array(
 531  	 	 "doHeaders"         => 10,
 532  	 	 "doHorizontalRules" => 20,
 533  	 	 "doLists"           => 40,
 534  	 	 "doCodeBlocks"      => 50,
 535  	 	 "doBlockQuotes"     => 60,
 536  	 );
 537  
 538  	 /**
 539  	  * Run block gamut tranformations.
 540  	  *
 541  	  * We need to escape raw HTML in Markdown source before doing anything
 542  	  * else. This need to be done for each block, and not only at the
 543  	  * begining in the Markdown function since hashed blocks can be part of
 544  	  * list items and could have been indented. Indented blocks would have
 545  	  * been seen as a code block in a previous pass of hashHTMLBlocks.
 546  	  *
 547  	  * @param  string $text
 548  	  * @return string
 549  	  */
 550  	protected function runBlockGamut($text) {
 551  	 	 $text = $this->hashHTMLBlocks($text);
 552  	 	 return $this->runBasicBlockGamut($text);
 553  	 }
 554  
 555  	 /**
 556  	  * Run block gamut tranformations, without hashing HTML blocks. This is
 557  	  * useful when HTML blocks are known to be already hashed, like in the first
 558  	  * whole-document pass.
 559  	  *
 560  	  * @param  string $text
 561  	  * @return string
 562  	  */
 563  	protected function runBasicBlockGamut($text) {
 564  
 565  	 	 foreach ($this->block_gamut as $method => $priority) {
 566  	 	 	 $text = $this->$method($text);
 567  	 	 }
 568  
 569  	 	 // Finally form paragraph and restore hashed blocks.
 570  	 	 $text = $this->formParagraphs($text);
 571  
 572  	 	 return $text;
 573  	 }
 574  
 575  	 /**
 576  	  * Convert horizontal rules
 577  	  * @param  string $text
 578  	  * @return string
 579  	  */
 580  	protected function doHorizontalRules($text) {
 581  	 	 return preg_replace(
 582  	 	 	 '{
 583  	 	 	 	 ^[ ]{0,3}	 # Leading space
 584  	 	 	 	 ([-*_])	 	 # $1: First marker
 585  	 	 	 	 (?>	 	 	 # Repeated marker group
 586  	 	 	 	 	 [ ]{0,2}	 # Zero, one, or two spaces.
 587  	 	 	 	 	 \1	 	 	 # Marker character
 588  	 	 	 	 ){2,}	 	 # Group repeated at least twice
 589  	 	 	 	 [ ]*	 	 # Tailing spaces
 590  	 	 	 	 $	 	 	 # End of line.
 591  	 	 	 }mx',
 592  	 	 	 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 593  	 	 	 $text
 594  	 	 );
 595  	 }
 596  
 597  	 /**
 598  	  * These are all the transformations that occur *within* block-level
 599  	  * tags like paragraphs, headers, and list items.
 600  	  * @var array
 601  	  */
 602  	 protected $span_gamut = array(
 603  	 	 // Process character escapes, code spans, and inline HTML
 604  	 	 // in one shot.
 605  	 	 "parseSpan"           => -30,
 606  	 	 // Process anchor and image tags. Images must come first,
 607  	 	 // because ![foo][f] looks like an anchor.
 608  	 	 "doImages"            =>  10,
 609  	 	 "doAnchors"           =>  20,
 610  	 	 // Make links out of things like `<https://example.com/>`
 611  	 	 // Must come after doAnchors, because you can use < and >
 612  	 	 // delimiters in inline links like [this](<url>).
 613  	 	 "doAutoLinks"         =>  30,
 614  	 	 "encodeAmpsAndAngles" =>  40,
 615  	 	 "doItalicsAndBold"    =>  50,
 616  	 	 "doHardBreaks"        =>  60,
 617  	 );
 618  
 619  	 /**
 620  	  * Run span gamut transformations
 621  	  * @param  string $text
 622  	  * @return string
 623  	  */
 624  	protected function runSpanGamut($text) {
 625  	 	 foreach ($this->span_gamut as $method => $priority) {
 626  	 	 	 $text = $this->$method($text);
 627  	 	 }
 628  
 629  	 	 return $text;
 630  	 }
 631  
 632  	 /**
 633  	  * Do hard breaks
 634  	  * @param  string $text
 635  	  * @return string
 636  	  */
 637  	protected function doHardBreaks($text) {
 638  	 	 if ($this->hard_wrap) {
 639  	 	 	 return preg_replace_callback('/ *\n/',
 640  	 	 	 	 array($this, '_doHardBreaks_callback'), $text);
 641  	 	 } else {
 642  	 	 	 return preg_replace_callback('/ {2,}\n/',
 643  	 	 	 	 array($this, '_doHardBreaks_callback'), $text);
 644  	 	 }
 645  	 }
 646  
 647  	 /**
 648  	  * Trigger part hashing for the hard break (callback method)
 649  	  * @param  array $matches
 650  	  * @return string
 651  	  */
 652  	protected function _doHardBreaks_callback($matches) {
 653  	 	 return $this->hashPart("<br$this->empty_element_suffix\n");
 654  	 }
 655  
 656  	 /**
 657  	  * Turn Markdown link shortcuts into XHTML <a> tags.
 658  	  * @param  string $text
 659  	  * @return string
 660  	  */
 661  	protected function doAnchors($text) {
 662  	 	 if ($this->in_anchor) {
 663  	 	 	 return $text;
 664  	 	 }
 665  	 	 $this->in_anchor = true;
 666  
 667  	 	 // First, handle reference-style links: [link text] [id]
 668  	 	 $text = preg_replace_callback('{
 669  	 	 	 (	 	 	 	 	 # wrap whole match in $1
 670  	 	 	   \[
 671  	 	 	 	 ('.$this->nested_brackets_re.')	 # link text = $2
 672  	 	 	   \]
 673  
 674  	 	 	   [ ]?	 	 	 	 # one optional space
 675  	 	 	   (?:\n[ ]*)?	 	 # one optional newline followed by spaces
 676  
 677  	 	 	   \[
 678  	 	 	 	 (.*?)	 	 # id = $3
 679  	 	 	   \]
 680  	 	 	 )
 681  	 	 	 }xs',
 682  	 	 	 array($this, '_doAnchors_reference_callback'), $text);
 683  
 684  	 	 // Next, inline-style links: [link text](url "optional title")
 685  	 	 $text = preg_replace_callback('{
 686  	 	 	 (	 	 	 	 # wrap whole match in $1
 687  	 	 	   \[
 688  	 	 	 	 ('.$this->nested_brackets_re.')	 # link text = $2
 689  	 	 	   \]
 690  	 	 	   \(	 	 	 # literal paren
 691  	 	 	 	 [ \n]*
 692  	 	 	 	 (?:
 693  	 	 	 	 	 <(.+?)>	 # href = $3
 694  	 	 	 	 |
 695  	 	 	 	 	 ('.$this->nested_url_parenthesis_re.')	 # href = $4
 696  	 	 	 	 )
 697  	 	 	 	 [ \n]*
 698  	 	 	 	 (	 	 	 # $5
 699  	 	 	 	   ([\'"])	 # quote char = $6
 700  	 	 	 	   (.*?)	 	 # Title = $7
 701  	 	 	 	   \6	 	 # matching quote
 702  	 	 	 	   [ \n]*	 # ignore any spaces/tabs between closing quote and )
 703  	 	 	 	 )?	 	 	 # title is optional
 704  	 	 	   \)
 705  	 	 	 )
 706  	 	 	 }xs',
 707  	 	 	 array($this, '_doAnchors_inline_callback'), $text);
 708  
 709  	 	 // Last, handle reference-style shortcuts: [link text]
 710  	 	 // These must come last in case you've also got [link text][1]
 711  	 	 // or [link text](/foo)
 712  	 	 $text = preg_replace_callback('{
 713  	 	 	 (	 	 	 	 	 # wrap whole match in $1
 714  	 	 	   \[
 715  	 	 	 	 ([^\[\]]+)	 	 # link text = $2; can\'t contain [ or ]
 716  	 	 	   \]
 717  	 	 	 )
 718  	 	 	 }xs',
 719  	 	 	 array($this, '_doAnchors_reference_callback'), $text);
 720  
 721  	 	 $this->in_anchor = false;
 722  	 	 return $text;
 723  	 }
 724  
 725  	 /**
 726  	  * Callback method to parse referenced anchors
 727  	  * @param  string $matches
 728  	  * @return string
 729  	  */
 730  	protected function _doAnchors_reference_callback($matches) {
 731  	 	 $whole_match =  $matches[1];
 732  	 	 $link_text   =  $matches[2];
 733  	 	 $link_id     =& $matches[3];
 734  
 735  	 	 if ($link_id == "") {
 736  	 	 	 // for shortcut links like [this][] or [this].
 737  	 	 	 $link_id = $link_text;
 738  	 	 }
 739  
 740  	 	 // lower-case and turn embedded newlines into spaces
 741  	 	 $link_id = strtolower($link_id);
 742  	 	 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 743  
 744  	 	 if (isset($this->urls[$link_id])) {
 745  	 	 	 $url = $this->urls[$link_id];
 746  	 	 	 $url = $this->encodeURLAttribute($url);
 747  
 748  	 	 	 $result = "<a href=\"$url\"";
 749  	 	 	 if ( isset( $this->titles[$link_id] ) ) {
 750  	 	 	 	 $title = $this->titles[$link_id];
 751  	 	 	 	 $title = $this->encodeAttribute($title);
 752  	 	 	 	 $result .=  " title=\"$title\"";
 753  	 	 	 }
 754  
 755  	 	 	 $link_text = $this->runSpanGamut($link_text);
 756  	 	 	 $result .= ">$link_text</a>";
 757  	 	 	 $result = $this->hashPart($result);
 758  	 	 } else {
 759  	 	 	 $result = $whole_match;
 760  	 	 }
 761  	 	 return $result;
 762  	 }
 763  
 764  	 /**
 765  	  * Callback method to parse inline anchors
 766  	  * @param  string $matches
 767  	  * @return string
 768  	  */
 769  	protected function _doAnchors_inline_callback($matches) {
 770  	 	 $link_text	 	 =  $this->runSpanGamut($matches[2]);
 771  	 	 $url	 	 	 =  $matches[3] === '' ? $matches[4] : $matches[3];
 772  	 	 $title	 	 	 =& $matches[7];
 773  
 774  	 	 // If the URL was of the form <s p a c e s> it got caught by the HTML
 775  	 	 // tag parser and hashed. Need to reverse the process before using
 776  	 	 // the URL.
 777  	 	 $unhashed = $this->unhash($url);
 778  	 	 if ($unhashed !== $url)
 779  	 	 	 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 780  
 781  	 	 $url = $this->encodeURLAttribute($url);
 782  
 783  	 	 $result = "<a href=\"$url\"";
 784  	 	 if (isset($title)) {
 785  	 	 	 $title = $this->encodeAttribute($title);
 786  	 	 	 $result .=  " title=\"$title\"";
 787  	 	 }
 788  
 789  	 	 $link_text = $this->runSpanGamut($link_text);
 790  	 	 $result .= ">$link_text</a>";
 791  
 792  	 	 return $this->hashPart($result);
 793  	 }
 794  
 795  	 /**
 796  	  * Turn Markdown image shortcuts into <img> tags.
 797  	  * @param  string $text
 798  	  * @return string
 799  	  */
 800  	protected function doImages($text) {
 801  	 	 // First, handle reference-style labeled images: ![alt text][id]
 802  	 	 $text = preg_replace_callback('{
 803  	 	 	 (	 	 	 	 # wrap whole match in $1
 804  	 	 	   !\[
 805  	 	 	 	 ('.$this->nested_brackets_re.')	 	 # alt text = $2
 806  	 	 	   \]
 807  
 808  	 	 	   [ ]?	 	 	 	 # one optional space
 809  	 	 	   (?:\n[ ]*)?	 	 # one optional newline followed by spaces
 810  
 811  	 	 	   \[
 812  	 	 	 	 (.*?)	 	 # id = $3
 813  	 	 	   \]
 814  
 815  	 	 	 )
 816  	 	 	 }xs',
 817  	 	 	 array($this, '_doImages_reference_callback'), $text);
 818  
 819  	 	 // Next, handle inline images:  ![alt text](url "optional title")
 820  	 	 // Don't forget: encode * and _
 821  	 	 $text = preg_replace_callback('{
 822  	 	 	 (	 	 	 	 # wrap whole match in $1
 823  	 	 	   !\[
 824  	 	 	 	 ('.$this->nested_brackets_re.')	 	 # alt text = $2
 825  	 	 	   \]
 826  	 	 	   \s?	 	 	 # One optional whitespace character
 827  	 	 	   \(	 	 	 # literal paren
 828  	 	 	 	 [ \n]*
 829  	 	 	 	 (?:
 830  	 	 	 	 	 <(\S*)>	 # src url = $3
 831  	 	 	 	 |
 832  	 	 	 	 	 ('.$this->nested_url_parenthesis_re.')	 # src url = $4
 833  	 	 	 	 )
 834  	 	 	 	 [ \n]*
 835  	 	 	 	 (	 	 	 # $5
 836  	 	 	 	   ([\'"])	 # quote char = $6
 837  	 	 	 	   (.*?)	 	 # title = $7
 838  	 	 	 	   \6	 	 # matching quote
 839  	 	 	 	   [ \n]*
 840  	 	 	 	 )?	 	 	 # title is optional
 841  	 	 	   \)
 842  	 	 	 )
 843  	 	 	 }xs',
 844  	 	 	 array($this, '_doImages_inline_callback'), $text);
 845  
 846  	 	 return $text;
 847  	 }
 848  
 849  	 /**
 850  	  * Callback to parse references image tags
 851  	  * @param  array $matches
 852  	  * @return string
 853  	  */
 854  	protected function _doImages_reference_callback($matches) {
 855  	 	 $whole_match = $matches[1];
 856  	 	 $alt_text    = $matches[2];
 857  	 	 $link_id     = strtolower($matches[3]);
 858  
 859  	 	 if ($link_id == "") {
 860  	 	 	 $link_id = strtolower($alt_text); // for shortcut links like ![this][].
 861  	 	 }
 862  
 863  	 	 $alt_text = $this->encodeAttribute($alt_text);
 864  	 	 if (isset($this->urls[$link_id])) {
 865  	 	 	 $url = $this->encodeURLAttribute($this->urls[$link_id]);
 866  	 	 	 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 867  	 	 	 if (isset($this->titles[$link_id])) {
 868  	 	 	 	 $title = $this->titles[$link_id];
 869  	 	 	 	 $title = $this->encodeAttribute($title);
 870  	 	 	 	 $result .=  " title=\"$title\"";
 871  	 	 	 }
 872  	 	 	 $result .= $this->empty_element_suffix;
 873  	 	 	 $result = $this->hashPart($result);
 874  	 	 } else {
 875  	 	 	 // If there's no such link ID, leave intact:
 876  	 	 	 $result = $whole_match;
 877  	 	 }
 878  
 879  	 	 return $result;
 880  	 }
 881  
 882  	 /**
 883  	  * Callback to parse inline image tags
 884  	  * @param  array $matches
 885  	  * @return string
 886  	  */
 887  	protected function _doImages_inline_callback($matches) {
 888  	 	 $whole_match	 = $matches[1];
 889  	 	 $alt_text	 	 = $matches[2];
 890  	 	 $url	 	 	 = $matches[3] == '' ? $matches[4] : $matches[3];
 891  	 	 $title	 	 	 =& $matches[7];
 892  
 893  	 	 $alt_text = $this->encodeAttribute($alt_text);
 894  	 	 $url = $this->encodeURLAttribute($url);
 895  	 	 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 896  	 	 if (isset($title)) {
 897  	 	 	 $title = $this->encodeAttribute($title);
 898  	 	 	 $result .=  " title=\"$title\""; // $title already quoted
 899  	 	 }
 900  	 	 $result .= $this->empty_element_suffix;
 901  
 902  	 	 return $this->hashPart($result);
 903  	 }
 904  
 905  	 /**
 906  	  * Parse Markdown heading elements to HTML
 907  	  * @param  string $text
 908  	  * @return string
 909  	  */
 910  	protected function doHeaders($text) {
 911  	 	 /**
 912  	 	  * Setext-style headers:
 913  	 	  *	   Header 1
 914  	 	  *	   ========
 915  	 	  *
 916  	 	  *	   Header 2
 917  	 	  *	   --------
 918  	 	  */
 919  	 	 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 920  	 	 	 array($this, '_doHeaders_callback_setext'), $text);
 921  
 922  	 	 /**
 923  	 	  * atx-style headers:
 924  	 	  *   # Header 1
 925  	 	  *   ## Header 2
 926  	 	  *   ## Header 2 with closing hashes ##
 927  	 	  *   ...
 928  	 	  *   ###### Header 6
 929  	 	  */
 930  	 	 $text = preg_replace_callback('{
 931  	 	 	 	 ^(\#{1,6})	 # $1 = string of #\'s
 932  	 	 	 	 [ ]*
 933  	 	 	 	 (.+?)	 	 # $2 = Header text
 934  	 	 	 	 [ ]*
 935  	 	 	 	 \#*	 	 	 # optional closing #\'s (not counted)
 936  	 	 	 	 \n+
 937  	 	 	 }xm',
 938  	 	 	 array($this, '_doHeaders_callback_atx'), $text);
 939  
 940  	 	 return $text;
 941  	 }
 942  
 943  	 /**
 944  	  * Setext header parsing callback
 945  	  * @param  array $matches
 946  	  * @return string
 947  	  */
 948  	protected function _doHeaders_callback_setext($matches) {
 949  	 	 // Terrible hack to check we haven't found an empty list item.
 950  	 	 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
 951  	 	 	 return $matches[0];
 952  	 	 }
 953  
 954  	 	 $level = $matches[2][0] == '=' ? 1 : 2;
 955  
 956  	 	 // ID attribute generation
 957  	 	 $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
 958  
 959  	 	 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
 960  	 	 return "\n" . $this->hashBlock($block) . "\n\n";
 961  	 }
 962  
 963  	 /**
 964  	  * ATX header parsing callback
 965  	  * @param  array $matches
 966  	  * @return string
 967  	  */
 968  	protected function _doHeaders_callback_atx($matches) {
 969  	 	 // ID attribute generation
 970  	 	 $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
 971  
 972  	 	 $level = strlen($matches[1]);
 973  	 	 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
 974  	 	 return "\n" . $this->hashBlock($block) . "\n\n";
 975  	 }
 976  
 977  	 /**
 978  	  * If a header_id_func property is set, we can use it to automatically
 979  	  * generate an id attribute.
 980  	  *
 981  	  * This method returns a string in the form id="foo", or an empty string
 982  	  * otherwise.
 983  	  * @param  string $headerValue
 984  	  * @return string
 985  	  */
 986  	protected function _generateIdFromHeaderValue($headerValue) {
 987  	 	 if (!is_callable($this->header_id_func)) {
 988  	 	 	 return "";
 989  	 	 }
 990  
 991  	 	 $idValue = call_user_func($this->header_id_func, $headerValue);
 992  	 	 if (!$idValue) {
 993  	 	 	 return "";
 994  	 	 }
 995  
 996  	 	 return ' id="' . $this->encodeAttribute($idValue) . '"';
 997  	 }
 998  
 999  	 /**
1000  	  * Form HTML ordered (numbered) and unordered (bulleted) lists.
1001  	  * @param  string $text
1002  	  * @return string
1003  	  */
1004  	protected function doLists($text) {
1005  	 	 $less_than_tab = $this->tab_width - 1;
1006  
1007  	 	 // Re-usable patterns to match list item bullets and number markers:
1008  	 	 $marker_ul_re  = '[*+-]';
1009  	 	 $marker_ol_re  = '\d+[\.]';
1010  
1011  	 	 $markers_relist = array(
1012  	 	 	 $marker_ul_re => $marker_ol_re,
1013  	 	 	 $marker_ol_re => $marker_ul_re,
1014  	 	 	 );
1015  
1016  	 	 foreach ($markers_relist as $marker_re => $other_marker_re) {
1017  	 	 	 // Re-usable pattern to match any entirel ul or ol list:
1018  	 	 	 $whole_list_re = '
1019  	 	 	 	 (	 	 	 	 	 	 	 	 # $1 = whole list
1020  	 	 	 	   (	 	 	 	 	 	 	 	 # $2
1021  	 	 	 	 	 ([ ]{0,'.$less_than_tab.'})	 # $3 = number of spaces
1022  	 	 	 	 	 ('.$marker_re.')	 	 	 # $4 = first list item marker
1023  	 	 	 	 	 [ ]+
1024  	 	 	 	   )
1025  	 	 	 	   (?s:.+?)
1026  	 	 	 	   (	 	 	 	 	 	 	 	 # $5
1027  	 	 	 	 	   \z
1028  	 	 	 	 	 |
1029  	 	 	 	 	   \n{2,}
1030  	 	 	 	 	   (?=\S)
1031  	 	 	 	 	   (?!	 	 	 	 	 	 # Negative lookahead for another list item marker
1032  	 	 	 	 	 	 [ ]*
1033  	 	 	 	 	 	 '.$marker_re.'[ ]+
1034  	 	 	 	 	   )
1035  	 	 	 	 	 |
1036  	 	 	 	 	   (?=	 	 	 	 	 	 # Lookahead for another kind of list
1037  	 	 	 	 	     \n
1038  	 	 	 	 	 	 \3	 	 	 	 	 	 # Must have the same indentation
1039  	 	 	 	 	 	 '.$other_marker_re.'[ ]+
1040  	 	 	 	 	   )
1041  	 	 	 	   )
1042  	 	 	 	 )
1043  	 	 	 '; // mx
1044  
1045  	 	 	 // We use a different prefix before nested lists than top-level lists.
1046  	 	 	 //See extended comment in _ProcessListItems().
1047  
1048  	 	 	 if ($this->list_level) {
1049  	 	 	 	 $text = preg_replace_callback('{
1050  	 	 	 	 	 	 ^
1051  	 	 	 	 	 	 '.$whole_list_re.'
1052  	 	 	 	 	 }mx',
1053  	 	 	 	 	 array($this, '_doLists_callback'), $text);
1054  	 	 	 } else {
1055  	 	 	 	 $text = preg_replace_callback('{
1056  	 	 	 	 	 	 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1057  	 	 	 	 	 	 '.$whole_list_re.'
1058  	 	 	 	 	 }mx',
1059  	 	 	 	 	 array($this, '_doLists_callback'), $text);
1060  	 	 	 }
1061  	 	 }
1062  
1063  	 	 return $text;
1064  	 }
1065  
1066  	 /**
1067  	  * List parsing callback
1068  	  * @param  array $matches
1069  	  * @return string
1070  	  */
1071  	protected function _doLists_callback($matches) {
1072  	 	 // Re-usable patterns to match list item bullets and number markers:
1073  	 	 $marker_ul_re  = '[*+-]';
1074  	 	 $marker_ol_re  = '\d+[\.]';
1075  	 	 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1076  	 	 $marker_ol_start_re = '[0-9]+';
1077  
1078  	 	 $list = $matches[1];
1079  	 	 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1080  
1081  	 	 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1082  
1083  	 	 $list .= "\n";
1084  	 	 $result = $this->processListItems($list, $marker_any_re);
1085  
1086  	 	 $ol_start = 1;
1087  	 	 if ($this->enhanced_ordered_list) {
1088  	 	 	 // Get the start number for ordered list.
1089  	 	 	 if ($list_type == 'ol') {
1090  	 	 	 	 $ol_start_array = array();
1091  	 	 	 	 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1092  	 	 	 	 if ($ol_start_check){
1093  	 	 	 	 	 $ol_start = $ol_start_array[0];
1094  	 	 	 	 }
1095  	 	 	 }
1096  	 	 }
1097  
1098  	 	 if ($ol_start > 1 && $list_type == 'ol'){
1099  	 	 	 $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1100  	 	 } else {
1101  	 	 	 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1102  	 	 }
1103  	 	 return "\n". $result ."\n\n";
1104  	 }
1105  
1106  	 /**
1107  	  * Nesting tracker for list levels
1108  	  * @var integer
1109  	  */
1110  	 protected $list_level = 0;
1111  
1112  	 /**
1113  	  * Process the contents of a single ordered or unordered list, splitting it
1114  	  * into individual list items.
1115  	  * @param  string $list_str
1116  	  * @param  string $marker_any_re
1117  	  * @return string
1118  	  */
1119  	protected function processListItems($list_str, $marker_any_re) {
1120  	 	 /**
1121  	 	  * The $this->list_level global keeps track of when we're inside a list.
1122  	 	  * Each time we enter a list, we increment it; when we leave a list,
1123  	 	  * we decrement. If it's zero, we're not in a list anymore.
1124  	 	  *
1125  	 	  * We do this because when we're not inside a list, we want to treat
1126  	 	  * something like this:
1127  	 	  *
1128  	 	  *	 	 I recommend upgrading to version
1129  	 	  *	 	 8. Oops, now this line is treated
1130  	 	  *	 	 as a sub-list.
1131  	 	  *
1132  	 	  * As a single paragraph, despite the fact that the second line starts
1133  	 	  * with a digit-period-space sequence.
1134  	 	  *
1135  	 	  * Whereas when we're inside a list (or sub-list), that line will be
1136  	 	  * treated as the start of a sub-list. What a kludge, huh? This is
1137  	 	  * an aspect of Markdown's syntax that's hard to parse perfectly
1138  	 	  * without resorting to mind-reading. Perhaps the solution is to
1139  	 	  * change the syntax rules such that sub-lists must start with a
1140  	 	  * starting cardinal number; e.g. "1." or "a.".
1141  	 	  */
1142  	 	 $this->list_level++;
1143  
1144  	 	 // Trim trailing blank lines:
1145  	 	 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1146  
1147  	 	 $list_str = preg_replace_callback('{
1148  	 	 	 (\n)?	 	 	 	 	 	 	 # leading line = $1
1149  	 	 	 (^[ ]*)	 	 	 	 	 	 	 # leading whitespace = $2
1150  	 	 	 ('.$marker_any_re.'	 	 	 	 # list marker and space = $3
1151  	 	 	 	 (?:[ ]+|(?=\n))	 # space only required if item is not empty
1152  	 	 	 )
1153  	 	 	 ((?s:.*?))	 	 	 	 	 	 # list item text   = $4
1154  	 	 	 (?:(\n+(?=\n))|\n)	 	 	 	 # tailing blank line = $5
1155  	 	 	 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1156  	 	 	 }xm',
1157  	 	 	 array($this, '_processListItems_callback'), $list_str);
1158  
1159  	 	 $this->list_level--;
1160  	 	 return $list_str;
1161  	 }
1162  
1163  	 /**
1164  	  * List item parsing callback
1165  	  * @param  array $matches
1166  	  * @return string
1167  	  */
1168  	protected function _processListItems_callback($matches) {
1169  	 	 $item = $matches[4];
1170  	 	 $leading_line =& $matches[1];
1171  	 	 $leading_space =& $matches[2];
1172  	 	 $marker_space = $matches[3];
1173  	 	 $tailing_blank_line =& $matches[5];
1174  
1175  	 	 if ($leading_line || $tailing_blank_line ||
1176  	 	 	 preg_match('/\n{2,}/', $item))
1177  	 	 {
1178  	 	 	 // Replace marker with the appropriate whitespace indentation
1179  	 	 	 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1180  	 	 	 $item = $this->runBlockGamut($this->outdent($item)."\n");
1181  	 	 } else {
1182  	 	 	 // Recursion for sub-lists:
1183  	 	 	 $item = $this->doLists($this->outdent($item));
1184  	 	 	 $item = $this->formParagraphs($item, false);
1185  	 	 }
1186  
1187  	 	 return "<li>" . $item . "</li>\n";
1188  	 }
1189  
1190  	 /**
1191  	  * Process Markdown `<pre><code>` blocks.
1192  	  * @param  string $text
1193  	  * @return string
1194  	  */
1195  	protected function doCodeBlocks($text) {
1196  	 	 $text = preg_replace_callback('{
1197  	 	 	 	 (?:\n\n|\A\n?)
1198  	 	 	 	 (	             # $1 = the code block -- one or more lines, starting with a space/tab
1199  	 	 	 	   (?>
1200  	 	 	 	 	 [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1201  	 	 	 	 	 .*\n+
1202  	 	 	 	   )+
1203  	 	 	 	 )
1204  	 	 	 	 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	 # Lookahead for non-space at line-start, or end of doc
1205  	 	 	 }xm',
1206  	 	 	 array($this, '_doCodeBlocks_callback'), $text);
1207  
1208  	 	 return $text;
1209  	 }
1210  
1211  	 /**
1212  	  * Code block parsing callback
1213  	  * @param  array $matches
1214  	  * @return string
1215  	  */
1216  	protected function _doCodeBlocks_callback($matches) {
1217  	 	 $codeblock = $matches[1];
1218  
1219  	 	 $codeblock = $this->outdent($codeblock);
1220  	 	 if (is_callable($this->code_block_content_func)) {
1221  	 	 	 $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1222  	 	 } else {
1223  	 	 	 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1224  	 	 }
1225  
1226  	 	 # trim leading newlines and trailing newlines
1227  	 	 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1228  
1229  	 	 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1230  	 	 return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1231  	 }
1232  
1233  	 /**
1234  	  * Create a code span markup for $code. Called from handleSpanToken.
1235  	  * @param  string $code
1236  	  * @return string
1237  	  */
1238  	protected function makeCodeSpan($code) {
1239  	 	 if (is_callable($this->code_span_content_func)) {
1240  	 	 	 $code = call_user_func($this->code_span_content_func, $code);
1241  	 	 } else {
1242  	 	 	 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1243  	 	 }
1244  	 	 return $this->hashPart("<code>$code</code>");
1245  	 }
1246  
1247  	 /**
1248  	  * Define the emphasis operators with their regex matches
1249  	  * @var array
1250  	  */
1251  	 protected $em_relist = array(
1252  	 	 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1253  	 	 '*' => '(?<![\s*])\*(?!\*)',
1254  	 	 '_' => '(?<![\s_])_(?!_)',
1255  	 );
1256  
1257  	 /**
1258  	  * Define the strong operators with their regex matches
1259  	  * @var array
1260  	  */
1261  	 protected $strong_relist = array(
1262  	 	 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1263  	 	 '**' => '(?<![\s*])\*\*(?!\*)',
1264  	 	 '__' => '(?<![\s_])__(?!_)',
1265  	 );
1266  
1267  	 /**
1268  	  * Define the emphasis + strong operators with their regex matches
1269  	  * @var array
1270  	  */
1271  	 protected $em_strong_relist = array(
1272  	 	 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1273  	 	 '***' => '(?<![\s*])\*\*\*(?!\*)',
1274  	 	 '___' => '(?<![\s_])___(?!_)',
1275  	 );
1276  
1277  	 /**
1278  	  * Container for prepared regular expressions
1279  	  * @var array
1280  	  */
1281  	 protected $em_strong_prepared_relist;
1282  
1283  	 /**
1284  	  * Prepare regular expressions for searching emphasis tokens in any
1285  	  * context.
1286  	  * @return void
1287  	  */
1288  	protected function prepareItalicsAndBold() {
1289  	 	 foreach ($this->em_relist as $em => $em_re) {
1290  	 	 	 foreach ($this->strong_relist as $strong => $strong_re) {
1291  	 	 	 	 // Construct list of allowed token expressions.
1292  	 	 	 	 $token_relist = array();
1293  	 	 	 	 if (isset($this->em_strong_relist["$em$strong"])) {
1294  	 	 	 	 	 $token_relist[] = $this->em_strong_relist["$em$strong"];
1295  	 	 	 	 }
1296  	 	 	 	 $token_relist[] = $em_re;
1297  	 	 	 	 $token_relist[] = $strong_re;
1298  
1299  	 	 	 	 // Construct master expression from list.
1300  	 	 	 	 $token_re = '{(' . implode('|', $token_relist) . ')}';
1301  	 	 	 	 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1302  	 	 	 }
1303  	 	 }
1304  	 }
1305  
1306  	 /**
1307  	  * Convert Markdown italics (emphasis) and bold (strong) to HTML
1308  	  * @param  string $text
1309  	  * @return string
1310  	  */
1311  	protected function doItalicsAndBold($text) {
1312  	 	 if ($this->in_emphasis_processing) {
1313  	 	 	 return $text; // avoid reentrency
1314  	 	 }
1315  	 	 $this->in_emphasis_processing = true;
1316  
1317  	 	 $token_stack = array('');
1318  	 	 $text_stack = array('');
1319  	 	 $em = '';
1320  	 	 $strong = '';
1321  	 	 $tree_char_em = false;
1322  
1323  	 	 while (1) {
1324  	 	 	 // Get prepared regular expression for seraching emphasis tokens
1325  	 	 	 // in current context.
1326  	 	 	 $token_re = $this->em_strong_prepared_relist["$em$strong"];
1327  
1328  	 	 	 // Each loop iteration search for the next emphasis token.
1329  	 	 	 // Each token is then passed to handleSpanToken.
1330  	 	 	 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1331  	 	 	 $text_stack[0] .= $parts[0];
1332  	 	 	 $token =& $parts[1];
1333  	 	 	 $text =& $parts[2];
1334  
1335  	 	 	 if (empty($token)) {
1336  	 	 	 	 // Reached end of text span: empty stack without emitting.
1337  	 	 	 	 // any more emphasis.
1338  	 	 	 	 while ($token_stack[0]) {
1339  	 	 	 	 	 $text_stack[1] .= array_shift($token_stack);
1340  	 	 	 	 	 $text_stack[0] .= array_shift($text_stack);
1341  	 	 	 	 }
1342  	 	 	 	 break;
1343  	 	 	 }
1344  
1345  	 	 	 $token_len = strlen($token);
1346  	 	 	 if ($tree_char_em) {
1347  	 	 	 	 // Reached closing marker while inside a three-char emphasis.
1348  	 	 	 	 if ($token_len == 3) {
1349  	 	 	 	 	 // Three-char closing marker, close em and strong.
1350  	 	 	 	 	 array_shift($token_stack);
1351  	 	 	 	 	 $span = array_shift($text_stack);
1352  	 	 	 	 	 $span = $this->runSpanGamut($span);
1353  	 	 	 	 	 $span = "<strong><em>$span</em></strong>";
1354  	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1355  	 	 	 	 	 $em = '';
1356  	 	 	 	 	 $strong = '';
1357  	 	 	 	 } else {
1358  	 	 	 	 	 // Other closing marker: close one em or strong and
1359  	 	 	 	 	 // change current token state to match the other
1360  	 	 	 	 	 $token_stack[0] = str_repeat($token[0], 3-$token_len);
1361  	 	 	 	 	 $tag = $token_len == 2 ? "strong" : "em";
1362  	 	 	 	 	 $span = $text_stack[0];
1363  	 	 	 	 	 $span = $this->runSpanGamut($span);
1364  	 	 	 	 	 $span = "<$tag>$span</$tag>";
1365  	 	 	 	 	 $text_stack[0] = $this->hashPart($span);
1366  	 	 	 	 	 $$tag = ''; // $$tag stands for $em or $strong
1367  	 	 	 	 }
1368  	 	 	 	 $tree_char_em = false;
1369  	 	 	 } else if ($token_len == 3) {
1370  	 	 	 	 if ($em) {
1371  	 	 	 	 	 // Reached closing marker for both em and strong.
1372  	 	 	 	 	 // Closing strong marker:
1373  	 	 	 	 	 for ($i = 0; $i < 2; ++$i) {
1374  	 	 	 	 	 	 $shifted_token = array_shift($token_stack);
1375  	 	 	 	 	 	 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1376  	 	 	 	 	 	 $span = array_shift($text_stack);
1377  	 	 	 	 	 	 $span = $this->runSpanGamut($span);
1378  	 	 	 	 	 	 $span = "<$tag>$span</$tag>";
1379  	 	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1380  	 	 	 	 	 	 $$tag = ''; // $$tag stands for $em or $strong
1381  	 	 	 	 	 }
1382  	 	 	 	 } else {
1383  	 	 	 	 	 // Reached opening three-char emphasis marker. Push on token
1384  	 	 	 	 	 // stack; will be handled by the special condition above.
1385  	 	 	 	 	 $em = $token[0];
1386  	 	 	 	 	 $strong = "$em$em";
1387  	 	 	 	 	 array_unshift($token_stack, $token);
1388  	 	 	 	 	 array_unshift($text_stack, '');
1389  	 	 	 	 	 $tree_char_em = true;
1390  	 	 	 	 }
1391  	 	 	 } else if ($token_len == 2) {
1392  	 	 	 	 if ($strong) {
1393  	 	 	 	 	 // Unwind any dangling emphasis marker:
1394  	 	 	 	 	 if (strlen($token_stack[0]) == 1) {
1395  	 	 	 	 	 	 $text_stack[1] .= array_shift($token_stack);
1396  	 	 	 	 	 	 $text_stack[0] .= array_shift($text_stack);
1397  	 	 	 	 	 	 $em = '';
1398  	 	 	 	 	 }
1399  	 	 	 	 	 // Closing strong marker:
1400  	 	 	 	 	 array_shift($token_stack);
1401  	 	 	 	 	 $span = array_shift($text_stack);
1402  	 	 	 	 	 $span = $this->runSpanGamut($span);
1403  	 	 	 	 	 $span = "<strong>$span</strong>";
1404  	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1405  	 	 	 	 	 $strong = '';
1406  	 	 	 	 } else {
1407  	 	 	 	 	 array_unshift($token_stack, $token);
1408  	 	 	 	 	 array_unshift($text_stack, '');
1409  	 	 	 	 	 $strong = $token;
1410  	 	 	 	 }
1411  	 	 	 } else {
1412  	 	 	 	 // Here $token_len == 1
1413  	 	 	 	 if ($em) {
1414  	 	 	 	 	 if (strlen($token_stack[0]) == 1) {
1415  	 	 	 	 	 	 // Closing emphasis marker:
1416  	 	 	 	 	 	 array_shift($token_stack);
1417  	 	 	 	 	 	 $span = array_shift($text_stack);
1418  	 	 	 	 	 	 $span = $this->runSpanGamut($span);
1419  	 	 	 	 	 	 $span = "<em>$span</em>";
1420  	 	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1421  	 	 	 	 	 	 $em = '';
1422  	 	 	 	 	 } else {
1423  	 	 	 	 	 	 $text_stack[0] .= $token;
1424  	 	 	 	 	 }
1425  	 	 	 	 } else {
1426  	 	 	 	 	 array_unshift($token_stack, $token);
1427  	 	 	 	 	 array_unshift($text_stack, '');
1428  	 	 	 	 	 $em = $token;
1429  	 	 	 	 }
1430  	 	 	 }
1431  	 	 }
1432  	 	 $this->in_emphasis_processing = false;
1433  	 	 return $text_stack[0];
1434  	 }
1435  
1436  	 /**
1437  	  * Parse Markdown blockquotes to HTML
1438  	  * @param  string $text
1439  	  * @return string
1440  	  */
1441  	protected function doBlockQuotes($text) {
1442  	 	 $text = preg_replace_callback('/
1443  	 	 	   (	 	 	 	 	 	 	 	 # Wrap whole match in $1
1444  	 	 	 	 (?>
1445  	 	 	 	   ^[ ]*>[ ]?	 	 	 # ">" at the start of a line
1446  	 	 	 	 	 .+\n	 	 	 	 	 # rest of the first line
1447  	 	 	 	   (.+\n)*	 	 	 	 	 # subsequent consecutive lines
1448  	 	 	 	   \n*	 	 	 	 	 	 # blanks
1449  	 	 	 	 )+
1450  	 	 	   )
1451  	 	 	 /xm',
1452  	 	 	 array($this, '_doBlockQuotes_callback'), $text);
1453  
1454  	 	 return $text;
1455  	 }
1456  
1457  	 /**
1458  	  * Blockquote parsing callback
1459  	  * @param  array $matches
1460  	  * @return string
1461  	  */
1462  	protected function _doBlockQuotes_callback($matches) {
1463  	 	 $bq = $matches[1];
1464  	 	 // trim one level of quoting - trim whitespace-only lines
1465  	 	 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1466  	 	 $bq = $this->runBlockGamut($bq); // recurse
1467  
1468  	 	 $bq = preg_replace('/^/m', "  ", $bq);
1469  	 	 // These leading spaces cause problem with <pre> content,
1470  	 	 // so we need to fix that:
1471  	 	 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1472  	 	 	 array($this, '_doBlockQuotes_callback2'), $bq);
1473  
1474  	 	 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1475  	 }
1476  
1477  	 /**
1478  	  * Blockquote parsing callback
1479  	  * @param  array $matches
1480  	  * @return string
1481  	  */
1482  	protected function _doBlockQuotes_callback2($matches) {
1483  	 	 $pre = $matches[1];
1484  	 	 $pre = preg_replace('/^  /m', '', $pre);
1485  	 	 return $pre;
1486  	 }
1487  
1488  	 /**
1489  	  * Parse paragraphs
1490  	  *
1491  	  * @param  string $text String to process in paragraphs
1492  	  * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1493  	  * @return string
1494  	  */
1495  	protected function formParagraphs($text, $wrap_in_p = true) {
1496  	 	 // Strip leading and trailing lines:
1497  	 	 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1498  
1499  	 	 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1500  
1501  	 	 // Wrap <p> tags and unhashify HTML blocks
1502  	 	 foreach ($grafs as $key => $value) {
1503  	 	 	 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1504  	 	 	 	 // Is a paragraph.
1505  	 	 	 	 $value = $this->runSpanGamut($value);
1506  	 	 	 	 if ($wrap_in_p) {
1507  	 	 	 	 	 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1508  	 	 	 	 	 $value .= "</p>";
1509  	 	 	 	 }
1510  	 	 	 	 $grafs[$key] = $this->unhash($value);
1511  	 	 	 } else {
1512  	 	 	 	 // Is a block.
1513  	 	 	 	 // Modify elements of @grafs in-place...
1514  	 	 	 	 $graf = $value;
1515  	 	 	 	 $block = $this->html_hashes[$graf];
1516  	 	 	 	 $graf = $block;
1517  //	 	 	 	 if (preg_match('{
1518  //	 	 	 	 	 \A
1519  //	 	 	 	 	 (	 	 	 	 	 	 	 # $1 = <div> tag
1520  //	 	 	 	 	   <div  \s+
1521  //	 	 	 	 	   [^>]*
1522  //	 	 	 	 	   \b
1523  //	 	 	 	 	   markdown\s*=\s*  ([\'"])	 #	 $2 = attr quote char
1524  //	 	 	 	 	   1
1525  //	 	 	 	 	   \2
1526  //	 	 	 	 	   [^>]*
1527  //	 	 	 	 	   >
1528  //	 	 	 	 	 )
1529  //	 	 	 	 	 (	 	 	 	 	 	 	 # $3 = contents
1530  //	 	 	 	 	 .*
1531  //	 	 	 	 	 )
1532  //	 	 	 	 	 (</div>)	 	 	 	 	 # $4 = closing tag
1533  //	 	 	 	 	 \z
1534  //	 	 	 	 	 }xs', $block, $matches))
1535  //	 	 	 	 {
1536  //	 	 	 	 	 list(, $div_open, , $div_content, $div_close) = $matches;
1537  //
1538  //	 	 	 	 	 // We can't call Markdown(), because that resets the hash;
1539  //	 	 	 	 	 // that initialization code should be pulled into its own sub, though.
1540  //	 	 	 	 	 $div_content = $this->hashHTMLBlocks($div_content);
1541  //
1542  //	 	 	 	 	 // Run document gamut methods on the content.
1543  //	 	 	 	 	 foreach ($this->document_gamut as $method => $priority) {
1544  //	 	 	 	 	 	 $div_content = $this->$method($div_content);
1545  //	 	 	 	 	 }
1546  //
1547  //	 	 	 	 	 $div_open = preg_replace(
1548  //	 	 	 	 	 	 '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1549  //
1550  //	 	 	 	 	 $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1551  //	 	 	 	 }
1552  	 	 	 	 $grafs[$key] = $graf;
1553  	 	 	 }
1554  	 	 }
1555  
1556  	 	 return implode("\n\n", $grafs);
1557  	 }
1558  
1559  	 /**
1560  	  * Encode text for a double-quoted HTML attribute. This function
1561  	  * is *not* suitable for attributes enclosed in single quotes.
1562  	  * @param  string $text
1563  	  * @return string
1564  	  */
1565  	protected function encodeAttribute($text) {
1566  	 	 $text = $this->encodeAmpsAndAngles($text);
1567  	 	 $text = str_replace('"', '&quot;', $text);
1568  	 	 return $text;
1569  	 }
1570  
1571  	 /**
1572  	  * Encode text for a double-quoted HTML attribute containing a URL,
1573  	  * applying the URL filter if set. Also generates the textual
1574  	  * representation for the URL (removing mailto: or tel:) storing it in $text.
1575  	  * This function is *not* suitable for attributes enclosed in single quotes.
1576  	  *
1577  	  * @param  string $url
1578  	  * @param  string $text Passed by reference
1579  	  * @return string        URL
1580  	  */
1581  	protected function encodeURLAttribute($url, &$text = null) {
1582  	 	 if (is_callable($this->url_filter_func)) {
1583  	 	 	 $url = call_user_func($this->url_filter_func, $url);
1584  	 	 }
1585  
1586  	 	 if (preg_match('{^mailto:}i', $url)) {
1587  	 	 	 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1588  	 	 } else if (preg_match('{^tel:}i', $url)) {
1589  	 	 	 $url = $this->encodeAttribute($url);
1590  	 	 	 $text = substr($url, 4);
1591  	 	 } else {
1592  	 	 	 $url = $this->encodeAttribute($url);
1593  	 	 	 $text = $url;
1594  	 	 }
1595  
1596  	 	 return $url;
1597  	 }
1598  
1599  	 /**
1600  	  * Smart processing for ampersands and angle brackets that need to
1601  	  * be encoded. Valid character entities are left alone unless the
1602  	  * no-entities mode is set.
1603  	  * @param  string $text
1604  	  * @return string
1605  	  */
1606  	protected function encodeAmpsAndAngles($text) {
1607  	 	 if ($this->no_entities) {
1608  	 	 	 $text = str_replace('&', '&amp;', $text);
1609  	 	 } else {
1610  	 	 	 // Ampersand-encoding based entirely on Nat Irons's Amputator
1611  	 	 	 // MT plugin: <http://bumppo.net/projects/amputator/>
1612  	 	 	 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1613  	 	 	 	 	 	 	 	 '&amp;', $text);
1614  	 	 }
1615  	 	 // Encode remaining <'s
1616  	 	 $text = str_replace('<', '&lt;', $text);
1617  
1618  	 	 return $text;
1619  	 }
1620  
1621  	 /**
1622  	  * Parse Markdown automatic links to anchor HTML tags
1623  	  * @param  string $text
1624  	  * @return string
1625  	  */
1626  	protected function doAutoLinks($text) {
1627  	 	 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1628  	 	 	 array($this, '_doAutoLinks_url_callback'), $text);
1629  
1630  	 	 // Email addresses: <address@domain.foo>
1631  	 	 $text = preg_replace_callback('{
1632  	 	 	 <
1633  	 	 	 (?:mailto:)?
1634  	 	 	 (
1635  	 	 	 	 (?:
1636  	 	 	 	 	 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1637  	 	 	 	 |
1638  	 	 	 	 	 ".*?"
1639  	 	 	 	 )
1640  	 	 	 	 \@
1641  	 	 	 	 (?:
1642  	 	 	 	 	 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1643  	 	 	 	 |
1644  	 	 	 	 	 \[[\d.a-fA-F:]+\]	 # IPv4 & IPv6
1645  	 	 	 	 )
1646  	 	 	 )
1647  	 	 	 >
1648  	 	 	 }xi',
1649  	 	 	 array($this, '_doAutoLinks_email_callback'), $text);
1650  
1651  	 	 return $text;
1652  	 }
1653  
1654  	 /**
1655  	  * Parse URL callback
1656  	  * @param  array $matches
1657  	  * @return string
1658  	  */
1659  	protected function _doAutoLinks_url_callback($matches) {
1660  	 	 $url = $this->encodeURLAttribute($matches[1], $text);
1661  	 	 $link = "<a href=\"$url\">$text</a>";
1662  	 	 return $this->hashPart($link);
1663  	 }
1664  
1665  	 /**
1666  	  * Parse email address callback
1667  	  * @param  array $matches
1668  	  * @return string
1669  	  */
1670  	protected function _doAutoLinks_email_callback($matches) {
1671  	 	 $addr = $matches[1];
1672  	 	 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1673  	 	 $link = "<a href=\"$url\">$text</a>";
1674  	 	 return $this->hashPart($link);
1675  	 }
1676  
1677  	 /**
1678  	  * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1679  	  *
1680  	  * Output: the same text but with most characters encoded as either a
1681  	  *         decimal or hex entity, in the hopes of foiling most address
1682  	  *         harvesting spam bots. E.g.:
1683  	  *
1684  	  *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1685  	  *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1686  	  *        &#x6d;
1687  	  *
1688  	  * Note: the additional output $tail is assigned the same value as the
1689  	  * ouput, minus the number of characters specified by $head_length.
1690  	  *
1691  	  * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1692  	  * With some optimizations by Milian Wolff. Forced encoding of HTML
1693  	  * attribute special characters by Allan Odgaard.
1694  	  *
1695  	  * @param  string  $text
1696  	  * @param  string  $tail Passed by reference
1697  	  * @param  integer $head_length
1698  	  * @return string
1699  	  */
1700  	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1701  	 	 if ($text == "") {
1702  	 	 	 return $tail = "";
1703  	 	 }
1704  
1705  	 	 $chars = preg_split('/(?<!^)(?!$)/', $text);
1706  	 	 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1707  
1708  	 	 foreach ($chars as $key => $char) {
1709  	 	 	 $ord = ord($char);
1710  	 	 	 // Ignore non-ascii chars.
1711  	 	 	 if ($ord < 128) {
1712  	 	 	 	 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1713  	 	 	 	 // roughly 10% raw, 45% hex, 45% dec
1714  	 	 	 	 // '@' *must* be encoded. I insist.
1715  	 	 	 	 // '"' and '>' have to be encoded inside the attribute
1716  	 	 	 	 if ($r > 90 && strpos('@"&>', $char) === false) {
1717  	 	 	 	 	 /* do nothing */
1718  	 	 	 	 } else if ($r < 45) {
1719  	 	 	 	 	 $chars[$key] = '&#x'.dechex($ord).';';
1720  	 	 	 	 } else {
1721  	 	 	 	 	 $chars[$key] = '&#'.$ord.';';
1722  	 	 	 	 }
1723  	 	 	 }
1724  	 	 }
1725  
1726  	 	 $text = implode('', $chars);
1727  	 	 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1728  
1729  	 	 return $text;
1730  	 }
1731  
1732  	 /**
1733  	  * Take the string $str and parse it into tokens, hashing embeded HTML,
1734  	  * escaped characters and handling code spans.
1735  	  * @param  string $str
1736  	  * @return string
1737  	  */
1738  	protected function parseSpan($str) {
1739  	 	 $output = '';
1740  
1741  	 	 $span_re = '{
1742  	 	 	 	 (
1743  	 	 	 	 	 \\\\'.$this->escape_chars_re.'
1744  	 	 	 	 |
1745  	 	 	 	 	 (?<![`\\\\])
1746  	 	 	 	 	 `+	 	 	 	 	 	 # code span marker
1747  	 	 	 '.( $this->no_markup ? '' : '
1748  	 	 	 	 |
1749  	 	 	 	 	 <!--    .*?     -->	 	 # comment
1750  	 	 	 	 |
1751  	 	 	 	 	 <\?.*?\?> | <%.*?%>	 	 # processing instruction
1752  	 	 	 	 |
1753  	 	 	 	 	 <[!$]?[-a-zA-Z0-9:_]+	 # regular tags
1754  	 	 	 	 	 (?>
1755  	 	 	 	 	 	 \s
1756  	 	 	 	 	 	 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1757  	 	 	 	 	 )?
1758  	 	 	 	 	 >
1759  	 	 	 	 |
1760  	 	 	 	 	 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1761  	 	 	 	 |
1762  	 	 	 	 	 </[-a-zA-Z0-9:_]+\s*> # closing tag
1763  	 	 	 ').'
1764  	 	 	 	 )
1765  	 	 	 	 }xs';
1766  
1767  	 	 while (1) {
1768  	 	 	 // Each loop iteration seach for either the next tag, the next
1769  	 	 	 // openning code span marker, or the next escaped character.
1770  	 	 	 // Each token is then passed to handleSpanToken.
1771  	 	 	 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1772  
1773  	 	 	 // Create token from text preceding tag.
1774  	 	 	 if ($parts[0] != "") {
1775  	 	 	 	 $output .= $parts[0];
1776  	 	 	 }
1777  
1778  	 	 	 // Check if we reach the end.
1779  	 	 	 if (isset($parts[1])) {
1780  	 	 	 	 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1781  	 	 	 	 $str = $parts[2];
1782  	 	 	 } else {
1783  	 	 	 	 break;
1784  	 	 	 }
1785  	 	 }
1786  
1787  	 	 return $output;
1788  	 }
1789  
1790  	 /**
1791  	  * Handle $token provided by parseSpan by determining its nature and
1792  	  * returning the corresponding value that should replace it.
1793  	  * @param  string $token
1794  	  * @param  string $str Passed by reference
1795  	  * @return string
1796  	  */
1797  	protected function handleSpanToken($token, &$str) {
1798  	 	 switch ($token[0]) {
1799  	 	 	 case "\\":
1800  	 	 	 	 return $this->hashPart("&#". ord($token[1]). ";");
1801  	 	 	 case "`":
1802  	 	 	 	 // Search for end marker in remaining text.
1803  	 	 	 	 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1804  	 	 	 	 	 $str, $matches))
1805  	 	 	 	 {
1806  	 	 	 	 	 $str = $matches[2];
1807  	 	 	 	 	 $codespan = $this->makeCodeSpan($matches[1]);
1808  	 	 	 	 	 return $this->hashPart($codespan);
1809  	 	 	 	 }
1810  	 	 	 	 return $token; // Return as text since no ending marker found.
1811  	 	 	 default:
1812  	 	 	 	 return $this->hashPart($token);
1813  	 	 }
1814  	 }
1815  
1816  	 /**
1817  	  * Remove one level of line-leading tabs or spaces
1818  	  * @param  string $text
1819  	  * @return string
1820  	  */
1821  	protected function outdent($text) {
1822  	 	 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1823  	 }
1824  
1825  
1826  	 /**
1827  	  * String length function for detab. `_initDetab` will create a function to
1828  	  * handle UTF-8 if the default function does not exist.
1829  	  * @var string
1830  	  */
1831  	 protected $utf8_strlen = 'mb_strlen';
1832  
1833  	 /**
1834  	  * Replace tabs with the appropriate amount of spaces.
1835  	  *
1836  	  * For each line we separate the line in blocks delemited by tab characters.
1837  	  * Then we reconstruct every line by adding the  appropriate number of space
1838  	  * between each blocks.
1839  	  *
1840  	  * @param  string $text
1841  	  * @return string
1842  	  */
1843  	protected function detab($text) {
1844  	 	 $text = preg_replace_callback('/^.*\t.*$/m',
1845  	 	 	 array($this, '_detab_callback'), $text);
1846  
1847  	 	 return $text;
1848  	 }
1849  
1850  	 /**
1851  	  * Replace tabs callback
1852  	  * @param  string $matches
1853  	  * @return string
1854  	  */
1855  	protected function _detab_callback($matches) {
1856  	 	 $line = $matches[0];
1857  	 	 $strlen = $this->utf8_strlen; // strlen function for UTF-8.
1858  
1859  	 	 // Split in blocks.
1860  	 	 $blocks = explode("\t", $line);
1861  	 	 // Add each blocks to the line.
1862  	 	 $line = $blocks[0];
1863  	 	 unset($blocks[0]); // Do not add first block twice.
1864  	 	 foreach ($blocks as $block) {
1865  	 	 	 // Calculate amount of space, insert spaces, insert block.
1866  	 	 	 $amount = $this->tab_width -
1867  	 	 	 	 $strlen($line, 'UTF-8') % $this->tab_width;
1868  	 	 	 $line .= str_repeat(" ", $amount) . $block;
1869  	 	 }
1870  	 	 return $line;
1871  	 }
1872  
1873  	 /**
1874  	  * Check for the availability of the function in the `utf8_strlen` property
1875  	  * (initially `mb_strlen`). If the function is not available, create a
1876  	  * function that will loosely count the number of UTF-8 characters with a
1877  	  * regular expression.
1878  	  * @return void
1879  	  */
1880  	protected function _initDetab() {
1881  
1882  	 	 if (function_exists($this->utf8_strlen)) {
1883  	 	 	 return;
1884  	 	 }
1885  
1886  	 	 $this->utf8_strlen = function($text) {
1887  	 	 	 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1888  	 	 };
1889  	 }
1890  
1891  	 /**
1892  	  * Swap back in all the tags hashed by _HashHTMLBlocks.
1893  	  * @param  string $text
1894  	  * @return string
1895  	  */
1896  	protected function unhash($text) {
1897  	 	 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1898  	 	 	 array($this, '_unhash_callback'), $text);
1899  	 }
1900  
1901  	 /**
1902  	  * Unhashing callback
1903  	  * @param  array $matches
1904  	  * @return string
1905  	  */
1906  	protected function _unhash_callback($matches) {
1907  	 	 return $this->html_hashes[$matches[0]];
1908  	 }
1909  }