Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]

   1  <?php
   2  /**
   3   * Markdown  -  A text-to-HTML conversion tool for web writers
   4   *
   5   * @package   php-markdown
   6   * @author    Michel Fortin <michel.fortin@michelf.com>
   7   * @copyright 2004-2018 Michel Fortin <https://michelf.com/projects/php-markdown/>
   8   * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
   9   */
  10  
  11  namespace Michelf;
  12  
  13  /**
  14   * Markdown Parser Class
  15   */
  16  class Markdown implements MarkdownInterface {
  17  	 /**
  18  	  * Define the package version
  19  	  * @var string
  20  	  */
  21  	 const MARKDOWNLIB_VERSION = "1.8.0";
  22  
  23  	 /**
  24  	  * Simple function interface - Initialize the parser and return the result
  25  	  * of its transform method. This will work fine for derived classes too.
  26  	  *
  27  	  * @api
  28  	  *
  29  	  * @param  string $text
  30  	  * @return string
  31  	  */
  32  	public static function defaultTransform($text) {
  33  	 	 // Take parser class on which this function was called.
  34  	 	 $parser_class = \get_called_class();
  35  
  36  	 	 // Try to take parser from the static parser list
  37  	 	 static $parser_list;
  38  	 	 $parser =& $parser_list[$parser_class];
  39  
  40  	 	 // Create the parser it not already set
  41  	 	 if (!$parser) {
  42  	 	 	 $parser = new $parser_class;
  43  	 	 }
  44  
  45  	 	 // Transform text using parser.
  46  	 	 return $parser->transform($text);
  47  	 }
  48  
  49  	 /**
  50  	  * Configuration variables
  51  	  */
  52  
  53  	 /**
  54  	  * Change to ">" for HTML output.
  55  	  * @var string
  56  	  */
  57  	 public $empty_element_suffix = " />";
  58  
  59  	 /**
  60  	  * The width of indentation of the output markup
  61  	  * @var int
  62  	  */
  63  	 public $tab_width = 4;
  64  
  65  	 /**
  66  	  * Change to `true` to disallow markup or entities.
  67  	  * @var boolean
  68  	  */
  69  	 public $no_markup   = false;
  70  	 public $no_entities = false;
  71  
  72  
  73  	 /**
  74  	  * Change to `true` to enable line breaks on \n without two trailling spaces
  75  	  * @var boolean
  76  	  */
  77  	 public $hard_wrap = false;
  78  
  79  	 /**
  80  	  * Predefined URLs and titles for reference links and images.
  81  	  * @var array
  82  	  */
  83  	 public $predef_urls   = array();
  84  	 public $predef_titles = array();
  85  
  86  	 /**
  87  	  * Optional filter function for URLs
  88  	  * @var callable
  89  	  */
  90  	 public $url_filter_func = null;
  91  
  92  	 /**
  93  	  * Optional header id="" generation callback function.
  94  	  * @var callable
  95  	  */
  96  	 public $header_id_func = null;
  97  
  98  	 /**
  99  	  * Optional function for converting code block content to HTML
 100  	  * @var callable
 101  	  */
 102  	 public $code_block_content_func = null;
 103  
 104  	 /**
 105  	  * Optional function for converting code span content to HTML.
 106  	  * @var callable
 107  	  */
 108  	 public $code_span_content_func = null;
 109  
 110  	 /**
 111  	  * Class attribute to toggle "enhanced ordered list" behaviour
 112  	  * setting this to true will allow ordered lists to start from the index
 113  	  * number that is defined first.
 114  	  *
 115  	  * For example:
 116  	  * 2. List item two
 117  	  * 3. List item three
 118  	  *
 119  	  * Becomes:
 120  	  * <ol start="2">
 121  	  * <li>List item two</li>
 122  	  * <li>List item three</li>
 123  	  * </ol>
 124  	  *
 125  	  * @var bool
 126  	  */
 127  	 public $enhanced_ordered_list = false;
 128  
 129  	 /**
 130  	  * Parser implementation
 131  	  */
 132  
 133  	 /**
 134  	  * Regex to match balanced [brackets].
 135  	  * Needed to insert a maximum bracked depth while converting to PHP.
 136  	  * @var int
 137  	  */
 138  	 protected $nested_brackets_depth = 6;
 139  	 protected $nested_brackets_re;
 140  
 141  	 protected $nested_url_parenthesis_depth = 4;
 142  	 protected $nested_url_parenthesis_re;
 143  
 144  	 /**
 145  	  * Table of hash values for escaped characters:
 146  	  * @var string
 147  	  */
 148  	 protected $escape_chars = '\`*_{}[]()>#+-.!';
 149  	 protected $escape_chars_re;
 150  
 151  	 /**
 152  	  * Constructor function. Initialize appropriate member variables.
 153  	  * @return void
 154  	  */
 155  	public function __construct() {
 156  	 	 $this->_initDetab();
 157  	 	 $this->prepareItalicsAndBold();
 158  
 159  	 	 $this->nested_brackets_re =
 160  	 	 	 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 161  	 	 	 str_repeat('\])*', $this->nested_brackets_depth);
 162  
 163  	 	 $this->nested_url_parenthesis_re =
 164  	 	 	 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 165  	 	 	 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 166  
 167  	 	 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 168  
 169  	 	 // Sort document, block, and span gamut in ascendent priority order.
 170  	 	 asort($this->document_gamut);
 171  	 	 asort($this->block_gamut);
 172  	 	 asort($this->span_gamut);
 173  	 }
 174  
 175  
 176  	 /**
 177  	  * Internal hashes used during transformation.
 178  	  * @var array
 179  	  */
 180  	 protected $urls        = array();
 181  	 protected $titles      = array();
 182  	 protected $html_hashes = array();
 183  
 184  	 /**
 185  	  * Status flag to avoid invalid nesting.
 186  	  * @var boolean
 187  	  */
 188  	 protected $in_anchor = false;
 189  
 190  	 /**
 191  	  * Status flag to avoid invalid nesting.
 192  	  * @var boolean
 193  	  */
 194  	 protected $in_emphasis_processing = false;
 195  
 196  	 /**
 197  	  * Called before the transformation process starts to setup parser states.
 198  	  * @return void
 199  	  */
 200  	protected function setup() {
 201  	 	 // Clear global hashes.
 202  	 	 $this->urls        = $this->predef_urls;
 203  	 	 $this->titles      = $this->predef_titles;
 204  	 	 $this->html_hashes = array();
 205  	 	 $this->in_anchor   = false;
 206  	 	 $this->in_emphasis_processing = false;
 207  	 }
 208  
 209  	 /**
 210  	  * Called after the transformation process to clear any variable which may
 211  	  * be taking up memory unnecessarly.
 212  	  * @return void
 213  	  */
 214  	protected function teardown() {
 215  	 	 $this->urls        = array();
 216  	 	 $this->titles      = array();
 217  	 	 $this->html_hashes = array();
 218  	 }
 219  
 220  	 /**
 221  	  * Main function. Performs some preprocessing on the input text and pass
 222  	  * it through the document gamut.
 223  	  *
 224  	  * @api
 225  	  *
 226  	  * @param  string $text
 227  	  * @return string
 228  	  */
 229  	public function transform($text) {
 230  	 	 $this->setup();
 231  
 232  	 	 # Remove UTF-8 BOM and marker character in input, if present.
 233  	 	 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 234  
 235  	 	 # Standardize line endings:
 236  	 	 #   DOS to Unix and Mac to Unix
 237  	 	 $text = preg_replace('{\r\n?}', "\n", $text);
 238  
 239  	 	 # Make sure $text ends with a couple of newlines:
 240  	 	 $text .= "\n\n";
 241  
 242  	 	 # Convert all tabs to spaces.
 243  	 	 $text = $this->detab($text);
 244  
 245  	 	 # Turn block-level HTML blocks into hash entries
 246  	 	 $text = $this->hashHTMLBlocks($text);
 247  
 248  	 	 # Strip any lines consisting only of spaces and tabs.
 249  	 	 # This makes subsequent regexen easier to write, because we can
 250  	 	 # match consecutive blank lines with /\n+/ instead of something
 251  	 	 # contorted like /[ ]*\n+/ .
 252  	 	 $text = preg_replace('/^[ ]+$/m', '', $text);
 253  
 254  	 	 # Run document gamut methods.
 255  	 	 foreach ($this->document_gamut as $method => $priority) {
 256  	 	 	 $text = $this->$method($text);
 257  	 	 }
 258  
 259  	 	 $this->teardown();
 260  
 261  	 	 return $text . "\n";
 262  	 }
 263  
 264  	 /**
 265  	  * Define the document gamut
 266  	  * @var array
 267  	  */
 268  	 protected $document_gamut = array(
 269  	 	 // Strip link definitions, store in hashes.
 270  	 	 "stripLinkDefinitions" => 20,
 271  	 	 "runBasicBlockGamut"   => 30,
 272  	 );
 273  
 274  	 /**
 275  	  * Strips link definitions from text, stores the URLs and titles in
 276  	  * hash references
 277  	  * @param  string $text
 278  	  * @return string
 279  	  */
 280  	protected function stripLinkDefinitions($text) {
 281  
 282  	 	 $less_than_tab = $this->tab_width - 1;
 283  
 284  	 	 // Link defs are in the form: ^[id]: url "optional title"
 285  	 	 $text = preg_replace_callback('{
 286  	 	 	 	 	 	 	 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	 # id = $1
 287  	 	 	 	 	 	 	   [ ]*
 288  	 	 	 	 	 	 	   \n?	 	 	 	 # maybe *one* newline
 289  	 	 	 	 	 	 	   [ ]*
 290  	 	 	 	 	 	 	 (?:
 291  	 	 	 	 	 	 	   <(.+?)>	 	 	 # url = $2
 292  	 	 	 	 	 	 	 |
 293  	 	 	 	 	 	 	   (\S+?)	 	 	 # url = $3
 294  	 	 	 	 	 	 	 )
 295  	 	 	 	 	 	 	   [ ]*
 296  	 	 	 	 	 	 	   \n?	 	 	 	 # maybe one newline
 297  	 	 	 	 	 	 	   [ ]*
 298  	 	 	 	 	 	 	 (?:
 299  	 	 	 	 	 	 	 	 (?<=\s)	 	 	 # lookbehind for whitespace
 300  	 	 	 	 	 	 	 	 ["(]
 301  	 	 	 	 	 	 	 	 (.*?)	 	 	 # title = $4
 302  	 	 	 	 	 	 	 	 [")]
 303  	 	 	 	 	 	 	 	 [ ]*
 304  	 	 	 	 	 	 	 )?	 # title is optional
 305  	 	 	 	 	 	 	 (?:\n+|\Z)
 306  	 	 	 }xm',
 307  	 	 	 array($this, '_stripLinkDefinitions_callback'),
 308  	 	 	 $text
 309  	 	 );
 310  	 	 return $text;
 311  	 }
 312  
 313  	 /**
 314  	  * The callback to strip link definitions
 315  	  * @param  array $matches
 316  	  * @return string
 317  	  */
 318  	protected function _stripLinkDefinitions_callback($matches) {
 319  	 	 $link_id = strtolower($matches[1]);
 320  	 	 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 321  	 	 $this->urls[$link_id] = $url;
 322  	 	 $this->titles[$link_id] =& $matches[4];
 323  	 	 return ''; // String that will replace the block
 324  	 }
 325  
 326  	 /**
 327  	  * Hashify HTML blocks
 328  	  * @param  string $text
 329  	  * @return string
 330  	  */
 331  	protected function hashHTMLBlocks($text) {
 332  	 	 if ($this->no_markup) {
 333  	 	 	 return $text;
 334  	 	 }
 335  
 336  	 	 $less_than_tab = $this->tab_width - 1;
 337  
 338  	 	 /**
 339  	 	  * Hashify HTML blocks:
 340  	 	  *
 341  	 	  * We only want to do this for block-level HTML tags, such as headers,
 342  	 	  * lists, and tables. That's because we still want to wrap <p>s around
 343  	 	  * "paragraphs" that are wrapped in non-block-level tags, such as
 344  	 	  * anchors, phrase emphasis, and spans. The list of tags we're looking
 345  	 	  * for is hard-coded:
 346  	 	  *
 347  	 	  * *  List "a" is made of tags which can be both inline or block-level.
 348  	 	  *    These will be treated block-level when the start tag is alone on
 349  	 	  *    its line, otherwise they're not matched here and will be taken as
 350  	 	  *    inline later.
 351  	 	  * *  List "b" is made of tags which are always block-level;
 352  	 	  */
 353  	 	 $block_tags_a_re = 'ins|del';
 354  	 	 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 355  	 	 	 	 	 	    'script|noscript|style|form|fieldset|iframe|math|svg|'.
 356  	 	 	 	 	 	    'article|section|nav|aside|hgroup|header|footer|'.
 357  	 	 	 	 	 	    'figure';
 358  
 359  	 	 // Regular expression for the content of a block tag.
 360  	 	 $nested_tags_level = 4;
 361  	 	 $attr = '
 362  	 	 	 (?>	 	 	 	 # optional tag attributes
 363  	 	 	   \s	 	 	 # starts with whitespace
 364  	 	 	   (?>
 365  	 	 	 	 [^>"/]+	 	 # text outside quotes
 366  	 	 	   |
 367  	 	 	 	 /+(?!>)	 	 # slash not followed by ">"
 368  	 	 	   |
 369  	 	 	 	 "[^"]*"	 	 # text inside double quotes (tolerate ">")
 370  	 	 	   |
 371  	 	 	 	 \'[^\']*\'	 # text inside single quotes (tolerate ">")
 372  	 	 	   )*
 373  	 	 	 )?
 374  	 	 	 ';
 375  	 	 $content =
 376  	 	 	 str_repeat('
 377  	 	 	 	 (?>
 378  	 	 	 	   [^<]+	 	 	 # content without tag
 379  	 	 	 	 |
 380  	 	 	 	   <\2	 	 	 # nested opening tag
 381  	 	 	 	 	 '.$attr.'	 # attributes
 382  	 	 	 	 	 (?>
 383  	 	 	 	 	   />
 384  	 	 	 	 	 |
 385  	 	 	 	 	   >', $nested_tags_level).	 // end of opening tag
 386  	 	 	 	 	   '.*?'.	 	 	 	 	 // last level nested tag content
 387  	 	 	 str_repeat('
 388  	 	 	 	 	   </\2\s*>	 # closing nested tag
 389  	 	 	 	 	 )
 390  	 	 	 	   |
 391  	 	 	 	 	 <(?!/\2\s*>	 # other tags with a different name
 392  	 	 	 	   )
 393  	 	 	 	 )*',
 394  	 	 	 	 $nested_tags_level);
 395  	 	 $content2 = str_replace('\2', '\3', $content);
 396  
 397  	 	 /**
 398  	 	  * First, look for nested blocks, e.g.:
 399  	 	  * 	 <div>
 400  	 	  * 	 	 <div>
 401  	 	  * 	 	 tags for inner block must be indented.
 402  	 	  * 	 	 </div>
 403  	 	  * 	 </div>
 404  	 	  *
 405  	 	  * The outermost tags must start at the left margin for this to match,
 406  	 	  * and the inner nested divs must be indented.
 407  	 	  * We need to do this before the next, more liberal match, because the
 408  	 	  * next match will start at the first `<div>` and stop at the
 409  	 	  * first `</div>`.
 410  	 	  */
 411  	 	 $text = preg_replace_callback('{(?>
 412  	 	 	 (?>
 413  	 	 	 	 (?<=\n)	 	 	 # Starting on its own line
 414  	 	 	 	 |	 	 	 	 # or
 415  	 	 	 	 \A\n?	 	 	 # the at beginning of the doc
 416  	 	 	 )
 417  	 	 	 (	 	 	 	 	 	 # save in $1
 418  
 419  	 	 	   # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 420  	 	 	   # in between.
 421  
 422  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 423  	 	 	 	 	 	 <('.$block_tags_b_re.')# start tag = $2
 424  	 	 	 	 	 	 '.$attr.'>	 	 	 # attributes followed by > and \n
 425  	 	 	 	 	 	 '.$content.'	 	 # content, support nesting
 426  	 	 	 	 	 	 </\2>	 	 	 	 # the matching end tag
 427  	 	 	 	 	 	 [ ]*	 	 	 	 # trailing spaces/tabs
 428  	 	 	 	 	 	 (?=\n+|\Z)	 # followed by a newline or end of document
 429  
 430  	 	 	 | # Special version for tags of group a.
 431  
 432  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 433  	 	 	 	 	 	 <('.$block_tags_a_re.')# start tag = $3
 434  	 	 	 	 	 	 '.$attr.'>[ ]*\n	 # attributes followed by >
 435  	 	 	 	 	 	 '.$content2.'	 	 # content, support nesting
 436  	 	 	 	 	 	 </\3>	 	 	 	 # the matching end tag
 437  	 	 	 	 	 	 [ ]*	 	 	 	 # trailing spaces/tabs
 438  	 	 	 	 	 	 (?=\n+|\Z)	 # followed by a newline or end of document
 439  
 440  	 	 	 | # Special case just for <hr />. It was easier to make a special
 441  	 	 	   # case than to make the other regex more complicated.
 442  
 443  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 444  	 	 	 	 	 	 <(hr)	 	 	 	 # start tag = $2
 445  	 	 	 	 	 	 '.$attr.'	 	 	 # attributes
 446  	 	 	 	 	 	 /?>	 	 	 	 	 # the matching end tag
 447  	 	 	 	 	 	 [ ]*
 448  	 	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 449  
 450  	 	 	 | # Special case for standalone HTML comments:
 451  
 452  	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 453  	 	 	 	 	 (?s:
 454  	 	 	 	 	 	 <!-- .*? -->
 455  	 	 	 	 	 )
 456  	 	 	 	 	 [ ]*
 457  	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 458  
 459  	 	 	 | # PHP and ASP-style processor instructions (<? and <%)
 460  
 461  	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 462  	 	 	 	 	 (?s:
 463  	 	 	 	 	 	 <([?%])	 	 	 # $2
 464  	 	 	 	 	 	 .*?
 465  	 	 	 	 	 	 \2>
 466  	 	 	 	 	 )
 467  	 	 	 	 	 [ ]*
 468  	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 469  
 470  	 	 	 )
 471  	 	 	 )}Sxmi',
 472  	 	 	 array($this, '_hashHTMLBlocks_callback'),
 473  	 	 	 $text
 474  	 	 );
 475  
 476  	 	 return $text;
 477  	 }
 478  
 479  	 /**
 480  	  * The callback for hashing HTML blocks
 481  	  * @param  string $matches
 482  	  * @return string
 483  	  */
 484  	protected function _hashHTMLBlocks_callback($matches) {
 485  	 	 $text = $matches[1];
 486  	 	 $key  = $this->hashBlock($text);
 487  	 	 return "\n\n$key\n\n";
 488  	 }
 489  
 490  	 /**
 491  	  * Called whenever a tag must be hashed when a function insert an atomic
 492  	  * element in the text stream. Passing $text to through this function gives
 493  	  * a unique text-token which will be reverted back when calling unhash.
 494  	  *
 495  	  * The $boundary argument specify what character should be used to surround
 496  	  * the token. By convension, "B" is used for block elements that needs not
 497  	  * to be wrapped into paragraph tags at the end, ":" is used for elements
 498  	  * that are word separators and "X" is used in the general case.
 499  	  *
 500  	  * @param  string $text
 501  	  * @param  string $boundary
 502  	  * @return string
 503  	  */
 504  	protected function hashPart($text, $boundary = 'X') {
 505  	 	 // Swap back any tag hash found in $text so we do not have to `unhash`
 506  	 	 // multiple times at the end.
 507  	 	 $text = $this->unhash($text);
 508  
 509  	 	 // Then hash the block.
 510  	 	 static $i = 0;
 511  	 	 $key = "$boundary\x1A" . ++$i . $boundary;
 512  	 	 $this->html_hashes[$key] = $text;
 513  	 	 return $key; // String that will replace the tag.
 514  	 }
 515  
 516  	 /**
 517  	  * Shortcut function for hashPart with block-level boundaries.
 518  	  * @param  string $text
 519  	  * @return string
 520  	  */
 521  	protected function hashBlock($text) {
 522  	 	 return $this->hashPart($text, 'B');
 523  	 }
 524  
 525  	 /**
 526  	  * Define the block gamut - these are all the transformations that form
 527  	  * block-level tags like paragraphs, headers, and list items.
 528  	  * @var array
 529  	  */
 530  	 protected $block_gamut = array(
 531  	 	 "doHeaders"         => 10,
 532  	 	 "doHorizontalRules" => 20,
 533  	 	 "doLists"           => 40,
 534  	 	 "doCodeBlocks"      => 50,
 535  	 	 "doBlockQuotes"     => 60,
 536  	 );
 537  
 538  	 /**
 539  	  * Run block gamut tranformations.
 540  	  *
 541  	  * We need to escape raw HTML in Markdown source before doing anything
 542  	  * else. This need to be done for each block, and not only at the
 543  	  * begining in the Markdown function since hashed blocks can be part of
 544  	  * list items and could have been indented. Indented blocks would have
 545  	  * been seen as a code block in a previous pass of hashHTMLBlocks.
 546  	  *
 547  	  * @param  string $text
 548  	  * @return string
 549  	  */
 550  	protected function runBlockGamut($text) {
 551  	 	 $text = $this->hashHTMLBlocks($text);
 552  	 	 return $this->runBasicBlockGamut($text);
 553  	 }
 554  
 555  	 /**
 556  	  * Run block gamut tranformations, without hashing HTML blocks. This is
 557  	  * useful when HTML blocks are known to be already hashed, like in the first
 558  	  * whole-document pass.
 559  	  *
 560  	  * @param  string $text
 561  	  * @return string
 562  	  */
 563  	protected function runBasicBlockGamut($text) {
 564  
 565  	 	 foreach ($this->block_gamut as $method => $priority) {
 566  	 	 	 $text = $this->$method($text);
 567  	 	 }
 568  
 569  	 	 // Finally form paragraph and restore hashed blocks.
 570  	 	 $text = $this->formParagraphs($text);
 571  
 572  	 	 return $text;
 573  	 }
 574  
 575  	 /**
 576  	  * Convert horizontal rules
 577  	  * @param  string $text
 578  	  * @return string
 579  	  */
 580  	protected function doHorizontalRules($text) {
 581  	 	 return preg_replace(
 582  	 	 	 '{
 583  	 	 	 	 ^[ ]{0,3}	 # Leading space
 584  	 	 	 	 ([-*_])	 	 # $1: First marker
 585  	 	 	 	 (?>	 	 	 # Repeated marker group
 586  	 	 	 	 	 [ ]{0,2}	 # Zero, one, or two spaces.
 587  	 	 	 	 	 \1	 	 	 # Marker character
 588  	 	 	 	 ){2,}	 	 # Group repeated at least twice
 589  	 	 	 	 [ ]*	 	 # Tailing spaces
 590  	 	 	 	 $	 	 	 # End of line.
 591  	 	 	 }mx',
 592  	 	 	 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 593  	 	 	 $text
 594  	 	 );
 595  	 }
 596  
 597  	 /**
 598  	  * These are all the transformations that occur *within* block-level
 599  	  * tags like paragraphs, headers, and list items.
 600  	  * @var array
 601  	  */
 602  	 protected $span_gamut = array(
 603  	 	 // Process character escapes, code spans, and inline HTML
 604  	 	 // in one shot.
 605  	 	 "parseSpan"           => -30,
 606  	 	 // Process anchor and image tags. Images must come first,
 607  	 	 // because ![foo][f] looks like an anchor.
 608  	 	 "doImages"            =>  10,
 609  	 	 "doAnchors"           =>  20,
 610  	 	 // Make links out of things like `<https://example.com/>`
 611  	 	 // Must come after doAnchors, because you can use < and >
 612  	 	 // delimiters in inline links like [this](<url>).
 613  	 	 "doAutoLinks"         =>  30,
 614  	 	 "encodeAmpsAndAngles" =>  40,
 615  	 	 "doItalicsAndBold"    =>  50,
 616  	 	 "doHardBreaks"        =>  60,
 617  	 );
 618  
 619  	 /**
 620  	  * Run span gamut transformations
 621  	  * @param  string $text
 622  	  * @return string
 623  	  */
 624  	protected function runSpanGamut($text) {
 625  	 	 foreach ($this->span_gamut as $method => $priority) {
 626  	 	 	 $text = $this->$method($text);
 627  	 	 }
 628  
 629  	 	 return $text;
 630  	 }
 631  
 632  	 /**
 633  	  * Do hard breaks
 634  	  * @param  string $text
 635  	  * @return string
 636  	  */
 637  	protected function doHardBreaks($text) {
 638  	 	 if ($this->hard_wrap) {
 639  	 	 	 return preg_replace_callback('/ *\n/',
 640  	 	 	 	 array($this, '_doHardBreaks_callback'), $text);
 641  	 	 } else {
 642  	 	 	 return preg_replace_callback('/ {2,}\n/',
 643  	 	 	 	 array($this, '_doHardBreaks_callback'), $text);
 644  	 	 }
 645  	 }
 646  
 647  	 /**
 648  	  * Trigger part hashing for the hard break (callback method)
 649  	  * @param  array $matches
 650  	  * @return string
 651  	  */
 652  	protected function _doHardBreaks_callback($matches) {
 653  	 	 return $this->hashPart("<br$this->empty_element_suffix\n");
 654  	 }
 655  
 656  	 /**
 657  	  * Turn Markdown link shortcuts into XHTML <a> tags.
 658  	  * @param  string $text
 659  	  * @return string
 660  	  */
 661  	protected function doAnchors($text) {
 662  	 	 if ($this->in_anchor) {
 663  	 	 	 return $text;
 664  	 	 }
 665  	 	 $this->in_anchor = true;
 666  
 667  	 	 // First, handle reference-style links: [link text] [id]
 668  	 	 $text = preg_replace_callback('{
 669  	 	 	 (	 	 	 	 	 # wrap whole match in $1
 670  	 	 	   \[
 671  	 	 	 	 ('.$this->nested_brackets_re.')	 # link text = $2
 672  	 	 	   \]
 673  
 674  	 	 	   [ ]?	 	 	 	 # one optional space
 675  	 	 	   (?:\n[ ]*)?	 	 # one optional newline followed by spaces
 676  
 677  	 	 	   \[
 678  	 	 	 	 (.*?)	 	 # id = $3
 679  	 	 	   \]
 680  	 	 	 )
 681  	 	 	 }xs',
 682  	 	 	 array($this, '_doAnchors_reference_callback'), $text);
 683  
 684  	 	 // Next, inline-style links: [link text](url "optional title")
 685  	 	 $text = preg_replace_callback('{
 686  	 	 	 (	 	 	 	 # wrap whole match in $1
 687  	 	 	   \[
 688  	 	 	 	 ('.$this->nested_brackets_re.')	 # link text = $2
 689  	 	 	   \]
 690  	 	 	   \(	 	 	 # literal paren
 691  	 	 	 	 [ \n]*
 692  	 	 	 	 (?:
 693  	 	 	 	 	 <(.+?)>	 # href = $3
 694  	 	 	 	 |
 695  	 	 	 	 	 ('.$this->nested_url_parenthesis_re.')	 # href = $4
 696  	 	 	 	 )
 697  	 	 	 	 [ \n]*
 698  	 	 	 	 (	 	 	 # $5
 699  	 	 	 	   ([\'"])	 # quote char = $6
 700  	 	 	 	   (.*?)	 	 # Title = $7
 701  	 	 	 	   \6	 	 # matching quote
 702  	 	 	 	   [ \n]*	 # ignore any spaces/tabs between closing quote and )
 703  	 	 	 	 )?	 	 	 # title is optional
 704  	 	 	   \)
 705  	 	 	 )
 706  	 	 	 }xs',
 707  	 	 	 array($this, '_doAnchors_inline_callback'), $text);
 708  
 709  	 	 // Last, handle reference-style shortcuts: [link text]
 710  	 	 // These must come last in case you've also got [link text][1]
 711  	 	 // or [link text](/foo)
 712  	 	 $text = preg_replace_callback('{
 713  	 	 	 (	 	 	 	 	 # wrap whole match in $1
 714  	 	 	   \[
 715  	 	 	 	 ([^\[\]]+)	 	 # link text = $2; can\'t contain [ or ]
 716  	 	 	   \]
 717  	 	 	 )
 718  	 	 	 }xs',
 719  	 	 	 array($this, '_doAnchors_reference_callback'), $text);
 720  
 721  	 	 $this->in_anchor = false;
 722  	 	 return $text;
 723  	 }
 724  
 725  	 /**
 726  	  * Callback method to parse referenced anchors
 727  	  * @param  string $matches
 728  	  * @return string
 729  	  */
 730  	protected function _doAnchors_reference_callback($matches) {
 731  	 	 $whole_match =  $matches[1];
 732  	 	 $link_text   =  $matches[2];
 733  	 	 $link_id     =& $matches[3];
 734  
 735  	 	 if ($link_id == "") {
 736  	 	 	 // for shortcut links like [this][] or [this].
 737  	 	 	 $link_id = $link_text;
 738  	 	 }
 739  
 740  	 	 // lower-case and turn embedded newlines into spaces
 741  	 	 $link_id = strtolower($link_id);
 742  	 	 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 743  
 744  	 	 if (isset($this->urls[$link_id])) {
 745  	 	 	 $url = $this->urls[$link_id];
 746  	 	 	 $url = $this->encodeURLAttribute($url);
 747  
 748  	 	 	 $result = "<a href=\"$url\"";
 749  	 	 	 if ( isset( $this->titles[$link_id] ) ) {
 750  	 	 	 	 $title = $this->titles[$link_id];
 751  	 	 	 	 $title = $this->encodeAttribute($title);
 752  	 	 	 	 $result .=  " title=\"$title\"";
 753  	 	 	 }
 754  
 755  	 	 	 $link_text = $this->runSpanGamut($link_text);
 756  	 	 	 $result .= ">$link_text</a>";
 757  	 	 	 $result = $this->hashPart($result);
 758  	 	 } else {
 759  	 	 	 $result = $whole_match;
 760  	 	 }
 761  	 	 return $result;
 762  	 }
 763  
 764  	 /**
 765  	  * Callback method to parse inline anchors
 766  	  * @param  string $matches
 767  	  * @return string
 768  	  */
 769  	protected function _doAnchors_inline_callback($matches) {
 770  	 	 $whole_match	 =  $matches[1];
 771  	 	 $link_text	 	 =  $this->runSpanGamut($matches[2]);
 772  	 	 $url	 	 	 =  $matches[3] == '' ? $matches[4] : $matches[3];
 773  	 	 $title	 	 	 =& $matches[7];
 774  
 775  	 	 // If the URL was of the form <s p a c e s> it got caught by the HTML
 776  	 	 // tag parser and hashed. Need to reverse the process before using
 777  	 	 // the URL.
 778  	 	 $unhashed = $this->unhash($url);
 779  	 	 if ($unhashed != $url)
 780  	 	 	 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 781  
 782  	 	 $url = $this->encodeURLAttribute($url);
 783  
 784  	 	 $result = "<a href=\"$url\"";
 785  	 	 if (isset($title)) {
 786  	 	 	 $title = $this->encodeAttribute($title);
 787  	 	 	 $result .=  " title=\"$title\"";
 788  	 	 }
 789  
 790  	 	 $link_text = $this->runSpanGamut($link_text);
 791  	 	 $result .= ">$link_text</a>";
 792  
 793  	 	 return $this->hashPart($result);
 794  	 }
 795  
 796  	 /**
 797  	  * Turn Markdown image shortcuts into <img> tags.
 798  	  * @param  string $text
 799  	  * @return string
 800  	  */
 801  	protected function doImages($text) {
 802  	 	 // First, handle reference-style labeled images: ![alt text][id]
 803  	 	 $text = preg_replace_callback('{
 804  	 	 	 (	 	 	 	 # wrap whole match in $1
 805  	 	 	   !\[
 806  	 	 	 	 ('.$this->nested_brackets_re.')	 	 # alt text = $2
 807  	 	 	   \]
 808  
 809  	 	 	   [ ]?	 	 	 	 # one optional space
 810  	 	 	   (?:\n[ ]*)?	 	 # one optional newline followed by spaces
 811  
 812  	 	 	   \[
 813  	 	 	 	 (.*?)	 	 # id = $3
 814  	 	 	   \]
 815  
 816  	 	 	 )
 817  	 	 	 }xs',
 818  	 	 	 array($this, '_doImages_reference_callback'), $text);
 819  
 820  	 	 // Next, handle inline images:  ![alt text](url "optional title")
 821  	 	 // Don't forget: encode * and _
 822  	 	 $text = preg_replace_callback('{
 823  	 	 	 (	 	 	 	 # wrap whole match in $1
 824  	 	 	   !\[
 825  	 	 	 	 ('.$this->nested_brackets_re.')	 	 # alt text = $2
 826  	 	 	   \]
 827  	 	 	   \s?	 	 	 # One optional whitespace character
 828  	 	 	   \(	 	 	 # literal paren
 829  	 	 	 	 [ \n]*
 830  	 	 	 	 (?:
 831  	 	 	 	 	 <(\S*)>	 # src url = $3
 832  	 	 	 	 |
 833  	 	 	 	 	 ('.$this->nested_url_parenthesis_re.')	 # src url = $4
 834  	 	 	 	 )
 835  	 	 	 	 [ \n]*
 836  	 	 	 	 (	 	 	 # $5
 837  	 	 	 	   ([\'"])	 # quote char = $6
 838  	 	 	 	   (.*?)	 	 # title = $7
 839  	 	 	 	   \6	 	 # matching quote
 840  	 	 	 	   [ \n]*
 841  	 	 	 	 )?	 	 	 # title is optional
 842  	 	 	   \)
 843  	 	 	 )
 844  	 	 	 }xs',
 845  	 	 	 array($this, '_doImages_inline_callback'), $text);
 846  
 847  	 	 return $text;
 848  	 }
 849  
 850  	 /**
 851  	  * Callback to parse references image tags
 852  	  * @param  array $matches
 853  	  * @return string
 854  	  */
 855  	protected function _doImages_reference_callback($matches) {
 856  	 	 $whole_match = $matches[1];
 857  	 	 $alt_text    = $matches[2];
 858  	 	 $link_id     = strtolower($matches[3]);
 859  
 860  	 	 if ($link_id == "") {
 861  	 	 	 $link_id = strtolower($alt_text); // for shortcut links like ![this][].
 862  	 	 }
 863  
 864  	 	 $alt_text = $this->encodeAttribute($alt_text);
 865  	 	 if (isset($this->urls[$link_id])) {
 866  	 	 	 $url = $this->encodeURLAttribute($this->urls[$link_id]);
 867  	 	 	 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 868  	 	 	 if (isset($this->titles[$link_id])) {
 869  	 	 	 	 $title = $this->titles[$link_id];
 870  	 	 	 	 $title = $this->encodeAttribute($title);
 871  	 	 	 	 $result .=  " title=\"$title\"";
 872  	 	 	 }
 873  	 	 	 $result .= $this->empty_element_suffix;
 874  	 	 	 $result = $this->hashPart($result);
 875  	 	 } else {
 876  	 	 	 // If there's no such link ID, leave intact:
 877  	 	 	 $result = $whole_match;
 878  	 	 }
 879  
 880  	 	 return $result;
 881  	 }
 882  
 883  	 /**
 884  	  * Callback to parse inline image tags
 885  	  * @param  array $matches
 886  	  * @return string
 887  	  */
 888  	protected function _doImages_inline_callback($matches) {
 889  	 	 $whole_match	 = $matches[1];
 890  	 	 $alt_text	 	 = $matches[2];
 891  	 	 $url	 	 	 = $matches[3] == '' ? $matches[4] : $matches[3];
 892  	 	 $title	 	 	 =& $matches[7];
 893  
 894  	 	 $alt_text = $this->encodeAttribute($alt_text);
 895  	 	 $url = $this->encodeURLAttribute($url);
 896  	 	 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 897  	 	 if (isset($title)) {
 898  	 	 	 $title = $this->encodeAttribute($title);
 899  	 	 	 $result .=  " title=\"$title\""; // $title already quoted
 900  	 	 }
 901  	 	 $result .= $this->empty_element_suffix;
 902  
 903  	 	 return $this->hashPart($result);
 904  	 }
 905  
 906  	 /**
 907  	  * Parse Markdown heading elements to HTML
 908  	  * @param  string $text
 909  	  * @return string
 910  	  */
 911  	protected function doHeaders($text) {
 912  	 	 /**
 913  	 	  * Setext-style headers:
 914  	 	  *	   Header 1
 915  	 	  *	   ========
 916  	 	  *
 917  	 	  *	   Header 2
 918  	 	  *	   --------
 919  	 	  */
 920  	 	 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 921  	 	 	 array($this, '_doHeaders_callback_setext'), $text);
 922  
 923  	 	 /**
 924  	 	  * atx-style headers:
 925  	 	  *   # Header 1
 926  	 	  *   ## Header 2
 927  	 	  *   ## Header 2 with closing hashes ##
 928  	 	  *   ...
 929  	 	  *   ###### Header 6
 930  	 	  */
 931  	 	 $text = preg_replace_callback('{
 932  	 	 	 	 ^(\#{1,6})	 # $1 = string of #\'s
 933  	 	 	 	 [ ]*
 934  	 	 	 	 (.+?)	 	 # $2 = Header text
 935  	 	 	 	 [ ]*
 936  	 	 	 	 \#*	 	 	 # optional closing #\'s (not counted)
 937  	 	 	 	 \n+
 938  	 	 	 }xm',
 939  	 	 	 array($this, '_doHeaders_callback_atx'), $text);
 940  
 941  	 	 return $text;
 942  	 }
 943  
 944  	 /**
 945  	  * Setext header parsing callback
 946  	  * @param  array $matches
 947  	  * @return string
 948  	  */
 949  	protected function _doHeaders_callback_setext($matches) {
 950  	 	 // Terrible hack to check we haven't found an empty list item.
 951  	 	 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
 952  	 	 	 return $matches[0];
 953  	 	 }
 954  
 955  	 	 $level = $matches[2][0] == '=' ? 1 : 2;
 956  
 957  	 	 // ID attribute generation
 958  	 	 $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
 959  
 960  	 	 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
 961  	 	 return "\n" . $this->hashBlock($block) . "\n\n";
 962  	 }
 963  
 964  	 /**
 965  	  * ATX header parsing callback
 966  	  * @param  array $matches
 967  	  * @return string
 968  	  */
 969  	protected function _doHeaders_callback_atx($matches) {
 970  	 	 // ID attribute generation
 971  	 	 $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
 972  
 973  	 	 $level = strlen($matches[1]);
 974  	 	 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
 975  	 	 return "\n" . $this->hashBlock($block) . "\n\n";
 976  	 }
 977  
 978  	 /**
 979  	  * If a header_id_func property is set, we can use it to automatically
 980  	  * generate an id attribute.
 981  	  *
 982  	  * This method returns a string in the form id="foo", or an empty string
 983  	  * otherwise.
 984  	  * @param  string $headerValue
 985  	  * @return string
 986  	  */
 987  	protected function _generateIdFromHeaderValue($headerValue) {
 988  	 	 if (!is_callable($this->header_id_func)) {
 989  	 	 	 return "";
 990  	 	 }
 991  
 992  	 	 $idValue = call_user_func($this->header_id_func, $headerValue);
 993  	 	 if (!$idValue) {
 994  	 	 	 return "";
 995  	 	 }
 996  
 997  	 	 return ' id="' . $this->encodeAttribute($idValue) . '"';
 998  	 }
 999  
1000  	 /**
1001  	  * Form HTML ordered (numbered) and unordered (bulleted) lists.
1002  	  * @param  string $text
1003  	  * @return string
1004  	  */
1005  	protected function doLists($text) {
1006  	 	 $less_than_tab = $this->tab_width - 1;
1007  
1008  	 	 // Re-usable patterns to match list item bullets and number markers:
1009  	 	 $marker_ul_re  = '[*+-]';
1010  	 	 $marker_ol_re  = '\d+[\.]';
1011  
1012  	 	 $markers_relist = array(
1013  	 	 	 $marker_ul_re => $marker_ol_re,
1014  	 	 	 $marker_ol_re => $marker_ul_re,
1015  	 	 	 );
1016  
1017  	 	 foreach ($markers_relist as $marker_re => $other_marker_re) {
1018  	 	 	 // Re-usable pattern to match any entirel ul or ol list:
1019  	 	 	 $whole_list_re = '
1020  	 	 	 	 (	 	 	 	 	 	 	 	 # $1 = whole list
1021  	 	 	 	   (	 	 	 	 	 	 	 	 # $2
1022  	 	 	 	 	 ([ ]{0,'.$less_than_tab.'})	 # $3 = number of spaces
1023  	 	 	 	 	 ('.$marker_re.')	 	 	 # $4 = first list item marker
1024  	 	 	 	 	 [ ]+
1025  	 	 	 	   )
1026  	 	 	 	   (?s:.+?)
1027  	 	 	 	   (	 	 	 	 	 	 	 	 # $5
1028  	 	 	 	 	   \z
1029  	 	 	 	 	 |
1030  	 	 	 	 	   \n{2,}
1031  	 	 	 	 	   (?=\S)
1032  	 	 	 	 	   (?!	 	 	 	 	 	 # Negative lookahead for another list item marker
1033  	 	 	 	 	 	 [ ]*
1034  	 	 	 	 	 	 '.$marker_re.'[ ]+
1035  	 	 	 	 	   )
1036  	 	 	 	 	 |
1037  	 	 	 	 	   (?=	 	 	 	 	 	 # Lookahead for another kind of list
1038  	 	 	 	 	     \n
1039  	 	 	 	 	 	 \3	 	 	 	 	 	 # Must have the same indentation
1040  	 	 	 	 	 	 '.$other_marker_re.'[ ]+
1041  	 	 	 	 	   )
1042  	 	 	 	   )
1043  	 	 	 	 )
1044  	 	 	 '; // mx
1045  
1046  	 	 	 // We use a different prefix before nested lists than top-level lists.
1047  	 	 	 //See extended comment in _ProcessListItems().
1048  
1049  	 	 	 if ($this->list_level) {
1050  	 	 	 	 $text = preg_replace_callback('{
1051  	 	 	 	 	 	 ^
1052  	 	 	 	 	 	 '.$whole_list_re.'
1053  	 	 	 	 	 }mx',
1054  	 	 	 	 	 array($this, '_doLists_callback'), $text);
1055  	 	 	 } else {
1056  	 	 	 	 $text = preg_replace_callback('{
1057  	 	 	 	 	 	 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1058  	 	 	 	 	 	 '.$whole_list_re.'
1059  	 	 	 	 	 }mx',
1060  	 	 	 	 	 array($this, '_doLists_callback'), $text);
1061  	 	 	 }
1062  	 	 }
1063  
1064  	 	 return $text;
1065  	 }
1066  
1067  	 /**
1068  	  * List parsing callback
1069  	  * @param  array $matches
1070  	  * @return string
1071  	  */
1072  	protected function _doLists_callback($matches) {
1073  	 	 // Re-usable patterns to match list item bullets and number markers:
1074  	 	 $marker_ul_re  = '[*+-]';
1075  	 	 $marker_ol_re  = '\d+[\.]';
1076  	 	 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1077  	 	 $marker_ol_start_re = '[0-9]+';
1078  
1079  	 	 $list = $matches[1];
1080  	 	 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1081  
1082  	 	 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1083  
1084  	 	 $list .= "\n";
1085  	 	 $result = $this->processListItems($list, $marker_any_re);
1086  
1087  	 	 $ol_start = 1;
1088  	 	 if ($this->enhanced_ordered_list) {
1089  	 	 	 // Get the start number for ordered list.
1090  	 	 	 if ($list_type == 'ol') {
1091  	 	 	 	 $ol_start_array = array();
1092  	 	 	 	 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1093  	 	 	 	 if ($ol_start_check){
1094  	 	 	 	 	 $ol_start = $ol_start_array[0];
1095  	 	 	 	 }
1096  	 	 	 }
1097  	 	 }
1098  
1099  	 	 if ($ol_start > 1 && $list_type == 'ol'){
1100  	 	 	 $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1101  	 	 } else {
1102  	 	 	 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1103  	 	 }
1104  	 	 return "\n". $result ."\n\n";
1105  	 }
1106  
1107  	 /**
1108  	  * Nesting tracker for list levels
1109  	  * @var integer
1110  	  */
1111  	 protected $list_level = 0;
1112  
1113  	 /**
1114  	  * Process the contents of a single ordered or unordered list, splitting it
1115  	  * into individual list items.
1116  	  * @param  string $list_str
1117  	  * @param  string $marker_any_re
1118  	  * @return string
1119  	  */
1120  	protected function processListItems($list_str, $marker_any_re) {
1121  	 	 /**
1122  	 	  * The $this->list_level global keeps track of when we're inside a list.
1123  	 	  * Each time we enter a list, we increment it; when we leave a list,
1124  	 	  * we decrement. If it's zero, we're not in a list anymore.
1125  	 	  *
1126  	 	  * We do this because when we're not inside a list, we want to treat
1127  	 	  * something like this:
1128  	 	  *
1129  	 	  *	 	 I recommend upgrading to version
1130  	 	  *	 	 8. Oops, now this line is treated
1131  	 	  *	 	 as a sub-list.
1132  	 	  *
1133  	 	  * As a single paragraph, despite the fact that the second line starts
1134  	 	  * with a digit-period-space sequence.
1135  	 	  *
1136  	 	  * Whereas when we're inside a list (or sub-list), that line will be
1137  	 	  * treated as the start of a sub-list. What a kludge, huh? This is
1138  	 	  * an aspect of Markdown's syntax that's hard to parse perfectly
1139  	 	  * without resorting to mind-reading. Perhaps the solution is to
1140  	 	  * change the syntax rules such that sub-lists must start with a
1141  	 	  * starting cardinal number; e.g. "1." or "a.".
1142  	 	  */
1143  	 	 $this->list_level++;
1144  
1145  	 	 // Trim trailing blank lines:
1146  	 	 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1147  
1148  	 	 $list_str = preg_replace_callback('{
1149  	 	 	 (\n)?	 	 	 	 	 	 	 # leading line = $1
1150  	 	 	 (^[ ]*)	 	 	 	 	 	 	 # leading whitespace = $2
1151  	 	 	 ('.$marker_any_re.'	 	 	 	 # list marker and space = $3
1152  	 	 	 	 (?:[ ]+|(?=\n))	 # space only required if item is not empty
1153  	 	 	 )
1154  	 	 	 ((?s:.*?))	 	 	 	 	 	 # list item text   = $4
1155  	 	 	 (?:(\n+(?=\n))|\n)	 	 	 	 # tailing blank line = $5
1156  	 	 	 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1157  	 	 	 }xm',
1158  	 	 	 array($this, '_processListItems_callback'), $list_str);
1159  
1160  	 	 $this->list_level--;
1161  	 	 return $list_str;
1162  	 }
1163  
1164  	 /**
1165  	  * List item parsing callback
1166  	  * @param  array $matches
1167  	  * @return string
1168  	  */
1169  	protected function _processListItems_callback($matches) {
1170  	 	 $item = $matches[4];
1171  	 	 $leading_line =& $matches[1];
1172  	 	 $leading_space =& $matches[2];
1173  	 	 $marker_space = $matches[3];
1174  	 	 $tailing_blank_line =& $matches[5];
1175  
1176  	 	 if ($leading_line || $tailing_blank_line ||
1177  	 	 	 preg_match('/\n{2,}/', $item))
1178  	 	 {
1179  	 	 	 // Replace marker with the appropriate whitespace indentation
1180  	 	 	 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1181  	 	 	 $item = $this->runBlockGamut($this->outdent($item)."\n");
1182  	 	 } else {
1183  	 	 	 // Recursion for sub-lists:
1184  	 	 	 $item = $this->doLists($this->outdent($item));
1185  	 	 	 $item = $this->formParagraphs($item, false);
1186  	 	 }
1187  
1188  	 	 return "<li>" . $item . "</li>\n";
1189  	 }
1190  
1191  	 /**
1192  	  * Process Markdown `<pre><code>` blocks.
1193  	  * @param  string $text
1194  	  * @return string
1195  	  */
1196  	protected function doCodeBlocks($text) {
1197  	 	 $text = preg_replace_callback('{
1198  	 	 	 	 (?:\n\n|\A\n?)
1199  	 	 	 	 (	             # $1 = the code block -- one or more lines, starting with a space/tab
1200  	 	 	 	   (?>
1201  	 	 	 	 	 [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1202  	 	 	 	 	 .*\n+
1203  	 	 	 	   )+
1204  	 	 	 	 )
1205  	 	 	 	 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	 # Lookahead for non-space at line-start, or end of doc
1206  	 	 	 }xm',
1207  	 	 	 array($this, '_doCodeBlocks_callback'), $text);
1208  
1209  	 	 return $text;
1210  	 }
1211  
1212  	 /**
1213  	  * Code block parsing callback
1214  	  * @param  array $matches
1215  	  * @return string
1216  	  */
1217  	protected function _doCodeBlocks_callback($matches) {
1218  	 	 $codeblock = $matches[1];
1219  
1220  	 	 $codeblock = $this->outdent($codeblock);
1221  	 	 if ($this->code_block_content_func) {
1222  	 	 	 $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1223  	 	 } else {
1224  	 	 	 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1225  	 	 }
1226  
1227  	 	 # trim leading newlines and trailing newlines
1228  	 	 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1229  
1230  	 	 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1231  	 	 return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1232  	 }
1233  
1234  	 /**
1235  	  * Create a code span markup for $code. Called from handleSpanToken.
1236  	  * @param  string $code
1237  	  * @return string
1238  	  */
1239  	protected function makeCodeSpan($code) {
1240  	 	 if ($this->code_span_content_func) {
1241  	 	 	 $code = call_user_func($this->code_span_content_func, $code);
1242  	 	 } else {
1243  	 	 	 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1244  	 	 }
1245  	 	 return $this->hashPart("<code>$code</code>");
1246  	 }
1247  
1248  	 /**
1249  	  * Define the emphasis operators with their regex matches
1250  	  * @var array
1251  	  */
1252  	 protected $em_relist = array(
1253  	 	 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1254  	 	 '*' => '(?<![\s*])\*(?!\*)',
1255  	 	 '_' => '(?<![\s_])_(?!_)',
1256  	 );
1257  
1258  	 /**
1259  	  * Define the strong operators with their regex matches
1260  	  * @var array
1261  	  */
1262  	 protected $strong_relist = array(
1263  	 	 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1264  	 	 '**' => '(?<![\s*])\*\*(?!\*)',
1265  	 	 '__' => '(?<![\s_])__(?!_)',
1266  	 );
1267  
1268  	 /**
1269  	  * Define the emphasis + strong operators with their regex matches
1270  	  * @var array
1271  	  */
1272  	 protected $em_strong_relist = array(
1273  	 	 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1274  	 	 '***' => '(?<![\s*])\*\*\*(?!\*)',
1275  	 	 '___' => '(?<![\s_])___(?!_)',
1276  	 );
1277  
1278  	 /**
1279  	  * Container for prepared regular expressions
1280  	  * @var array
1281  	  */
1282  	 protected $em_strong_prepared_relist;
1283  
1284  	 /**
1285  	  * Prepare regular expressions for searching emphasis tokens in any
1286  	  * context.
1287  	  * @return void
1288  	  */
1289  	protected function prepareItalicsAndBold() {
1290  	 	 foreach ($this->em_relist as $em => $em_re) {
1291  	 	 	 foreach ($this->strong_relist as $strong => $strong_re) {
1292  	 	 	 	 // Construct list of allowed token expressions.
1293  	 	 	 	 $token_relist = array();
1294  	 	 	 	 if (isset($this->em_strong_relist["$em$strong"])) {
1295  	 	 	 	 	 $token_relist[] = $this->em_strong_relist["$em$strong"];
1296  	 	 	 	 }
1297  	 	 	 	 $token_relist[] = $em_re;
1298  	 	 	 	 $token_relist[] = $strong_re;
1299  
1300  	 	 	 	 // Construct master expression from list.
1301  	 	 	 	 $token_re = '{(' . implode('|', $token_relist) . ')}';
1302  	 	 	 	 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1303  	 	 	 }
1304  	 	 }
1305  	 }
1306  
1307  	 /**
1308  	  * Convert Markdown italics (emphasis) and bold (strong) to HTML
1309  	  * @param  string $text
1310  	  * @return string
1311  	  */
1312  	protected function doItalicsAndBold($text) {
1313  	 	 if ($this->in_emphasis_processing) {
1314  	 	 	 return $text; // avoid reentrency
1315  	 	 }
1316  	 	 $this->in_emphasis_processing = true;
1317  
1318  	 	 $token_stack = array('');
1319  	 	 $text_stack = array('');
1320  	 	 $em = '';
1321  	 	 $strong = '';
1322  	 	 $tree_char_em = false;
1323  
1324  	 	 while (1) {
1325  	 	 	 // Get prepared regular expression for seraching emphasis tokens
1326  	 	 	 // in current context.
1327  	 	 	 $token_re = $this->em_strong_prepared_relist["$em$strong"];
1328  
1329  	 	 	 // Each loop iteration search for the next emphasis token.
1330  	 	 	 // Each token is then passed to handleSpanToken.
1331  	 	 	 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1332  	 	 	 $text_stack[0] .= $parts[0];
1333  	 	 	 $token =& $parts[1];
1334  	 	 	 $text =& $parts[2];
1335  
1336  	 	 	 if (empty($token)) {
1337  	 	 	 	 // Reached end of text span: empty stack without emitting.
1338  	 	 	 	 // any more emphasis.
1339  	 	 	 	 while ($token_stack[0]) {
1340  	 	 	 	 	 $text_stack[1] .= array_shift($token_stack);
1341  	 	 	 	 	 $text_stack[0] .= array_shift($text_stack);
1342  	 	 	 	 }
1343  	 	 	 	 break;
1344  	 	 	 }
1345  
1346  	 	 	 $token_len = strlen($token);
1347  	 	 	 if ($tree_char_em) {
1348  	 	 	 	 // Reached closing marker while inside a three-char emphasis.
1349  	 	 	 	 if ($token_len == 3) {
1350  	 	 	 	 	 // Three-char closing marker, close em and strong.
1351  	 	 	 	 	 array_shift($token_stack);
1352  	 	 	 	 	 $span = array_shift($text_stack);
1353  	 	 	 	 	 $span = $this->runSpanGamut($span);
1354  	 	 	 	 	 $span = "<strong><em>$span</em></strong>";
1355  	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1356  	 	 	 	 	 $em = '';
1357  	 	 	 	 	 $strong = '';
1358  	 	 	 	 } else {
1359  	 	 	 	 	 // Other closing marker: close one em or strong and
1360  	 	 	 	 	 // change current token state to match the other
1361  	 	 	 	 	 $token_stack[0] = str_repeat($token[0], 3-$token_len);
1362  	 	 	 	 	 $tag = $token_len == 2 ? "strong" : "em";
1363  	 	 	 	 	 $span = $text_stack[0];
1364  	 	 	 	 	 $span = $this->runSpanGamut($span);
1365  	 	 	 	 	 $span = "<$tag>$span</$tag>";
1366  	 	 	 	 	 $text_stack[0] = $this->hashPart($span);
1367  	 	 	 	 	 $$tag = ''; // $$tag stands for $em or $strong
1368  	 	 	 	 }
1369  	 	 	 	 $tree_char_em = false;
1370  	 	 	 } else if ($token_len == 3) {
1371  	 	 	 	 if ($em) {
1372  	 	 	 	 	 // Reached closing marker for both em and strong.
1373  	 	 	 	 	 // Closing strong marker:
1374  	 	 	 	 	 for ($i = 0; $i < 2; ++$i) {
1375  	 	 	 	 	 	 $shifted_token = array_shift($token_stack);
1376  	 	 	 	 	 	 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1377  	 	 	 	 	 	 $span = array_shift($text_stack);
1378  	 	 	 	 	 	 $span = $this->runSpanGamut($span);
1379  	 	 	 	 	 	 $span = "<$tag>$span</$tag>";
1380  	 	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1381  	 	 	 	 	 	 $$tag = ''; // $$tag stands for $em or $strong
1382  	 	 	 	 	 }
1383  	 	 	 	 } else {
1384  	 	 	 	 	 // Reached opening three-char emphasis marker. Push on token
1385  	 	 	 	 	 // stack; will be handled by the special condition above.
1386  	 	 	 	 	 $em = $token[0];
1387  	 	 	 	 	 $strong = "$em$em";
1388  	 	 	 	 	 array_unshift($token_stack, $token);
1389  	 	 	 	 	 array_unshift($text_stack, '');
1390  	 	 	 	 	 $tree_char_em = true;
1391  	 	 	 	 }
1392  	 	 	 } else if ($token_len == 2) {
1393  	 	 	 	 if ($strong) {
1394  	 	 	 	 	 // Unwind any dangling emphasis marker:
1395  	 	 	 	 	 if (strlen($token_stack[0]) == 1) {
1396  	 	 	 	 	 	 $text_stack[1] .= array_shift($token_stack);
1397  	 	 	 	 	 	 $text_stack[0] .= array_shift($text_stack);
1398  	 	 	 	 	 	 $em = '';
1399  	 	 	 	 	 }
1400  	 	 	 	 	 // Closing strong marker:
1401  	 	 	 	 	 array_shift($token_stack);
1402  	 	 	 	 	 $span = array_shift($text_stack);
1403  	 	 	 	 	 $span = $this->runSpanGamut($span);
1404  	 	 	 	 	 $span = "<strong>$span</strong>";
1405  	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1406  	 	 	 	 	 $strong = '';
1407  	 	 	 	 } else {
1408  	 	 	 	 	 array_unshift($token_stack, $token);
1409  	 	 	 	 	 array_unshift($text_stack, '');
1410  	 	 	 	 	 $strong = $token;
1411  	 	 	 	 }
1412  	 	 	 } else {
1413  	 	 	 	 // Here $token_len == 1
1414  	 	 	 	 if ($em) {
1415  	 	 	 	 	 if (strlen($token_stack[0]) == 1) {
1416  	 	 	 	 	 	 // Closing emphasis marker:
1417  	 	 	 	 	 	 array_shift($token_stack);
1418  	 	 	 	 	 	 $span = array_shift($text_stack);
1419  	 	 	 	 	 	 $span = $this->runSpanGamut($span);
1420  	 	 	 	 	 	 $span = "<em>$span</em>";
1421  	 	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1422  	 	 	 	 	 	 $em = '';
1423  	 	 	 	 	 } else {
1424  	 	 	 	 	 	 $text_stack[0] .= $token;
1425  	 	 	 	 	 }
1426  	 	 	 	 } else {
1427  	 	 	 	 	 array_unshift($token_stack, $token);
1428  	 	 	 	 	 array_unshift($text_stack, '');
1429  	 	 	 	 	 $em = $token;
1430  	 	 	 	 }
1431  	 	 	 }
1432  	 	 }
1433  	 	 $this->in_emphasis_processing = false;
1434  	 	 return $text_stack[0];
1435  	 }
1436  
1437  	 /**
1438  	  * Parse Markdown blockquotes to HTML
1439  	  * @param  string $text
1440  	  * @return string
1441  	  */
1442  	protected function doBlockQuotes($text) {
1443  	 	 $text = preg_replace_callback('/
1444  	 	 	   (	 	 	 	 	 	 	 	 # Wrap whole match in $1
1445  	 	 	 	 (?>
1446  	 	 	 	   ^[ ]*>[ ]?	 	 	 # ">" at the start of a line
1447  	 	 	 	 	 .+\n	 	 	 	 	 # rest of the first line
1448  	 	 	 	   (.+\n)*	 	 	 	 	 # subsequent consecutive lines
1449  	 	 	 	   \n*	 	 	 	 	 	 # blanks
1450  	 	 	 	 )+
1451  	 	 	   )
1452  	 	 	 /xm',
1453  	 	 	 array($this, '_doBlockQuotes_callback'), $text);
1454  
1455  	 	 return $text;
1456  	 }
1457  
1458  	 /**
1459  	  * Blockquote parsing callback
1460  	  * @param  array $matches
1461  	  * @return string
1462  	  */
1463  	protected function _doBlockQuotes_callback($matches) {
1464  	 	 $bq = $matches[1];
1465  	 	 // trim one level of quoting - trim whitespace-only lines
1466  	 	 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1467  	 	 $bq = $this->runBlockGamut($bq); // recurse
1468  
1469  	 	 $bq = preg_replace('/^/m', "  ", $bq);
1470  	 	 // These leading spaces cause problem with <pre> content,
1471  	 	 // so we need to fix that:
1472  	 	 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1473  	 	 	 array($this, '_doBlockQuotes_callback2'), $bq);
1474  
1475  	 	 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1476  	 }
1477  
1478  	 /**
1479  	  * Blockquote parsing callback
1480  	  * @param  array $matches
1481  	  * @return string
1482  	  */
1483  	protected function _doBlockQuotes_callback2($matches) {
1484  	 	 $pre = $matches[1];
1485  	 	 $pre = preg_replace('/^  /m', '', $pre);
1486  	 	 return $pre;
1487  	 }
1488  
1489  	 /**
1490  	  * Parse paragraphs
1491  	  *
1492  	  * @param  string $text String to process in paragraphs
1493  	  * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1494  	  * @return string
1495  	  */
1496  	protected function formParagraphs($text, $wrap_in_p = true) {
1497  	 	 // Strip leading and trailing lines:
1498  	 	 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1499  
1500  	 	 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1501  
1502  	 	 // Wrap <p> tags and unhashify HTML blocks
1503  	 	 foreach ($grafs as $key => $value) {
1504  	 	 	 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1505  	 	 	 	 // Is a paragraph.
1506  	 	 	 	 $value = $this->runSpanGamut($value);
1507  	 	 	 	 if ($wrap_in_p) {
1508  	 	 	 	 	 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1509  	 	 	 	 	 $value .= "</p>";
1510  	 	 	 	 }
1511  	 	 	 	 $grafs[$key] = $this->unhash($value);
1512  	 	 	 } else {
1513  	 	 	 	 // Is a block.
1514  	 	 	 	 // Modify elements of @grafs in-place...
1515  	 	 	 	 $graf = $value;
1516  	 	 	 	 $block = $this->html_hashes[$graf];
1517  	 	 	 	 $graf = $block;
1518  //	 	 	 	 if (preg_match('{
1519  //	 	 	 	 	 \A
1520  //	 	 	 	 	 (	 	 	 	 	 	 	 # $1 = <div> tag
1521  //	 	 	 	 	   <div  \s+
1522  //	 	 	 	 	   [^>]*
1523  //	 	 	 	 	   \b
1524  //	 	 	 	 	   markdown\s*=\s*  ([\'"])	 #	 $2 = attr quote char
1525  //	 	 	 	 	   1
1526  //	 	 	 	 	   \2
1527  //	 	 	 	 	   [^>]*
1528  //	 	 	 	 	   >
1529  //	 	 	 	 	 )
1530  //	 	 	 	 	 (	 	 	 	 	 	 	 # $3 = contents
1531  //	 	 	 	 	 .*
1532  //	 	 	 	 	 )
1533  //	 	 	 	 	 (</div>)	 	 	 	 	 # $4 = closing tag
1534  //	 	 	 	 	 \z
1535  //	 	 	 	 	 }xs', $block, $matches))
1536  //	 	 	 	 {
1537  //	 	 	 	 	 list(, $div_open, , $div_content, $div_close) = $matches;
1538  //
1539  //	 	 	 	 	 // We can't call Markdown(), because that resets the hash;
1540  //	 	 	 	 	 // that initialization code should be pulled into its own sub, though.
1541  //	 	 	 	 	 $div_content = $this->hashHTMLBlocks($div_content);
1542  //
1543  //	 	 	 	 	 // Run document gamut methods on the content.
1544  //	 	 	 	 	 foreach ($this->document_gamut as $method => $priority) {
1545  //	 	 	 	 	 	 $div_content = $this->$method($div_content);
1546  //	 	 	 	 	 }
1547  //
1548  //	 	 	 	 	 $div_open = preg_replace(
1549  //	 	 	 	 	 	 '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1550  //
1551  //	 	 	 	 	 $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1552  //	 	 	 	 }
1553  	 	 	 	 $grafs[$key] = $graf;
1554  	 	 	 }
1555  	 	 }
1556  
1557  	 	 return implode("\n\n", $grafs);
1558  	 }
1559  
1560  	 /**
1561  	  * Encode text for a double-quoted HTML attribute. This function
1562  	  * is *not* suitable for attributes enclosed in single quotes.
1563  	  * @param  string $text
1564  	  * @return string
1565  	  */
1566  	protected function encodeAttribute($text) {
1567  	 	 $text = $this->encodeAmpsAndAngles($text);
1568  	 	 $text = str_replace('"', '&quot;', $text);
1569  	 	 return $text;
1570  	 }
1571  
1572  	 /**
1573  	  * Encode text for a double-quoted HTML attribute containing a URL,
1574  	  * applying the URL filter if set. Also generates the textual
1575  	  * representation for the URL (removing mailto: or tel:) storing it in $text.
1576  	  * This function is *not* suitable for attributes enclosed in single quotes.
1577  	  *
1578  	  * @param  string $url
1579  	  * @param  string &$text Passed by reference
1580  	  * @return string        URL
1581  	  */
1582  	protected function encodeURLAttribute($url, &$text = null) {
1583  	 	 if ($this->url_filter_func) {
1584  	 	 	 $url = call_user_func($this->url_filter_func, $url);
1585  	 	 }
1586  
1587  	 	 if (preg_match('{^mailto:}i', $url)) {
1588  	 	 	 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1589  	 	 } else if (preg_match('{^tel:}i', $url)) {
1590  	 	 	 $url = $this->encodeAttribute($url);
1591  	 	 	 $text = substr($url, 4);
1592  	 	 } else {
1593  	 	 	 $url = $this->encodeAttribute($url);
1594  	 	 	 $text = $url;
1595  	 	 }
1596  
1597  	 	 return $url;
1598  	 }
1599  
1600  	 /**
1601  	  * Smart processing for ampersands and angle brackets that need to
1602  	  * be encoded. Valid character entities are left alone unless the
1603  	  * no-entities mode is set.
1604  	  * @param  string $text
1605  	  * @return string
1606  	  */
1607  	protected function encodeAmpsAndAngles($text) {
1608  	 	 if ($this->no_entities) {
1609  	 	 	 $text = str_replace('&', '&amp;', $text);
1610  	 	 } else {
1611  	 	 	 // Ampersand-encoding based entirely on Nat Irons's Amputator
1612  	 	 	 // MT plugin: <http://bumppo.net/projects/amputator/>
1613  	 	 	 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1614  	 	 	 	 	 	 	 	 '&amp;', $text);
1615  	 	 }
1616  	 	 // Encode remaining <'s
1617  	 	 $text = str_replace('<', '&lt;', $text);
1618  
1619  	 	 return $text;
1620  	 }
1621  
1622  	 /**
1623  	  * Parse Markdown automatic links to anchor HTML tags
1624  	  * @param  string $text
1625  	  * @return string
1626  	  */
1627  	protected function doAutoLinks($text) {
1628  	 	 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1629  	 	 	 array($this, '_doAutoLinks_url_callback'), $text);
1630  
1631  	 	 // Email addresses: <address@domain.foo>
1632  	 	 $text = preg_replace_callback('{
1633  	 	 	 <
1634  	 	 	 (?:mailto:)?
1635  	 	 	 (
1636  	 	 	 	 (?:
1637  	 	 	 	 	 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1638  	 	 	 	 |
1639  	 	 	 	 	 ".*?"
1640  	 	 	 	 )
1641  	 	 	 	 \@
1642  	 	 	 	 (?:
1643  	 	 	 	 	 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1644  	 	 	 	 |
1645  	 	 	 	 	 \[[\d.a-fA-F:]+\]	 # IPv4 & IPv6
1646  	 	 	 	 )
1647  	 	 	 )
1648  	 	 	 >
1649  	 	 	 }xi',
1650  	 	 	 array($this, '_doAutoLinks_email_callback'), $text);
1651  
1652  	 	 return $text;
1653  	 }
1654  
1655  	 /**
1656  	  * Parse URL callback
1657  	  * @param  array $matches
1658  	  * @return string
1659  	  */
1660  	protected function _doAutoLinks_url_callback($matches) {
1661  	 	 $url = $this->encodeURLAttribute($matches[1], $text);
1662  	 	 $link = "<a href=\"$url\">$text</a>";
1663  	 	 return $this->hashPart($link);
1664  	 }
1665  
1666  	 /**
1667  	  * Parse email address callback
1668  	  * @param  array $matches
1669  	  * @return string
1670  	  */
1671  	protected function _doAutoLinks_email_callback($matches) {
1672  	 	 $addr = $matches[1];
1673  	 	 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1674  	 	 $link = "<a href=\"$url\">$text</a>";
1675  	 	 return $this->hashPart($link);
1676  	 }
1677  
1678  	 /**
1679  	  * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1680  	  *
1681  	  * Output: the same text but with most characters encoded as either a
1682  	  *         decimal or hex entity, in the hopes of foiling most address
1683  	  *         harvesting spam bots. E.g.:
1684  	  *
1685  	  *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1686  	  *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1687  	  *        &#x6d;
1688  	  *
1689  	  * Note: the additional output $tail is assigned the same value as the
1690  	  * ouput, minus the number of characters specified by $head_length.
1691  	  *
1692  	  * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1693  	  * With some optimizations by Milian Wolff. Forced encoding of HTML
1694  	  * attribute special characters by Allan Odgaard.
1695  	  *
1696  	  * @param  string  $text
1697  	  * @param  string  &$tail
1698  	  * @param  integer $head_length
1699  	  * @return string
1700  	  */
1701  	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1702  	 	 if ($text == "") {
1703  	 	 	 return $tail = "";
1704  	 	 }
1705  
1706  	 	 $chars = preg_split('/(?<!^)(?!$)/', $text);
1707  	 	 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1708  
1709  	 	 foreach ($chars as $key => $char) {
1710  	 	 	 $ord = ord($char);
1711  	 	 	 // Ignore non-ascii chars.
1712  	 	 	 if ($ord < 128) {
1713  	 	 	 	 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1714  	 	 	 	 // roughly 10% raw, 45% hex, 45% dec
1715  	 	 	 	 // '@' *must* be encoded. I insist.
1716  	 	 	 	 // '"' and '>' have to be encoded inside the attribute
1717  	 	 	 	 if ($r > 90 && strpos('@"&>', $char) === false) {
1718  	 	 	 	 	 /* do nothing */
1719  	 	 	 	 } else if ($r < 45) {
1720  	 	 	 	 	 $chars[$key] = '&#x'.dechex($ord).';';
1721  	 	 	 	 } else {
1722  	 	 	 	 	 $chars[$key] = '&#'.$ord.';';
1723  	 	 	 	 }
1724  	 	 	 }
1725  	 	 }
1726  
1727  	 	 $text = implode('', $chars);
1728  	 	 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1729  
1730  	 	 return $text;
1731  	 }
1732  
1733  	 /**
1734  	  * Take the string $str and parse it into tokens, hashing embeded HTML,
1735  	  * escaped characters and handling code spans.
1736  	  * @param  string $str
1737  	  * @return string
1738  	  */
1739  	protected function parseSpan($str) {
1740  	 	 $output = '';
1741  
1742  	 	 $span_re = '{
1743  	 	 	 	 (
1744  	 	 	 	 	 \\\\'.$this->escape_chars_re.'
1745  	 	 	 	 |
1746  	 	 	 	 	 (?<![`\\\\])
1747  	 	 	 	 	 `+	 	 	 	 	 	 # code span marker
1748  	 	 	 '.( $this->no_markup ? '' : '
1749  	 	 	 	 |
1750  	 	 	 	 	 <!--    .*?     -->	 	 # comment
1751  	 	 	 	 |
1752  	 	 	 	 	 <\?.*?\?> | <%.*?%>	 	 # processing instruction
1753  	 	 	 	 |
1754  	 	 	 	 	 <[!$]?[-a-zA-Z0-9:_]+	 # regular tags
1755  	 	 	 	 	 (?>
1756  	 	 	 	 	 	 \s
1757  	 	 	 	 	 	 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1758  	 	 	 	 	 )?
1759  	 	 	 	 	 >
1760  	 	 	 	 |
1761  	 	 	 	 	 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1762  	 	 	 	 |
1763  	 	 	 	 	 </[-a-zA-Z0-9:_]+\s*> # closing tag
1764  	 	 	 ').'
1765  	 	 	 	 )
1766  	 	 	 	 }xs';
1767  
1768  	 	 while (1) {
1769  	 	 	 // Each loop iteration seach for either the next tag, the next
1770  	 	 	 // openning code span marker, or the next escaped character.
1771  	 	 	 // Each token is then passed to handleSpanToken.
1772  	 	 	 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1773  
1774  	 	 	 // Create token from text preceding tag.
1775  	 	 	 if ($parts[0] != "") {
1776  	 	 	 	 $output .= $parts[0];
1777  	 	 	 }
1778  
1779  	 	 	 // Check if we reach the end.
1780  	 	 	 if (isset($parts[1])) {
1781  	 	 	 	 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1782  	 	 	 	 $str = $parts[2];
1783  	 	 	 } else {
1784  	 	 	 	 break;
1785  	 	 	 }
1786  	 	 }
1787  
1788  	 	 return $output;
1789  	 }
1790  
1791  	 /**
1792  	  * Handle $token provided by parseSpan by determining its nature and
1793  	  * returning the corresponding value that should replace it.
1794  	  * @param  string $token
1795  	  * @param  string &$str
1796  	  * @return string
1797  	  */
1798  	protected function handleSpanToken($token, &$str) {
1799  	 	 switch ($token[0]) {
1800  	 	 	 case "\\":
1801  	 	 	 	 return $this->hashPart("&#". ord($token[1]). ";");
1802  	 	 	 case "`":
1803  	 	 	 	 // Search for end marker in remaining text.
1804  	 	 	 	 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1805  	 	 	 	 	 $str, $matches))
1806  	 	 	 	 {
1807  	 	 	 	 	 $str = $matches[2];
1808  	 	 	 	 	 $codespan = $this->makeCodeSpan($matches[1]);
1809  	 	 	 	 	 return $this->hashPart($codespan);
1810  	 	 	 	 }
1811  	 	 	 	 return $token; // Return as text since no ending marker found.
1812  	 	 	 default:
1813  	 	 	 	 return $this->hashPart($token);
1814  	 	 }
1815  	 }
1816  
1817  	 /**
1818  	  * Remove one level of line-leading tabs or spaces
1819  	  * @param  string $text
1820  	  * @return string
1821  	  */
1822  	protected function outdent($text) {
1823  	 	 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1824  	 }
1825  
1826  
1827  	 /**
1828  	  * String length function for detab. `_initDetab` will create a function to
1829  	  * handle UTF-8 if the default function does not exist.
1830  	  * @var string
1831  	  */
1832  	 protected $utf8_strlen = 'mb_strlen';
1833  
1834  	 /**
1835  	  * Replace tabs with the appropriate amount of spaces.
1836  	  *
1837  	  * For each line we separate the line in blocks delemited by tab characters.
1838  	  * Then we reconstruct every line by adding the  appropriate number of space
1839  	  * between each blocks.
1840  	  *
1841  	  * @param  string $text
1842  	  * @return string
1843  	  */
1844  	protected function detab($text) {
1845  	 	 $text = preg_replace_callback('/^.*\t.*$/m',
1846  	 	 	 array($this, '_detab_callback'), $text);
1847  
1848  	 	 return $text;
1849  	 }
1850  
1851  	 /**
1852  	  * Replace tabs callback
1853  	  * @param  string $matches
1854  	  * @return string
1855  	  */
1856  	protected function _detab_callback($matches) {
1857  	 	 $line = $matches[0];
1858  	 	 $strlen = $this->utf8_strlen; // strlen function for UTF-8.
1859  
1860  	 	 // Split in blocks.
1861  	 	 $blocks = explode("\t", $line);
1862  	 	 // Add each blocks to the line.
1863  	 	 $line = $blocks[0];
1864  	 	 unset($blocks[0]); // Do not add first block twice.
1865  	 	 foreach ($blocks as $block) {
1866  	 	 	 // Calculate amount of space, insert spaces, insert block.
1867  	 	 	 $amount = $this->tab_width -
1868  	 	 	 	 $strlen($line, 'UTF-8') % $this->tab_width;
1869  	 	 	 $line .= str_repeat(" ", $amount) . $block;
1870  	 	 }
1871  	 	 return $line;
1872  	 }
1873  
1874  	 /**
1875  	  * Check for the availability of the function in the `utf8_strlen` property
1876  	  * (initially `mb_strlen`). If the function is not available, create a
1877  	  * function that will loosely count the number of UTF-8 characters with a
1878  	  * regular expression.
1879  	  * @return void
1880  	  */
1881  	protected function _initDetab() {
1882  
1883  	 	 if (function_exists($this->utf8_strlen)) {
1884  	 	 	 return;
1885  	 	 }
1886  
1887  	 	 $this->utf8_strlen = function($text) {
1888  	 	 	 return preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1889  	 	 };
1890  	 }
1891  
1892  	 /**
1893  	  * Swap back in all the tags hashed by _HashHTMLBlocks.
1894  	  * @param  string $text
1895  	  * @return string
1896  	  */
1897  	protected function unhash($text) {
1898  	 	 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1899  	 	 	 array($this, '_unhash_callback'), $text);
1900  	 }
1901  
1902  	 /**
1903  	  * Unhashing callback
1904  	  * @param  array $matches
1905  	  * @return string
1906  	  */
1907  	protected function _unhash_callback($matches) {
1908  	 	 return $this->html_hashes[$matches[0]];
1909  	 }
1910  }