Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.3.x will end 7 October 2024 (12 months).
  • Bug fixes for security issues in 4.3.x will end 21 April 2025 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.2.x is supported too.

Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403]

   1  <?php
   2  /**
   3   * Markdown  -  A text-to-HTML conversion tool for web writers
   4   *
   5   * @package   php-markdown
   6   * @author    Michel Fortin <michel.fortin@michelf.com>
   7   * @copyright 2004-2022 Michel Fortin <https://michelf.com/projects/php-markdown/>
   8   * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
   9   */
  10  
  11  namespace Michelf;
  12  
  13  /**
  14   * Markdown Parser Class
  15   */
  16  class Markdown implements MarkdownInterface {
  17  	 /**
  18  	  * Define the package version
  19  	  * @var string
  20  	  */
  21  	 const MARKDOWNLIB_VERSION = "2.0.0";
  22  
  23  	 /**
  24  	  * Simple function interface - Initialize the parser and return the result
  25  	  * of its transform method. This will work fine for derived classes too.
  26  	  *
  27  	  * @api
  28  	  *
  29  	  * @param  string $text
  30  	  * @return string
  31  	  */
  32  	public static function defaultTransform(string $text): string {
  33  	 	 // Take parser class on which this function was called.
  34  	 	 $parser_class = static::class;
  35  
  36  	 	 // Try to take parser from the static parser list
  37  	 	 static $parser_list;
  38  	 	 $parser =& $parser_list[$parser_class];
  39  
  40  	 	 // Create the parser it not already set
  41  	 	 if (!$parser) {
  42  	 	 	 $parser = new $parser_class;
  43  	 	 }
  44  
  45  	 	 // Transform text using parser.
  46  	 	 return $parser->transform($text);
  47  	 }
  48  
  49  	 /**
  50  	  * Configuration variables
  51  	  */
  52  	 /**
  53  	  * Change to ">" for HTML output.
  54  	  */
  55  	 public string $empty_element_suffix = " />";
  56  
  57  	 /**
  58  	  * The width of indentation of the output markup
  59  	  */
  60  	 public int $tab_width = 4;
  61  
  62  	 /**
  63  	  * Change to `true` to disallow markup or entities.
  64  	  */
  65  	 public bool $no_markup   = false;
  66  	 public bool $no_entities = false;
  67  
  68  
  69  	 /**
  70  	  * Change to `true` to enable line breaks on \n without two trailling spaces
  71  	  * @var boolean
  72  	  */
  73  	 public bool $hard_wrap = false;
  74  
  75  	 /**
  76  	  * Predefined URLs and titles for reference links and images.
  77  	  */
  78  	 public array $predef_urls   = array();
  79  	 public array $predef_titles = array();
  80  
  81  	 /**
  82  	  * Optional filter function for URLs
  83  	  * @var callable|null
  84  	  */
  85  	 public $url_filter_func = null;
  86  
  87  	 /**
  88  	  * Optional header id="" generation callback function.
  89  	  * @var callable|null
  90  	  */
  91  	 public $header_id_func = null;
  92  
  93  	 /**
  94  	  * Optional function for converting code block content to HTML
  95  	  * @var callable|null
  96  	  */
  97  	 public $code_block_content_func = null;
  98  
  99  	 /**
 100  	  * Optional function for converting code span content to HTML.
 101  	  * @var callable|null
 102  	  */
 103  	 public $code_span_content_func = null;
 104  
 105  	 /**
 106  	  * Class attribute to toggle "enhanced ordered list" behaviour
 107  	  * setting this to true will allow ordered lists to start from the index
 108  	  * number that is defined first.
 109  	  *
 110  	  * For example:
 111  	  * 2. List item two
 112  	  * 3. List item three
 113  	  *
 114  	  * Becomes:
 115  	  * <ol start="2">
 116  	  * <li>List item two</li>
 117  	  * <li>List item three</li>
 118  	  * </ol>
 119  	  */
 120  	 public bool $enhanced_ordered_list = false;
 121  
 122  	 /**
 123  	  * Parser implementation
 124  	  */
 125  	 /**
 126  	  * Regex to match balanced [brackets].
 127  	  * Needed to insert a maximum bracked depth while converting to PHP.
 128  	  */
 129  	 protected int $nested_brackets_depth = 6;
 130  	 protected string $nested_brackets_re;
 131  
 132  	 protected int $nested_url_parenthesis_depth = 4;
 133  	 protected string $nested_url_parenthesis_re;
 134  
 135  	 /**
 136  	  * Table of hash values for escaped characters:
 137  	  */
 138  	 protected string $escape_chars = '\`*_{}[]()>#+-.!';
 139  	 protected string $escape_chars_re;
 140  
 141  	 /**
 142  	  * Constructor function. Initialize appropriate member variables.
 143  	  * @return void
 144  	  */
 145  	public function __construct() {
 146  	 	 $this->_initDetab();
 147  	 	 $this->prepareItalicsAndBold();
 148  
 149  	 	 $this->nested_brackets_re =
 150  	 	 	 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 151  	 	 	 str_repeat('\])*', $this->nested_brackets_depth);
 152  
 153  	 	 $this->nested_url_parenthesis_re =
 154  	 	 	 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 155  	 	 	 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 156  
 157  	 	 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 158  
 159  	 	 // Sort document, block, and span gamut in ascendent priority order.
 160  	 	 asort($this->document_gamut);
 161  	 	 asort($this->block_gamut);
 162  	 	 asort($this->span_gamut);
 163  	 }
 164  
 165  
 166  	 /**
 167  	  * Internal hashes used during transformation.
 168  	  */
 169  	 protected array $urls        = array();
 170  	 protected array $titles      = array();
 171  	 protected array $html_hashes = array();
 172  
 173  	 /**
 174  	  * Status flag to avoid invalid nesting.
 175  	  */
 176  	 protected bool $in_anchor = false;
 177  
 178  	 /**
 179  	  * Status flag to avoid invalid nesting.
 180  	  */
 181  	 protected bool $in_emphasis_processing = false;
 182  
 183  	 /**
 184  	  * Called before the transformation process starts to setup parser states.
 185  	  * @return void
 186  	  */
 187  	protected function setup() {
 188  	 	 // Clear global hashes.
 189  	 	 $this->urls        = $this->predef_urls;
 190  	 	 $this->titles      = $this->predef_titles;
 191  	 	 $this->html_hashes = array();
 192  	 	 $this->in_anchor   = false;
 193  	 	 $this->in_emphasis_processing = false;
 194  	 }
 195  
 196  	 /**
 197  	  * Called after the transformation process to clear any variable which may
 198  	  * be taking up memory unnecessarly.
 199  	  * @return void
 200  	  */
 201  	protected function teardown() {
 202  	 	 $this->urls        = array();
 203  	 	 $this->titles      = array();
 204  	 	 $this->html_hashes = array();
 205  	 }
 206  
 207  	 /**
 208  	  * Main function. Performs some preprocessing on the input text and pass
 209  	  * it through the document gamut.
 210  	  *
 211  	  * @api
 212  	  *
 213  	  * @param  string $text
 214  	  * @return string
 215  	  */
 216  	public function transform(string $text): string {
 217  	 	 $this->setup();
 218  
 219  	 	 # Remove UTF-8 BOM and marker character in input, if present.
 220  	 	 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 221  
 222  	 	 # Standardize line endings:
 223  	 	 #   DOS to Unix and Mac to Unix
 224  	 	 $text = preg_replace('{\r\n?}', "\n", $text);
 225  
 226  	 	 # Make sure $text ends with a couple of newlines:
 227  	 	 $text .= "\n\n";
 228  
 229  	 	 # Convert all tabs to spaces.
 230  	 	 $text = $this->detab($text);
 231  
 232  	 	 # Turn block-level HTML blocks into hash entries
 233  	 	 $text = $this->hashHTMLBlocks($text);
 234  
 235  	 	 # Strip any lines consisting only of spaces and tabs.
 236  	 	 # This makes subsequent regexen easier to write, because we can
 237  	 	 # match consecutive blank lines with /\n+/ instead of something
 238  	 	 # contorted like /[ ]*\n+/ .
 239  	 	 $text = preg_replace('/^[ ]+$/m', '', $text);
 240  
 241  	 	 # Run document gamut methods.
 242  	 	 foreach ($this->document_gamut as $method => $priority) {
 243  	 	 	 $text = $this->$method($text);
 244  	 	 }
 245  
 246  	 	 $this->teardown();
 247  
 248  	 	 return $text . "\n";
 249  	 }
 250  
 251  	 /**
 252  	  * Define the document gamut
 253  	  */
 254  	 protected array $document_gamut = array(
 255  	 	 // Strip link definitions, store in hashes.
 256  	 	 "stripLinkDefinitions" => 20,
 257  	 	 "runBasicBlockGamut"   => 30,
 258  	 );
 259  
 260  	 /**
 261  	  * Strips link definitions from text, stores the URLs and titles in
 262  	  * hash references
 263  	  * @param  string $text
 264  	  * @return string
 265  	  */
 266  	protected function stripLinkDefinitions($text) {
 267  
 268  	 	 $less_than_tab = $this->tab_width - 1;
 269  
 270  	 	 // Link defs are in the form: ^[id]: url "optional title"
 271  	 	 $text = preg_replace_callback('{
 272  	 	 	 	 	 	 	 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	 # id = $1
 273  	 	 	 	 	 	 	   [ ]*
 274  	 	 	 	 	 	 	   \n?	 	 	 	 # maybe *one* newline
 275  	 	 	 	 	 	 	   [ ]*
 276  	 	 	 	 	 	 	 (?:
 277  	 	 	 	 	 	 	   <(.+?)>	 	 	 # url = $2
 278  	 	 	 	 	 	 	 |
 279  	 	 	 	 	 	 	   (\S+?)	 	 	 # url = $3
 280  	 	 	 	 	 	 	 )
 281  	 	 	 	 	 	 	   [ ]*
 282  	 	 	 	 	 	 	   \n?	 	 	 	 # maybe one newline
 283  	 	 	 	 	 	 	   [ ]*
 284  	 	 	 	 	 	 	 (?:
 285  	 	 	 	 	 	 	 	 (?<=\s)	 	 	 # lookbehind for whitespace
 286  	 	 	 	 	 	 	 	 ["(]
 287  	 	 	 	 	 	 	 	 (.*?)	 	 	 # title = $4
 288  	 	 	 	 	 	 	 	 [")]
 289  	 	 	 	 	 	 	 	 [ ]*
 290  	 	 	 	 	 	 	 )?	 # title is optional
 291  	 	 	 	 	 	 	 (?:\n+|\Z)
 292  	 	 	 }xm',
 293  	 	 	 array($this, '_stripLinkDefinitions_callback'),
 294  	 	 	 $text
 295  	 	 );
 296  	 	 return $text;
 297  	 }
 298  
 299  	 /**
 300  	  * The callback to strip link definitions
 301  	  * @param  array $matches
 302  	  * @return string
 303  	  */
 304  	protected function _stripLinkDefinitions_callback($matches) {
 305  	 	 $link_id = strtolower($matches[1]);
 306  	 	 $url = $matches[2] == '' ? $matches[3] : $matches[2];
 307  	 	 $this->urls[$link_id] = $url;
 308  	 	 $this->titles[$link_id] =& $matches[4];
 309  	 	 return ''; // String that will replace the block
 310  	 }
 311  
 312  	 /**
 313  	  * Hashify HTML blocks
 314  	  * @param  string $text
 315  	  * @return string
 316  	  */
 317  	protected function hashHTMLBlocks($text) {
 318  	 	 if ($this->no_markup) {
 319  	 	 	 return $text;
 320  	 	 }
 321  
 322  	 	 $less_than_tab = $this->tab_width - 1;
 323  
 324  	 	 /**
 325  	 	  * Hashify HTML blocks:
 326  	 	  *
 327  	 	  * We only want to do this for block-level HTML tags, such as headers,
 328  	 	  * lists, and tables. That's because we still want to wrap <p>s around
 329  	 	  * "paragraphs" that are wrapped in non-block-level tags, such as
 330  	 	  * anchors, phrase emphasis, and spans. The list of tags we're looking
 331  	 	  * for is hard-coded:
 332  	 	  *
 333  	 	  * *  List "a" is made of tags which can be both inline or block-level.
 334  	 	  *    These will be treated block-level when the start tag is alone on
 335  	 	  *    its line, otherwise they're not matched here and will be taken as
 336  	 	  *    inline later.
 337  	 	  * *  List "b" is made of tags which are always block-level;
 338  	 	  */
 339  	 	 $block_tags_a_re = 'ins|del';
 340  	 	 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 341  	 	 	 	 	 	    'script|noscript|style|form|fieldset|iframe|math|svg|'.
 342  	 	 	 	 	 	    'article|section|nav|aside|hgroup|header|footer|'.
 343  	 	 	 	 	 	    'figure|details|summary';
 344  
 345  	 	 // Regular expression for the content of a block tag.
 346  	 	 $nested_tags_level = 4;
 347  	 	 $attr = '
 348  	 	 	 (?>	 	 	 	 # optional tag attributes
 349  	 	 	   \s	 	 	 # starts with whitespace
 350  	 	 	   (?>
 351  	 	 	 	 [^>"/]+	 	 # text outside quotes
 352  	 	 	   |
 353  	 	 	 	 /+(?!>)	 	 # slash not followed by ">"
 354  	 	 	   |
 355  	 	 	 	 "[^"]*"	 	 # text inside double quotes (tolerate ">")
 356  	 	 	   |
 357  	 	 	 	 \'[^\']*\'	 # text inside single quotes (tolerate ">")
 358  	 	 	   )*
 359  	 	 	 )?
 360  	 	 	 ';
 361  	 	 $content =
 362  	 	 	 str_repeat('
 363  	 	 	 	 (?>
 364  	 	 	 	   [^<]+	 	 	 # content without tag
 365  	 	 	 	 |
 366  	 	 	 	   <\2	 	 	 # nested opening tag
 367  	 	 	 	 	 '.$attr.'	 # attributes
 368  	 	 	 	 	 (?>
 369  	 	 	 	 	   />
 370  	 	 	 	 	 |
 371  	 	 	 	 	   >', $nested_tags_level).	 // end of opening tag
 372  	 	 	 	 	   '.*?'.	 	 	 	 	 // last level nested tag content
 373  	 	 	 str_repeat('
 374  	 	 	 	 	   </\2\s*>	 # closing nested tag
 375  	 	 	 	 	 )
 376  	 	 	 	   |
 377  	 	 	 	 	 <(?!/\2\s*>	 # other tags with a different name
 378  	 	 	 	   )
 379  	 	 	 	 )*',
 380  	 	 	 	 $nested_tags_level);
 381  	 	 $content2 = str_replace('\2', '\3', $content);
 382  
 383  	 	 /**
 384  	 	  * First, look for nested blocks, e.g.:
 385  	 	  * 	 <div>
 386  	 	  * 	 	 <div>
 387  	 	  * 	 	 tags for inner block must be indented.
 388  	 	  * 	 	 </div>
 389  	 	  * 	 </div>
 390  	 	  *
 391  	 	  * The outermost tags must start at the left margin for this to match,
 392  	 	  * and the inner nested divs must be indented.
 393  	 	  * We need to do this before the next, more liberal match, because the
 394  	 	  * next match will start at the first `<div>` and stop at the
 395  	 	  * first `</div>`.
 396  	 	  */
 397  	 	 $text = preg_replace_callback('{(?>
 398  	 	 	 (?>
 399  	 	 	 	 (?<=\n)	 	 	 # Starting on its own line
 400  	 	 	 	 |	 	 	 	 # or
 401  	 	 	 	 \A\n?	 	 	 # the at beginning of the doc
 402  	 	 	 )
 403  	 	 	 (	 	 	 	 	 	 # save in $1
 404  
 405  	 	 	   # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 406  	 	 	   # in between.
 407  
 408  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 409  	 	 	 	 	 	 <('.$block_tags_b_re.')# start tag = $2
 410  	 	 	 	 	 	 '.$attr.'>	 	 	 # attributes followed by > and \n
 411  	 	 	 	 	 	 '.$content.'	 	 # content, support nesting
 412  	 	 	 	 	 	 </\2>	 	 	 	 # the matching end tag
 413  	 	 	 	 	 	 [ ]*	 	 	 	 # trailing spaces/tabs
 414  	 	 	 	 	 	 (?=\n+|\Z)	 # followed by a newline or end of document
 415  
 416  	 	 	 | # Special version for tags of group a.
 417  
 418  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 419  	 	 	 	 	 	 <('.$block_tags_a_re.')# start tag = $3
 420  	 	 	 	 	 	 '.$attr.'>[ ]*\n	 # attributes followed by >
 421  	 	 	 	 	 	 '.$content2.'	 	 # content, support nesting
 422  	 	 	 	 	 	 </\3>	 	 	 	 # the matching end tag
 423  	 	 	 	 	 	 [ ]*	 	 	 	 # trailing spaces/tabs
 424  	 	 	 	 	 	 (?=\n+|\Z)	 # followed by a newline or end of document
 425  
 426  	 	 	 | # Special case just for <hr />. It was easier to make a special
 427  	 	 	   # case than to make the other regex more complicated.
 428  
 429  	 	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 430  	 	 	 	 	 	 <(hr)	 	 	 	 # start tag = $2
 431  	 	 	 	 	 	 '.$attr.'	 	 	 # attributes
 432  	 	 	 	 	 	 /?>	 	 	 	 	 # the matching end tag
 433  	 	 	 	 	 	 [ ]*
 434  	 	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 435  
 436  	 	 	 | # Special case for standalone HTML comments:
 437  
 438  	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 439  	 	 	 	 	 (?s:
 440  	 	 	 	 	 	 <!-- .*? -->
 441  	 	 	 	 	 )
 442  	 	 	 	 	 [ ]*
 443  	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 444  
 445  	 	 	 | # PHP and ASP-style processor instructions (<? and <%)
 446  
 447  	 	 	 	 	 [ ]{0,'.$less_than_tab.'}
 448  	 	 	 	 	 (?s:
 449  	 	 	 	 	 	 <([?%])	 	 	 # $2
 450  	 	 	 	 	 	 .*?
 451  	 	 	 	 	 	 \2>
 452  	 	 	 	 	 )
 453  	 	 	 	 	 [ ]*
 454  	 	 	 	 	 (?=\n{2,}|\Z)	 	 # followed by a blank line or end of document
 455  
 456  	 	 	 )
 457  	 	 	 )}Sxmi',
 458  	 	 	 array($this, '_hashHTMLBlocks_callback'),
 459  	 	 	 $text
 460  	 	 );
 461  
 462  	 	 return $text;
 463  	 }
 464  
 465  	 /**
 466  	  * The callback for hashing HTML blocks
 467  	  * @param  string $matches
 468  	  * @return string
 469  	  */
 470  	protected function _hashHTMLBlocks_callback($matches) {
 471  	 	 $text = $matches[1];
 472  	 	 $key  = $this->hashBlock($text);
 473  	 	 return "\n\n$key\n\n";
 474  	 }
 475  
 476  	 /**
 477  	  * Called whenever a tag must be hashed when a function insert an atomic
 478  	  * element in the text stream. Passing $text to through this function gives
 479  	  * a unique text-token which will be reverted back when calling unhash.
 480  	  *
 481  	  * The $boundary argument specify what character should be used to surround
 482  	  * the token. By convension, "B" is used for block elements that needs not
 483  	  * to be wrapped into paragraph tags at the end, ":" is used for elements
 484  	  * that are word separators and "X" is used in the general case.
 485  	  *
 486  	  * @param  string $text
 487  	  * @param  string $boundary
 488  	  * @return string
 489  	  */
 490  	protected function hashPart($text, $boundary = 'X') {
 491  	 	 // Swap back any tag hash found in $text so we do not have to `unhash`
 492  	 	 // multiple times at the end.
 493  	 	 $text = $this->unhash($text);
 494  
 495  	 	 // Then hash the block.
 496  	 	 static $i = 0;
 497  	 	 $key = "$boundary\x1A" . ++$i . $boundary;
 498  	 	 $this->html_hashes[$key] = $text;
 499  	 	 return $key; // String that will replace the tag.
 500  	 }
 501  
 502  	 /**
 503  	  * Shortcut function for hashPart with block-level boundaries.
 504  	  * @param  string $text
 505  	  * @return string
 506  	  */
 507  	protected function hashBlock($text) {
 508  	 	 return $this->hashPart($text, 'B');
 509  	 }
 510  
 511  	 /**
 512  	  * Define the block gamut - these are all the transformations that form
 513  	  * block-level tags like paragraphs, headers, and list items.
 514  	  */
 515  	 protected array $block_gamut = array(
 516  	 	 "doHeaders"         => 10,
 517  	 	 "doHorizontalRules" => 20,
 518  	 	 "doLists"           => 40,
 519  	 	 "doCodeBlocks"      => 50,
 520  	 	 "doBlockQuotes"     => 60,
 521  	 );
 522  
 523  	 /**
 524  	  * Run block gamut tranformations.
 525  	  *
 526  	  * We need to escape raw HTML in Markdown source before doing anything
 527  	  * else. This need to be done for each block, and not only at the
 528  	  * begining in the Markdown function since hashed blocks can be part of
 529  	  * list items and could have been indented. Indented blocks would have
 530  	  * been seen as a code block in a previous pass of hashHTMLBlocks.
 531  	  *
 532  	  * @param  string $text
 533  	  * @return string
 534  	  */
 535  	protected function runBlockGamut($text) {
 536  	 	 $text = $this->hashHTMLBlocks($text);
 537  	 	 return $this->runBasicBlockGamut($text);
 538  	 }
 539  
 540  	 /**
 541  	  * Run block gamut tranformations, without hashing HTML blocks. This is
 542  	  * useful when HTML blocks are known to be already hashed, like in the first
 543  	  * whole-document pass.
 544  	  *
 545  	  * @param  string $text
 546  	  * @return string
 547  	  */
 548  	protected function runBasicBlockGamut($text) {
 549  
 550  	 	 foreach ($this->block_gamut as $method => $priority) {
 551  	 	 	 $text = $this->$method($text);
 552  	 	 }
 553  
 554  	 	 // Finally form paragraph and restore hashed blocks.
 555  	 	 $text = $this->formParagraphs($text);
 556  
 557  	 	 return $text;
 558  	 }
 559  
 560  	 /**
 561  	  * Convert horizontal rules
 562  	  * @param  string $text
 563  	  * @return string
 564  	  */
 565  	protected function doHorizontalRules($text) {
 566  	 	 return preg_replace(
 567  	 	 	 '{
 568  	 	 	 	 ^[ ]{0,3}	 # Leading space
 569  	 	 	 	 ([-*_])	 	 # $1: First marker
 570  	 	 	 	 (?>	 	 	 # Repeated marker group
 571  	 	 	 	 	 [ ]{0,2}	 # Zero, one, or two spaces.
 572  	 	 	 	 	 \1	 	 	 # Marker character
 573  	 	 	 	 ){2,}	 	 # Group repeated at least twice
 574  	 	 	 	 [ ]*	 	 # Tailing spaces
 575  	 	 	 	 $	 	 	 # End of line.
 576  	 	 	 }mx',
 577  	 	 	 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 578  	 	 	 $text
 579  	 	 );
 580  	 }
 581  
 582  	 /**
 583  	  * These are all the transformations that occur *within* block-level
 584  	  * tags like paragraphs, headers, and list items.
 585  	  */
 586  	 protected array $span_gamut = array(
 587  	 	 // Process character escapes, code spans, and inline HTML
 588  	 	 // in one shot.
 589  	 	 "parseSpan"           => -30,
 590  	 	 // Process anchor and image tags. Images must come first,
 591  	 	 // because ![foo][f] looks like an anchor.
 592  	 	 "doImages"            =>  10,
 593  	 	 "doAnchors"           =>  20,
 594  	 	 // Make links out of things like `<https://example.com/>`
 595  	 	 // Must come after doAnchors, because you can use < and >
 596  	 	 // delimiters in inline links like [this](<url>).
 597  	 	 "doAutoLinks"         =>  30,
 598  	 	 "encodeAmpsAndAngles" =>  40,
 599  	 	 "doItalicsAndBold"    =>  50,
 600  	 	 "doHardBreaks"        =>  60,
 601  	 );
 602  
 603  	 /**
 604  	  * Run span gamut transformations
 605  	  * @param  string $text
 606  	  * @return string
 607  	  */
 608  	protected function runSpanGamut($text) {
 609  	 	 foreach ($this->span_gamut as $method => $priority) {
 610  	 	 	 $text = $this->$method($text);
 611  	 	 }
 612  
 613  	 	 return $text;
 614  	 }
 615  
 616  	 /**
 617  	  * Do hard breaks
 618  	  * @param  string $text
 619  	  * @return string
 620  	  */
 621  	protected function doHardBreaks($text) {
 622  	 	 if ($this->hard_wrap) {
 623  	 	 	 return preg_replace_callback('/ *\n/',
 624  	 	 	 	 array($this, '_doHardBreaks_callback'), $text);
 625  	 	 } else {
 626  	 	 	 return preg_replace_callback('/ {2,}\n/',
 627  	 	 	 	 array($this, '_doHardBreaks_callback'), $text);
 628  	 	 }
 629  	 }
 630  
 631  	 /**
 632  	  * Trigger part hashing for the hard break (callback method)
 633  	  * @param  array $matches
 634  	  * @return string
 635  	  */
 636  	protected function _doHardBreaks_callback($matches) {
 637  	 	 return $this->hashPart("<br$this->empty_element_suffix\n");
 638  	 }
 639  
 640  	 /**
 641  	  * Turn Markdown link shortcuts into XHTML <a> tags.
 642  	  * @param  string $text
 643  	  * @return string
 644  	  */
 645  	protected function doAnchors($text) {
 646  	 	 if ($this->in_anchor) {
 647  	 	 	 return $text;
 648  	 	 }
 649  	 	 $this->in_anchor = true;
 650  
 651  	 	 // First, handle reference-style links: [link text] [id]
 652  	 	 $text = preg_replace_callback('{
 653  	 	 	 (	 	 	 	 	 # wrap whole match in $1
 654  	 	 	   \[
 655  	 	 	 	 ('.$this->nested_brackets_re.')	 # link text = $2
 656  	 	 	   \]
 657  
 658  	 	 	   [ ]?	 	 	 	 # one optional space
 659  	 	 	   (?:\n[ ]*)?	 	 # one optional newline followed by spaces
 660  
 661  	 	 	   \[
 662  	 	 	 	 (.*?)	 	 # id = $3
 663  	 	 	   \]
 664  	 	 	 )
 665  	 	 	 }xs',
 666  	 	 	 array($this, '_doAnchors_reference_callback'), $text);
 667  
 668  	 	 // Next, inline-style links: [link text](url "optional title")
 669  	 	 $text = preg_replace_callback('{
 670  	 	 	 (	 	 	 	 # wrap whole match in $1
 671  	 	 	   \[
 672  	 	 	 	 ('.$this->nested_brackets_re.')	 # link text = $2
 673  	 	 	   \]
 674  	 	 	   \(	 	 	 # literal paren
 675  	 	 	 	 [ \n]*
 676  	 	 	 	 (?:
 677  	 	 	 	 	 <(.+?)>	 # href = $3
 678  	 	 	 	 |
 679  	 	 	 	 	 ('.$this->nested_url_parenthesis_re.')	 # href = $4
 680  	 	 	 	 )
 681  	 	 	 	 [ \n]*
 682  	 	 	 	 (	 	 	 # $5
 683  	 	 	 	   ([\'"])	 # quote char = $6
 684  	 	 	 	   (.*?)	 	 # Title = $7
 685  	 	 	 	   \6	 	 # matching quote
 686  	 	 	 	   [ \n]*	 # ignore any spaces/tabs between closing quote and )
 687  	 	 	 	 )?	 	 	 # title is optional
 688  	 	 	   \)
 689  	 	 	 )
 690  	 	 	 }xs',
 691  	 	 	 array($this, '_doAnchors_inline_callback'), $text);
 692  
 693  	 	 // Last, handle reference-style shortcuts: [link text]
 694  	 	 // These must come last in case you've also got [link text][1]
 695  	 	 // or [link text](/foo)
 696  	 	 $text = preg_replace_callback('{
 697  	 	 	 (	 	 	 	 	 # wrap whole match in $1
 698  	 	 	   \[
 699  	 	 	 	 ([^\[\]]+)	 	 # link text = $2; can\'t contain [ or ]
 700  	 	 	   \]
 701  	 	 	 )
 702  	 	 	 }xs',
 703  	 	 	 array($this, '_doAnchors_reference_callback'), $text);
 704  
 705  	 	 $this->in_anchor = false;
 706  	 	 return $text;
 707  	 }
 708  
 709  	 /**
 710  	  * Callback method to parse referenced anchors
 711  	  * @param  array $matches
 712  	  * @return string
 713  	  */
 714  	protected function _doAnchors_reference_callback($matches) {
 715  	 	 $whole_match =  $matches[1];
 716  	 	 $link_text   =  $matches[2];
 717  	 	 $link_id     =& $matches[3];
 718  
 719  	 	 if ($link_id == "") {
 720  	 	 	 // for shortcut links like [this][] or [this].
 721  	 	 	 $link_id = $link_text;
 722  	 	 }
 723  
 724  	 	 // lower-case and turn embedded newlines into spaces
 725  	 	 $link_id = strtolower($link_id);
 726  	 	 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 727  
 728  	 	 if (isset($this->urls[$link_id])) {
 729  	 	 	 $url = $this->urls[$link_id];
 730  	 	 	 $url = $this->encodeURLAttribute($url);
 731  
 732  	 	 	 $result = "<a href=\"$url\"";
 733  	 	 	 if ( isset( $this->titles[$link_id] ) ) {
 734  	 	 	 	 $title = $this->titles[$link_id];
 735  	 	 	 	 $title = $this->encodeAttribute($title);
 736  	 	 	 	 $result .=  " title=\"$title\"";
 737  	 	 	 }
 738  
 739  	 	 	 $link_text = $this->runSpanGamut($link_text);
 740  	 	 	 $result .= ">$link_text</a>";
 741  	 	 	 $result = $this->hashPart($result);
 742  	 	 } else {
 743  	 	 	 $result = $whole_match;
 744  	 	 }
 745  	 	 return $result;
 746  	 }
 747  
 748  	 /**
 749  	  * Callback method to parse inline anchors
 750  	  * @param  array $matches
 751  	  * @return string
 752  	  */
 753  	protected function _doAnchors_inline_callback($matches) {
 754  	 	 $link_text	 	 =  $this->runSpanGamut($matches[2]);
 755  	 	 $url	 	 	 =  $matches[3] === '' ? $matches[4] : $matches[3];
 756  	 	 $title	 	 	 =& $matches[7];
 757  
 758  	 	 // If the URL was of the form <s p a c e s> it got caught by the HTML
 759  	 	 // tag parser and hashed. Need to reverse the process before using
 760  	 	 // the URL.
 761  	 	 $unhashed = $this->unhash($url);
 762  	 	 if ($unhashed !== $url)
 763  	 	 	 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 764  
 765  	 	 $url = $this->encodeURLAttribute($url);
 766  
 767  	 	 $result = "<a href=\"$url\"";
 768  	 	 if ($title) {
 769  	 	 	 $title = $this->encodeAttribute($title);
 770  	 	 	 $result .=  " title=\"$title\"";
 771  	 	 }
 772  
 773  	 	 $link_text = $this->runSpanGamut($link_text);
 774  	 	 $result .= ">$link_text</a>";
 775  
 776  	 	 return $this->hashPart($result);
 777  	 }
 778  
 779  	 /**
 780  	  * Turn Markdown image shortcuts into <img> tags.
 781  	  * @param  string $text
 782  	  * @return string
 783  	  */
 784  	protected function doImages($text) {
 785  	 	 // First, handle reference-style labeled images: ![alt text][id]
 786  	 	 $text = preg_replace_callback('{
 787  	 	 	 (	 	 	 	 # wrap whole match in $1
 788  	 	 	   !\[
 789  	 	 	 	 ('.$this->nested_brackets_re.')	 	 # alt text = $2
 790  	 	 	   \]
 791  
 792  	 	 	   [ ]?	 	 	 	 # one optional space
 793  	 	 	   (?:\n[ ]*)?	 	 # one optional newline followed by spaces
 794  
 795  	 	 	   \[
 796  	 	 	 	 (.*?)	 	 # id = $3
 797  	 	 	   \]
 798  
 799  	 	 	 )
 800  	 	 	 }xs',
 801  	 	 	 array($this, '_doImages_reference_callback'), $text);
 802  
 803  	 	 // Next, handle inline images:  ![alt text](url "optional title")
 804  	 	 // Don't forget: encode * and _
 805  	 	 $text = preg_replace_callback('{
 806  	 	 	 (	 	 	 	 # wrap whole match in $1
 807  	 	 	   !\[
 808  	 	 	 	 ('.$this->nested_brackets_re.')	 	 # alt text = $2
 809  	 	 	   \]
 810  	 	 	   \s?	 	 	 # One optional whitespace character
 811  	 	 	   \(	 	 	 # literal paren
 812  	 	 	 	 [ \n]*
 813  	 	 	 	 (?:
 814  	 	 	 	 	 <(\S*)>	 # src url = $3
 815  	 	 	 	 |
 816  	 	 	 	 	 ('.$this->nested_url_parenthesis_re.')	 # src url = $4
 817  	 	 	 	 )
 818  	 	 	 	 [ \n]*
 819  	 	 	 	 (	 	 	 # $5
 820  	 	 	 	   ([\'"])	 # quote char = $6
 821  	 	 	 	   (.*?)	 	 # title = $7
 822  	 	 	 	   \6	 	 # matching quote
 823  	 	 	 	   [ \n]*
 824  	 	 	 	 )?	 	 	 # title is optional
 825  	 	 	   \)
 826  	 	 	 )
 827  	 	 	 }xs',
 828  	 	 	 array($this, '_doImages_inline_callback'), $text);
 829  
 830  	 	 return $text;
 831  	 }
 832  
 833  	 /**
 834  	  * Callback to parse references image tags
 835  	  * @param  array $matches
 836  	  * @return string
 837  	  */
 838  	protected function _doImages_reference_callback($matches) {
 839  	 	 $whole_match = $matches[1];
 840  	 	 $alt_text    = $matches[2];
 841  	 	 $link_id     = strtolower($matches[3]);
 842  
 843  	 	 if ($link_id == "") {
 844  	 	 	 $link_id = strtolower($alt_text); // for shortcut links like ![this][].
 845  	 	 }
 846  
 847  	 	 $alt_text = $this->encodeAttribute($alt_text);
 848  	 	 if (isset($this->urls[$link_id])) {
 849  	 	 	 $url = $this->encodeURLAttribute($this->urls[$link_id]);
 850  	 	 	 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 851  	 	 	 if (isset($this->titles[$link_id])) {
 852  	 	 	 	 $title = $this->titles[$link_id];
 853  	 	 	 	 $title = $this->encodeAttribute($title);
 854  	 	 	 	 $result .=  " title=\"$title\"";
 855  	 	 	 }
 856  	 	 	 $result .= $this->empty_element_suffix;
 857  	 	 	 $result = $this->hashPart($result);
 858  	 	 } else {
 859  	 	 	 // If there's no such link ID, leave intact:
 860  	 	 	 $result = $whole_match;
 861  	 	 }
 862  
 863  	 	 return $result;
 864  	 }
 865  
 866  	 /**
 867  	  * Callback to parse inline image tags
 868  	  * @param  array $matches
 869  	  * @return string
 870  	  */
 871  	protected function _doImages_inline_callback($matches) {
 872  	 	 $whole_match	 = $matches[1];
 873  	 	 $alt_text	 	 = $matches[2];
 874  	 	 $url	 	 	 = $matches[3] == '' ? $matches[4] : $matches[3];
 875  	 	 $title	 	 	 =& $matches[7];
 876  
 877  	 	 $alt_text = $this->encodeAttribute($alt_text);
 878  	 	 $url = $this->encodeURLAttribute($url);
 879  	 	 $result = "<img src=\"$url\" alt=\"$alt_text\"";
 880  	 	 if (isset($title)) {
 881  	 	 	 $title = $this->encodeAttribute($title);
 882  	 	 	 $result .=  " title=\"$title\""; // $title already quoted
 883  	 	 }
 884  	 	 $result .= $this->empty_element_suffix;
 885  
 886  	 	 return $this->hashPart($result);
 887  	 }
 888  
 889  	 /**
 890  	  * Parse Markdown heading elements to HTML
 891  	  * @param  string $text
 892  	  * @return string
 893  	  */
 894  	protected function doHeaders($text) {
 895  	 	 /**
 896  	 	  * Setext-style headers:
 897  	 	  *	   Header 1
 898  	 	  *	   ========
 899  	 	  *
 900  	 	  *	   Header 2
 901  	 	  *	   --------
 902  	 	  */
 903  	 	 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 904  	 	 	 array($this, '_doHeaders_callback_setext'), $text);
 905  
 906  	 	 /**
 907  	 	  * atx-style headers:
 908  	 	  *   # Header 1
 909  	 	  *   ## Header 2
 910  	 	  *   ## Header 2 with closing hashes ##
 911  	 	  *   ...
 912  	 	  *   ###### Header 6
 913  	 	  */
 914  	 	 $text = preg_replace_callback('{
 915  	 	 	 	 ^(\#{1,6})	 # $1 = string of #\'s
 916  	 	 	 	 [ ]*
 917  	 	 	 	 (.+?)	 	 # $2 = Header text
 918  	 	 	 	 [ ]*
 919  	 	 	 	 \#*	 	 	 # optional closing #\'s (not counted)
 920  	 	 	 	 \n+
 921  	 	 	 }xm',
 922  	 	 	 array($this, '_doHeaders_callback_atx'), $text);
 923  
 924  	 	 return $text;
 925  	 }
 926  
 927  	 /**
 928  	  * Setext header parsing callback
 929  	  * @param  array $matches
 930  	  * @return string
 931  	  */
 932  	protected function _doHeaders_callback_setext($matches) {
 933  	 	 // Terrible hack to check we haven't found an empty list item.
 934  	 	 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
 935  	 	 	 return $matches[0];
 936  	 	 }
 937  
 938  	 	 $level = $matches[2][0] == '=' ? 1 : 2;
 939  
 940  	 	 // ID attribute generation
 941  	 	 $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
 942  
 943  	 	 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
 944  	 	 return "\n" . $this->hashBlock($block) . "\n\n";
 945  	 }
 946  
 947  	 /**
 948  	  * ATX header parsing callback
 949  	  * @param  array $matches
 950  	  * @return string
 951  	  */
 952  	protected function _doHeaders_callback_atx($matches) {
 953  	 	 // ID attribute generation
 954  	 	 $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
 955  
 956  	 	 $level = strlen($matches[1]);
 957  	 	 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
 958  	 	 return "\n" . $this->hashBlock($block) . "\n\n";
 959  	 }
 960  
 961  	 /**
 962  	  * If a header_id_func property is set, we can use it to automatically
 963  	  * generate an id attribute.
 964  	  *
 965  	  * This method returns a string in the form id="foo", or an empty string
 966  	  * otherwise.
 967  	  * @param  string $headerValue
 968  	  * @return string
 969  	  */
 970  	protected function _generateIdFromHeaderValue($headerValue) {
 971  	 	 if (!is_callable($this->header_id_func)) {
 972  	 	 	 return "";
 973  	 	 }
 974  
 975  	 	 $idValue = call_user_func($this->header_id_func, $headerValue);
 976  	 	 if (!$idValue) {
 977  	 	 	 return "";
 978  	 	 }
 979  
 980  	 	 return ' id="' . $this->encodeAttribute($idValue) . '"';
 981  	 }
 982  
 983  	 /**
 984  	  * Form HTML ordered (numbered) and unordered (bulleted) lists.
 985  	  * @param  string $text
 986  	  * @return string
 987  	  */
 988  	protected function doLists($text) {
 989  	 	 $less_than_tab = $this->tab_width - 1;
 990  
 991  	 	 // Re-usable patterns to match list item bullets and number markers:
 992  	 	 $marker_ul_re  = '[*+-]';
 993  	 	 $marker_ol_re  = '\d+[\.]';
 994  
 995  	 	 $markers_relist = array(
 996  	 	 	 $marker_ul_re => $marker_ol_re,
 997  	 	 	 $marker_ol_re => $marker_ul_re,
 998  	 	 	 );
 999  
1000  	 	 foreach ($markers_relist as $marker_re => $other_marker_re) {
1001  	 	 	 // Re-usable pattern to match any entirel ul or ol list:
1002  	 	 	 $whole_list_re = '
1003  	 	 	 	 (	 	 	 	 	 	 	 	 # $1 = whole list
1004  	 	 	 	   (	 	 	 	 	 	 	 	 # $2
1005  	 	 	 	 	 ([ ]{0,'.$less_than_tab.'})	 # $3 = number of spaces
1006  	 	 	 	 	 ('.$marker_re.')	 	 	 # $4 = first list item marker
1007  	 	 	 	 	 [ ]+
1008  	 	 	 	   )
1009  	 	 	 	   (?s:.+?)
1010  	 	 	 	   (	 	 	 	 	 	 	 	 # $5
1011  	 	 	 	 	   \z
1012  	 	 	 	 	 |
1013  	 	 	 	 	   \n{2,}
1014  	 	 	 	 	   (?=\S)
1015  	 	 	 	 	   (?!	 	 	 	 	 	 # Negative lookahead for another list item marker
1016  	 	 	 	 	 	 [ ]*
1017  	 	 	 	 	 	 '.$marker_re.'[ ]+
1018  	 	 	 	 	   )
1019  	 	 	 	 	 |
1020  	 	 	 	 	   (?=	 	 	 	 	 	 # Lookahead for another kind of list
1021  	 	 	 	 	     \n
1022  	 	 	 	 	 	 \3	 	 	 	 	 	 # Must have the same indentation
1023  	 	 	 	 	 	 '.$other_marker_re.'[ ]+
1024  	 	 	 	 	   )
1025  	 	 	 	   )
1026  	 	 	 	 )
1027  	 	 	 '; // mx
1028  
1029  	 	 	 // We use a different prefix before nested lists than top-level lists.
1030  	 	 	 //See extended comment in _ProcessListItems().
1031  
1032  	 	 	 if ($this->list_level) {
1033  	 	 	 	 $text = preg_replace_callback('{
1034  	 	 	 	 	 	 ^
1035  	 	 	 	 	 	 '.$whole_list_re.'
1036  	 	 	 	 	 }mx',
1037  	 	 	 	 	 array($this, '_doLists_callback'), $text);
1038  	 	 	 } else {
1039  	 	 	 	 $text = preg_replace_callback('{
1040  	 	 	 	 	 	 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1041  	 	 	 	 	 	 '.$whole_list_re.'
1042  	 	 	 	 	 }mx',
1043  	 	 	 	 	 array($this, '_doLists_callback'), $text);
1044  	 	 	 }
1045  	 	 }
1046  
1047  	 	 return $text;
1048  	 }
1049  
1050  	 /**
1051  	  * List parsing callback
1052  	  * @param  array $matches
1053  	  * @return string
1054  	  */
1055  	protected function _doLists_callback($matches) {
1056  	 	 // Re-usable patterns to match list item bullets and number markers:
1057  	 	 $marker_ul_re  = '[*+-]';
1058  	 	 $marker_ol_re  = '\d+[\.]';
1059  	 	 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1060  	 	 $marker_ol_start_re = '[0-9]+';
1061  
1062  	 	 $list = $matches[1];
1063  	 	 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1064  
1065  	 	 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1066  
1067  	 	 $list .= "\n";
1068  	 	 $result = $this->processListItems($list, $marker_any_re);
1069  
1070  	 	 $ol_start = 1;
1071  	 	 if ($this->enhanced_ordered_list) {
1072  	 	 	 // Get the start number for ordered list.
1073  	 	 	 if ($list_type == 'ol') {
1074  	 	 	 	 $ol_start_array = array();
1075  	 	 	 	 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1076  	 	 	 	 if ($ol_start_check){
1077  	 	 	 	 	 $ol_start = $ol_start_array[0];
1078  	 	 	 	 }
1079  	 	 	 }
1080  	 	 }
1081  
1082  	 	 if ($ol_start > 1 && $list_type == 'ol'){
1083  	 	 	 $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1084  	 	 } else {
1085  	 	 	 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1086  	 	 }
1087  	 	 return "\n". $result ."\n\n";
1088  	 }
1089  
1090  	 /**
1091  	  * Nesting tracker for list levels
1092  	  */
1093  	 protected int $list_level = 0;
1094  
1095  	 /**
1096  	  * Process the contents of a single ordered or unordered list, splitting it
1097  	  * into individual list items.
1098  	  * @param  string $list_str
1099  	  * @param  string $marker_any_re
1100  	  * @return string
1101  	  */
1102  	protected function processListItems($list_str, $marker_any_re) {
1103  	 	 /**
1104  	 	  * The $this->list_level global keeps track of when we're inside a list.
1105  	 	  * Each time we enter a list, we increment it; when we leave a list,
1106  	 	  * we decrement. If it's zero, we're not in a list anymore.
1107  	 	  *
1108  	 	  * We do this because when we're not inside a list, we want to treat
1109  	 	  * something like this:
1110  	 	  *
1111  	 	  *	 	 I recommend upgrading to version
1112  	 	  *	 	 8. Oops, now this line is treated
1113  	 	  *	 	 as a sub-list.
1114  	 	  *
1115  	 	  * As a single paragraph, despite the fact that the second line starts
1116  	 	  * with a digit-period-space sequence.
1117  	 	  *
1118  	 	  * Whereas when we're inside a list (or sub-list), that line will be
1119  	 	  * treated as the start of a sub-list. What a kludge, huh? This is
1120  	 	  * an aspect of Markdown's syntax that's hard to parse perfectly
1121  	 	  * without resorting to mind-reading. Perhaps the solution is to
1122  	 	  * change the syntax rules such that sub-lists must start with a
1123  	 	  * starting cardinal number; e.g. "1." or "a.".
1124  	 	  */
1125  	 	 $this->list_level++;
1126  
1127  	 	 // Trim trailing blank lines:
1128  	 	 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1129  
1130  	 	 $list_str = preg_replace_callback('{
1131  	 	 	 (\n)?	 	 	 	 	 	 	 # leading line = $1
1132  	 	 	 (^[ ]*)	 	 	 	 	 	 	 # leading whitespace = $2
1133  	 	 	 ('.$marker_any_re.'	 	 	 	 # list marker and space = $3
1134  	 	 	 	 (?:[ ]+|(?=\n))	 # space only required if item is not empty
1135  	 	 	 )
1136  	 	 	 ((?s:.*?))	 	 	 	 	 	 # list item text   = $4
1137  	 	 	 (?:(\n+(?=\n))|\n)	 	 	 	 # tailing blank line = $5
1138  	 	 	 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1139  	 	 	 }xm',
1140  	 	 	 array($this, '_processListItems_callback'), $list_str);
1141  
1142  	 	 $this->list_level--;
1143  	 	 return $list_str;
1144  	 }
1145  
1146  	 /**
1147  	  * List item parsing callback
1148  	  * @param  array $matches
1149  	  * @return string
1150  	  */
1151  	protected function _processListItems_callback($matches) {
1152  	 	 $item = $matches[4];
1153  	 	 $leading_line =& $matches[1];
1154  	 	 $leading_space =& $matches[2];
1155  	 	 $marker_space = $matches[3];
1156  	 	 $tailing_blank_line =& $matches[5];
1157  
1158  	 	 if ($leading_line || $tailing_blank_line ||
1159  	 	 	 preg_match('/\n{2,}/', $item))
1160  	 	 {
1161  	 	 	 // Replace marker with the appropriate whitespace indentation
1162  	 	 	 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1163  	 	 	 $item = $this->runBlockGamut($this->outdent($item)."\n");
1164  	 	 } else {
1165  	 	 	 // Recursion for sub-lists:
1166  	 	 	 $item = $this->doLists($this->outdent($item));
1167  	 	 	 $item = $this->formParagraphs($item, false);
1168  	 	 }
1169  
1170  	 	 return "<li>" . $item . "</li>\n";
1171  	 }
1172  
1173  	 /**
1174  	  * Process Markdown `<pre><code>` blocks.
1175  	  * @param  string $text
1176  	  * @return string
1177  	  */
1178  	protected function doCodeBlocks($text) {
1179  	 	 $text = preg_replace_callback('{
1180  	 	 	 	 (?:\n\n|\A\n?)
1181  	 	 	 	 (	             # $1 = the code block -- one or more lines, starting with a space/tab
1182  	 	 	 	   (?>
1183  	 	 	 	 	 [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1184  	 	 	 	 	 .*\n+
1185  	 	 	 	   )+
1186  	 	 	 	 )
1187  	 	 	 	 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	 # Lookahead for non-space at line-start, or end of doc
1188  	 	 	 }xm',
1189  	 	 	 array($this, '_doCodeBlocks_callback'), $text);
1190  
1191  	 	 return $text;
1192  	 }
1193  
1194  	 /**
1195  	  * Code block parsing callback
1196  	  * @param  array $matches
1197  	  * @return string
1198  	  */
1199  	protected function _doCodeBlocks_callback($matches) {
1200  	 	 $codeblock = $matches[1];
1201  
1202  	 	 $codeblock = $this->outdent($codeblock);
1203  	 	 if (is_callable($this->code_block_content_func)) {
1204  	 	 	 $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1205  	 	 } else {
1206  	 	 	 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1207  	 	 }
1208  
1209  	 	 # trim leading newlines and trailing newlines
1210  	 	 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1211  
1212  	 	 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1213  	 	 return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1214  	 }
1215  
1216  	 /**
1217  	  * Create a code span markup for $code. Called from handleSpanToken.
1218  	  * @param  string $code
1219  	  * @return string
1220  	  */
1221  	protected function makeCodeSpan($code) {
1222  	 	 if (is_callable($this->code_span_content_func)) {
1223  	 	 	 $code = call_user_func($this->code_span_content_func, $code);
1224  	 	 } else {
1225  	 	 	 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1226  	 	 }
1227  	 	 return $this->hashPart("<code>$code</code>");
1228  	 }
1229  
1230  	 /**
1231  	  * Define the emphasis operators with their regex matches
1232  	  * @var array
1233  	  */
1234  	 protected array $em_relist = array(
1235  	 	 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1236  	 	 '*' => '(?<![\s*])\*(?!\*)',
1237  	 	 '_' => '(?<![\s_])_(?!_)',
1238  	 );
1239  
1240  	 /**
1241  	  * Define the strong operators with their regex matches
1242  	  * @var array
1243  	  */
1244  	 protected array $strong_relist = array(
1245  	 	 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1246  	 	 '**' => '(?<![\s*])\*\*(?!\*)',
1247  	 	 '__' => '(?<![\s_])__(?!_)',
1248  	 );
1249  
1250  	 /**
1251  	  * Define the emphasis + strong operators with their regex matches
1252  	  * @var array
1253  	  */
1254  	 protected array $em_strong_relist = array(
1255  	 	 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1256  	 	 '***' => '(?<![\s*])\*\*\*(?!\*)',
1257  	 	 '___' => '(?<![\s_])___(?!_)',
1258  	 );
1259  
1260  	 /**
1261  	  * Container for prepared regular expressions
1262  	  */
1263  	 protected ?array $em_strong_prepared_relist = null;
1264  
1265  	 /**
1266  	  * Prepare regular expressions for searching emphasis tokens in any
1267  	  * context.
1268  	  * @return void
1269  	  */
1270  	protected function prepareItalicsAndBold() {
1271  	 	 foreach ($this->em_relist as $em => $em_re) {
1272  	 	 	 foreach ($this->strong_relist as $strong => $strong_re) {
1273  	 	 	 	 // Construct list of allowed token expressions.
1274  	 	 	 	 $token_relist = array();
1275  	 	 	 	 if (isset($this->em_strong_relist["$em$strong"])) {
1276  	 	 	 	 	 $token_relist[] = $this->em_strong_relist["$em$strong"];
1277  	 	 	 	 }
1278  	 	 	 	 $token_relist[] = $em_re;
1279  	 	 	 	 $token_relist[] = $strong_re;
1280  
1281  	 	 	 	 // Construct master expression from list.
1282  	 	 	 	 $token_re = '{(' . implode('|', $token_relist) . ')}';
1283  	 	 	 	 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1284  	 	 	 }
1285  	 	 }
1286  	 }
1287  
1288  	 /**
1289  	  * Convert Markdown italics (emphasis) and bold (strong) to HTML
1290  	  * @param  string $text
1291  	  * @return string
1292  	  */
1293  	protected function doItalicsAndBold($text) {
1294  	 	 if ($this->in_emphasis_processing) {
1295  	 	 	 return $text; // avoid reentrency
1296  	 	 }
1297  	 	 $this->in_emphasis_processing = true;
1298  
1299  	 	 $token_stack = array('');
1300  	 	 $text_stack = array('');
1301  	 	 $em = '';
1302  	 	 $strong = '';
1303  	 	 $tree_char_em = false;
1304  
1305  	 	 while (1) {
1306  	 	 	 // Get prepared regular expression for seraching emphasis tokens
1307  	 	 	 // in current context.
1308  	 	 	 $token_re = $this->em_strong_prepared_relist["$em$strong"];
1309  
1310  	 	 	 // Each loop iteration search for the next emphasis token.
1311  	 	 	 // Each token is then passed to handleSpanToken.
1312  	 	 	 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1313  	 	 	 $text_stack[0] .= $parts[0];
1314  	 	 	 $token =& $parts[1];
1315  	 	 	 $text =& $parts[2];
1316  
1317  	 	 	 if (empty($token)) {
1318  	 	 	 	 // Reached end of text span: empty stack without emitting.
1319  	 	 	 	 // any more emphasis.
1320  	 	 	 	 while ($token_stack[0]) {
1321  	 	 	 	 	 $text_stack[1] .= array_shift($token_stack);
1322  	 	 	 	 	 $text_stack[0] .= array_shift($text_stack);
1323  	 	 	 	 }
1324  	 	 	 	 break;
1325  	 	 	 }
1326  
1327  	 	 	 $token_len = strlen($token);
1328  	 	 	 if ($tree_char_em) {
1329  	 	 	 	 // Reached closing marker while inside a three-char emphasis.
1330  	 	 	 	 if ($token_len == 3) {
1331  	 	 	 	 	 // Three-char closing marker, close em and strong.
1332  	 	 	 	 	 array_shift($token_stack);
1333  	 	 	 	 	 $span = array_shift($text_stack);
1334  	 	 	 	 	 $span = $this->runSpanGamut($span);
1335  	 	 	 	 	 $span = "<strong><em>$span</em></strong>";
1336  	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1337  	 	 	 	 	 $em = '';
1338  	 	 	 	 	 $strong = '';
1339  	 	 	 	 } else {
1340  	 	 	 	 	 // Other closing marker: close one em or strong and
1341  	 	 	 	 	 // change current token state to match the other
1342  	 	 	 	 	 $token_stack[0] = str_repeat($token[0], 3-$token_len);
1343  	 	 	 	 	 $tag = $token_len == 2 ? "strong" : "em";
1344  	 	 	 	 	 $span = $text_stack[0];
1345  	 	 	 	 	 $span = $this->runSpanGamut($span);
1346  	 	 	 	 	 $span = "<$tag>$span</$tag>";
1347  	 	 	 	 	 $text_stack[0] = $this->hashPart($span);
1348  	 	 	 	 	 $$tag = ''; // $$tag stands for $em or $strong
1349  	 	 	 	 }
1350  	 	 	 	 $tree_char_em = false;
1351  	 	 	 } else if ($token_len == 3) {
1352  	 	 	 	 if ($em) {
1353  	 	 	 	 	 // Reached closing marker for both em and strong.
1354  	 	 	 	 	 // Closing strong marker:
1355  	 	 	 	 	 for ($i = 0; $i < 2; ++$i) {
1356  	 	 	 	 	 	 $shifted_token = array_shift($token_stack);
1357  	 	 	 	 	 	 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1358  	 	 	 	 	 	 $span = array_shift($text_stack);
1359  	 	 	 	 	 	 $span = $this->runSpanGamut($span);
1360  	 	 	 	 	 	 $span = "<$tag>$span</$tag>";
1361  	 	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1362  	 	 	 	 	 	 $$tag = ''; // $$tag stands for $em or $strong
1363  	 	 	 	 	 }
1364  	 	 	 	 } else {
1365  	 	 	 	 	 // Reached opening three-char emphasis marker. Push on token
1366  	 	 	 	 	 // stack; will be handled by the special condition above.
1367  	 	 	 	 	 $em = $token[0];
1368  	 	 	 	 	 $strong = "$em$em";
1369  	 	 	 	 	 array_unshift($token_stack, $token);
1370  	 	 	 	 	 array_unshift($text_stack, '');
1371  	 	 	 	 	 $tree_char_em = true;
1372  	 	 	 	 }
1373  	 	 	 } else if ($token_len == 2) {
1374  	 	 	 	 if ($strong) {
1375  	 	 	 	 	 // Unwind any dangling emphasis marker:
1376  	 	 	 	 	 if (strlen($token_stack[0]) == 1) {
1377  	 	 	 	 	 	 $text_stack[1] .= array_shift($token_stack);
1378  	 	 	 	 	 	 $text_stack[0] .= array_shift($text_stack);
1379  	 	 	 	 	 	 $em = '';
1380  	 	 	 	 	 }
1381  	 	 	 	 	 // Closing strong marker:
1382  	 	 	 	 	 array_shift($token_stack);
1383  	 	 	 	 	 $span = array_shift($text_stack);
1384  	 	 	 	 	 $span = $this->runSpanGamut($span);
1385  	 	 	 	 	 $span = "<strong>$span</strong>";
1386  	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1387  	 	 	 	 	 $strong = '';
1388  	 	 	 	 } else {
1389  	 	 	 	 	 array_unshift($token_stack, $token);
1390  	 	 	 	 	 array_unshift($text_stack, '');
1391  	 	 	 	 	 $strong = $token;
1392  	 	 	 	 }
1393  	 	 	 } else {
1394  	 	 	 	 // Here $token_len == 1
1395  	 	 	 	 if ($em) {
1396  	 	 	 	 	 if (strlen($token_stack[0]) == 1) {
1397  	 	 	 	 	 	 // Closing emphasis marker:
1398  	 	 	 	 	 	 array_shift($token_stack);
1399  	 	 	 	 	 	 $span = array_shift($text_stack);
1400  	 	 	 	 	 	 $span = $this->runSpanGamut($span);
1401  	 	 	 	 	 	 $span = "<em>$span</em>";
1402  	 	 	 	 	 	 $text_stack[0] .= $this->hashPart($span);
1403  	 	 	 	 	 	 $em = '';
1404  	 	 	 	 	 } else {
1405  	 	 	 	 	 	 $text_stack[0] .= $token;
1406  	 	 	 	 	 }
1407  	 	 	 	 } else {
1408  	 	 	 	 	 array_unshift($token_stack, $token);
1409  	 	 	 	 	 array_unshift($text_stack, '');
1410  	 	 	 	 	 $em = $token;
1411  	 	 	 	 }
1412  	 	 	 }
1413  	 	 }
1414  	 	 $this->in_emphasis_processing = false;
1415  	 	 return $text_stack[0];
1416  	 }
1417  
1418  	 /**
1419  	  * Parse Markdown blockquotes to HTML
1420  	  * @param  string $text
1421  	  * @return string
1422  	  */
1423  	protected function doBlockQuotes($text) {
1424  	 	 $text = preg_replace_callback('/
1425  	 	 	   (	 	 	 	 	 	 	 	 # Wrap whole match in $1
1426  	 	 	 	 (?>
1427  	 	 	 	   ^[ ]*>[ ]?	 	 	 # ">" at the start of a line
1428  	 	 	 	 	 .+\n	 	 	 	 	 # rest of the first line
1429  	 	 	 	   (.+\n)*	 	 	 	 	 # subsequent consecutive lines
1430  	 	 	 	   \n*	 	 	 	 	 	 # blanks
1431  	 	 	 	 )+
1432  	 	 	   )
1433  	 	 	 /xm',
1434  	 	 	 array($this, '_doBlockQuotes_callback'), $text);
1435  
1436  	 	 return $text;
1437  	 }
1438  
1439  	 /**
1440  	  * Blockquote parsing callback
1441  	  * @param  array $matches
1442  	  * @return string
1443  	  */
1444  	protected function _doBlockQuotes_callback($matches) {
1445  	 	 $bq = $matches[1];
1446  	 	 // trim one level of quoting - trim whitespace-only lines
1447  	 	 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1448  	 	 $bq = $this->runBlockGamut($bq); // recurse
1449  
1450  	 	 $bq = preg_replace('/^/m', "  ", $bq);
1451  	 	 // These leading spaces cause problem with <pre> content,
1452  	 	 // so we need to fix that:
1453  	 	 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1454  	 	 	 array($this, '_doBlockQuotes_callback2'), $bq);
1455  
1456  	 	 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1457  	 }
1458  
1459  	 /**
1460  	  * Blockquote parsing callback
1461  	  * @param  array $matches
1462  	  * @return string
1463  	  */
1464  	protected function _doBlockQuotes_callback2($matches) {
1465  	 	 $pre = $matches[1];
1466  	 	 $pre = preg_replace('/^  /m', '', $pre);
1467  	 	 return $pre;
1468  	 }
1469  
1470  	 /**
1471  	  * Parse paragraphs
1472  	  *
1473  	  * @param  string $text String to process in paragraphs
1474  	  * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1475  	  * @return string
1476  	  */
1477  	protected function formParagraphs($text, $wrap_in_p = true) {
1478  	 	 // Strip leading and trailing lines:
1479  	 	 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1480  
1481  	 	 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1482  
1483  	 	 // Wrap <p> tags and unhashify HTML blocks
1484  	 	 foreach ($grafs as $key => $value) {
1485  	 	 	 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1486  	 	 	 	 // Is a paragraph.
1487  	 	 	 	 $value = $this->runSpanGamut($value);
1488  	 	 	 	 if ($wrap_in_p) {
1489  	 	 	 	 	 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1490  	 	 	 	 	 $value .= "</p>";
1491  	 	 	 	 }
1492  	 	 	 	 $grafs[$key] = $this->unhash($value);
1493  	 	 	 } else {
1494  	 	 	 	 // Is a block.
1495  	 	 	 	 // Modify elements of @grafs in-place...
1496  	 	 	 	 $graf = $value;
1497  	 	 	 	 $block = $this->html_hashes[$graf];
1498  	 	 	 	 $graf = $block;
1499  //	 	 	 	 if (preg_match('{
1500  //	 	 	 	 	 \A
1501  //	 	 	 	 	 (	 	 	 	 	 	 	 # $1 = <div> tag
1502  //	 	 	 	 	   <div  \s+
1503  //	 	 	 	 	   [^>]*
1504  //	 	 	 	 	   \b
1505  //	 	 	 	 	   markdown\s*=\s*  ([\'"])	 #	 $2 = attr quote char
1506  //	 	 	 	 	   1
1507  //	 	 	 	 	   \2
1508  //	 	 	 	 	   [^>]*
1509  //	 	 	 	 	   >
1510  //	 	 	 	 	 )
1511  //	 	 	 	 	 (	 	 	 	 	 	 	 # $3 = contents
1512  //	 	 	 	 	 .*
1513  //	 	 	 	 	 )
1514  //	 	 	 	 	 (</div>)	 	 	 	 	 # $4 = closing tag
1515  //	 	 	 	 	 \z
1516  //	 	 	 	 	 }xs', $block, $matches))
1517  //	 	 	 	 {
1518  //	 	 	 	 	 list(, $div_open, , $div_content, $div_close) = $matches;
1519  //
1520  //	 	 	 	 	 // We can't call Markdown(), because that resets the hash;
1521  //	 	 	 	 	 // that initialization code should be pulled into its own sub, though.
1522  //	 	 	 	 	 $div_content = $this->hashHTMLBlocks($div_content);
1523  //
1524  //	 	 	 	 	 // Run document gamut methods on the content.
1525  //	 	 	 	 	 foreach ($this->document_gamut as $method => $priority) {
1526  //	 	 	 	 	 	 $div_content = $this->$method($div_content);
1527  //	 	 	 	 	 }
1528  //
1529  //	 	 	 	 	 $div_open = preg_replace(
1530  //	 	 	 	 	 	 '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1531  //
1532  //	 	 	 	 	 $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1533  //	 	 	 	 }
1534  	 	 	 	 $grafs[$key] = $graf;
1535  	 	 	 }
1536  	 	 }
1537  
1538  	 	 return implode("\n\n", $grafs);
1539  	 }
1540  
1541  	 /**
1542  	  * Encode text for a double-quoted HTML attribute. This function
1543  	  * is *not* suitable for attributes enclosed in single quotes.
1544  	  * @param  string $text
1545  	  * @return string
1546  	  */
1547  	protected function encodeAttribute($text) {
1548  	 	 $text = $this->encodeAmpsAndAngles($text);
1549  	 	 $text = str_replace('"', '&quot;', $text);
1550  	 	 return $text;
1551  	 }
1552  
1553  	 /**
1554  	  * Encode text for a double-quoted HTML attribute containing a URL,
1555  	  * applying the URL filter if set. Also generates the textual
1556  	  * representation for the URL (removing mailto: or tel:) storing it in $text.
1557  	  * This function is *not* suitable for attributes enclosed in single quotes.
1558  	  *
1559  	  * @param  string $url
1560  	  * @param  string $text Passed by reference
1561  	  * @return string        URL
1562  	  */
1563  	protected function encodeURLAttribute($url, &$text = null) {
1564  	 	 if (is_callable($this->url_filter_func)) {
1565  	 	 	 $url = call_user_func($this->url_filter_func, $url);
1566  	 	 }
1567  
1568  	 	 if (preg_match('{^mailto:}i', $url)) {
1569  	 	 	 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1570  	 	 } else if (preg_match('{^tel:}i', $url)) {
1571  	 	 	 $url = $this->encodeAttribute($url);
1572  	 	 	 $text = substr($url, 4);
1573  	 	 } else {
1574  	 	 	 $url = $this->encodeAttribute($url);
1575  	 	 	 $text = $url;
1576  	 	 }
1577  
1578  	 	 return $url;
1579  	 }
1580  
1581  	 /**
1582  	  * Smart processing for ampersands and angle brackets that need to
1583  	  * be encoded. Valid character entities are left alone unless the
1584  	  * no-entities mode is set.
1585  	  * @param  string $text
1586  	  * @return string
1587  	  */
1588  	protected function encodeAmpsAndAngles($text) {
1589  	 	 if ($this->no_entities) {
1590  	 	 	 $text = str_replace('&', '&amp;', $text);
1591  	 	 } else {
1592  	 	 	 // Ampersand-encoding based entirely on Nat Irons's Amputator
1593  	 	 	 // MT plugin: <http://bumppo.net/projects/amputator/>
1594  	 	 	 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1595  	 	 	 	 	 	 	 	 '&amp;', $text);
1596  	 	 }
1597  	 	 // Encode remaining <'s
1598  	 	 $text = str_replace('<', '&lt;', $text);
1599  
1600  	 	 return $text;
1601  	 }
1602  
1603  	 /**
1604  	  * Parse Markdown automatic links to anchor HTML tags
1605  	  * @param  string $text
1606  	  * @return string
1607  	  */
1608  	protected function doAutoLinks($text) {
1609  	 	 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1610  	 	 	 array($this, '_doAutoLinks_url_callback'), $text);
1611  
1612  	 	 // Email addresses: <address@domain.foo>
1613  	 	 $text = preg_replace_callback('{
1614  	 	 	 <
1615  	 	 	 (?:mailto:)?
1616  	 	 	 (
1617  	 	 	 	 (?:
1618  	 	 	 	 	 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1619  	 	 	 	 |
1620  	 	 	 	 	 ".*?"
1621  	 	 	 	 )
1622  	 	 	 	 \@
1623  	 	 	 	 (?:
1624  	 	 	 	 	 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1625  	 	 	 	 |
1626  	 	 	 	 	 \[[\d.a-fA-F:]+\]	 # IPv4 & IPv6
1627  	 	 	 	 )
1628  	 	 	 )
1629  	 	 	 >
1630  	 	 	 }xi',
1631  	 	 	 array($this, '_doAutoLinks_email_callback'), $text);
1632  
1633  	 	 return $text;
1634  	 }
1635  
1636  	 /**
1637  	  * Parse URL callback
1638  	  * @param  array $matches
1639  	  * @return string
1640  	  */
1641  	protected function _doAutoLinks_url_callback($matches) {
1642  	 	 $url = $this->encodeURLAttribute($matches[1], $text);
1643  	 	 $link = "<a href=\"$url\">$text</a>";
1644  	 	 return $this->hashPart($link);
1645  	 }
1646  
1647  	 /**
1648  	  * Parse email address callback
1649  	  * @param  array $matches
1650  	  * @return string
1651  	  */
1652  	protected function _doAutoLinks_email_callback($matches) {
1653  	 	 $addr = $matches[1];
1654  	 	 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1655  	 	 $link = "<a href=\"$url\">$text</a>";
1656  	 	 return $this->hashPart($link);
1657  	 }
1658  
1659  	 /**
1660  	  * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1661  	  *
1662  	  * Output: the same text but with most characters encoded as either a
1663  	  *         decimal or hex entity, in the hopes of foiling most address
1664  	  *         harvesting spam bots. E.g.:
1665  	  *
1666  	  *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1667  	  *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1668  	  *        &#x6d;
1669  	  *
1670  	  * Note: the additional output $tail is assigned the same value as the
1671  	  * ouput, minus the number of characters specified by $head_length.
1672  	  *
1673  	  * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1674  	  * With some optimizations by Milian Wolff. Forced encoding of HTML
1675  	  * attribute special characters by Allan Odgaard.
1676  	  *
1677  	  * @param  string  $text
1678  	  * @param  string  $tail Passed by reference
1679  	  * @param  integer $head_length
1680  	  * @return string
1681  	  */
1682  	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1683  	 	 if ($text == "") {
1684  	 	 	 return $tail = "";
1685  	 	 }
1686  
1687  	 	 $chars = preg_split('/(?<!^)(?!$)/', $text);
1688  	 	 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1689  
1690  	 	 foreach ($chars as $key => $char) {
1691  	 	 	 $ord = ord($char);
1692  	 	 	 // Ignore non-ascii chars.
1693  	 	 	 if ($ord < 128) {
1694  	 	 	 	 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1695  	 	 	 	 // roughly 10% raw, 45% hex, 45% dec
1696  	 	 	 	 // '@' *must* be encoded. I insist.
1697  	 	 	 	 // '"' and '>' have to be encoded inside the attribute
1698  	 	 	 	 if ($r > 90 && strpos('@"&>', $char) === false) {
1699  	 	 	 	 	 /* do nothing */
1700  	 	 	 	 } else if ($r < 45) {
1701  	 	 	 	 	 $chars[$key] = '&#x'.dechex($ord).';';
1702  	 	 	 	 } else {
1703  	 	 	 	 	 $chars[$key] = '&#'.$ord.';';
1704  	 	 	 	 }
1705  	 	 	 }
1706  	 	 }
1707  
1708  	 	 $text = implode('', $chars);
1709  	 	 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1710  
1711  	 	 return $text;
1712  	 }
1713  
1714  	 /**
1715  	  * Take the string $str and parse it into tokens, hashing embeded HTML,
1716  	  * escaped characters and handling code spans.
1717  	  * @param  string $str
1718  	  * @return string
1719  	  */
1720  	protected function parseSpan($str) {
1721  	 	 $output = '';
1722  
1723  	 	 $span_re = '{
1724  	 	 	 	 (
1725  	 	 	 	 	 \\\\'.$this->escape_chars_re.'
1726  	 	 	 	 |
1727  	 	 	 	 	 (?<![`\\\\])
1728  	 	 	 	 	 `+	 	 	 	 	 	 # code span marker
1729  	 	 	 '.( $this->no_markup ? '' : '
1730  	 	 	 	 |
1731  	 	 	 	 	 <!--    .*?     -->	 	 # comment
1732  	 	 	 	 |
1733  	 	 	 	 	 <\?.*?\?> | <%.*?%>	 	 # processing instruction
1734  	 	 	 	 |
1735  	 	 	 	 	 <[!$]?[-a-zA-Z0-9:_]+	 # regular tags
1736  	 	 	 	 	 (?>
1737  	 	 	 	 	 	 \s
1738  	 	 	 	 	 	 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1739  	 	 	 	 	 )?
1740  	 	 	 	 	 >
1741  	 	 	 	 |
1742  	 	 	 	 	 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1743  	 	 	 	 |
1744  	 	 	 	 	 </[-a-zA-Z0-9:_]+\s*> # closing tag
1745  	 	 	 ').'
1746  	 	 	 	 )
1747  	 	 	 	 }xs';
1748  
1749  	 	 while (1) {
1750  	 	 	 // Each loop iteration seach for either the next tag, the next
1751  	 	 	 // openning code span marker, or the next escaped character.
1752  	 	 	 // Each token is then passed to handleSpanToken.
1753  	 	 	 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1754  
1755  	 	 	 // Create token from text preceding tag.
1756  	 	 	 if ($parts[0] != "") {
1757  	 	 	 	 $output .= $parts[0];
1758  	 	 	 }
1759  
1760  	 	 	 // Check if we reach the end.
1761  	 	 	 if (isset($parts[1])) {
1762  	 	 	 	 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1763  	 	 	 	 $str = $parts[2];
1764  	 	 	 } else {
1765  	 	 	 	 break;
1766  	 	 	 }
1767  	 	 }
1768  
1769  	 	 return $output;
1770  	 }
1771  
1772  	 /**
1773  	  * Handle $token provided by parseSpan by determining its nature and
1774  	  * returning the corresponding value that should replace it.
1775  	  * @param  string $token
1776  	  * @param  string $str Passed by reference
1777  	  * @return string
1778  	  */
1779  	protected function handleSpanToken($token, &$str) {
1780  	 	 switch ($token[0]) {
1781  	 	 	 case "\\":
1782  	 	 	 	 return $this->hashPart("&#". ord($token[1]). ";");
1783  	 	 	 case "`":
1784  	 	 	 	 // Search for end marker in remaining text.
1785  	 	 	 	 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1786  	 	 	 	 	 $str, $matches))
1787  	 	 	 	 {
1788  	 	 	 	 	 $str = $matches[2];
1789  	 	 	 	 	 $codespan = $this->makeCodeSpan($matches[1]);
1790  	 	 	 	 	 return $this->hashPart($codespan);
1791  	 	 	 	 }
1792  	 	 	 	 return $token; // Return as text since no ending marker found.
1793  	 	 	 default:
1794  	 	 	 	 return $this->hashPart($token);
1795  	 	 }
1796  	 }
1797  
1798  	 /**
1799  	  * Remove one level of line-leading tabs or spaces
1800  	  * @param  string $text
1801  	  * @return string
1802  	  */
1803  	protected function outdent($text) {
1804  	 	 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1805  	 }
1806  
1807  
1808  	 /**
1809  	  * String length function for detab. `_initDetab` will create a function to
1810  	  * handle UTF-8 if the default function does not exist.
1811  	  * can be a string or function
1812  	  */
1813  	 protected $utf8_strlen = 'mb_strlen';
1814  
1815  	 /**
1816  	  * Replace tabs with the appropriate amount of spaces.
1817  	  *
1818  	  * For each line we separate the line in blocks delemited by tab characters.
1819  	  * Then we reconstruct every line by adding the  appropriate number of space
1820  	  * between each blocks.
1821  	  *
1822  	  * @param  string $text
1823  	  * @return string
1824  	  */
1825  	protected function detab($text) {
1826  	 	 $text = preg_replace_callback('/^.*\t.*$/m',
1827  	 	 	 array($this, '_detab_callback'), $text);
1828  
1829  	 	 return $text;
1830  	 }
1831  
1832  	 /**
1833  	  * Replace tabs callback
1834  	  * @param  string $matches
1835  	  * @return string
1836  	  */
1837  	protected function _detab_callback($matches) {
1838  	 	 $line = $matches[0];
1839  	 	 $strlen = $this->utf8_strlen; // strlen function for UTF-8.
1840  
1841  	 	 // Split in blocks.
1842  	 	 $blocks = explode("\t", $line);
1843  	 	 // Add each blocks to the line.
1844  	 	 $line = $blocks[0];
1845  	 	 unset($blocks[0]); // Do not add first block twice.
1846  	 	 foreach ($blocks as $block) {
1847  	 	 	 // Calculate amount of space, insert spaces, insert block.
1848  	 	 	 $amount = $this->tab_width -
1849  	 	 	 	 $strlen($line, 'UTF-8') % $this->tab_width;
1850  	 	 	 $line .= str_repeat(" ", $amount) . $block;
1851  	 	 }
1852  	 	 return $line;
1853  	 }
1854  
1855  	 /**
1856  	  * Check for the availability of the function in the `utf8_strlen` property
1857  	  * (initially `mb_strlen`). If the function is not available, create a
1858  	  * function that will loosely count the number of UTF-8 characters with a
1859  	  * regular expression.
1860  	  * @return void
1861  	  */
1862  	protected function _initDetab() {
1863  
1864  	 	 if (function_exists($this->utf8_strlen)) {
1865  	 	 	 return;
1866  	 	 }
1867  
1868  	 	 $this->utf8_strlen = fn($text) => preg_match_all('/[\x00-\xBF]|[\xC0-\xFF][\x80-\xBF]*/', $text, $m);
1869  	 }
1870  
1871  	 /**
1872  	  * Swap back in all the tags hashed by _HashHTMLBlocks.
1873  	  * @param  string $text
1874  	  * @return string
1875  	  */
1876  	protected function unhash($text) {
1877  	 	 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1878  	 	 	 array($this, '_unhash_callback'), $text);
1879  	 }
1880  
1881  	 /**
1882  	  * Unhashing callback
1883  	  * @param  array $matches
1884  	  * @return string
1885  	  */
1886  	protected function _unhash_callback($matches) {
1887  	 	 return $this->html_hashes[$matches[0]];
1888  	 }
1889  }