1 <?php 2 3 /** 4 * PHP lexer code snarfed from the CVS tree for the lamplib project at 5 * http://sourceforge.net/projects/lamplib 6 * This project is administered by Markus Baker, Harry Fuecks and Matt 7 * Mitchell, and the project code is in the public domain. 8 * 9 * Thanks, guys! 10 * 11 * @package moodlecore 12 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell 13 * @license Public Domain {@link http://sourceforge.net/projects/lamplib} 14 */ 15 16 /** LEXER_ENTER = 1 */ 17 define("LEXER_ENTER", 1); 18 /** LEXER_MATCHED = 2 */ 19 define("LEXER_MATCHED", 2); 20 /** LEXER_UNMATCHED = 3 */ 21 define("LEXER_UNMATCHED", 3); 22 /** LEXER_EXIT = 4 */ 23 define("LEXER_EXIT", 4); 24 /** LEXER_SPECIAL = 5 */ 25 define("LEXER_SPECIAL", 5); 26 27 /** 28 * Compounded regular expression. Any of 29 * the contained patterns could match and 30 * when one does it's label is returned. 31 * @package moodlecore 32 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell 33 * @license Public Domain {@link http://sourceforge.net/projects/lamplib} 34 */ 35 class ParallelRegex { 36 var $_patterns; 37 var $_labels; 38 var $_regex; 39 var $_case; 40 41 /** 42 * Constructor. Starts with no patterns. 43 * @param bool $case True for case sensitive, false 44 * for insensitive. 45 * @access public 46 */ 47 public function __construct($case) { 48 $this->_case = $case; 49 $this->_patterns = array(); 50 $this->_labels = array(); 51 $this->_regex = null; 52 } 53 54 /** 55 * Old syntax of class constructor. Deprecated in PHP7. 56 * 57 * @deprecated since Moodle 3.1 58 */ 59 public function ParallelRegex($case) { 60 debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER); 61 self::__construct($case); 62 } 63 64 /** 65 * Adds a pattern with an optional label. 66 * @param string $pattern Perl style regex, but ( and ) 67 * lose the usual meaning. 68 * @param string $label Label of regex to be returned 69 * on a match. 70 * @access public 71 */ 72 function addPattern($pattern, $label = true) { 73 $count = count($this->_patterns); 74 $this->_patterns[$count] = $pattern; 75 $this->_labels[$count] = $label; 76 $this->_regex = null; 77 } 78 79 /** 80 * Attempts to match all patterns at once against 81 * a string. 82 * @param string $subject String to match against. 83 * @param string $match First matched portion of 84 * subject. 85 * @return bool True on success. 86 * @access public 87 */ 88 function match($subject, &$match) { 89 if (count($this->_patterns) == 0) { 90 return false; 91 } 92 if (!preg_match($this->_getCompoundedRegex(), $subject, $matches)) { 93 $match = ""; 94 return false; 95 } 96 $match = $matches[0]; 97 for ($i = 1; $i < count($matches); $i++) { 98 if ($matches[$i]) { 99 return $this->_labels[$i - 1]; 100 } 101 } 102 return true; 103 } 104 105 /** 106 * Compounds the patterns into a single 107 * regular expression separated with the 108 * "or" operator. Caches the regex. 109 * Will automatically escape (, ) and / tokens. 110 * @access private 111 */ 112 function _getCompoundedRegex() { 113 if ($this->_regex == null) { 114 for ($i = 0; $i < count($this->_patterns); $i++) { 115 $this->_patterns[$i] = '(' . str_replace( 116 array('/', '(', ')'), 117 array('\/', '\(', '\)'), 118 $this->_patterns[$i]) . ')'; 119 } 120 $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags(); 121 } 122 return $this->_regex; 123 } 124 125 /** 126 * Accessor for perl regex mode flags to use. 127 * @return string Flags as string. 128 * @access private 129 */ 130 function _getPerlMatchingFlags() { 131 return ($this->_case ? "msS" : "msSi"); 132 } 133 } 134 135 /** 136 * States for a stack machine. 137 * 138 * @package moodlecore 139 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell 140 * @license Public Domain {@link http://sourceforge.net/projects/lamplib} 141 */ 142 class StateStack { 143 var $_stack; 144 145 /** 146 * Constructor. Starts in named state. 147 * @param string $start Starting state name. 148 * @access public 149 */ 150 public function __construct($start) { 151 $this->_stack = array($start); 152 } 153 154 /** 155 * Old syntax of class constructor. Deprecated in PHP7. 156 * 157 * @deprecated since Moodle 3.1 158 */ 159 public function StateStack($start) { 160 debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER); 161 self::__construct($start); 162 } 163 164 /** 165 * Accessor for current state. 166 * @return string State as string. 167 * @access public 168 */ 169 function getCurrent() { 170 return $this->_stack[count($this->_stack) - 1]; 171 } 172 173 /** 174 * Adds a state to the stack and sets it 175 * to be the current state. 176 * @param string $state New state. 177 * @access public 178 */ 179 function enter($state) { 180 array_push($this->_stack, $state); 181 } 182 183 /** 184 * Leaves the current state and reverts 185 * to the previous one. 186 * @return bool False if we drop off 187 * the bottom of the list. 188 * @access public 189 */ 190 function leave() { 191 if (count($this->_stack) == 1) { 192 return false; 193 } 194 array_pop($this->_stack); 195 return true; 196 } 197 } 198 199 /** 200 * Accepts text and breaks it into tokens. 201 * Some optimisation to make the sure the 202 * content is only scanned by the PHP regex 203 * parser once. Lexer modes must not start 204 * with leading underscores. 205 * 206 * @package moodlecore 207 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell 208 * @license Public Domain {@link http://sourceforge.net/projects/lamplib} 209 */ 210 class Lexer { 211 var $_regexes; 212 var $_parser; 213 var $_mode; 214 var $_mode_handlers; 215 var $_case; 216 217 /** 218 * Sets up the lexer in case insensitive matching 219 * by default. 220 * @param object $parser Handling strategy by 221 * reference. 222 * @param string $start Starting handler. 223 * @param bool $case True for case sensitive. 224 * @access public 225 */ 226 public function __construct(&$parser, $start = "accept", $case = false) { 227 $this->_case = $case; 228 $this->_regexes = array(); 229 $this->_parser = &$parser; 230 $this->_mode = new StateStack($start); 231 $this->_mode_handlers = array(); 232 } 233 234 /** 235 * Old syntax of class constructor. Deprecated in PHP7. 236 * 237 * @deprecated since Moodle 3.1 238 */ 239 public function Lexer(&$parser, $start = "accept", $case = false) { 240 debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER); 241 self::__construct($parser, $start, $case); 242 } 243 244 /** 245 * Adds a token search pattern for a particular 246 * parsing mode. The pattern does not change the 247 * current mode. 248 * @param string $pattern Perl style regex, but ( and ) 249 * lose the usual meaning. 250 * @param string $mode Should only apply this 251 * pattern when dealing with 252 * this type of input. 253 * @access public 254 */ 255 function addPattern($pattern, $mode = "accept") { 256 if (!isset($this->_regexes[$mode])) { 257 $this->_regexes[$mode] = new ParallelRegex($this->_case); 258 } 259 $this->_regexes[$mode]->addPattern($pattern); 260 } 261 262 /** 263 * Adds a pattern that will enter a new parsing 264 * mode. Useful for entering parenthesis, strings, 265 * tags, etc. 266 * @param string $pattern Perl style regex, but ( and ) 267 * lose the usual meaning. 268 * @param string $mode Should only apply this 269 * pattern when dealing with 270 * this type of input. 271 * @param string $new_mode Change parsing to this new 272 * nested mode. 273 * @access public 274 */ 275 function addEntryPattern($pattern, $mode, $new_mode) { 276 if (!isset($this->_regexes[$mode])) { 277 $this->_regexes[$mode] = new ParallelRegex($this->_case); 278 } 279 $this->_regexes[$mode]->addPattern($pattern, $new_mode); 280 } 281 282 /** 283 * Adds a pattern that will exit the current mode 284 * and re-enter the previous one. 285 * @param string $pattern Perl style regex, but ( and ) 286 * lose the usual meaning. 287 * @param string $mode Mode to leave. 288 * @access public 289 */ 290 function addExitPattern($pattern, $mode) { 291 if (!isset($this->_regexes[$mode])) { 292 $this->_regexes[$mode] = new ParallelRegex($this->_case); 293 } 294 $this->_regexes[$mode]->addPattern($pattern, "__exit"); 295 } 296 297 /** 298 * Adds a pattern that has a special mode. 299 * Acts as an entry and exit pattern in one go. 300 * @param string $pattern Perl style regex, but ( and ) 301 * lose the usual meaning. 302 * @param string $mode Should only apply this 303 * pattern when dealing with 304 * this type of input. 305 * @param string $special Use this mode for this one token. 306 * @access public 307 */ 308 function addSpecialPattern($pattern, $mode, $special) { 309 if (!isset($this->_regexes[$mode])) { 310 $this->_regexes[$mode] = new ParallelRegex($this->_case); 311 } 312 $this->_regexes[$mode]->addPattern($pattern, "_$special"); 313 } 314 315 /** 316 * Adds a mapping from a mode to another handler. 317 * @param string $mode Mode to be remapped. 318 * @param string $handler New target handler. 319 * @access public 320 */ 321 function mapHandler($mode, $handler) { 322 $this->_mode_handlers[$mode] = $handler; 323 } 324 325 /** 326 * Splits the page text into tokens. Will fail 327 * if the handlers report an error or if no 328 * content is consumed. If successful then each 329 * unparsed and parsed token invokes a call to the 330 * held listener. 331 * @param string $raw Raw HTML text. 332 * @return bool True on success, else false. 333 * @access public 334 */ 335 function parse($raw) { 336 if (!isset($this->_parser)) { 337 return false; 338 } 339 $length = strlen($raw); 340 while (is_array($parsed = $this->_reduce($raw))) { 341 list($unmatched, $matched, $mode) = $parsed; 342 if (!$this->_dispatchTokens($unmatched, $matched, $mode)) { 343 return false; 344 } 345 if (strlen($raw) == $length) { 346 return false; 347 } 348 $length = strlen($raw); 349 } 350 if (!$parsed) { 351 return false; 352 } 353 return $this->_invokeParser($raw, LEXER_UNMATCHED); 354 } 355 356 /** 357 * Sends the matched token and any leading unmatched 358 * text to the parser changing the lexer to a new 359 * mode if one is listed. 360 * @param string $unmatched Unmatched leading portion. 361 * @param string $matched Actual token match. 362 * @param string $mode Mode after match. The "_exit" 363 * mode causes a stack pop. An 364 * false mode causes no change. 365 * @return bool False if there was any error 366 * from the parser. 367 * @access private 368 */ 369 function _dispatchTokens($unmatched, $matched, $mode = false) { 370 if (!$this->_invokeParser($unmatched, LEXER_UNMATCHED)) { 371 return false; 372 } 373 if ($mode === "__exit") { 374 if (!$this->_invokeParser($matched, LEXER_EXIT)) { 375 return false; 376 } 377 return $this->_mode->leave(); 378 } 379 if (strncmp($mode, "_", 1) == 0) { 380 $mode = substr($mode, 1); 381 $this->_mode->enter($mode); 382 if (!$this->_invokeParser($matched, LEXER_SPECIAL)) { 383 return false; 384 } 385 return $this->_mode->leave(); 386 } 387 if (is_string($mode)) { 388 $this->_mode->enter($mode); 389 return $this->_invokeParser($matched, LEXER_ENTER); 390 } 391 return $this->_invokeParser($matched, LEXER_MATCHED); 392 } 393 394 /** 395 * Calls the parser method named after the current 396 * mode. Empty content will be ignored. 397 * @param string $content Text parsed. 398 * @param string $is_match Token is recognised rather 399 * than unparsed data. 400 * @access private 401 */ 402 function _invokeParser($content, $is_match) { 403 if (($content === "") || ($content === false)) { 404 return true; 405 } 406 $handler = $this->_mode->getCurrent(); 407 if (isset($this->_mode_handlers[$handler])) { 408 $handler = $this->_mode_handlers[$handler]; 409 } 410 return $this->_parser->$handler($content, $is_match); 411 } 412 413 /** 414 * Tries to match a chunk of text and if successful 415 * removes the recognised chunk and any leading 416 * unparsed data. Empty strings will not be matched. 417 * @param string $raw The subject to parse. This is the 418 * content that will be eaten. 419 * @return bool|array Three item list of unparsed 420 * content followed by the 421 * recognised token and finally the 422 * action the parser is to take. 423 * True if no match, false if there 424 * is a parsing error. 425 * @access private 426 */ 427 function _reduce(&$raw) { 428 if (!isset($this->_regexes[$this->_mode->getCurrent()])) { 429 return false; 430 } 431 if ($raw === "") { 432 return true; 433 } 434 if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) { 435 $count = strpos($raw, $match); 436 $unparsed = substr($raw, 0, $count); 437 $raw = substr($raw, $count + strlen($match)); 438 return array($unparsed, $match, $action); 439 } 440 return true; 441 } 442 } 443 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body