Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.
/lib/ -> lexer.php (source)
   1  <?php
   2  
   3  /**
   4   * PHP lexer code snarfed from the CVS tree for the lamplib project at
   5   * http://sourceforge.net/projects/lamplib
   6   * This project is administered by Markus Baker, Harry Fuecks and Matt
   7   * Mitchell, and the project  code is in the public domain.
   8   *
   9   * Thanks, guys!
  10   *
  11   * @package   moodlecore
  12   * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  13   * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
  14   */
  15  
  16      /** LEXER_ENTER = 1 */
  17      define("LEXER_ENTER", 1);
  18      /** LEXER_MATCHED = 2 */
  19      define("LEXER_MATCHED", 2);
  20      /** LEXER_UNMATCHED = 3 */
  21      define("LEXER_UNMATCHED", 3);
  22      /** LEXER_EXIT = 4 */
  23      define("LEXER_EXIT", 4);
  24      /** LEXER_SPECIAL = 5 */
  25      define("LEXER_SPECIAL", 5);
  26  
  27      /**
  28       * Compounded regular expression. Any of
  29       * the contained patterns could match and
  30       * when one does it's label is returned.
  31       * @package   moodlecore
  32       * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
  33       * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
  34       */
  35      class ParallelRegex {
  36          var $_patterns;
  37          var $_labels;
  38          var $_regex;
  39          var $_case;
  40  
  41          /**
  42           *    Constructor. Starts with no patterns.
  43           *    @param bool $case    True for case sensitive, false
  44           *                    for insensitive.
  45           *    @access public
  46           */
  47          public function __construct($case) {
  48              $this->_case = $case;
  49              $this->_patterns = array();
  50              $this->_labels = array();
  51              $this->_regex = null;
  52          }
  53  
  54          /**
  55           * Old syntax of class constructor. Deprecated in PHP7.
  56           *
  57           * @deprecated since Moodle 3.1
  58           */
  59          public function ParallelRegex($case) {
  60              debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
  61              self::__construct($case);
  62          }
  63  
  64          /**
  65           *    Adds a pattern with an optional label.
  66           *    @param string $pattern      Perl style regex, but ( and )
  67           *                         lose the usual meaning.
  68           *    @param string $label        Label of regex to be returned
  69           *                         on a match.
  70           *    @access public
  71           */
  72          function addPattern($pattern, $label = true) {
  73              $count = count($this->_patterns);
  74              $this->_patterns[$count] = $pattern;
  75              $this->_labels[$count] = $label;
  76              $this->_regex = null;
  77          }
  78  
  79          /**
  80           *    Attempts to match all patterns at once against
  81           *    a string.
  82           *    @param string $subject      String to match against.
  83           *    @param string $match        First matched portion of
  84           *                         subject.
  85           *    @return bool             True on success.
  86           *    @access public
  87           */
  88          function match($subject, &$match) {
  89              if (count($this->_patterns) == 0) {
  90                  return false;
  91              }
  92              if (!preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
  93                  $match = "";
  94                  return false;
  95              }
  96              $match = $matches[0];
  97              for ($i = 1; $i < count($matches); $i++) {
  98                  if ($matches[$i]) {
  99                      return $this->_labels[$i - 1];
 100                  }
 101              }
 102              return true;
 103          }
 104  
 105          /**
 106           *    Compounds the patterns into a single
 107           *    regular expression separated with the
 108           *    "or" operator. Caches the regex.
 109           *    Will automatically escape (, ) and / tokens.
 110           *    @access private
 111           */
 112          function _getCompoundedRegex() {
 113              if ($this->_regex == null) {
 114                  for ($i = 0; $i < count($this->_patterns); $i++) {
 115                      $this->_patterns[$i] = '(' . str_replace(
 116                              array('/', '(', ')'),
 117                              array('\/', '\(', '\)'),
 118                              $this->_patterns[$i]) . ')';
 119                  }
 120                  $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
 121              }
 122              return $this->_regex;
 123          }
 124  
 125          /**
 126           *    Accessor for perl regex mode flags to use.
 127           *    @return string       Flags as string.
 128           *    @access private
 129           */
 130          function _getPerlMatchingFlags() {
 131              return ($this->_case ? "msS" : "msSi");
 132          }
 133      }
 134  
 135      /**
 136       * States for a stack machine.
 137       *
 138       * @package   moodlecore
 139       * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
 140       * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
 141       */
 142      class StateStack {
 143          var $_stack;
 144  
 145          /**
 146           *    Constructor. Starts in named state.
 147           *    @param string $start        Starting state name.
 148           *    @access public
 149           */
 150          public function __construct($start) {
 151              $this->_stack = array($start);
 152          }
 153  
 154          /**
 155           * Old syntax of class constructor. Deprecated in PHP7.
 156           *
 157           * @deprecated since Moodle 3.1
 158           */
 159          public function StateStack($start) {
 160              debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
 161              self::__construct($start);
 162          }
 163  
 164          /**
 165           *    Accessor for current state.
 166           *    @return string State as string.
 167           *    @access public
 168           */
 169          function getCurrent() {
 170              return $this->_stack[count($this->_stack) - 1];
 171          }
 172  
 173          /**
 174           *    Adds a state to the stack and sets it
 175           *    to be the current state.
 176           *    @param string $state        New state.
 177           *    @access public
 178           */
 179          function enter($state) {
 180              array_push($this->_stack, $state);
 181          }
 182  
 183          /**
 184           *    Leaves the current state and reverts
 185           *    to the previous one.
 186           *    @return bool     False if we drop off
 187           *                the bottom of the list.
 188           *    @access public
 189           */
 190          function leave() {
 191              if (count($this->_stack) == 1) {
 192                  return false;
 193              }
 194              array_pop($this->_stack);
 195              return true;
 196          }
 197      }
 198  
 199      /**
 200       * Accepts text and breaks it into tokens.
 201       * Some optimisation to make the sure the
 202       * content is only scanned by the PHP regex
 203       * parser once. Lexer modes must not start
 204       * with leading underscores.
 205       *
 206       * @package   moodlecore
 207       * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
 208       * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
 209       */
 210      class Lexer {
 211          var $_regexes;
 212          var $_parser;
 213          var $_mode;
 214          var $_mode_handlers;
 215          var $_case;
 216  
 217          /**
 218           *    Sets up the lexer in case insensitive matching
 219           *    by default.
 220           *    @param object $parser     Handling strategy by
 221           *                       reference.
 222           *    @param string $start      Starting handler.
 223           *    @param bool $case       True for case sensitive.
 224           *    @access public
 225           */
 226          public function __construct(&$parser, $start = "accept", $case = false) {
 227              $this->_case = $case;
 228              $this->_regexes = array();
 229              $this->_parser = &$parser;
 230              $this->_mode = new StateStack($start);
 231              $this->_mode_handlers = array();
 232          }
 233  
 234          /**
 235           * Old syntax of class constructor. Deprecated in PHP7.
 236           *
 237           * @deprecated since Moodle 3.1
 238           */
 239          public function Lexer(&$parser, $start = "accept", $case = false) {
 240              debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
 241              self::__construct($parser, $start, $case);
 242          }
 243  
 244          /**
 245           *    Adds a token search pattern for a particular
 246           *    parsing mode. The pattern does not change the
 247           *    current mode.
 248           *    @param string $pattern      Perl style regex, but ( and )
 249           *                         lose the usual meaning.
 250           *    @param string $mode         Should only apply this
 251           *                         pattern when dealing with
 252           *                         this type of input.
 253           *    @access public
 254           */
 255          function addPattern($pattern, $mode = "accept") {
 256              if (!isset($this->_regexes[$mode])) {
 257                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 258              }
 259              $this->_regexes[$mode]->addPattern($pattern);
 260          }
 261  
 262          /**
 263           *    Adds a pattern that will enter a new parsing
 264           *    mode. Useful for entering parenthesis, strings,
 265           *    tags, etc.
 266           *    @param string $pattern      Perl style regex, but ( and )
 267           *                         lose the usual meaning.
 268           *    @param string $mode         Should only apply this
 269           *                         pattern when dealing with
 270           *                         this type of input.
 271           *    @param string $new_mode     Change parsing to this new
 272           *                         nested mode.
 273           *    @access public
 274           */
 275          function addEntryPattern($pattern, $mode, $new_mode) {
 276              if (!isset($this->_regexes[$mode])) {
 277                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 278              }
 279              $this->_regexes[$mode]->addPattern($pattern, $new_mode);
 280          }
 281  
 282          /**
 283           *    Adds a pattern that will exit the current mode
 284           *    and re-enter the previous one.
 285           *    @param string $pattern      Perl style regex, but ( and )
 286           *                         lose the usual meaning.
 287           *    @param string $mode         Mode to leave.
 288           *    @access public
 289           */
 290          function addExitPattern($pattern, $mode) {
 291              if (!isset($this->_regexes[$mode])) {
 292                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 293              }
 294              $this->_regexes[$mode]->addPattern($pattern, "__exit");
 295          }
 296  
 297          /**
 298           *    Adds a pattern that has a special mode.
 299           *    Acts as an entry and exit pattern in one go.
 300           *    @param string $pattern      Perl style regex, but ( and )
 301           *                         lose the usual meaning.
 302           *    @param string $mode         Should only apply this
 303           *                         pattern when dealing with
 304           *                         this type of input.
 305           *    @param string $special      Use this mode for this one token.
 306           *    @access public
 307           */
 308          function addSpecialPattern($pattern, $mode, $special) {
 309              if (!isset($this->_regexes[$mode])) {
 310                  $this->_regexes[$mode] = new ParallelRegex($this->_case);
 311              }
 312              $this->_regexes[$mode]->addPattern($pattern, "_$special");
 313          }
 314  
 315          /**
 316           *    Adds a mapping from a mode to another handler.
 317           *    @param string $mode        Mode to be remapped.
 318           *    @param string $handler     New target handler.
 319           *    @access public
 320           */
 321          function mapHandler($mode, $handler) {
 322              $this->_mode_handlers[$mode] = $handler;
 323          }
 324  
 325          /**
 326           *    Splits the page text into tokens. Will fail
 327           *    if the handlers report an error or if no
 328           *    content is consumed. If successful then each
 329           *    unparsed and parsed token invokes a call to the
 330           *    held listener.
 331           *    @param string $raw        Raw HTML text.
 332           *    @return bool           True on success, else false.
 333           *    @access public
 334           */
 335          function parse($raw) {
 336              if (!isset($this->_parser)) {
 337                  return false;
 338              }
 339              $length = strlen($raw);
 340              while (is_array($parsed = $this->_reduce($raw))) {
 341                  list($unmatched, $matched, $mode) = $parsed;
 342                  if (!$this->_dispatchTokens($unmatched, $matched, $mode)) {
 343                      return false;
 344                  }
 345                  if (strlen($raw) == $length) {
 346                      return false;
 347                  }
 348                  $length = strlen($raw);
 349              }
 350              if (!$parsed) {
 351                  return false;
 352              }
 353              return $this->_invokeParser($raw, LEXER_UNMATCHED);
 354          }
 355  
 356          /**
 357           *    Sends the matched token and any leading unmatched
 358           *    text to the parser changing the lexer to a new
 359           *    mode if one is listed.
 360           *    @param string $unmatched    Unmatched leading portion.
 361           *    @param string $matched      Actual token match.
 362           *    @param string $mode         Mode after match. The "_exit"
 363           *                         mode causes a stack pop. An
 364           *                         false mode causes no change.
 365           *    @return bool              False if there was any error
 366           *                         from the parser.
 367           *    @access private
 368           */
 369          function _dispatchTokens($unmatched, $matched, $mode = false) {
 370              if (!$this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
 371                  return false;
 372              }
 373              if ($mode === "__exit") {
 374                  if (!$this->_invokeParser($matched, LEXER_EXIT)) {
 375                      return false;
 376                  }
 377                  return $this->_mode->leave();
 378              }
 379              if (strncmp($mode, "_", 1) == 0) {
 380                  $mode = substr($mode, 1);
 381                  $this->_mode->enter($mode);
 382                  if (!$this->_invokeParser($matched, LEXER_SPECIAL)) {
 383                      return false;
 384                  }
 385                  return $this->_mode->leave();
 386              }
 387              if (is_string($mode)) {
 388                  $this->_mode->enter($mode);
 389                  return $this->_invokeParser($matched, LEXER_ENTER);
 390              }
 391              return $this->_invokeParser($matched, LEXER_MATCHED);
 392          }
 393  
 394          /**
 395           *    Calls the parser method named after the current
 396           *    mode. Empty content will be ignored.
 397           *    @param string $content        Text parsed.
 398           *    @param string $is_match       Token is recognised rather
 399           *                           than unparsed data.
 400           *    @access private
 401           */
 402          function _invokeParser($content, $is_match) {
 403              if (($content === "") || ($content === false)) {
 404                  return true;
 405              }
 406              $handler = $this->_mode->getCurrent();
 407              if (isset($this->_mode_handlers[$handler])) {
 408                  $handler = $this->_mode_handlers[$handler];
 409              }
 410              return $this->_parser->$handler($content, $is_match);
 411          }
 412  
 413          /**
 414           *    Tries to match a chunk of text and if successful
 415           *    removes the recognised chunk and any leading
 416           *    unparsed data. Empty strings will not be matched.
 417           *    @param string $raw  The subject to parse. This is the
 418           *                        content that will be eaten.
 419           *    @return bool|array  Three item list of unparsed
 420           *                        content followed by the
 421           *                        recognised token and finally the
 422           *                        action the parser is to take.
 423           *                        True if no match, false if there
 424           *                        is a parsing error.
 425           *    @access private
 426           */
 427          function _reduce(&$raw) {
 428              if (!isset($this->_regexes[$this->_mode->getCurrent()])) {
 429                  return false;
 430              }
 431              if ($raw === "") {
 432                  return true;
 433              }
 434              if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
 435                  $count = strpos($raw, $match);
 436                  $unparsed = substr($raw, 0, $count);
 437                  $raw = substr($raw, $count + strlen($match));
 438                  return array($unparsed, $match, $action);
 439              }
 440              return true;
 441          }
 442      }
 443  ?>