Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.
/lib/ -> searchlib.php (source)

Differences Between: [Versions 310 and 402] [Versions 311 and 402] [Versions 39 and 402] [Versions 400 and 402]

   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * @package    core
  20   * @subpackage search
  21   * @copyright  1999 onwards Martin Dougiamas  {@link http://moodle.com}
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  defined('MOODLE_INTERNAL') || die();
  26  
  27  /** @see lexer.php */
  28  require_once($CFG->libdir.'/lexer.php');
  29  
  30  /** Constants for the various types of tokens */
  31  
  32  define("TOKEN_USER","0");
  33  define("TOKEN_META","1");
  34  define("TOKEN_EXACT","2");
  35  define("TOKEN_NEGATE","3");
  36  define("TOKEN_STRING","4");
  37  define("TOKEN_USERID","5");
  38  define("TOKEN_DATEFROM","6");
  39  define("TOKEN_DATETO","7");
  40  define("TOKEN_INSTANCE","8");
  41  define("TOKEN_TAGS","9");
  42  
  43  /**
  44   * Class to hold token/value pairs after they're parsed.
  45   *
  46   * @package   moodlecore
  47   * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
  48   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  49   */
  50  class search_token {
  51    private $value;
  52    private $type;
  53  
  54    public function __construct($type,$value){
  55      $this->type = $type;
  56      $this->value = $this->sanitize($value);
  57  
  58    }
  59  
  60    /**
  61     * Old syntax of class constructor. Deprecated in PHP7.
  62     *
  63     * @deprecated since Moodle 3.1
  64     */
  65    public function search_token($type, $value) {
  66      debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
  67      self::__construct($type, $value);
  68    }
  69  
  70    // Try to clean up user input to avoid potential security issues.
  71    // Need to think about this some more.
  72  
  73    function sanitize($userstring){
  74      return htmlspecialchars($userstring, ENT_COMPAT);
  75    }
  76    function getValue(){
  77      return $this->value;
  78    }
  79    function getType(){
  80      return $this->type;
  81    }
  82  }
  83  
  84  
  85  /**
  86   * This class does the heavy lifting of lexing the search string into tokens.
  87   * Using a full-blown lexer is probably overkill for this application, but
  88   * might be useful for other tasks.
  89   *
  90   * @package   moodlecore
  91   * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
  92   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  93   */
  94  class search_lexer extends Lexer{
  95  
  96    public function __construct(&$parser){
  97  
  98      // Call parent constructor.
  99      parent::__construct($parser);
 100  
 101      //Set up the state machine and pattern matches for transitions.
 102  
 103      // Patterns to handle strings  of the form datefrom:foo
 104  
 105      // If we see the string datefrom: while in the base accept state, start
 106      // parsing a username and go to the indatefrom state.
 107      $this->addEntryPattern("datefrom:\S+","accept","indatefrom");
 108  
 109      // Snarf everything into the username until we see whitespace, then exit
 110      // back to the base accept state.
 111      $this->addExitPattern("\s","indatefrom");
 112  
 113  
 114      // If we see the string tags: while in the base accept state, start
 115      // parsing tags and go to the intags state.
 116      $this->addEntryPattern("tags:\S+","accept","intags");
 117  
 118      // Snarf everything into the tags until we see whitespace, then exit
 119      // back to the base accept state.
 120      $this->addExitPattern("\s","intags");
 121  
 122      // Patterns to handle strings  of the form dateto:foo
 123  
 124      // If we see the string dateto: while in the base accept state, start
 125      // parsing a username and go to the indateto state.
 126      $this->addEntryPattern("dateto:\S+","accept","indateto");
 127  
 128      // Snarf everything into the username until we see whitespace, then exit
 129      // back to the base accept state.
 130      $this->addExitPattern("\s","indateto");
 131  
 132  
 133      // Patterns to handle strings  of the form instance:foo
 134  
 135      // If we see the string instance: while in the base accept state, start
 136      // parsing for instance number and go to the ininstance state.
 137      $this->addEntryPattern("instance:\S+","accept","ininstance");
 138  
 139      // Snarf everything into the username until we see whitespace, then exit
 140      // back to the base accept state.
 141      $this->addExitPattern("\s","ininstance");
 142  
 143  
 144      // Patterns to handle strings  of the form userid:foo
 145  
 146      // If we see the string userid: while in the base accept state, start
 147      // parsing a username and go to the inuserid state.
 148      $this->addEntryPattern("userid:\S+","accept","inuserid");
 149  
 150      // Snarf everything into the username until we see whitespace, then exit
 151      // back to the base accept state.
 152      $this->addExitPattern("\s","inuserid");
 153  
 154  
 155      // Patterns to handle strings  of the form user:foo
 156  
 157      // If we see the string user: while in the base accept state, start
 158      // parsing a username and go to the inusername state.
 159      $this->addEntryPattern("user:\S+","accept","inusername");
 160  
 161      // Snarf everything into the username until we see whitespace, then exit
 162      // back to the base accept state.
 163      $this->addExitPattern("\s","inusername");
 164  
 165  
 166      // Patterns to handle strings  of the form meta:foo
 167  
 168     // If we see the string meta: while in the base accept state, start
 169      // parsing a username and go to the inmeta state.
 170      $this->addEntryPattern("subject:\S+","accept","inmeta");
 171  
 172      // Snarf everything into the meta token until we see whitespace, then exit
 173      // back to the base accept state.
 174      $this->addExitPattern("\s","inmeta");
 175  
 176  
 177      // Patterns to handle required exact match strings (+foo) .
 178  
 179      // If we see a + sign  while in the base accept state, start
 180      // parsing an exact match string and enter the inrequired state
 181      $this->addEntryPattern("\+\S+","accept","inrequired");
 182      // When we see white space, exit back to accept state.
 183      $this->addExitPattern("\s","inrequired");
 184  
 185      // Handle excluded strings (-foo)
 186  
 187     // If we see a - sign  while in the base accept state, start
 188      // parsing an excluded string and enter the inexcluded state
 189      $this->addEntryPattern("\-\S+","accept","inexcluded");
 190      // When we see white space, exit back to accept state.
 191      $this->addExitPattern("\s","inexcluded");
 192  
 193  
 194      // Patterns to handle quoted strings.
 195  
 196      // If we see a quote  while in the base accept state, start
 197      // parsing a quoted string and enter the inquotedstring state.
 198      // Grab everything until we see the closing quote.
 199  
 200      $this->addEntryPattern("\"[^\"]+","accept","inquotedstring");
 201  
 202      // When we see a closing quote, reenter the base accept state.
 203      $this->addExitPattern("\"","inquotedstring");
 204  
 205      // Patterns to handle ordinary, nonquoted words.
 206  
 207      // When we see non-whitespace, snarf everything into the nonquoted word
 208      // until we see whitespace again.
 209      $this->addEntryPattern("\S+","accept","plainstring");
 210  
 211      // Once we see whitespace, reenter the base accept state.
 212      $this->addExitPattern("\s","plainstring");
 213  
 214    }
 215  
 216    /**
 217     * Old syntax of class constructor. Deprecated in PHP7.
 218     *
 219     * @deprecated since Moodle 3.1
 220     */
 221    public function search_lexer(&$parser) {
 222      debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
 223      self::__construct($parser);
 224    }
 225  
 226  }
 227  
 228  
 229  
 230  /**
 231   * This class takes care of sticking the proper token type/value pairs into
 232   * the parsed token  array.
 233   * Most functions in this class should only be called by the lexer, the
 234   * one exception being getParseArray() which returns the result.
 235   *
 236   * @package   moodlecore
 237   * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
 238   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 239   */
 240  class search_parser {
 241      private $tokens;
 242  
 243      // This function is called by the code that's interested in the result of the parse operation.
 244      function get_parsed_array(){
 245          return $this->tokens;
 246      }
 247  
 248      /*
 249       * Functions below this are part of the state machine for the parse
 250       * operation and should not be called directly.
 251       */
 252  
 253      // Base state. No output emitted.
 254      function accept() {
 255          return true;
 256      }
 257  
 258      // State for handling datefrom:foo constructs. Potentially emits a token.
 259      function indatefrom($content){
 260          if (strlen($content) < 10) { // State exit or missing parameter.
 261              return true;
 262          }
 263          // Strip off the datefrom: part and add the reminder to the parsed token array
 264          $param = trim(substr($content,9));
 265          $this->tokens[] = new search_token(TOKEN_DATEFROM,$param);
 266          return true;
 267      }
 268  
 269      // State for handling dateto:foo constructs. Potentially emits a token.
 270      function indateto($content){
 271          if (strlen($content) < 8) { // State exit or missing parameter.
 272              return true;
 273          }
 274          // Strip off the dateto: part and add the reminder to the parsed token array
 275          $param = trim(substr($content,7));
 276          $this->tokens[] = new search_token(TOKEN_DATETO,$param);
 277          return true;
 278      }
 279  
 280      // State for handling tags:tagname,tagname constructs. Potentially emits a token.
 281      function intags($content){
 282          if (strlen($content) < 5) { // State exit or missing parameter.
 283              return true;
 284          }
 285          // Strip off the tags: part and add the reminder to the parsed token array
 286          $param = trim(substr($content,5));
 287          $this->tokens[] = new search_token(TOKEN_TAGS,$param);
 288          return true;
 289      }
 290  
 291      // State for handling instance:foo constructs. Potentially emits a token.
 292      function ininstance($content){
 293          if (strlen($content) < 10) { // State exit or missing parameter.
 294              return true;
 295          }
 296          // Strip off the instance: part and add the reminder to the parsed token array
 297          $param = trim(substr($content,9));
 298          $this->tokens[] = new search_token(TOKEN_INSTANCE,$param);
 299          return true;
 300      }
 301  
 302  
 303      // State for handling userid:foo constructs. Potentially emits a token.
 304      function inuserid($content){
 305          if (strlen($content) < 8) { // State exit or missing parameter.
 306              return true;
 307          }
 308          // Strip off the userid: part and add the reminder to the parsed token array
 309          $param = trim(substr($content,7));
 310          $this->tokens[] = new search_token(TOKEN_USERID,$param);
 311          return true;
 312      }
 313  
 314  
 315      // State for handling user:foo constructs. Potentially emits a token.
 316      function inusername($content){
 317          if (strlen($content) < 6) { // State exit or missing parameter.
 318              return true;
 319          }
 320          // Strip off the user: part and add the reminder to the parsed token array
 321          $param = trim(substr($content,5));
 322          $this->tokens[] = new search_token(TOKEN_USER,$param);
 323          return true;
 324      }
 325  
 326  
 327      // State for handling meta:foo constructs. Potentially emits a token.
 328      function inmeta($content){
 329          if (strlen($content) < 9) { // Missing parameter.
 330              return true;
 331          }
 332          // Strip off the meta: part and add the reminder to the parsed token array.
 333          $param = trim(substr($content,8));
 334          $this->tokens[] = new search_token(TOKEN_META,$param);
 335          return true;
 336      }
 337  
 338  
 339      // State entered when we've seen a required string (+foo). Potentially
 340      // emits a token.
 341      function inrequired($content){
 342          if (strlen($content) < 2) { // State exit or missing parameter, don't emit.
 343              return true;
 344          }
 345          // Strip off the + sign and add the reminder to the parsed token array.
 346          $this->tokens[] = new search_token(TOKEN_EXACT,substr($content,1));
 347          return true;
 348      }
 349  
 350      // State entered when we've seen an excluded string (-foo). Potentially
 351      // emits a token.
 352      function inexcluded($content){
 353          if (strlen($content) < 2) { // State exit or missing parameter.
 354              return true;
 355          }
 356          // Strip off the -sign and add the reminder to the parsed token array.
 357          $this->tokens[] = new search_token(TOKEN_NEGATE,substr($content,1));
 358          return true;
 359      }
 360  
 361  
 362      // State entered when we've seen a quoted string. Potentially emits a token.
 363      function inquotedstring($content){
 364          if (strlen($content) < 2) { // State exit or missing parameter.
 365              return true;
 366          }
 367          // Strip off the opening quote and add the reminder to the parsed token array.
 368          $this->tokens[] = new search_token(TOKEN_STRING,substr($content,1));
 369          return true;
 370      }
 371  
 372      // State entered when we've seen an ordinary, non-quoted word. Potentially
 373      // emits a token.
 374      function plainstring($content){
 375          if (trim($content) === '') { // State exit
 376              return true;
 377          }
 378          // Add the string to the parsed token array.
 379          $this->tokens[] = new search_token(TOKEN_STRING,$content);
 380          return true;
 381      }
 382  }
 383  
 384  /**
 385   * Primitive function to generate a SQL string from a parse tree
 386   * using TEXT indexes. If searches aren't suitable to use TEXT
 387   * this function calls the default search_generate_SQL() one.
 388   *
 389   * @deprecated since Moodle 2.9 MDL-48939
 390   * @todo MDL-48940 This will be deleted in Moodle 3.2
 391   * @see search_generate_SQL()
 392   */
 393  function search_generate_text_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
 394                               $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
 395      debugging('search_generate_text_SQL() is deprecated, please use search_generate_SQL() instead.', DEBUG_DEVELOPER);
 396  
 397      return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
 398                                 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
 399  }
 400  
 401  /**
 402   * Primitive function to generate a SQL string from a parse tree.
 403   * Parameters:
 404   *
 405   * $parsetree should be a parse tree generated by a
 406   * search_lexer/search_parser combination.
 407   * Other fields are database table names to search.
 408   *
 409   * @global object
 410   * @global object
 411   */
 412  function search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
 413                               $userfirstnamefield, $userlastnamefield, $timefield, $instancefield,
 414                               $tagfields = []) {
 415      global $CFG, $DB;
 416      static $p = 0;
 417  
 418      if ($DB->sql_regex_supported()) {
 419          $REGEXP    = $DB->sql_regex(true);
 420          $NOTREGEXP = $DB->sql_regex(false);
 421          $regexwordbegin = $DB->sql_regex_get_word_beginning_boundary_marker();
 422          $regexwordend = $DB->sql_regex_get_word_end_boundary_marker();
 423      }
 424  
 425      $params = array();
 426  
 427      $ntokens = count($parsetree);
 428      if ($ntokens == 0) {
 429          return "";
 430      }
 431  
 432      $SQLString = '';
 433      $nexttagfield = 0;
 434      for ($i=0; $i<$ntokens; $i++){
 435          if ($i > 0) {// We have more than one clause, need to tack on AND
 436              $SQLString .= ' AND ';
 437          }
 438  
 439          $type = $parsetree[$i]->getType();
 440          $value = $parsetree[$i]->getValue();
 441  
 442      /// Under Oracle and MSSQL, transform TOKEN searches into STRING searches and trim +- chars
 443          if (!$DB->sql_regex_supported()) {
 444              $value = trim($value, '+-');
 445              if ($type == TOKEN_EXACT) {
 446                  $type = TOKEN_STRING;
 447              }
 448          }
 449  
 450          $name1 = 'sq'.$p++;
 451          $name2 = 'sq'.$p++;
 452  
 453          switch($type){
 454              case TOKEN_STRING:
 455                  $SQLString .= "((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false)."))";
 456                  $params[$name1] = "%$value%";
 457                  $params[$name2] = "%$value%";
 458                  break;
 459              case TOKEN_EXACT:
 460                  $SQLString .= "(($datafield $REGEXP :$name1) OR ($metafield $REGEXP :$name2))";
 461                  $params[$name1] = $regexwordbegin.$value.$regexwordend;
 462                  $params[$name2] = $regexwordbegin.$value.$regexwordend;
 463                  break;
 464              case TOKEN_META:
 465                  if ($metafield != '') {
 466                      $SQLString .= "(".$DB->sql_like($metafield, ":$name1", false).")";
 467                      $params[$name1] = "%$value%";
 468                  }
 469                  break;
 470              case TOKEN_USER:
 471                  $SQLString .= "(($mainidfield = $useridfield) AND ((".$DB->sql_like($userfirstnamefield, ":$name1", false).") OR (".$DB->sql_like($userlastnamefield, ":$name2", false).")))";
 472                  $params[$name1] = "%$value%";
 473                  $params[$name2] = "%$value%";
 474                  break;
 475              case TOKEN_USERID:
 476                  $SQLString .= "($useridfield = :$name1)";
 477                  $params[$name1] = $value;
 478                  break;
 479              case TOKEN_INSTANCE:
 480                  $SQLString .= "($instancefield = :$name1)";
 481                  $params[$name1] = $value;
 482                  break;
 483              case TOKEN_DATETO:
 484                  $SQLString .= "($timefield <= :$name1)";
 485                  $params[$name1] = $value;
 486                  break;
 487              case TOKEN_DATEFROM:
 488                  $SQLString .= "($timefield >= :$name1)";
 489                  $params[$name1] = $value;
 490                  break;
 491              case TOKEN_TAGS:
 492                  $sqlstrings = [];
 493                  foreach (explode(',', $value) as $tag) {
 494                      $paramname = $name1 . '_' . $nexttagfield;
 495                      if (isset($tagfields[$nexttagfield])) {
 496                          $sqlstrings[]       = "($tagfields[$nexttagfield] = :$paramname)";
 497                          $params[$paramname] = $tag;
 498                      } else if (!isset($tagfields[$nexttagfield]) && !isset($stoppedprocessingtags)) {
 499                          // Show a debugging message the first time we hit this.
 500                          $stoppedprocessingtags = true;
 501                          \core\notification::add(get_string('toomanytags'), \core\notification::WARNING);
 502                      }
 503                      $nexttagfield++;
 504                  }
 505                  $SQLString .= implode(' AND ', $sqlstrings);
 506                  break;
 507              case TOKEN_NEGATE:
 508                  $SQLString .= "(NOT ((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false).")))";
 509                  $params[$name1] = "%$value%";
 510                  $params[$name2] = "%$value%";
 511                  break;
 512              default:
 513                  return '';
 514  
 515          }
 516      }
 517      return array($SQLString, $params);
 518  }