Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 3.9.x will end* 10 May 2021 (12 months).
  • Bug fixes for security issues in 3.9.x will end* 8 May 2023 (36 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 39 and 310] [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401] [Versions 39 and 402] [Versions 39 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Base class for search engines.
  19   *
  20   * All search engines must extend this class.
  21   *
  22   * @package   core_search
  23   * @copyright 2015 Daniel Neis
  24   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  25   */
  26  
  27  namespace core_search;
  28  
  29  defined('MOODLE_INTERNAL') || die();
  30  
  31  /**
  32   * Base class for search engines.
  33   *
  34   * All search engines must extend this class.
  35   *
  36   * @package   core_search
  37   * @copyright 2015 Daniel Neis
  38   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  39   */
  40  abstract class engine {
  41  
  42      /**
  43       * The search engine configuration.
  44       *
  45       * @var \stdClass
  46       */
  47      protected $config = null;
  48  
  49      /**
  50       * Last executed query error, if there was any.
  51       * @var string
  52       */
  53      protected $queryerror = null;
  54  
  55      /**
  56       * @var array Internal cache.
  57       */
  58      protected $cachedareas = array();
  59  
  60      /**
  61       * @var array Internal cache.
  62       */
  63      protected $cachedcourses = array();
  64  
  65      /**
  66       * User data required to show their fullnames. Indexed by userid.
  67       *
  68       * @var \stdClass[]
  69       */
  70      protected static $cachedusers = array();
  71  
  72      /**
  73       * @var string Frankenstyle plugin name.
  74       */
  75      protected $pluginname = null;
  76  
  77      /**
  78       * @var bool If true, should skip schema validity check when checking the search engine is ready
  79       */
  80      protected $skipschemacheck = false;
  81  
  82      /**
  83       * Initialises the search engine configuration.
  84       *
  85       * Search engine availability should be checked separately.
  86       *
  87       * @return void
  88       */
  89      public function __construct() {
  90  
  91          $classname = get_class($this);
  92          if (strpos($classname, '\\') === false) {
  93              throw new \coding_exception('"' . $classname . '" class should specify its component namespace and it should be named engine.');
  94          } else if (strpos($classname, '_') === false) {
  95              throw new \coding_exception('"' . $classname . '" class namespace should be its frankenstyle name');
  96          }
  97  
  98          // This is search_xxxx config.
  99          $this->pluginname = substr($classname, 0, strpos($classname, '\\'));
 100          if ($config = get_config($this->pluginname)) {
 101              $this->config = $config;
 102          } else {
 103              $this->config = new stdClass();
 104          }
 105      }
 106  
 107      /**
 108       * Returns a course instance checking internal caching.
 109       *
 110       * @param int $courseid
 111       * @return stdClass
 112       */
 113      protected function get_course($courseid) {
 114          if (!empty($this->cachedcourses[$courseid])) {
 115              return $this->cachedcourses[$courseid];
 116          }
 117  
 118          // No need to clone, only read.
 119          $this->cachedcourses[$courseid] = get_course($courseid, false);
 120  
 121          return $this->cachedcourses[$courseid];
 122      }
 123  
 124      /**
 125       * Returns user data checking the internal static cache.
 126       *
 127       * Including here the minimum required user information as this may grow big.
 128       *
 129       * @param int $userid
 130       * @return stdClass
 131       */
 132      public function get_user($userid) {
 133          global $DB;
 134  
 135          if (empty(self::$cachedusers[$userid])) {
 136              $fields = get_all_user_name_fields(true);
 137              self::$cachedusers[$userid] = $DB->get_record('user', array('id' => $userid), 'id, ' . $fields);
 138          }
 139          return self::$cachedusers[$userid];
 140      }
 141  
 142      /**
 143       * Clears the users cache.
 144       *
 145       * @return null
 146       */
 147      public static function clear_users_cache() {
 148          self::$cachedusers = [];
 149      }
 150  
 151      /**
 152       * Returns a search instance of the specified area checking internal caching.
 153       *
 154       * @param string $areaid Area id
 155       * @return \core_search\base
 156       */
 157      protected function get_search_area($areaid) {
 158  
 159          if (isset($this->cachedareas[$areaid]) && $this->cachedareas[$areaid] === false) {
 160              // We already checked that area and it is not available.
 161              return false;
 162          }
 163  
 164          if (!isset($this->cachedareas[$areaid])) {
 165              // First result that matches this area.
 166  
 167              $this->cachedareas[$areaid] = \core_search\manager::get_search_area($areaid);
 168              if ($this->cachedareas[$areaid] === false) {
 169                  // The area does not exist or it is not available any more.
 170  
 171                  $this->cachedareas[$areaid] = false;
 172                  return false;
 173              }
 174  
 175              if (!$this->cachedareas[$areaid]->is_enabled()) {
 176                  // We skip the area if it is not enabled.
 177  
 178                  // Marking it as false so next time we don' need to check it again.
 179                  $this->cachedareas[$areaid] = false;
 180  
 181                  return false;
 182              }
 183          }
 184  
 185          return $this->cachedareas[$areaid];
 186      }
 187  
 188      /**
 189       * Returns a document instance prepared to be rendered.
 190       *
 191       * @param \core_search\base $searcharea
 192       * @param array $docdata
 193       * @return \core_search\document
 194       */
 195      protected function to_document(\core_search\base $searcharea, $docdata) {
 196  
 197          list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($docdata['areaid']);
 198          $doc = \core_search\document_factory::instance($docdata['itemid'], $componentname, $areaname, $this);
 199          $doc->set_data_from_engine($docdata);
 200          $doc->set_doc_url($searcharea->get_doc_url($doc));
 201          $doc->set_context_url($searcharea->get_context_url($doc));
 202          $doc->set_doc_icon($searcharea->get_doc_icon($doc));
 203  
 204          // Uses the internal caches to get required data needed to render the document later.
 205          $course = $this->get_course($doc->get('courseid'));
 206          $doc->set_extra('coursefullname', $course->fullname);
 207  
 208          if ($doc->is_set('userid')) {
 209              $user = $this->get_user($doc->get('userid'));
 210              $doc->set_extra('userfullname', fullname($user));
 211          }
 212  
 213          return $doc;
 214      }
 215  
 216      /**
 217       * Loop through given iterator of search documents
 218       * and and have the search engine back end add them
 219       * to the index.
 220       *
 221       * @param iterator $iterator the iterator of documents to index
 222       * @param searcharea $searcharea the area for the documents to index
 223       * @param array $options document indexing options
 224       * @return array Processed document counts
 225       */
 226      public function add_documents($iterator, $searcharea, $options) {
 227          $numrecords = 0;
 228          $numdocs = 0;
 229          $numdocsignored = 0;
 230          $lastindexeddoc = 0;
 231          $firstindexeddoc = 0;
 232          $partial = false;
 233          $lastprogress = manager::get_current_time();
 234  
 235          foreach ($iterator as $document) {
 236              // Stop if we have exceeded the time limit (and there are still more items). Always
 237              // do at least one second's worth of documents otherwise it will never make progress.
 238              if ($lastindexeddoc !== $firstindexeddoc &&
 239                      !empty($options['stopat']) && manager::get_current_time() >= $options['stopat']) {
 240                  $partial = true;
 241                  break;
 242              }
 243  
 244              if (!$document instanceof \core_search\document) {
 245                  continue;
 246              }
 247  
 248              if (isset($options['lastindexedtime']) && $options['lastindexedtime'] == 0) {
 249                  // If we have never indexed this area before, it must be new.
 250                  $document->set_is_new(true);
 251              }
 252  
 253              if ($options['indexfiles']) {
 254                  // Attach files if we are indexing.
 255                  $searcharea->attach_files($document);
 256              }
 257  
 258              if ($this->add_document($document, $options['indexfiles'])) {
 259                  $numdocs++;
 260              } else {
 261                  $numdocsignored++;
 262              }
 263  
 264              $lastindexeddoc = $document->get('modified');
 265              if (!$firstindexeddoc) {
 266                  $firstindexeddoc = $lastindexeddoc;
 267              }
 268              $numrecords++;
 269  
 270              // If indexing the area takes a long time, periodically output progress information.
 271              if (isset($options['progress'])) {
 272                  $now = manager::get_current_time();
 273                  if ($now - $lastprogress >= manager::DISPLAY_INDEXING_PROGRESS_EVERY) {
 274                      $lastprogress = $now;
 275                      // The first date format is the same used in cron_trace_time_and_memory().
 276                      $options['progress']->output(date('H:i:s', $now) . ': Done to ' . userdate(
 277                              $lastindexeddoc, get_string('strftimedatetimeshort', 'langconfig')), 1);
 278                  }
 279              }
 280          }
 281  
 282          return array($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial);
 283      }
 284  
 285      /**
 286       * Returns the plugin name.
 287       *
 288       * @return string Frankenstyle plugin name.
 289       */
 290      public function get_plugin_name() {
 291          return $this->pluginname;
 292      }
 293  
 294      /**
 295       * Gets the document class used by this search engine.
 296       *
 297       * Search engines can overwrite \core_search\document with \search_ENGINENAME\document class.
 298       *
 299       * Looks for a document class in the current search engine namespace, falling back to \core_search\document.
 300  
 301       * Publicly available because search areas do not have access to the engine details,
 302       * \core_search\document_factory accesses this function.
 303       *
 304       * @return string
 305       */
 306      public function get_document_classname() {
 307          $classname = $this->pluginname . '\\document';
 308          if (!class_exists($classname)) {
 309              $classname = '\\core_search\\document';
 310          }
 311          return $classname;
 312      }
 313  
 314      /**
 315       * Run any pre-indexing operations.
 316       *
 317       * Should be overwritten if the search engine needs to do any pre index preparation.
 318       *
 319       * @param bool $fullindex True if a full index will be performed
 320       * @return void
 321       */
 322      public function index_starting($fullindex = false) {
 323          // Nothing by default.
 324      }
 325  
 326      /**
 327       * Run any post indexing operations.
 328       *
 329       * Should be overwritten if the search engine needs to do any post index cleanup.
 330       *
 331       * @param int $numdocs The number of documents that were added to the index
 332       * @param bool $fullindex True if a full index was performed
 333       * @return void
 334       */
 335      public function index_complete($numdocs = 0, $fullindex = false) {
 336          // Nothing by default.
 337      }
 338  
 339      /**
 340       * Do anything that may need to be done before an area is indexed.
 341       *
 342       * @param \core_search\base $searcharea The search area that was complete
 343       * @param bool $fullindex True if a full index is being performed
 344       * @return void
 345       */
 346      public function area_index_starting($searcharea, $fullindex = false) {
 347          // Nothing by default.
 348      }
 349  
 350      /**
 351       * Do any area cleanup needed, and do anything to confirm contents.
 352       *
 353       * Return false to prevent the search area completed time and stats from being updated.
 354       *
 355       * @param \core_search\base $searcharea The search area that was complete
 356       * @param int $numdocs The number of documents that were added to the index
 357       * @param bool $fullindex True if a full index is being performed
 358       * @return bool True means that data is considered indexed
 359       */
 360      public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
 361          return true;
 362      }
 363  
 364      /**
 365       * Optimizes the search engine.
 366       *
 367       * Should be overwritten if the search engine can optimize its contents.
 368       *
 369       * @return void
 370       */
 371      public function optimize() {
 372          // Nothing by default.
 373      }
 374  
 375      /**
 376       * Does the system satisfy all the requirements.
 377       *
 378       * Should be overwritten if the search engine has any system dependencies
 379       * that needs to be checked.
 380       *
 381       * @return bool
 382       */
 383      public function is_installed() {
 384          return true;
 385      }
 386  
 387      /**
 388       * Returns any error reported by the search engine when executing the provided query.
 389       *
 390       * It should be called from static::execute_query when an exception is triggered.
 391       *
 392       * @return string
 393       */
 394      public function get_query_error() {
 395          return $this->queryerror;
 396      }
 397  
 398      /**
 399       * Returns the total number of documents available for the most recent call to execute_query.
 400       *
 401       * This can be an estimate, but should get more accurate the higher the limited passed to execute_query is.
 402       * To do that, the engine can use (actual result returned count + count of unchecked documents), or
 403       * (total possible docs - docs that have been checked and rejected).
 404       *
 405       * Engine can limit to manager::MAX_RESULTS if there is cost to determining more.
 406       * If this cannot be computed in a reasonable way, manager::MAX_RESULTS may be returned.
 407       *
 408       * @return int
 409       */
 410      abstract public function get_query_total_count();
 411  
 412      /**
 413       * Return true if file indexing is supported and enabled. False otherwise.
 414       *
 415       * @return bool
 416       */
 417      public function file_indexing_enabled() {
 418          return false;
 419      }
 420  
 421      /**
 422       * Clears the current query error value.
 423       *
 424       * @return void
 425       */
 426      public function clear_query_error() {
 427          $this->queryerror = null;
 428      }
 429  
 430      /**
 431       * Is the server ready to use?
 432       *
 433       * This should also check that the search engine configuration is ok.
 434       *
 435       * If the function $this->should_skip_schema_check() returns true, then this function may leave
 436       * out time-consuming checks that the schema is valid. (This allows for improved performance on
 437       * critical pages such as the main search form.)
 438       *
 439       * @return true|string Returns true if all good or an error string.
 440       */
 441      abstract function is_server_ready();
 442  
 443      /**
 444       * Tells the search engine to skip any time-consuming checks that it might do as part of the
 445       * is_server_ready function, and only carry out a basic check that it can contact the server.
 446       *
 447       * This setting is not remembered and applies only to the current request.
 448       *
 449       * @since Moodle 3.5
 450       * @param bool $skip True to skip the checks, false to start checking again
 451       */
 452      public function skip_schema_check($skip = true) {
 453          $this->skipschemacheck = $skip;
 454      }
 455  
 456      /**
 457       * For use by subclasses. The engine can call this inside is_server_ready to check whether it
 458       * should skip time-consuming schema checks.
 459       *
 460       * @since Moodle 3.5
 461       * @return bool True if schema checks should be skipped
 462       */
 463      protected function should_skip_schema_check() {
 464          return $this->skipschemacheck;
 465      }
 466  
 467      /**
 468       * Adds a document to the search engine.
 469       *
 470       * @param document $document
 471       * @param bool     $fileindexing True if file indexing is to be used
 472       * @return bool    False if the file was skipped or failed, true on success
 473       */
 474      abstract function add_document($document, $fileindexing = false);
 475  
 476      /**
 477       * Executes the query on the engine.
 478       *
 479       * Implementations of this function should check user context array to limit the results to contexts where the
 480       * user have access. They should also limit the owneruserid field to manger::NO_OWNER_ID or the current user's id.
 481       * Engines must use area->check_access() to confirm user access.
 482       *
 483       * Engines should reasonably attempt to fill up to limit with valid results if they are available.
 484       *
 485       * The $filters object may include the following fields (optional except q):
 486       * - q: value of main search field; results should include this text
 487       * - title: if included, title must match this search
 488       * - areaids: array of search area id strings (only these areas will be searched)
 489       * - courseids: array of course ids (only these courses will be searched)
 490       * - groupids: array of group ids (only results specifically from these groupids will be
 491       *   searched) - this option will be ignored if the search engine doesn't support groups
 492       *
 493       * The $accessinfo parameter has two different values (for historical compatibility). If the
 494       * engine returns false to supports_group_filtering then it is an array of user contexts, or
 495       * true if the user can access all contexts. (This parameter used to be called $usercontexts.)
 496       * If the engine returns true to supports_group_filtering then it will be an object containing
 497       * these fields:
 498       * - everything (true if admin is searching with no restrictions)
 499       * - usercontexts (same as above)
 500       * - separategroupscontexts (array of context ids where separate groups are used)
 501       * - visiblegroupscontextsareas (array of subset of those where some areas use visible groups)
 502       * - usergroups (array of relevant group ids that user belongs to)
 503       *
 504       * The engine should apply group restrictions to those contexts listed in the
 505       * 'separategroupscontexts' array. In these contexts, it shouled only include results if the
 506       * groupid is not set, or if the groupid matches one of the values in USER_GROUPS array, or
 507       * if the search area is one of those listed in 'visiblegroupscontextsareas' for that context.
 508       *
 509       * @param \stdClass $filters Query and filters to apply.
 510       * @param \stdClass $accessinfo Information about the contexts the user can access
 511       * @param  int      $limit The maximum number of results to return. If empty, limit to manager::MAX_RESULTS.
 512       * @return \core_search\document[] Results or false if no results
 513       */
 514      public abstract function execute_query($filters, $accessinfo, $limit = 0);
 515  
 516      /**
 517       * Delete all documents.
 518       *
 519       * @param string $areaid To filter by area
 520       * @return void
 521       */
 522      abstract function delete($areaid = null);
 523  
 524      /**
 525       * Deletes information related to a specific context id. This should be used when the context
 526       * itself is deleted from Moodle.
 527       *
 528       * This only deletes information for the specified context - not for any child contexts.
 529       *
 530       * This function is optional; if not supported it will return false and the information will
 531       * not be deleted from the search index.
 532       *
 533       * If an engine implements this function it should also implement delete_index_for_course;
 534       * otherwise, nothing will be deleted when users delete an entire course at once.
 535       *
 536       * @param int $oldcontextid ID of context that has been deleted
 537       * @return bool True if implemented
 538       * @throws \core_search\engine_exception Engines may throw this exception for any problem
 539       */
 540      public function delete_index_for_context(int $oldcontextid) {
 541          return false;
 542      }
 543  
 544      /**
 545       * Deletes information related to a specific course id. This should be used when the course
 546       * itself is deleted from Moodle.
 547       *
 548       * This deletes all information relating to that course from the index, including all child
 549       * contexts.
 550       *
 551       * This function is optional; if not supported it will return false and the information will
 552       * not be deleted from the search index.
 553       *
 554       * If an engine implements this function then, ideally, it should also implement
 555       * delete_index_for_context so that deletion of single activities/blocks also works.
 556       *
 557       * @param int $oldcourseid ID of course that has been deleted
 558       * @return bool True if implemented
 559       * @throws \core_search\engine_exception Engines may throw this exception for any problem
 560       */
 561      public function delete_index_for_course(int $oldcourseid) {
 562          return false;
 563      }
 564  
 565      /**
 566       * Checks that the schema is the latest version. If the version stored in config does not match
 567       * the current, this function will attempt to upgrade the schema.
 568       *
 569       * @return bool|string True if schema is OK, a string if user needs to take action
 570       */
 571      public function check_latest_schema() {
 572          if (empty($this->config->schemaversion)) {
 573              $currentversion = 0;
 574          } else {
 575              $currentversion = $this->config->schemaversion;
 576          }
 577          if ($currentversion < document::SCHEMA_VERSION) {
 578              return $this->update_schema((int)$currentversion, (int)document::SCHEMA_VERSION);
 579          } else {
 580              return true;
 581          }
 582      }
 583  
 584      /**
 585       * Usually called by the engine; marks that the schema has been updated.
 586       *
 587       * @param int $version Records the schema version now applied
 588       */
 589      public function record_applied_schema_version($version) {
 590          set_config('schemaversion', $version, $this->pluginname);
 591      }
 592  
 593      /**
 594       * Requests the search engine to upgrade the schema. The engine should update the schema if
 595       * possible/necessary, and should ensure that record_applied_schema_version is called as a
 596       * result.
 597       *
 598       * If it is not possible to upgrade the schema at the moment, it can do nothing and return; the
 599       * function will be called again next time search is initialised.
 600       *
 601       * The default implementation just returns, with a DEBUG_DEVELOPER warning.
 602       *
 603       * @param int $oldversion Old schema version
 604       * @param int $newversion New schema version
 605       * @return bool|string True if schema is updated successfully, a string if it needs updating manually
 606       */
 607      protected function update_schema($oldversion, $newversion) {
 608          debugging('Unable to update search engine schema: ' . $this->pluginname, DEBUG_DEVELOPER);
 609          return get_string('schemanotupdated', 'search');
 610      }
 611  
 612      /**
 613       * Checks if this search engine supports groups.
 614       *
 615       * Note that returning true to this function causes the parameters to execute_query to be
 616       * passed differently!
 617       *
 618       * In order to implement groups and return true to this function, the search engine should:
 619       *
 620       * 1. Handle the fields ->separategroupscontexts and ->usergroups in the $accessinfo parameter
 621       *    to execute_query (ideally, using these to automatically restrict search results).
 622       * 2. Support the optional groupids parameter in the $filter parameter for execute_query to
 623       *    restrict results to only those where the stored groupid matches the given value.
 624       *
 625       * @return bool True if this engine supports searching by group id field
 626       */
 627      public function supports_group_filtering() {
 628          return false;
 629      }
 630  
 631      /**
 632       * Obtain a list of results orders (and names for them) that are supported by this
 633       * search engine in the given context.
 634       *
 635       * By default, engines sort by relevance only.
 636       *
 637       * @param \context $context Context that the user requested search from
 638       * @return array Array from order name => display text
 639       */
 640      public function get_supported_orders(\context $context) {
 641          return ['relevance' => get_string('order_relevance', 'search')];
 642      }
 643  
 644      /**
 645       * Checks if the search engine supports searching by user.
 646       *
 647       * If it returns true to this function, the search engine should support the 'userids' option
 648       * in the $filters value passed to execute_query(), returning only items where the userid in
 649       * the search document matches one of those user ids.
 650       *
 651       * @return bool True if the search engine supports searching by user
 652       */
 653      public function supports_users() {
 654          return false;
 655      }
 656  }