Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 310 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Document representation.
  19   *
  20   * @package    core_search
  21   * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  namespace core_search;
  26  
  27  use context;
  28  
  29  defined('MOODLE_INTERNAL') || die();
  30  
  31  /**
  32   * Represents a document to index.
  33   *
  34   * Note that, if you are writting a search engine and you want to change \core_search\document
  35   * behaviour, you can overwrite this class, will be automatically loaded from \search_YOURENGINE\document.
  36   *
  37   * @package    core_search
  38   * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
  39   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  40   */
  41  class document implements \renderable, \templatable {
  42  
  43      /**
  44       * @var array $data The document data.
  45       */
  46      protected $data = array();
  47  
  48      /**
  49       * @var array Extra data needed to render the document.
  50       */
  51      protected $extradata = array();
  52  
  53      /**
  54       * @var \moodle_url Link to the document.
  55       */
  56      protected $docurl = null;
  57  
  58      /**
  59       * @var \moodle_url Link to the document context.
  60       */
  61      protected $contexturl = null;
  62  
  63      /**
  64       * @var \core_search\document_icon Document icon instance.
  65       */
  66      protected $docicon = null;
  67  
  68      /**
  69       * @var int|null The content field filearea.
  70       */
  71      protected $contentfilearea = null;
  72  
  73      /**
  74       * @var int|null The content field itemid.
  75       */
  76      protected $contentitemid = null;
  77  
  78      /**
  79       * @var bool Should be set to true if document hasn't been indexed before. False if unknown.
  80       */
  81      protected $isnew = false;
  82  
  83      /**
  84       * @var \stored_file[] An array of stored files to attach to the document.
  85       */
  86      protected $files = array();
  87  
  88      /**
  89       * Change list (for engine implementers):
  90       * 2017091700 - add optional field groupid
  91       *
  92       * @var int Schema version number (update if any change)
  93       */
  94      const SCHEMA_VERSION = 2017091700;
  95  
  96      /**
  97       * All required fields any doc should contain.
  98       *
  99       * We have to choose a format to specify field types, using solr format as we have to choose one and solr is the
 100       * default search engine.
 101       *
 102       * Search engine plugins are responsible of setting their appropriate field types and map these naming to whatever format
 103       * they need.
 104       *
 105       * @var array
 106       */
 107      protected static $requiredfields = array(
 108          'id' => array(
 109              'type' => 'string',
 110              'stored' => true,
 111              'indexed' => false
 112          ),
 113          'itemid' => array(
 114              'type' => 'int',
 115              'stored' => true,
 116              'indexed' => true
 117          ),
 118          'title' => array(
 119              'type' => 'text',
 120              'stored' => true,
 121              'indexed' => true,
 122              'mainquery' => true
 123          ),
 124          'content' => array(
 125              'type' => 'text',
 126              'stored' => true,
 127              'indexed' => true,
 128              'mainquery' => true
 129          ),
 130          'contextid' => array(
 131              'type' => 'int',
 132              'stored' => true,
 133              'indexed' => true
 134          ),
 135          'areaid' => array(
 136              'type' => 'string',
 137              'stored' => true,
 138              'indexed' => true
 139          ),
 140          'type' => array(
 141              'type' => 'int',
 142              'stored' => true,
 143              'indexed' => true
 144          ),
 145          'courseid' => array(
 146              'type' => 'int',
 147              'stored' => true,
 148              'indexed' => true
 149          ),
 150          'owneruserid' => array(
 151              'type' => 'int',
 152              'stored' => true,
 153              'indexed' => true
 154          ),
 155          'modified' => array(
 156              'type' => 'tdate',
 157              'stored' => true,
 158              'indexed' => true
 159          ),
 160      );
 161  
 162      /**
 163       * All optional fields docs can contain.
 164       *
 165       * Although it matches solr fields format, this is just to define the field types. Search
 166       * engine plugins are responsible of setting their appropriate field types and map these
 167       * naming to whatever format they need.
 168       *
 169       * @var array
 170       */
 171      protected static $optionalfields = array(
 172          'userid' => array(
 173              'type' => 'int',
 174              'stored' => true,
 175              'indexed' => true
 176          ),
 177          'groupid' => array(
 178              'type' => 'int',
 179              'stored' => true,
 180              'indexed' => true
 181          ),
 182          'description1' => array(
 183              'type' => 'text',
 184              'stored' => true,
 185              'indexed' => true,
 186              'mainquery' => true
 187          ),
 188          'description2' => array(
 189              'type' => 'text',
 190              'stored' => true,
 191              'indexed' => true,
 192              'mainquery' => true
 193          )
 194      );
 195  
 196      /**
 197       * Any fields that are engine specifc. These are fields that are solely used by a search engine plugin
 198       * for internal purposes.
 199       *
 200       * Field names should be prefixed with engine name to avoid potential conflict with core fields.
 201       *
 202       * Uses same format as fields above.
 203       *
 204       * @var array
 205       */
 206      protected static $enginefields = array();
 207  
 208      /**
 209       * We ensure that the document has a unique id across search areas.
 210       *
 211       * @param int $itemid An id unique to the search area
 212       * @param string $componentname The search area component Frankenstyle name
 213       * @param string $areaname The area name (the search area class name)
 214       * @return void
 215       */
 216      public function __construct($itemid, $componentname, $areaname) {
 217  
 218          if (!is_numeric($itemid)) {
 219              throw new \coding_exception('The itemid should be an integer');
 220          }
 221  
 222          $this->data['areaid'] = \core_search\manager::generate_areaid($componentname, $areaname);
 223          $this->data['id'] = $this->data['areaid'] . '-' . $itemid;
 224          $this->data['itemid'] = intval($itemid);
 225      }
 226  
 227      /**
 228       * Add a stored file to the document.
 229       *
 230       * @param \stored_file|int $file The file to add, or file id.
 231       * @return void
 232       */
 233      public function add_stored_file($file) {
 234          if (is_numeric($file)) {
 235              $this->files[$file] = $file;
 236          } else {
 237              $this->files[$file->get_id()] = $file;
 238          }
 239      }
 240  
 241      /**
 242       * Returns the array of attached files.
 243       *
 244       * @return \stored_file[]
 245       */
 246      public function get_files() {
 247          // The files array can contain stored file ids, so we need to get instances if asked.
 248          foreach ($this->files as $id => $listfile) {
 249              if (is_numeric($listfile)) {
 250                  $fs = get_file_storage();
 251  
 252                  if ($file = $fs->get_file_by_id($id)) {
 253                      $this->files[$id] = $file;
 254                  } else {
 255                      unset($this->files[$id]); // Index is out of date and referencing a file that does not exist.
 256                  }
 257              }
 258          }
 259  
 260          return $this->files;
 261      }
 262  
 263      /**
 264       * Setter.
 265       *
 266       * Basic checkings to prevent common issues.
 267       *
 268       * If the field is a string tags will be stripped, if it is an integer or a date it
 269       * will be casted to a PHP integer. tdate fields values are expected to be timestamps.
 270       *
 271       * @throws \coding_exception
 272       * @param string $fieldname The field name
 273       * @param string|int $value The value to store
 274       * @return string|int The stored value
 275       */
 276      public function set($fieldname, $value) {
 277  
 278          if (!empty(static::$requiredfields[$fieldname])) {
 279              $fielddata = static::$requiredfields[$fieldname];
 280          } else if (!empty(static::$optionalfields[$fieldname])) {
 281              $fielddata = static::$optionalfields[$fieldname];
 282          } else if (!empty(static::$enginefields[$fieldname])) {
 283              $fielddata = static::$enginefields[$fieldname];
 284          }
 285  
 286          if (empty($fielddata)) {
 287              throw new \coding_exception('"' . $fieldname . '" field does not exist.');
 288          }
 289  
 290          // tdate fields should be set as timestamps, later they might be converted to
 291          // a date format, it depends on the search engine.
 292          if (($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') && !is_numeric($value)) {
 293              throw new \coding_exception('"' . $fieldname . '" value should be an integer and its value is "' . $value . '"');
 294          }
 295  
 296          // We want to be strict here, there might be engines that expect us to
 297          // provide them data with the proper type already set.
 298          if ($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') {
 299              $this->data[$fieldname] = intval($value);
 300          } else {
 301              // Remove disallowed Unicode characters.
 302              $value = \core_text::remove_unicode_non_characters($value);
 303  
 304              // Replace all groups of line breaks and spaces by single spaces.
 305              $this->data[$fieldname] = preg_replace("/\s+/u", " ", $value);
 306              if ($this->data[$fieldname] === null) {
 307                  if (isset($this->data['id'])) {
 308                      $docid = $this->data['id'];
 309                  } else {
 310                      $docid = '(unknown)';
 311                  }
 312                  throw new \moodle_exception('error_indexing', 'search', '', null, '"' . $fieldname .
 313                          '" value causes preg_replace error (may be caused by unusual characters) ' .
 314                          'in document with id "' . $docid . '"');
 315              }
 316          }
 317  
 318          return $this->data[$fieldname];
 319      }
 320  
 321      /**
 322       * Sets data to this->extradata
 323       *
 324       * This data can be retrieved using \core_search\document->get($fieldname).
 325       *
 326       * @param string $fieldname
 327       * @param string $value
 328       * @return void
 329       */
 330      public function set_extra($fieldname, $value) {
 331          $this->extradata[$fieldname] = $value;
 332      }
 333  
 334      /**
 335       * Getter.
 336       *
 337       * Use self::is_set if you are not sure if this field is set or not
 338       * as otherwise it will trigger a \coding_exception
 339       *
 340       * @throws \coding_exception
 341       * @param string $field
 342       * @return string|int
 343       */
 344      public function get($field) {
 345  
 346          if (isset($this->data[$field])) {
 347              return $this->data[$field];
 348          }
 349  
 350          // Fallback to extra data.
 351          if (isset($this->extradata[$field])) {
 352              return $this->extradata[$field];
 353          }
 354  
 355          throw new \coding_exception('Field "' . $field . '" is not set in the document');
 356      }
 357  
 358      /**
 359       * Checks if a field is set.
 360       *
 361       * @param string $field
 362       * @return bool
 363       */
 364      public function is_set($field) {
 365          return (isset($this->data[$field]) || isset($this->extradata[$field]));
 366      }
 367  
 368      /**
 369       * Set if this is a new document. False if unknown.
 370       *
 371       * @param bool $new
 372       */
 373      public function set_is_new($new) {
 374         $this->isnew = (bool)$new;
 375      }
 376  
 377      /**
 378       * Returns if the document is new. False if unknown.
 379       *
 380       * @return bool
 381       */
 382      public function get_is_new() {
 383         return $this->isnew;
 384      }
 385  
 386      /**
 387       * Returns all default fields definitions.
 388       *
 389       * @return array
 390       */
 391      public static function get_default_fields_definition() {
 392          return static::$requiredfields + static::$optionalfields + static::$enginefields;
 393      }
 394  
 395      /**
 396       * Formats the timestamp preparing the time fields to be inserted into the search engine.
 397       *
 398       * By default it just returns a timestamp so any search engine could just store integers
 399       * and use integers comparison to get documents between x and y timestamps, but search
 400       * engines might be interested in using their own field formats. They can do it extending
 401       * this class in \search_xxx\document.
 402       *
 403       * @param int $timestamp
 404       * @return string
 405       */
 406      public static function format_time_for_engine($timestamp) {
 407          return $timestamp;
 408      }
 409  
 410      /**
 411       * Formats a string value for the search engine.
 412       *
 413       * Search engines may overwrite this method to apply restrictions, like limiting the size.
 414       * The default behaviour is just returning the string.
 415       *
 416       * @param string $string
 417       * @return string
 418       */
 419      public static function format_string_for_engine($string) {
 420          return $string;
 421      }
 422  
 423      /**
 424       * Formats a text value for the search engine.
 425       *
 426       * Search engines may overwrite this method to apply restrictions, like limiting the size.
 427       * The default behaviour is just returning the string.
 428       *
 429       * @param string $text
 430       * @return string
 431       */
 432      public static function format_text_for_engine($text) {
 433          return $text;
 434      }
 435  
 436      /**
 437       * Returns a timestamp from the value stored in the search engine.
 438       *
 439       * By default it just returns a timestamp so any search engine could just store integers
 440       * and use integers comparison to get documents between x and y timestamps, but search
 441       * engines might be interested in using their own field formats. They should do it extending
 442       * this class in \search_xxx\document.
 443       *
 444       * @param string $time
 445       * @return int
 446       */
 447      public static function import_time_from_engine($time) {
 448          return $time;
 449      }
 450  
 451      /**
 452       * Returns how text is returned from the search engine.
 453       *
 454       * @return int
 455       */
 456      protected function get_text_format() {
 457          return FORMAT_PLAIN;
 458      }
 459  
 460      /**
 461       * Fills the document with data coming from the search engine.
 462       *
 463       * @throws \core_search\engine_exception
 464       * @param array $docdata
 465       * @return void
 466       */
 467      public function set_data_from_engine($docdata) {
 468          $fields = static::$requiredfields + static::$optionalfields + static::$enginefields;
 469          foreach ($fields as $fieldname => $field) {
 470  
 471              // Optional params might not be there.
 472              if (isset($docdata[$fieldname])) {
 473                  if ($field['type'] === 'tdate') {
 474                      // Time fields may need a preprocessing.
 475                      $this->set($fieldname, static::import_time_from_engine($docdata[$fieldname]));
 476                  } else {
 477                      // No way we can make this work if there is any multivalue field.
 478                      if (is_array($docdata[$fieldname])) {
 479                          throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname);
 480                      }
 481                      $this->set($fieldname, $docdata[$fieldname]);
 482                  }
 483              }
 484          }
 485      }
 486  
 487      /**
 488       * Sets the document url.
 489       *
 490       * @param \moodle_url $url
 491       * @return void
 492       */
 493      public function set_doc_url(\moodle_url $url) {
 494          $this->docurl = $url;
 495      }
 496  
 497      /**
 498       * Gets the url to the doc.
 499       *
 500       * @return \moodle_url
 501       */
 502      public function get_doc_url() {
 503          return $this->docurl;
 504      }
 505  
 506      /**
 507       * Sets document icon instance.
 508       *
 509       * @param \core_search\document_icon $docicon
 510       */
 511      public function set_doc_icon(document_icon $docicon) {
 512          $this->docicon = $docicon;
 513      }
 514  
 515      /**
 516       * Gets document icon instance.
 517       *
 518       * @return \core_search\document_icon
 519       */
 520      public function get_doc_icon() {
 521          return $this->docicon;
 522      }
 523  
 524      public function set_context_url(\moodle_url $url) {
 525          $this->contexturl = $url;
 526      }
 527  
 528      /**
 529       * Gets the url to the context.
 530       *
 531       * @return \moodle_url
 532       */
 533      public function get_context_url() {
 534          return $this->contexturl;
 535      }
 536  
 537      /**
 538       * Returns the document ready to submit to the search engine.
 539       *
 540       * @throws \coding_exception
 541       * @return array
 542       */
 543      public function export_for_engine() {
 544          // Set any unset defaults.
 545          $this->apply_defaults();
 546  
 547          // We don't want to affect the document instance.
 548          $data = $this->data;
 549  
 550          // Apply specific engine-dependant formats and restrictions.
 551          foreach (static::$requiredfields as $fieldname => $field) {
 552  
 553              // We also check that we have everything we need.
 554              if (!isset($data[$fieldname])) {
 555                  throw new \coding_exception('Missing "' . $fieldname . '" field in document with id "' . $this->data['id'] . '"');
 556              }
 557  
 558              if ($field['type'] === 'tdate') {
 559                  // Overwrite the timestamp with the engine dependant format.
 560                  $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
 561              } else if ($field['type'] === 'string') {
 562                  // Overwrite the string with the engine dependant format.
 563                  $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
 564              } else if ($field['type'] === 'text') {
 565                  // Overwrite the text with the engine dependant format.
 566                  $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
 567              }
 568  
 569          }
 570  
 571          $fields = static::$optionalfields + static::$enginefields;
 572          foreach ($fields as $fieldname => $field) {
 573              if (!isset($data[$fieldname])) {
 574                  continue;
 575              }
 576              if ($field['type'] === 'tdate') {
 577                  // Overwrite the timestamp with the engine dependant format.
 578                  $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
 579              } else if ($field['type'] === 'string') {
 580                  // Overwrite the string with the engine dependant format.
 581                  $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
 582              } else if ($field['type'] === 'text') {
 583                  // Overwrite the text with the engine dependant format.
 584                  $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
 585              }
 586          }
 587  
 588          return $data;
 589      }
 590  
 591      /**
 592       * Apply any defaults to unset fields before export. Called after document building, but before export.
 593       *
 594       * Sub-classes of this should make sure to call parent::apply_defaults().
 595       */
 596      protected function apply_defaults() {
 597          // Set the default type, TYPE_TEXT.
 598          if (!isset($this->data['type'])) {
 599              $this->data['type'] = manager::TYPE_TEXT;
 600          }
 601      }
 602  
 603      /**
 604       * Export the document data to be used as a template context.
 605       *
 606       * Adding more info than the required one as people might be interested in extending the template.
 607       *
 608       * Although content is a required field when setting up the document, it accepts '' (empty) values
 609       * as they may be the result of striping out HTML.
 610       *
 611       * SECURITY NOTE: It is the responsibility of the document to properly escape any text to be displayed.
 612       * The renderer will output the content without any further cleaning.
 613       *
 614       * @param renderer_base $output The renderer.
 615       * @return array
 616       */
 617      public function export_for_template(\renderer_base $output) {
 618          global $USER;
 619  
 620          list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($this->get('areaid'));
 621          $context = context::instance_by_id($this->get('contextid'));
 622  
 623          $searcharea = \core_search\manager::get_search_area($this->data['areaid']);
 624          $title = $this->is_set('title') ? $this->format_text($searcharea->get_document_display_title($this)) : '';
 625          $data = [
 626              'componentname' => $componentname,
 627              'areaname' => $areaname,
 628              'courseurl' => course_get_url($this->get('courseid')),
 629              'coursefullname' => format_string($this->get('coursefullname'), true, ['context' => $context->id]),
 630              'modified' => userdate($this->get('modified')),
 631              'title' => ($title !== '') ? $title : get_string('notitle', 'search'),
 632              'docurl' => $this->get_doc_url(),
 633              'content' => $this->is_set('content') ? $this->format_text($this->get('content')) : null,
 634              'contexturl' => $this->get_context_url(),
 635              'description1' => $this->is_set('description1') ? $this->format_text($this->get('description1')) : null,
 636              'description2' => $this->is_set('description2') ? $this->format_text($this->get('description2')) : null,
 637          ];
 638  
 639          // Now take any attached any files.
 640          $files = $this->get_files();
 641          if (!empty($files)) {
 642              if (count($files) > 1) {
 643                  $filenames = [];
 644                  foreach ($files as $file) {
 645                      $filenames[] = format_string($file->get_filename(), true, ['context' => $context->id]);
 646                  }
 647                  $data['multiplefiles'] = true;
 648                  $data['filenames'] = $filenames;
 649              } else {
 650                  $file = reset($files);
 651                  $data['filename'] = format_string($file->get_filename(), true, ['context' => $context->id]);
 652              }
 653          }
 654  
 655          if ($this->is_set('userid')) {
 656              if ($this->get('userid') == $USER->id ||
 657                      (has_capability('moodle/user:viewdetails', $context) &&
 658                      has_capability('moodle/course:viewparticipants', $context))) {
 659                  $data['userurl'] = new \moodle_url(
 660                      '/user/view.php',
 661                      ['id' => $this->get('userid'), 'course' => $this->get('courseid')]
 662                  );
 663                  $data['userfullname'] = format_string($this->get('userfullname'), true, ['context' => $context->id]);
 664              }
 665          }
 666  
 667          if ($docicon = $this->get_doc_icon()) {
 668              $data['icon'] = $output->image_url($docicon->get_name(), $docicon->get_component());
 669          }
 670  
 671          return $data;
 672      }
 673  
 674      /**
 675       * Formats a text string coming from the search engine.
 676       *
 677       * By default just return the text as it is:
 678       * - Search areas are responsible of sending just plain data, the search engine may
 679       *   append HTML or markdown to it (highlighing for example).
 680       * - The view is responsible of shortening the text if it is too big
 681       *
 682       * @param  string $text Text to format
 683       * @return string HTML text to be renderer
 684       */
 685      protected function format_text($text) {
 686          return format_text($text, $this->get_text_format(), array('context' => $this->get('contextid')));
 687      }
 688  }