Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 401 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Document representation.
  19   *
  20   * @package    search_solr
  21   * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  namespace search_solr;
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  /**
  30   * Respresents a document to index.
  31   *
  32   * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
  33   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  34   */
  35  class document extends \core_search\document {
  36      /**
  37       * Indicates the file contents were not indexed due to an error.
  38       */
  39      const INDEXED_FILE_ERROR = -1;
  40  
  41      /**
  42       * Indicates the file contents were not indexed due filtering/settings.
  43       */
  44      const INDEXED_FILE_FALSE = 0;
  45  
  46      /**
  47       * Indicates the file contents are indexed with the record.
  48       */
  49      const INDEXED_FILE_TRUE = 1;
  50  
  51      /**
  52       * Any fields that are engine specifc. These are fields that are solely used by a seach engine plugin
  53       * for internal purposes.
  54       *
  55       * @var array
  56       */
  57      protected static $enginefields = array(
  58          'solr_filegroupingid' => array(
  59              'type' => 'string',
  60              'stored' => true,
  61              'indexed' => true
  62          ),
  63          'solr_fileid' => array(
  64              'type' => 'string',
  65              'stored' => true,
  66              'indexed' => true
  67          ),
  68          'solr_filecontenthash' => array(
  69              'type' => 'string',
  70              'stored' => true,
  71              'indexed' => true
  72          ),
  73          // Stores the status of file indexing.
  74          'solr_fileindexstatus' => array(
  75              'type' => 'int',
  76              'stored' => true,
  77              'indexed' => true
  78          ),
  79          // Field to index, but not store, file contents.
  80          'solr_filecontent' => array(
  81              'type' => 'text',
  82              'stored' => false,
  83              'indexed' => true,
  84              'mainquery' => true
  85          )
  86      );
  87  
  88      /**
  89       * Formats the timestamp according to the search engine needs.
  90       *
  91       * @param int $timestamp
  92       * @return string
  93       */
  94      public static function format_time_for_engine($timestamp) {
  95          return gmdate(\search_solr\engine::DATE_FORMAT, $timestamp);
  96      }
  97  
  98      /**
  99       * Formats the timestamp according to the search engine needs.
 100       *
 101       * @param int $timestamp
 102       * @return string
 103       */
 104      public static function format_string_for_engine($string) {
 105          // 2^15 default. We could convert this to a setting as is possible to
 106          // change the max in solr.
 107          return \core_text::str_max_bytes($string, 32766);
 108      }
 109  
 110      /**
 111       * Returns a timestamp from the value stored in the search engine.
 112       *
 113       * @param string $time
 114       * @return int
 115       */
 116      public static function import_time_from_engine($time) {
 117          return strtotime($time);
 118      }
 119  
 120      /**
 121       * Overwritten to use markdown format as we use markdown for solr highlighting.
 122       *
 123       * @return int
 124       */
 125      protected function get_text_format() {
 126          return FORMAT_HTML;
 127      }
 128  
 129      /**
 130       * Formats a text string coming from the search engine.
 131       *
 132       * @param  string $text Text to format
 133       * @return string HTML text to be renderer
 134       */
 135      protected function format_text($text) {
 136          // Since we allow output for highlighting, we need to encode html entities.
 137          // This ensures plaintext html chars don't become valid html.
 138          $out = s($text);
 139  
 140          $startcount = 0;
 141          $endcount = 0;
 142  
 143          // Remove end/start pairs that span a few common seperation characters. Allows us to highlight phrases instead of words.
 144          $regex = '|'.engine::HIGHLIGHT_END.'([ .,-]{0,3})'.engine::HIGHLIGHT_START.'|';
 145          $out = preg_replace($regex, '$1', $out);
 146  
 147          // Now replace our start and end highlight markers.
 148          $out = str_replace(engine::HIGHLIGHT_START, '<span class="highlight">', $out, $startcount);
 149          $out = str_replace(engine::HIGHLIGHT_END, '</span>', $out, $endcount);
 150  
 151          // This makes sure any highlight tags are balanced, incase truncation or the highlight text contained our markers.
 152          while ($startcount > $endcount) {
 153              $out .= '</span>';
 154              $endcount++;
 155          }
 156          while ($startcount < $endcount) {
 157              $out = '<span class="highlight">' . $out;
 158              $endcount++;
 159          }
 160  
 161          return parent::format_text($out);
 162      }
 163  
 164      /**
 165       * Apply any defaults to unset fields before export. Called after document building, but before export.
 166       *
 167       * Sub-classes of this should make sure to call parent::apply_defaults().
 168       */
 169      protected function apply_defaults() {
 170          parent::apply_defaults();
 171  
 172          // We want to set the solr_filegroupingid to id if it isn't set.
 173          if (!isset($this->data['solr_filegroupingid'])) {
 174              $this->data['solr_filegroupingid'] = $this->data['id'];
 175          }
 176      }
 177  
 178      /**
 179       * Export the data for the given file in relation to this document.
 180       *
 181       * @param \stored_file $file The stored file we are talking about.
 182       * @return array
 183       */
 184      public function export_file_for_engine($file) {
 185          $data = $this->export_for_engine();
 186  
 187          // Content is index in the main document.
 188          unset($data['content']);
 189          unset($data['description1']);
 190          unset($data['description2']);
 191  
 192          // Going to append the fileid to give it a unique id.
 193          $data['id'] = $data['id'].'-solrfile'.$file->get_id();
 194          $data['type'] = \core_search\manager::TYPE_FILE;
 195          $data['solr_fileid'] = $file->get_id();
 196          $data['solr_filecontenthash'] = $file->get_contenthash();
 197          $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE;
 198          $data['title'] = $file->get_filename();
 199          $data['modified'] = self::format_time_for_engine($file->get_timemodified());
 200  
 201          return $data;
 202      }
 203  }