Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 3.9.x will end* 10 May 2021 (12 months).
  • Bug fixes for security issues in 3.9.x will end* 8 May 2023 (36 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.

/**
 * Document representation.
 *
 * @package    core_search
 * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */

namespace core_search;

use context;

defined('MOODLE_INTERNAL') || die();

/**
 * Represents a document to index.
 *
 * Note that, if you are writting a search engine and you want to change \core_search\document
 * behaviour, you can overwrite this class, will be automatically loaded from \search_YOURENGINE\document.
 *
 * @package    core_search
 * @copyright  2015 David Monllao {@link http://www.davidmonllao.com}
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */
class document implements \renderable, \templatable {

    /**
     * @var array $data The document data.
     */
    protected $data = array();

    /**
     * @var array Extra data needed to render the document.
     */
    protected $extradata = array();

    /**
     * @var \moodle_url Link to the document.
     */
    protected $docurl = null;

    /**
     * @var \moodle_url Link to the document context.
     */
    protected $contexturl = null;

    /**
     * @var \core_search\document_icon Document icon instance.
     */
    protected $docicon = null;

    /**
     * @var int|null The content field filearea.
     */
    protected $contentfilearea = null;

    /**
     * @var int|null The content field itemid.
     */
    protected $contentitemid = null;

    /**
     * @var bool Should be set to true if document hasn't been indexed before. False if unknown.
     */
    protected $isnew = false;

    /**
     * @var \stored_file[] An array of stored files to attach to the document.
     */
    protected $files = array();

    /**
     * Change list (for engine implementers):
     * 2017091700 - add optional field groupid
     *
     * @var int Schema version number (update if any change)
     */
    const SCHEMA_VERSION = 2017091700;

    /**
     * All required fields any doc should contain.
     *
     * We have to choose a format to specify field types, using solr format as we have to choose one and solr is the
     * default search engine.
     *
     * Search engine plugins are responsible of setting their appropriate field types and map these naming to whatever format
     * they need.
     *
     * @var array
     */
    protected static $requiredfields = array(
        'id' => array(
            'type' => 'string',
            'stored' => true,
            'indexed' => false
        ),
        'itemid' => array(
            'type' => 'int',
            'stored' => true,
            'indexed' => true
        ),
        'title' => array(
            'type' => 'text',
            'stored' => true,
            'indexed' => true,
            'mainquery' => true
        ),
        'content' => array(
            'type' => 'text',
            'stored' => true,
            'indexed' => true,
            'mainquery' => true
        ),
        'contextid' => array(
            'type' => 'int',
            'stored' => true,
            'indexed' => true
        ),
        'areaid' => array(
            'type' => 'string',
            'stored' => true,
            'indexed' => true
        ),
        'type' => array(
            'type' => 'int',
            'stored' => true,
            'indexed' => true
        ),
        'courseid' => array(
            'type' => 'int',
            'stored' => true,
            'indexed' => true
        ),
        'owneruserid' => array(
            'type' => 'int',
            'stored' => true,
            'indexed' => true
        ),
        'modified' => array(
            'type' => 'tdate',
            'stored' => true,
            'indexed' => true
        ),
    );

    /**
     * All optional fields docs can contain.
     *
     * Although it matches solr fields format, this is just to define the field types. Search
     * engine plugins are responsible of setting their appropriate field types and map these
     * naming to whatever format they need.
     *
     * @var array
     */
    protected static $optionalfields = array(
        'userid' => array(
            'type' => 'int',
            'stored' => true,
            'indexed' => true
        ),
        'groupid' => array(
            'type' => 'int',
            'stored' => true,
            'indexed' => true
        ),
        'description1' => array(
            'type' => 'text',
            'stored' => true,
            'indexed' => true,
            'mainquery' => true
        ),
        'description2' => array(
            'type' => 'text',
            'stored' => true,
            'indexed' => true,
            'mainquery' => true
        )
    );

    /**
     * Any fields that are engine specifc. These are fields that are solely used by a search engine plugin
     * for internal purposes.
     *
     * Field names should be prefixed with engine name to avoid potential conflict with core fields.
     *
     * Uses same format as fields above.
     *
     * @var array
     */
    protected static $enginefields = array();

    /**
     * We ensure that the document has a unique id across search areas.
     *
     * @param int $itemid An id unique to the search area
     * @param string $componentname The search area component Frankenstyle name
     * @param string $areaname The area name (the search area class name)
     * @return void
     */
    public function __construct($itemid, $componentname, $areaname) {

        if (!is_numeric($itemid)) {
            throw new \coding_exception('The itemid should be an integer');
        }

        $this->data['areaid'] = \core_search\manager::generate_areaid($componentname, $areaname);
        $this->data['id'] = $this->data['areaid'] . '-' . $itemid;
        $this->data['itemid'] = intval($itemid);
    }

    /**
     * Add a stored file to the document.
     *
     * @param \stored_file|int $file The file to add, or file id.
     * @return void
     */
    public function add_stored_file($file) {
        if (is_numeric($file)) {
            $this->files[$file] = $file;
        } else {
            $this->files[$file->get_id()] = $file;
        }
    }

    /**
     * Returns the array of attached files.
     *
     * @return \stored_file[]
     */
    public function get_files() {
        // The files array can contain stored file ids, so we need to get instances if asked.
        foreach ($this->files as $id => $listfile) {
            if (is_numeric($listfile)) {
                $fs = get_file_storage();

                if ($file = $fs->get_file_by_id($id)) {
                    $this->files[$id] = $file;
                } else {
                    unset($this->files[$id]); // Index is out of date and referencing a file that does not exist.
                }
            }
        }

        return $this->files;
    }

    /**
     * Setter.
     *
     * Basic checkings to prevent common issues.
     *
     * If the field is a string tags will be stripped, if it is an integer or a date it
     * will be casted to a PHP integer. tdate fields values are expected to be timestamps.
     *
     * @throws \coding_exception
     * @param string $fieldname The field name
     * @param string|int $value The value to store
     * @return string|int The stored value
     */
    public function set($fieldname, $value) {

        if (!empty(static::$requiredfields[$fieldname])) {
            $fielddata = static::$requiredfields[$fieldname];
        } else if (!empty(static::$optionalfields[$fieldname])) {
            $fielddata = static::$optionalfields[$fieldname];
        } else if (!empty(static::$enginefields[$fieldname])) {
            $fielddata = static::$enginefields[$fieldname];
        }

        if (empty($fielddata)) {
            throw new \coding_exception('"' . $fieldname . '" field does not exist.');
        }

        // tdate fields should be set as timestamps, later they might be converted to
        // a date format, it depends on the search engine.
        if (($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') && !is_numeric($value)) {
            throw new \coding_exception('"' . $fieldname . '" value should be an integer and its value is "' . $value . '"');
        }

        // We want to be strict here, there might be engines that expect us to
        // provide them data with the proper type already set.
        if ($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') {
            $this->data[$fieldname] = intval($value);
        } else {
            // Remove disallowed Unicode characters.
            $value = \core_text::remove_unicode_non_characters($value);

            // Replace all groups of line breaks and spaces by single spaces.
            $this->data[$fieldname] = preg_replace("/\s+/u", " ", $value);
            if ($this->data[$fieldname] === null) {
                if (isset($this->data['id'])) {
                    $docid = $this->data['id'];
                } else {
                    $docid = '(unknown)';
                }
                throw new \moodle_exception('error_indexing', 'search', '', null, '"' . $fieldname .
                        '" value causes preg_replace error (may be caused by unusual characters) ' .
                        'in document with id "' . $docid . '"');
            }
        }

        return $this->data[$fieldname];
    }

    /**
     * Sets data to this->extradata
     *
     * This data can be retrieved using \core_search\document->get($fieldname).
     *
     * @param string $fieldname
     * @param string $value
     * @return void
     */
    public function set_extra($fieldname, $value) {
        $this->extradata[$fieldname] = $value;
    }

    /**
     * Getter.
     *
     * Use self::is_set if you are not sure if this field is set or not
     * as otherwise it will trigger a \coding_exception
     *
     * @throws \coding_exception
     * @param string $field
     * @return string|int
     */
    public function get($field) {

        if (isset($this->data[$field])) {
            return $this->data[$field];
        }

        // Fallback to extra data.
        if (isset($this->extradata[$field])) {
            return $this->extradata[$field];
        }

        throw new \coding_exception('Field "' . $field . '" is not set in the document');
    }

    /**
     * Checks if a field is set.
     *
     * @param string $field
     * @return bool
     */
    public function is_set($field) {
        return (isset($this->data[$field]) || isset($this->extradata[$field]));
    }

    /**
     * Set if this is a new document. False if unknown.
     *
     * @param bool $new
     */
    public function set_is_new($new) {
       $this->isnew = (bool)$new;
    }

    /**
     * Returns if the document is new. False if unknown.
     *
     * @return bool
     */
    public function get_is_new() {
       return $this->isnew;
    }

    /**
     * Returns all default fields definitions.
     *
     * @return array
     */
    public static function get_default_fields_definition() {
        return static::$requiredfields + static::$optionalfields + static::$enginefields;
    }

    /**
     * Formats the timestamp preparing the time fields to be inserted into the search engine.
     *
     * By default it just returns a timestamp so any search engine could just store integers
     * and use integers comparison to get documents between x and y timestamps, but search
     * engines might be interested in using their own field formats. They can do it extending
     * this class in \search_xxx\document.
     *
     * @param int $timestamp
     * @return string
     */
    public static function format_time_for_engine($timestamp) {
        return $timestamp;
    }

    /**
     * Formats a string value for the search engine.
     *
     * Search engines may overwrite this method to apply restrictions, like limiting the size.
     * The default behaviour is just returning the string.
     *
     * @param string $string
     * @return string
     */
    public static function format_string_for_engine($string) {
        return $string;
    }

    /**
     * Formats a text value for the search engine.
     *
     * Search engines may overwrite this method to apply restrictions, like limiting the size.
     * The default behaviour is just returning the string.
     *
     * @param string $text
     * @return string
     */
    public static function format_text_for_engine($text) {
        return $text;
    }

    /**
     * Returns a timestamp from the value stored in the search engine.
     *
     * By default it just returns a timestamp so any search engine could just store integers
     * and use integers comparison to get documents between x and y timestamps, but search
     * engines might be interested in using their own field formats. They should do it extending
     * this class in \search_xxx\document.
     *
     * @param string $time
     * @return int
     */
    public static function import_time_from_engine($time) {
        return $time;
    }

    /**
     * Returns how text is returned from the search engine.
     *
     * @return int
     */
    protected function get_text_format() {
        return FORMAT_PLAIN;
    }

    /**
     * Fills the document with data coming from the search engine.
     *
     * @throws \core_search\engine_exception
     * @param array $docdata
     * @return void
     */
    public function set_data_from_engine($docdata) {
        $fields = static::$requiredfields + static::$optionalfields + static::$enginefields;
        foreach ($fields as $fieldname => $field) {

            // Optional params might not be there.
            if (isset($docdata[$fieldname])) {
                if ($field['type'] === 'tdate') {
                    // Time fields may need a preprocessing.
                    $this->set($fieldname, static::import_time_from_engine($docdata[$fieldname]));
                } else {
                    // No way we can make this work if there is any multivalue field.
                    if (is_array($docdata[$fieldname])) {
                        throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname);
                    }
                    $this->set($fieldname, $docdata[$fieldname]);
                }
            }
        }
    }

    /**
     * Sets the document url.
     *
     * @param \moodle_url $url
     * @return void
     */
    public function set_doc_url(\moodle_url $url) {
        $this->docurl = $url;
    }

    /**
     * Gets the url to the doc.
     *
     * @return \moodle_url
     */
    public function get_doc_url() {
        return $this->docurl;
    }

    /**
     * Sets document icon instance.
     *
     * @param \core_search\document_icon $docicon
     */
    public function set_doc_icon(document_icon $docicon) {
        $this->docicon = $docicon;
    }

    /**
     * Gets document icon instance.
     *
     * @return \core_search\document_icon
     */
    public function get_doc_icon() {
        return $this->docicon;
    }

    public function set_context_url(\moodle_url $url) {
        $this->contexturl = $url;
    }

    /**
     * Gets the url to the context.
     *
     * @return \moodle_url
     */
    public function get_context_url() {
        return $this->contexturl;
    }

    /**
     * Returns the document ready to submit to the search engine.
     *
     * @throws \coding_exception
     * @return array
     */
    public function export_for_engine() {
        // Set any unset defaults.
        $this->apply_defaults();

        // We don't want to affect the document instance.
        $data = $this->data;

        // Apply specific engine-dependant formats and restrictions.
        foreach (static::$requiredfields as $fieldname => $field) {

            // We also check that we have everything we need.
            if (!isset($data[$fieldname])) {
                throw new \coding_exception('Missing "' . $fieldname . '" field in document with id "' . $this->data['id'] . '"');
            }

            if ($field['type'] === 'tdate') {
                // Overwrite the timestamp with the engine dependant format.
                $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
            } else if ($field['type'] === 'string') {
                // Overwrite the string with the engine dependant format.
                $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
            } else if ($field['type'] === 'text') {
                // Overwrite the text with the engine dependant format.
                $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
            }

        }

        $fields = static::$optionalfields + static::$enginefields;
        foreach ($fields as $fieldname => $field) {
            if (!isset($data[$fieldname])) {
                continue;
            }
            if ($field['type'] === 'tdate') {
                // Overwrite the timestamp with the engine dependant format.
                $data[$fieldname] = static::format_time_for_engine($data[$fieldname]);
            } else if ($field['type'] === 'string') {
                // Overwrite the string with the engine dependant format.
                $data[$fieldname] = static::format_string_for_engine($data[$fieldname]);
            } else if ($field['type'] === 'text') {
                // Overwrite the text with the engine dependant format.
                $data[$fieldname] = static::format_text_for_engine($data[$fieldname]);
            }
        }

        return $data;
    }

    /**
     * Apply any defaults to unset fields before export. Called after document building, but before export.
     *
     * Sub-classes of this should make sure to call parent::apply_defaults().
     */
    protected function apply_defaults() {
        // Set the default type, TYPE_TEXT.
        if (!isset($this->data['type'])) {
            $this->data['type'] = manager::TYPE_TEXT;
        }
    }

    /**
     * Export the document data to be used as a template context.
     *
> * Just delegates all the processing to export_doc_info, also used by external functions.
* Adding more info than the required one as people might be interested in extending the template. *
> * @param \renderer_base $output The renderer. * Although content is a required field when setting up the document, it accepts '' (empty) values > * @return array * as they may be the result of striping out HTML. > */ * > public function export_for_template(\renderer_base $output): array { * SECURITY NOTE: It is the responsibility of the document to properly escape any text to be displayed. > $docdata = $this->export_doc($output); * The renderer will output the content without any further cleaning. > return $docdata; * > } * @param renderer_base $output The renderer. > * @return array > /** */ > * Returns the current docuement information. public function export_for_template(\renderer_base $output) { > * global $USER; > * Adding more info than the required one as themers and ws clients might be interested in showing more stuff. > *
< * @param renderer_base $output The renderer.
> * @param \renderer_base $output The renderer.
< public function export_for_template(\renderer_base $output) { < global $USER;
> public function export_doc(\renderer_base $output): array { > global $USER, $CFG; > require_once($CFG->dirroot . '/course/lib.php');
$searcharea = \core_search\manager::get_search_area($this->data['areaid']); $title = $this->is_set('title') ? $this->format_text($searcharea->get_document_display_title($this)) : ''; $data = [
> 'itemid' => $this->get('itemid'),
'componentname' => $componentname, 'areaname' => $areaname,
< 'courseurl' => course_get_url($this->get('courseid')),
> 'courseurl' => (course_get_url($this->get('courseid')))->out(false),
'coursefullname' => format_string($this->get('coursefullname'), true, ['context' => $context->id]), 'modified' => userdate($this->get('modified')),
> 'timemodified' => $this->get('modified'),
'title' => ($title !== '') ? $title : get_string('notitle', 'search'),
< 'docurl' => $this->get_doc_url(),
> 'docurl' => ($this->get_doc_url())->out(false),
'content' => $this->is_set('content') ? $this->format_text($this->get('content')) : null,
< 'contexturl' => $this->get_context_url(),
> 'contextid' => $this->get('contextid'), > 'contexturl' => ($this->get_context_url())->out(false),
'description1' => $this->is_set('description1') ? $this->format_text($this->get('description1')) : null, 'description2' => $this->is_set('description2') ? $this->format_text($this->get('description2')) : null, ]; // Now take any attached any files. $files = $this->get_files(); if (!empty($files)) { if (count($files) > 1) { $filenames = []; foreach ($files as $file) { $filenames[] = format_string($file->get_filename(), true, ['context' => $context->id]); } $data['multiplefiles'] = true; $data['filenames'] = $filenames; } else { $file = reset($files); $data['filename'] = format_string($file->get_filename(), true, ['context' => $context->id]); } } if ($this->is_set('userid')) { if ($this->get('userid') == $USER->id || (has_capability('moodle/user:viewdetails', $context) && has_capability('moodle/course:viewparticipants', $context))) {
< $data['userurl'] = new \moodle_url(
> $data['userurl'] = (new \moodle_url(
'/user/view.php', ['id' => $this->get('userid'), 'course' => $this->get('courseid')]
< );
> ))->out(false);
$data['userfullname'] = format_string($this->get('userfullname'), true, ['context' => $context->id]);
> $data['userid'] = $this->get('userid');
} } if ($docicon = $this->get_doc_icon()) { $data['icon'] = $output->image_url($docicon->get_name(), $docicon->get_component());
> $data['iconurl'] = $data['icon']->out(false);
}
> $data['textformat'] = $this->get_text_format();
return $data; } /** * Formats a text string coming from the search engine. * * By default just return the text as it is: * - Search areas are responsible of sending just plain data, the search engine may * append HTML or markdown to it (highlighing for example). * - The view is responsible of shortening the text if it is too big * * @param string $text Text to format * @return string HTML text to be renderer */ protected function format_text($text) { return format_text($text, $this->get_text_format(), array('context' => $this->get('contextid'))); } }