Differences Between: [Versions 310 and 403]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Document representation. 19 * 20 * @package core_search 21 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace core_search; 26 27 use context; 28 29 defined('MOODLE_INTERNAL') || die(); 30 31 /** 32 * Represents a document to index. 33 * 34 * Note that, if you are writting a search engine and you want to change \core_search\document 35 * behaviour, you can overwrite this class, will be automatically loaded from \search_YOURENGINE\document. 36 * 37 * @package core_search 38 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 39 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 40 */ 41 class document implements \renderable, \templatable { 42 43 /** 44 * @var array $data The document data. 45 */ 46 protected $data = array(); 47 48 /** 49 * @var array Extra data needed to render the document. 50 */ 51 protected $extradata = array(); 52 53 /** 54 * @var \moodle_url Link to the document. 55 */ 56 protected $docurl = null; 57 58 /** 59 * @var \moodle_url Link to the document context. 60 */ 61 protected $contexturl = null; 62 63 /** 64 * @var \core_search\document_icon Document icon instance. 65 */ 66 protected $docicon = null; 67 68 /** 69 * @var int|null The content field filearea. 70 */ 71 protected $contentfilearea = null; 72 73 /** 74 * @var int|null The content field itemid. 75 */ 76 protected $contentitemid = null; 77 78 /** 79 * @var bool Should be set to true if document hasn't been indexed before. False if unknown. 80 */ 81 protected $isnew = false; 82 83 /** 84 * @var \stored_file[] An array of stored files to attach to the document. 85 */ 86 protected $files = array(); 87 88 /** 89 * Change list (for engine implementers): 90 * 2017091700 - add optional field groupid 91 * 92 * @var int Schema version number (update if any change) 93 */ 94 const SCHEMA_VERSION = 2017091700; 95 96 /** 97 * All required fields any doc should contain. 98 * 99 * We have to choose a format to specify field types, using solr format as we have to choose one and solr is the 100 * default search engine. 101 * 102 * Search engine plugins are responsible of setting their appropriate field types and map these naming to whatever format 103 * they need. 104 * 105 * @var array 106 */ 107 protected static $requiredfields = array( 108 'id' => array( 109 'type' => 'string', 110 'stored' => true, 111 'indexed' => false 112 ), 113 'itemid' => array( 114 'type' => 'int', 115 'stored' => true, 116 'indexed' => true 117 ), 118 'title' => array( 119 'type' => 'text', 120 'stored' => true, 121 'indexed' => true, 122 'mainquery' => true 123 ), 124 'content' => array( 125 'type' => 'text', 126 'stored' => true, 127 'indexed' => true, 128 'mainquery' => true 129 ), 130 'contextid' => array( 131 'type' => 'int', 132 'stored' => true, 133 'indexed' => true 134 ), 135 'areaid' => array( 136 'type' => 'string', 137 'stored' => true, 138 'indexed' => true 139 ), 140 'type' => array( 141 'type' => 'int', 142 'stored' => true, 143 'indexed' => true 144 ), 145 'courseid' => array( 146 'type' => 'int', 147 'stored' => true, 148 'indexed' => true 149 ), 150 'owneruserid' => array( 151 'type' => 'int', 152 'stored' => true, 153 'indexed' => true 154 ), 155 'modified' => array( 156 'type' => 'tdate', 157 'stored' => true, 158 'indexed' => true 159 ), 160 ); 161 162 /** 163 * All optional fields docs can contain. 164 * 165 * Although it matches solr fields format, this is just to define the field types. Search 166 * engine plugins are responsible of setting their appropriate field types and map these 167 * naming to whatever format they need. 168 * 169 * @var array 170 */ 171 protected static $optionalfields = array( 172 'userid' => array( 173 'type' => 'int', 174 'stored' => true, 175 'indexed' => true 176 ), 177 'groupid' => array( 178 'type' => 'int', 179 'stored' => true, 180 'indexed' => true 181 ), 182 'description1' => array( 183 'type' => 'text', 184 'stored' => true, 185 'indexed' => true, 186 'mainquery' => true 187 ), 188 'description2' => array( 189 'type' => 'text', 190 'stored' => true, 191 'indexed' => true, 192 'mainquery' => true 193 ) 194 ); 195 196 /** 197 * Any fields that are engine specifc. These are fields that are solely used by a search engine plugin 198 * for internal purposes. 199 * 200 * Field names should be prefixed with engine name to avoid potential conflict with core fields. 201 * 202 * Uses same format as fields above. 203 * 204 * @var array 205 */ 206 protected static $enginefields = array(); 207 208 /** 209 * We ensure that the document has a unique id across search areas. 210 * 211 * @param int $itemid An id unique to the search area 212 * @param string $componentname The search area component Frankenstyle name 213 * @param string $areaname The area name (the search area class name) 214 * @return void 215 */ 216 public function __construct($itemid, $componentname, $areaname) { 217 218 if (!is_numeric($itemid)) { 219 throw new \coding_exception('The itemid should be an integer'); 220 } 221 222 $this->data['areaid'] = \core_search\manager::generate_areaid($componentname, $areaname); 223 $this->data['id'] = $this->data['areaid'] . '-' . $itemid; 224 $this->data['itemid'] = intval($itemid); 225 } 226 227 /** 228 * Add a stored file to the document. 229 * 230 * @param \stored_file|int $file The file to add, or file id. 231 * @return void 232 */ 233 public function add_stored_file($file) { 234 if (is_numeric($file)) { 235 $this->files[$file] = $file; 236 } else { 237 $this->files[$file->get_id()] = $file; 238 } 239 } 240 241 /** 242 * Returns the array of attached files. 243 * 244 * @return \stored_file[] 245 */ 246 public function get_files() { 247 // The files array can contain stored file ids, so we need to get instances if asked. 248 foreach ($this->files as $id => $listfile) { 249 if (is_numeric($listfile)) { 250 $fs = get_file_storage(); 251 252 if ($file = $fs->get_file_by_id($id)) { 253 $this->files[$id] = $file; 254 } else { 255 unset($this->files[$id]); // Index is out of date and referencing a file that does not exist. 256 } 257 } 258 } 259 260 return $this->files; 261 } 262 263 /** 264 * Setter. 265 * 266 * Basic checkings to prevent common issues. 267 * 268 * If the field is a string tags will be stripped, if it is an integer or a date it 269 * will be casted to a PHP integer. tdate fields values are expected to be timestamps. 270 * 271 * @throws \coding_exception 272 * @param string $fieldname The field name 273 * @param string|int $value The value to store 274 * @return string|int The stored value 275 */ 276 public function set($fieldname, $value) { 277 278 if (!empty(static::$requiredfields[$fieldname])) { 279 $fielddata = static::$requiredfields[$fieldname]; 280 } else if (!empty(static::$optionalfields[$fieldname])) { 281 $fielddata = static::$optionalfields[$fieldname]; 282 } else if (!empty(static::$enginefields[$fieldname])) { 283 $fielddata = static::$enginefields[$fieldname]; 284 } 285 286 if (empty($fielddata)) { 287 throw new \coding_exception('"' . $fieldname . '" field does not exist.'); 288 } 289 290 // tdate fields should be set as timestamps, later they might be converted to 291 // a date format, it depends on the search engine. 292 if (($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') && !is_numeric($value)) { 293 throw new \coding_exception('"' . $fieldname . '" value should be an integer and its value is "' . $value . '"'); 294 } 295 296 // We want to be strict here, there might be engines that expect us to 297 // provide them data with the proper type already set. 298 if ($fielddata['type'] === 'int' || $fielddata['type'] === 'tdate') { 299 $this->data[$fieldname] = intval($value); 300 } else { 301 // Remove disallowed Unicode characters. 302 $value = \core_text::remove_unicode_non_characters($value); 303 304 // Replace all groups of line breaks and spaces by single spaces. 305 $this->data[$fieldname] = preg_replace("/\s+/u", " ", $value); 306 if ($this->data[$fieldname] === null) { 307 if (isset($this->data['id'])) { 308 $docid = $this->data['id']; 309 } else { 310 $docid = '(unknown)'; 311 } 312 throw new \moodle_exception('error_indexing', 'search', '', null, '"' . $fieldname . 313 '" value causes preg_replace error (may be caused by unusual characters) ' . 314 'in document with id "' . $docid . '"'); 315 } 316 } 317 318 return $this->data[$fieldname]; 319 } 320 321 /** 322 * Sets data to this->extradata 323 * 324 * This data can be retrieved using \core_search\document->get($fieldname). 325 * 326 * @param string $fieldname 327 * @param string $value 328 * @return void 329 */ 330 public function set_extra($fieldname, $value) { 331 $this->extradata[$fieldname] = $value; 332 } 333 334 /** 335 * Getter. 336 * 337 * Use self::is_set if you are not sure if this field is set or not 338 * as otherwise it will trigger a \coding_exception 339 * 340 * @throws \coding_exception 341 * @param string $field 342 * @return string|int 343 */ 344 public function get($field) { 345 346 if (isset($this->data[$field])) { 347 return $this->data[$field]; 348 } 349 350 // Fallback to extra data. 351 if (isset($this->extradata[$field])) { 352 return $this->extradata[$field]; 353 } 354 355 throw new \coding_exception('Field "' . $field . '" is not set in the document'); 356 } 357 358 /** 359 * Checks if a field is set. 360 * 361 * @param string $field 362 * @return bool 363 */ 364 public function is_set($field) { 365 return (isset($this->data[$field]) || isset($this->extradata[$field])); 366 } 367 368 /** 369 * Set if this is a new document. False if unknown. 370 * 371 * @param bool $new 372 */ 373 public function set_is_new($new) { 374 $this->isnew = (bool)$new; 375 } 376 377 /** 378 * Returns if the document is new. False if unknown. 379 * 380 * @return bool 381 */ 382 public function get_is_new() { 383 return $this->isnew; 384 } 385 386 /** 387 * Returns all default fields definitions. 388 * 389 * @return array 390 */ 391 public static function get_default_fields_definition() { 392 return static::$requiredfields + static::$optionalfields + static::$enginefields; 393 } 394 395 /** 396 * Formats the timestamp preparing the time fields to be inserted into the search engine. 397 * 398 * By default it just returns a timestamp so any search engine could just store integers 399 * and use integers comparison to get documents between x and y timestamps, but search 400 * engines might be interested in using their own field formats. They can do it extending 401 * this class in \search_xxx\document. 402 * 403 * @param int $timestamp 404 * @return string 405 */ 406 public static function format_time_for_engine($timestamp) { 407 return $timestamp; 408 } 409 410 /** 411 * Formats a string value for the search engine. 412 * 413 * Search engines may overwrite this method to apply restrictions, like limiting the size. 414 * The default behaviour is just returning the string. 415 * 416 * @param string $string 417 * @return string 418 */ 419 public static function format_string_for_engine($string) { 420 return $string; 421 } 422 423 /** 424 * Formats a text value for the search engine. 425 * 426 * Search engines may overwrite this method to apply restrictions, like limiting the size. 427 * The default behaviour is just returning the string. 428 * 429 * @param string $text 430 * @return string 431 */ 432 public static function format_text_for_engine($text) { 433 return $text; 434 } 435 436 /** 437 * Returns a timestamp from the value stored in the search engine. 438 * 439 * By default it just returns a timestamp so any search engine could just store integers 440 * and use integers comparison to get documents between x and y timestamps, but search 441 * engines might be interested in using their own field formats. They should do it extending 442 * this class in \search_xxx\document. 443 * 444 * @param string $time 445 * @return int 446 */ 447 public static function import_time_from_engine($time) { 448 return $time; 449 } 450 451 /** 452 * Returns how text is returned from the search engine. 453 * 454 * @return int 455 */ 456 protected function get_text_format() { 457 return FORMAT_PLAIN; 458 } 459 460 /** 461 * Fills the document with data coming from the search engine. 462 * 463 * @throws \core_search\engine_exception 464 * @param array $docdata 465 * @return void 466 */ 467 public function set_data_from_engine($docdata) { 468 $fields = static::$requiredfields + static::$optionalfields + static::$enginefields; 469 foreach ($fields as $fieldname => $field) { 470 471 // Optional params might not be there. 472 if (isset($docdata[$fieldname])) { 473 if ($field['type'] === 'tdate') { 474 // Time fields may need a preprocessing. 475 $this->set($fieldname, static::import_time_from_engine($docdata[$fieldname])); 476 } else { 477 // No way we can make this work if there is any multivalue field. 478 if (is_array($docdata[$fieldname])) { 479 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $fieldname); 480 } 481 $this->set($fieldname, $docdata[$fieldname]); 482 } 483 } 484 } 485 } 486 487 /** 488 * Sets the document url. 489 * 490 * @param \moodle_url $url 491 * @return void 492 */ 493 public function set_doc_url(\moodle_url $url) { 494 $this->docurl = $url; 495 } 496 497 /** 498 * Gets the url to the doc. 499 * 500 * @return \moodle_url 501 */ 502 public function get_doc_url() { 503 return $this->docurl; 504 } 505 506 /** 507 * Sets document icon instance. 508 * 509 * @param \core_search\document_icon $docicon 510 */ 511 public function set_doc_icon(document_icon $docicon) { 512 $this->docicon = $docicon; 513 } 514 515 /** 516 * Gets document icon instance. 517 * 518 * @return \core_search\document_icon 519 */ 520 public function get_doc_icon() { 521 return $this->docicon; 522 } 523 524 public function set_context_url(\moodle_url $url) { 525 $this->contexturl = $url; 526 } 527 528 /** 529 * Gets the url to the context. 530 * 531 * @return \moodle_url 532 */ 533 public function get_context_url() { 534 return $this->contexturl; 535 } 536 537 /** 538 * Returns the document ready to submit to the search engine. 539 * 540 * @throws \coding_exception 541 * @return array 542 */ 543 public function export_for_engine() { 544 // Set any unset defaults. 545 $this->apply_defaults(); 546 547 // We don't want to affect the document instance. 548 $data = $this->data; 549 550 // Apply specific engine-dependant formats and restrictions. 551 foreach (static::$requiredfields as $fieldname => $field) { 552 553 // We also check that we have everything we need. 554 if (!isset($data[$fieldname])) { 555 throw new \coding_exception('Missing "' . $fieldname . '" field in document with id "' . $this->data['id'] . '"'); 556 } 557 558 if ($field['type'] === 'tdate') { 559 // Overwrite the timestamp with the engine dependant format. 560 $data[$fieldname] = static::format_time_for_engine($data[$fieldname]); 561 } else if ($field['type'] === 'string') { 562 // Overwrite the string with the engine dependant format. 563 $data[$fieldname] = static::format_string_for_engine($data[$fieldname]); 564 } else if ($field['type'] === 'text') { 565 // Overwrite the text with the engine dependant format. 566 $data[$fieldname] = static::format_text_for_engine($data[$fieldname]); 567 } 568 569 } 570 571 $fields = static::$optionalfields + static::$enginefields; 572 foreach ($fields as $fieldname => $field) { 573 if (!isset($data[$fieldname])) { 574 continue; 575 } 576 if ($field['type'] === 'tdate') { 577 // Overwrite the timestamp with the engine dependant format. 578 $data[$fieldname] = static::format_time_for_engine($data[$fieldname]); 579 } else if ($field['type'] === 'string') { 580 // Overwrite the string with the engine dependant format. 581 $data[$fieldname] = static::format_string_for_engine($data[$fieldname]); 582 } else if ($field['type'] === 'text') { 583 // Overwrite the text with the engine dependant format. 584 $data[$fieldname] = static::format_text_for_engine($data[$fieldname]); 585 } 586 } 587 588 return $data; 589 } 590 591 /** 592 * Apply any defaults to unset fields before export. Called after document building, but before export. 593 * 594 * Sub-classes of this should make sure to call parent::apply_defaults(). 595 */ 596 protected function apply_defaults() { 597 // Set the default type, TYPE_TEXT. 598 if (!isset($this->data['type'])) { 599 $this->data['type'] = manager::TYPE_TEXT; 600 } 601 } 602 603 /** 604 * Export the document data to be used as a template context. 605 * 606 * Adding more info than the required one as people might be interested in extending the template. 607 * 608 * Although content is a required field when setting up the document, it accepts '' (empty) values 609 * as they may be the result of striping out HTML. 610 * 611 * SECURITY NOTE: It is the responsibility of the document to properly escape any text to be displayed. 612 * The renderer will output the content without any further cleaning. 613 * 614 * @param renderer_base $output The renderer. 615 * @return array 616 */ 617 public function export_for_template(\renderer_base $output) { 618 global $USER; 619 620 list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($this->get('areaid')); 621 $context = context::instance_by_id($this->get('contextid')); 622 623 $searcharea = \core_search\manager::get_search_area($this->data['areaid']); 624 $title = $this->is_set('title') ? $this->format_text($searcharea->get_document_display_title($this)) : ''; 625 $data = [ 626 'componentname' => $componentname, 627 'areaname' => $areaname, 628 'courseurl' => course_get_url($this->get('courseid')), 629 'coursefullname' => format_string($this->get('coursefullname'), true, ['context' => $context->id]), 630 'modified' => userdate($this->get('modified')), 631 'title' => ($title !== '') ? $title : get_string('notitle', 'search'), 632 'docurl' => $this->get_doc_url(), 633 'content' => $this->is_set('content') ? $this->format_text($this->get('content')) : null, 634 'contexturl' => $this->get_context_url(), 635 'description1' => $this->is_set('description1') ? $this->format_text($this->get('description1')) : null, 636 'description2' => $this->is_set('description2') ? $this->format_text($this->get('description2')) : null, 637 ]; 638 639 // Now take any attached any files. 640 $files = $this->get_files(); 641 if (!empty($files)) { 642 if (count($files) > 1) { 643 $filenames = []; 644 foreach ($files as $file) { 645 $filenames[] = format_string($file->get_filename(), true, ['context' => $context->id]); 646 } 647 $data['multiplefiles'] = true; 648 $data['filenames'] = $filenames; 649 } else { 650 $file = reset($files); 651 $data['filename'] = format_string($file->get_filename(), true, ['context' => $context->id]); 652 } 653 } 654 655 if ($this->is_set('userid')) { 656 if ($this->get('userid') == $USER->id || 657 (has_capability('moodle/user:viewdetails', $context) && 658 has_capability('moodle/course:viewparticipants', $context))) { 659 $data['userurl'] = new \moodle_url( 660 '/user/view.php', 661 ['id' => $this->get('userid'), 'course' => $this->get('courseid')] 662 ); 663 $data['userfullname'] = format_string($this->get('userfullname'), true, ['context' => $context->id]); 664 } 665 } 666 667 if ($docicon = $this->get_doc_icon()) { 668 $data['icon'] = $output->image_url($docicon->get_name(), $docicon->get_component()); 669 } 670 671 return $data; 672 } 673 674 /** 675 * Formats a text string coming from the search engine. 676 * 677 * By default just return the text as it is: 678 * - Search areas are responsible of sending just plain data, the search engine may 679 * append HTML or markdown to it (highlighing for example). 680 * - The view is responsible of shortening the text if it is too big 681 * 682 * @param string $text Text to format 683 * @return string HTML text to be renderer 684 */ 685 protected function format_text($text) { 686 return format_text($text, $this->get_text_format(), array('context' => $this->get('contextid'))); 687 } 688 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body