See Release Notes
Long Term Support Release
Differences Between: [Versions 39 and 310] [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401] [Versions 39 and 402] [Versions 39 and 403]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Solr engine. 19 * 20 * @package search_solr 21 * @copyright 2015 Daniel Neis Araujo 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace search_solr; 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Solr engine. 31 * 32 * @package search_solr 33 * @copyright 2015 Daniel Neis Araujo 34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 35 */ 36 class engine extends \core_search\engine { 37 38 /** 39 * @var string The date format used by solr. 40 */ 41 const DATE_FORMAT = 'Y-m-d\TH:i:s\Z'; 42 43 /** 44 * @var int Commit documents interval (number of miliseconds). 45 */ 46 const AUTOCOMMIT_WITHIN = 15000; 47 48 /** 49 * The maximum number of results to fetch at a time. 50 */ 51 const QUERY_SIZE = 120; 52 53 /** 54 * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending. 55 */ 56 const FRAG_SIZE = 510; 57 58 /** 59 * Marker for the start of a highlight. 60 */ 61 const HIGHLIGHT_START = '@@HI_S@@'; 62 63 /** 64 * Marker for the end of a highlight. 65 */ 66 const HIGHLIGHT_END = '@@HI_E@@'; 67 68 /** @var float Boost value for matching course in location-ordered searches */ 69 const COURSE_BOOST = 1; 70 71 /** @var float Boost value for matching context (in addition to course boost) */ 72 const CONTEXT_BOOST = 0.5; 73 74 /** 75 * @var \SolrClient 76 */ 77 protected $client = null; 78 79 /** 80 * @var bool True if we should reuse SolrClients, false if not. 81 */ 82 protected $cacheclient = true; 83 84 /** 85 * @var \curl Direct curl object. 86 */ 87 protected $curl = null; 88 89 /** 90 * @var array Fields that can be highlighted. 91 */ 92 protected $highlightfields = array('title', 'content', 'description1', 'description2'); 93 94 /** 95 * @var int Number of total docs reported by Sorl for the last query. 96 */ 97 protected $totalenginedocs = 0; 98 99 /** 100 * @var int Number of docs we have processed for the last query. 101 */ 102 protected $processeddocs = 0; 103 104 /** 105 * @var int Number of docs that have been skipped while processing the last query. 106 */ 107 protected $skippeddocs = 0; 108 109 /** 110 * Solr server major version. 111 * 112 * @var int 113 */ 114 protected $solrmajorversion = null; 115 116 /** 117 * Initialises the search engine configuration. 118 * 119 * @return void 120 */ 121 public function __construct() { 122 parent::__construct(); 123 124 $curlversion = curl_version(); 125 if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) { 126 // There is a flaw with curl 7.35.0 that causes problems with client reuse. 127 $this->cacheclient = false; 128 } 129 } 130 131 /** 132 * Prepares a Solr query, applies filters and executes it returning its results. 133 * 134 * @throws \core_search\engine_exception 135 * @param \stdClass $filters Containing query and filters. 136 * @param \stdClass $accessinfo Information about areas user can access. 137 * @param int $limit The maximum number of results to return. 138 * @return \core_search\document[] Results or false if no results 139 */ 140 public function execute_query($filters, $accessinfo, $limit = 0) { 141 global $USER; 142 143 if (empty($limit)) { 144 $limit = \core_search\manager::MAX_RESULTS; 145 } 146 147 // If there is any problem we trigger the exception as soon as possible. 148 $client = $this->get_search_client(); 149 150 // Create the query object. 151 $query = $this->create_user_query($filters, $accessinfo); 152 153 // If the query cannot have results, return none. 154 if (!$query) { 155 return []; 156 } 157 158 // We expect good match rates, so for our first get, we will get a small number of records. 159 // This significantly speeds solr response time for first few pages. 160 $query->setRows(min($limit * 3, static::QUERY_SIZE)); 161 $response = $this->get_query_response($query); 162 163 // Get count data out of the response, and reset our counters. 164 list($included, $found) = $this->get_response_counts($response); 165 $this->totalenginedocs = $found; 166 $this->processeddocs = 0; 167 $this->skippeddocs = 0; 168 if ($included == 0 || $this->totalenginedocs == 0) { 169 // No results. 170 return array(); 171 } 172 173 // Get valid documents out of the response. 174 $results = $this->process_response($response, $limit); 175 176 // We have processed all the docs in the response at this point. 177 $this->processeddocs += $included; 178 179 // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying. 180 while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) { 181 // Offset the start of the query, and since we are making another call, get more per call. 182 $query->setStart($this->processeddocs); 183 $query->setRows(static::QUERY_SIZE); 184 185 $response = $this->get_query_response($query); 186 list($included, $found) = $this->get_response_counts($response); 187 if ($included == 0 || $found == 0) { 188 // No new results were found. Found being empty would be weird, so we will just return. 189 return $results; 190 } 191 $this->totalenginedocs = $found; 192 193 // Get the new response docs, limiting to remaining we need, then add it to the end of the results array. 194 $newdocs = $this->process_response($response, $limit - count($results)); 195 $results = array_merge($results, $newdocs); 196 197 // Add to our processed docs count. 198 $this->processeddocs += $included; 199 } 200 201 return $results; 202 } 203 204 /** 205 * Takes a query and returns the response in SolrObject format. 206 * 207 * @param SolrQuery $query Solr query object. 208 * @return SolrObject|false Response document or false on error. 209 */ 210 protected function get_query_response($query) { 211 try { 212 return $this->get_search_client()->query($query)->getResponse(); 213 } catch (\SolrClientException $ex) { 214 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); 215 $this->queryerror = $ex->getMessage(); 216 return false; 217 } catch (\SolrServerException $ex) { 218 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); 219 $this->queryerror = $ex->getMessage(); 220 return false; 221 } 222 } 223 224 /** 225 * Returns the total number of documents available for the most recently call to execute_query. 226 * 227 * @return int 228 */ 229 public function get_query_total_count() { 230 // Return the total engine count minus the docs we have determined are bad. 231 return $this->totalenginedocs - $this->skippeddocs; 232 } 233 234 /** 235 * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses. 236 * 237 * @param SolrDocument $response The response document from Solr. 238 * @return array A two part array. First how many response docs are in the response. 239 * Second, how many results are vailable in the engine. 240 */ 241 protected function get_response_counts($response) { 242 $found = 0; 243 $included = 0; 244 245 if (isset($response->grouped->solr_filegroupingid->ngroups)) { 246 // Get the number of results for file grouped queries. 247 $found = $response->grouped->solr_filegroupingid->ngroups; 248 $included = count($response->grouped->solr_filegroupingid->groups); 249 } else if (isset($response->response->numFound)) { 250 // Get the number of results for standard queries. 251 $found = $response->response->numFound; 252 if ($found > 0 && is_array($response->response->docs)) { 253 $included = count($response->response->docs); 254 } 255 } 256 257 return array($included, $found); 258 } 259 260 /** 261 * Prepares a new query object with needed limits, filters, etc. 262 * 263 * @param \stdClass $filters Containing query and filters. 264 * @param \stdClass $accessinfo Information about contexts the user can access 265 * @return \SolrDisMaxQuery|null Query object or null if they can't get any results 266 */ 267 protected function create_user_query($filters, $accessinfo) { 268 global $USER; 269 270 // Let's keep these changes internal. 271 $data = clone $filters; 272 273 $query = new \SolrDisMaxQuery(); 274 275 $this->set_query($query, self::replace_underlines($data->q)); 276 $this->add_fields($query); 277 278 // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters 279 // we are really interested in caching contexts filters instead. 280 if (!empty($data->title)) { 281 $query->addFilterQuery('{!field cache=false f=title}' . $data->title); 282 } 283 if (!empty($data->areaids)) { 284 // If areaids are specified, we want to get any that match. 285 $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')'); 286 } 287 if (!empty($data->courseids)) { 288 $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')'); 289 } 290 if (!empty($data->groupids)) { 291 $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')'); 292 } 293 if (!empty($data->userids)) { 294 $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')'); 295 } 296 297 if (!empty($data->timestart) or !empty($data->timeend)) { 298 if (empty($data->timestart)) { 299 $data->timestart = '*'; 300 } else { 301 $data->timestart = \search_solr\document::format_time_for_engine($data->timestart); 302 } 303 if (empty($data->timeend)) { 304 $data->timeend = '*'; 305 } else { 306 $data->timeend = \search_solr\document::format_time_for_engine($data->timeend); 307 } 308 309 // No cache. 310 $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']'); 311 } 312 313 // Restrict to users who are supposed to be able to see a particular result. 314 $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')'); 315 316 // And finally restrict it to the context where the user can access, we want this one cached. 317 // If the user can access all contexts $usercontexts value is just true, we don't need to filter 318 // in that case. 319 if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) { 320 // Join all area contexts into a single array and implode. 321 $allcontexts = array(); 322 foreach ($accessinfo->usercontexts as $areaid => $areacontexts) { 323 if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) { 324 // Skip unused areas. 325 continue; 326 } 327 foreach ($areacontexts as $contextid) { 328 // Ensure they are unique. 329 $allcontexts[$contextid] = $contextid; 330 } 331 } 332 if (empty($allcontexts)) { 333 // This means there are no valid contexts for them, so they get no results. 334 return null; 335 } 336 $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')'); 337 } 338 339 if (!$accessinfo->everything && $accessinfo->separategroupscontexts) { 340 // Add another restriction to handle group ids. If there are any contexts using separate 341 // groups, then results in that context will not show unless you belong to the group. 342 // (Note: Access all groups is taken care of earlier, when computing these arrays.) 343 344 // This special exceptions list allows for particularly pig-headed developers to create 345 // multiple search areas within the same module, where one of them uses separate 346 // groups and the other uses visible groups. It is a little inefficient, but this should 347 // be rare. 348 $exceptions = ''; 349 if ($accessinfo->visiblegroupscontextsareas) { 350 foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) { 351 $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' . 352 implode(' OR ', $areaids) . '))'; 353 } 354 } 355 356 if ($accessinfo->usergroups) { 357 // Either the document has no groupid, or the groupid is one that the user 358 // belongs to, or the context is not one of the separate groups contexts. 359 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' . 360 'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' . 361 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' . 362 $exceptions); 363 } else { 364 // Either the document has no groupid, or the context is not a restricted one. 365 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' . 366 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' . 367 $exceptions); 368 } 369 } 370 371 if ($this->file_indexing_enabled()) { 372 // Now group records by solr_filegroupingid. Limit to 3 results per group. 373 $query->setGroup(true); 374 $query->setGroupLimit(3); 375 $query->setGroupNGroups(true); 376 $query->addGroupField('solr_filegroupingid'); 377 } else { 378 // Make sure we only get text files, in case the index has pre-existing files. 379 $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT); 380 } 381 382 // If ordering by location, add in boost for the relevant course or context ids. 383 if (!empty($filters->order) && $filters->order === 'location') { 384 $coursecontext = $filters->context->get_course_context(); 385 $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST); 386 if ($filters->context->contextlevel !== CONTEXT_COURSE) { 387 // If it's a block or activity, also add a boost for the specific context id. 388 $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST); 389 } 390 } 391 392 return $query; 393 } 394 395 /** 396 * Prepares a new query by setting the query, start offset and rows to return. 397 * 398 * @param SolrQuery $query 399 * @param object $q Containing query and filters. 400 */ 401 protected function set_query($query, $q) { 402 // Set hightlighting. 403 $query->setHighlight(true); 404 foreach ($this->highlightfields as $field) { 405 $query->addHighlightField($field); 406 } 407 $query->setHighlightFragsize(static::FRAG_SIZE); 408 $query->setHighlightSimplePre(self::HIGHLIGHT_START); 409 $query->setHighlightSimplePost(self::HIGHLIGHT_END); 410 $query->setHighlightMergeContiguous(true); 411 412 $query->setQuery($q); 413 414 // A reasonable max. 415 $query->setRows(static::QUERY_SIZE); 416 } 417 418 /** 419 * Sets fields to be returned in the result. 420 * 421 * @param SolrDisMaxQuery|SolrQuery $query object. 422 */ 423 public function add_fields($query) { 424 $documentclass = $this->get_document_classname(); 425 $fields = $documentclass::get_default_fields_definition(); 426 427 $dismax = false; 428 if ($query instanceof \SolrDisMaxQuery) { 429 $dismax = true; 430 } 431 432 foreach ($fields as $key => $field) { 433 $query->addField($key); 434 if ($dismax && !empty($field['mainquery'])) { 435 // Add fields the main query should be run against. 436 // Due to a regression in the PECL solr extension, https://bugs.php.net/bug.php?id=72740, 437 // a boost value is required, even if it is optional; to avoid boosting one among other fields, 438 // the explicit boost value will be the default one, for every field. 439 $query->addQueryField($key, 1); 440 } 441 } 442 } 443 444 /** 445 * Finds the key common to both highlighing and docs array returned from response. 446 * @param object $response containing results. 447 */ 448 public function add_highlight_content($response) { 449 if (!isset($response->highlighting)) { 450 // There is no highlighting to add. 451 return; 452 } 453 454 $highlightedobject = $response->highlighting; 455 foreach ($response->response->docs as $doc) { 456 $x = $doc->id; 457 $highlighteddoc = $highlightedobject->$x; 458 $this->merge_highlight_field_values($doc, $highlighteddoc); 459 } 460 } 461 462 /** 463 * Adds the highlighting array values to docs array values. 464 * 465 * @throws \core_search\engine_exception 466 * @param object $doc containing the results. 467 * @param object $highlighteddoc containing the highlighted results values. 468 */ 469 public function merge_highlight_field_values($doc, $highlighteddoc) { 470 471 foreach ($this->highlightfields as $field) { 472 if (!empty($doc->$field)) { 473 474 // Check that the returned value is not an array. No way we can make this work with multivalued solr fields. 475 if (is_array($doc->{$field})) { 476 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field); 477 } 478 479 if (!empty($highlighteddoc->$field)) { 480 // Replace by the highlighted result. 481 $doc->$field = reset($highlighteddoc->$field); 482 } 483 } 484 } 485 } 486 487 /** 488 * Filters the response on Moodle side. 489 * 490 * @param SolrObject $response Solr object containing the response return from solr server. 491 * @param int $limit The maximum number of results to return. 0 for all. 492 * @param bool $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access. 493 * @return array $results containing final results to be displayed. 494 */ 495 protected function process_response($response, $limit = 0, $skipaccesscheck = false) { 496 global $USER; 497 498 if (empty($response)) { 499 return array(); 500 } 501 502 if (isset($response->grouped)) { 503 return $this->grouped_files_process_response($response, $limit); 504 } 505 506 $userid = $USER->id; 507 $noownerid = \core_search\manager::NO_OWNER_ID; 508 509 $numgranted = 0; 510 511 if (!$docs = $response->response->docs) { 512 return array(); 513 } 514 515 $out = array(); 516 if (!empty($response->response->numFound)) { 517 $this->add_highlight_content($response); 518 519 // Iterate through the results checking its availability and whether they are available for the user or not. 520 foreach ($docs as $key => $docdata) { 521 if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) { 522 // If owneruserid is set, no other user should be able to access this record. 523 continue; 524 } 525 526 if (!$searcharea = $this->get_search_area($docdata->areaid)) { 527 continue; 528 } 529 530 $docdata = $this->standarize_solr_obj($docdata); 531 532 if ($skipaccesscheck) { 533 $access = \core_search\manager::ACCESS_GRANTED; 534 } else { 535 $access = $searcharea->check_access($docdata['itemid']); 536 } 537 switch ($access) { 538 case \core_search\manager::ACCESS_DELETED: 539 $this->delete_by_id($docdata['id']); 540 // Remove one from our processed and total counters, since we promptly deleted. 541 $this->processeddocs--; 542 $this->totalenginedocs--; 543 break; 544 case \core_search\manager::ACCESS_DENIED: 545 $this->skippeddocs++; 546 break; 547 case \core_search\manager::ACCESS_GRANTED: 548 $numgranted++; 549 550 // Add the doc. 551 $out[] = $this->to_document($searcharea, $docdata); 552 break; 553 } 554 555 // Stop when we hit our limit. 556 if (!empty($limit) && count($out) >= $limit) { 557 break; 558 } 559 } 560 } 561 562 return $out; 563 } 564 565 /** 566 * Processes grouped file results into documents, with attached matching files. 567 * 568 * @param SolrObject $response The response returned from solr server 569 * @param int $limit The maximum number of results to return. 0 for all. 570 * @return array Final results to be displayed. 571 */ 572 protected function grouped_files_process_response($response, $limit = 0) { 573 // If we can't find the grouping, or there are no matches in the grouping, return empty. 574 if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) { 575 return array(); 576 } 577 578 $numgranted = 0; 579 $orderedids = array(); 580 $completedocs = array(); 581 $incompletedocs = array(); 582 583 $highlightingobj = $response->highlighting; 584 585 // Each group represents a "master document". 586 $groups = $response->grouped->solr_filegroupingid->groups; 587 foreach ($groups as $group) { 588 $groupid = $group->groupValue; 589 $groupdocs = $group->doclist->docs; 590 $firstdoc = reset($groupdocs); 591 592 if (!$searcharea = $this->get_search_area($firstdoc->areaid)) { 593 // Well, this is a problem. 594 continue; 595 } 596 597 // Check for access. 598 $access = $searcharea->check_access($firstdoc->itemid); 599 switch ($access) { 600 case \core_search\manager::ACCESS_DELETED: 601 // If deleted from Moodle, delete from index and then continue. 602 $this->delete_by_id($firstdoc->id); 603 // Remove one from our processed and total counters, since we promptly deleted. 604 $this->processeddocs--; 605 $this->totalenginedocs--; 606 continue 2; 607 break; 608 case \core_search\manager::ACCESS_DENIED: 609 // This means we should just skip for the current user. 610 $this->skippeddocs++; 611 continue 2; 612 break; 613 } 614 $numgranted++; 615 616 $maindoc = false; 617 $fileids = array(); 618 // Seperate the main document and any files returned. 619 foreach ($groupdocs as $groupdoc) { 620 if ($groupdoc->id == $groupid) { 621 $maindoc = $groupdoc; 622 } else if (isset($groupdoc->solr_fileid)) { 623 $fileids[] = $groupdoc->solr_fileid; 624 } 625 } 626 627 // Store the id of this group, in order, for later merging. 628 $orderedids[] = $groupid; 629 630 if (!$maindoc) { 631 // We don't have the main doc, store what we know for later building. 632 $incompletedocs[$groupid] = $fileids; 633 } else { 634 if (isset($highlightingobj->$groupid)) { 635 // Merge the highlighting for this doc. 636 $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid); 637 } 638 $docdata = $this->standarize_solr_obj($maindoc); 639 $doc = $this->to_document($searcharea, $docdata); 640 // Now we need to attach the result files to the doc. 641 foreach ($fileids as $fileid) { 642 $doc->add_stored_file($fileid); 643 } 644 $completedocs[$groupid] = $doc; 645 } 646 647 if (!empty($limit) && $numgranted >= $limit) { 648 // We have hit the max results, we will just ignore the rest. 649 break; 650 } 651 } 652 653 $incompletedocs = $this->get_missing_docs($incompletedocs); 654 655 $out = array(); 656 // Now merge the complete and incomplete documents, in results order. 657 foreach ($orderedids as $docid) { 658 if (isset($completedocs[$docid])) { 659 $out[] = $completedocs[$docid]; 660 } else if (isset($incompletedocs[$docid])) { 661 $out[] = $incompletedocs[$docid]; 662 } 663 } 664 665 return $out; 666 } 667 668 /** 669 * Retreive any missing main documents and attach provided files. 670 * 671 * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value 672 * associated to the key should be an array of stored_files or stored file ids to attach to the result document. 673 * 674 * Return array also indexed by document id. 675 * 676 * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach. 677 * @return document[] 678 */ 679 protected function get_missing_docs($missingdocs) { 680 if (empty($missingdocs)) { 681 return array(); 682 } 683 684 $docids = array_keys($missingdocs); 685 686 // Build a custom query that will get all the missing documents. 687 $query = new \SolrQuery(); 688 $this->set_query($query, '*'); 689 $this->add_fields($query); 690 $query->setRows(count($docids)); 691 $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')'); 692 693 $response = $this->get_query_response($query); 694 // We know the missing docs have already been checked for access, so don't recheck. 695 $results = $this->process_response($response, 0, true); 696 697 $out = array(); 698 foreach ($results as $result) { 699 $resultid = $result->get('id'); 700 if (!isset($missingdocs[$resultid])) { 701 // We got a result we didn't expect. Skip it. 702 continue; 703 } 704 // Attach the files. 705 foreach ($missingdocs[$resultid] as $filedoc) { 706 $result->add_stored_file($filedoc); 707 } 708 $out[$resultid] = $result; 709 } 710 711 return $out; 712 } 713 714 /** 715 * Returns a standard php array from a \SolrObject instance. 716 * 717 * @param \SolrObject $obj 718 * @return array The returned document as an array. 719 */ 720 public function standarize_solr_obj(\SolrObject $obj) { 721 $properties = $obj->getPropertyNames(); 722 723 $docdata = array(); 724 foreach($properties as $name) { 725 // http://php.net/manual/en/solrobject.getpropertynames.php#98018. 726 $name = trim($name); 727 $docdata[$name] = $obj->offsetGet($name); 728 } 729 return $docdata; 730 } 731 732 /** 733 * Adds a document to the search engine. 734 * 735 * This does not commit to the search engine. 736 * 737 * @param document $document 738 * @param bool $fileindexing True if file indexing is to be used 739 * @return bool 740 */ 741 public function add_document($document, $fileindexing = false) { 742 $docdata = $document->export_for_engine(); 743 744 if (!$this->add_solr_document($docdata)) { 745 return false; 746 } 747 748 if ($fileindexing) { 749 // This will take care of updating all attached files in the index. 750 $this->process_document_files($document); 751 } 752 753 return true; 754 } 755 756 /** 757 * Replaces underlines at edges of words in the content with spaces. 758 * 759 * For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads', 760 * and 'frogs_and_toads' will be left as 'frogs_and_toads'. 761 * 762 * The reason for this is that for italic content_to_text puts _italic_ underlines at the start 763 * and end of the italicised phrase (not between words). Solr treats underlines as part of the 764 * word, which means that if you search for a word in italic then you can't find it. 765 * 766 * @param string $str String to replace 767 * @return string Replaced string 768 */ 769 protected static function replace_underlines(string $str): string { 770 return preg_replace('~\b_|_\b~', '', $str); 771 } 772 773 /** 774 * Adds a text document to the search engine. 775 * 776 * @param array $doc 777 * @return bool 778 */ 779 protected function add_solr_document($doc) { 780 $solrdoc = new \SolrInputDocument(); 781 782 // Replace underlines in the content with spaces. The reason for this is that for italic 783 // text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the 784 // word, which means that if you search for a word in italic then you can't find it. 785 if (array_key_exists('content', $doc)) { 786 $doc['content'] = self::replace_underlines($doc['content']); 787 } 788 789 foreach ($doc as $field => $value) { 790 $solrdoc->addField($field, $value); 791 } 792 793 try { 794 $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN); 795 return true; 796 } catch (\SolrClientException $e) { 797 debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER); 798 } catch (\SolrServerException $e) { 799 // We only use the first line of the message, as it's a fully java stacktrace behind it. 800 $msg = strtok($e->getMessage(), "\n"); 801 debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER); 802 } 803 804 return false; 805 } 806 807 /** 808 * Index files attached to the docuemnt, ensuring the index matches the current document files. 809 * 810 * For documents that aren't known to be new, we check the index for existing files. 811 * - New files we will add. 812 * - Existing and unchanged files we will skip. 813 * - File that are in the index but not on the document will be deleted from the index. 814 * - Files that have changed will be re-indexed. 815 * 816 * @param document $document 817 */ 818 protected function process_document_files($document) { 819 if (!$this->file_indexing_enabled()) { 820 return; 821 } 822 823 // Maximum rows to process at a time. 824 $rows = 500; 825 826 // Get the attached files. 827 $files = $document->get_files(); 828 829 // If this isn't a new document, we need to check the exiting indexed files. 830 if (!$document->get_is_new()) { 831 // We do this progressively, so we can handle lots of files cleanly. 832 list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows); 833 $count = 0; 834 $idstodelete = array(); 835 836 do { 837 // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones. 838 foreach ($indexedfiles as $indexedfile) { 839 $fileid = $indexedfile->solr_fileid; 840 841 if (isset($files[$fileid])) { 842 // Check for changes that would mean we need to re-index the file. If so, just leave in $files. 843 // Filelib does not guarantee time modified is updated, so we will check important values. 844 if ($indexedfile->modified != $files[$fileid]->get_timemodified()) { 845 continue; 846 } 847 if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) { 848 continue; 849 } 850 if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) { 851 continue; 852 } 853 if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE && 854 $this->file_is_indexable($files[$fileid])) { 855 // This means that the last time we indexed this file, filtering blocked it. 856 // Current settings say it is indexable, so we will allow it to be indexed. 857 continue; 858 } 859 860 // If the file is already indexed, we can just remove it from the files array and skip it. 861 unset($files[$fileid]); 862 } else { 863 // This means we have found a file that is no longer attached, so we need to delete from the index. 864 // We do it later, since this is progressive, and it could reorder results. 865 $idstodelete[] = $indexedfile->id; 866 } 867 } 868 $count += $rows; 869 870 if ($count < $numfound) { 871 // If we haven't hit the total count yet, fetch the next batch. 872 list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows); 873 } 874 875 } while ($count < $numfound); 876 877 // Delete files that are no longer attached. 878 foreach ($idstodelete as $id) { 879 // We directly delete the item using the client, as the engine delete_by_id won't work on file docs. 880 $this->get_search_client()->deleteById($id); 881 } 882 } 883 884 // Now we can actually index all the remaining files. 885 foreach ($files as $file) { 886 $this->add_stored_file($document, $file); 887 } 888 } 889 890 /** 891 * Get the currently indexed files for a particular document, returns the total count, and a subset of files. 892 * 893 * @param document $document 894 * @param int $start The row to start the results on. Zero indexed. 895 * @param int $rows The number of rows to fetch 896 * @return array A two element array, the first is the total number of availble results, the second is an array 897 * of documents for the current request. 898 */ 899 protected function get_indexed_files($document, $start = 0, $rows = 500) { 900 // Build a custom query that will get any document files that are in our solr_filegroupingid. 901 $query = new \SolrQuery(); 902 903 // We want to get all file records tied to a document. 904 // For efficiency, we are building our own, stripped down, query. 905 $query->setQuery('*'); 906 $query->setRows($rows); 907 $query->setStart($start); 908 // We want a consistent sorting. 909 $query->addSortField('id'); 910 911 // We only want the bare minimum of fields. 912 $query->addField('id'); 913 $query->addField('modified'); 914 $query->addField('title'); 915 $query->addField('solr_fileid'); 916 $query->addField('solr_filecontenthash'); 917 $query->addField('solr_fileindexstatus'); 918 919 $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')'); 920 $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE); 921 922 $response = $this->get_query_response($query); 923 if (empty($response->response->numFound)) { 924 return array(0, array()); 925 } 926 927 return array($response->response->numFound, $this->convert_file_results($response)); 928 } 929 930 /** 931 * A very lightweight handler for getting information about already indexed files from a Solr response. 932 * 933 * @param SolrObject $responsedoc A Solr response document 934 * @return stdClass[] An array of objects that contain the basic information for file processing. 935 */ 936 protected function convert_file_results($responsedoc) { 937 if (!$docs = $responsedoc->response->docs) { 938 return array(); 939 } 940 941 $out = array(); 942 943 foreach ($docs as $doc) { 944 // Copy the bare minimim needed info. 945 $result = new \stdClass(); 946 $result->id = $doc->id; 947 $result->modified = document::import_time_from_engine($doc->modified); 948 $result->title = $doc->title; 949 $result->solr_fileid = $doc->solr_fileid; 950 $result->solr_filecontenthash = $doc->solr_filecontenthash; 951 $result->solr_fileindexstatus = $doc->solr_fileindexstatus; 952 $out[] = $result; 953 } 954 955 return $out; 956 } 957 958 /** 959 * Adds a file to the search engine. 960 * 961 * Notes about Solr and Tika indexing. We do not send the mime type, only the filename. 962 * Tika has much better content type detection than Moodle, and we will have many more doc failures 963 * if we try to send mime types. 964 * 965 * @param document $document 966 * @param \stored_file $storedfile 967 * @return void 968 */ 969 protected function add_stored_file($document, $storedfile) { 970 $filedoc = $document->export_file_for_engine($storedfile); 971 972 if (!$this->file_is_indexable($storedfile)) { 973 // For files that we don't consider indexable, we will still place a reference in the search engine. 974 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE; 975 $this->add_solr_document($filedoc); 976 return; 977 } 978 979 $curl = $this->get_curl_object(); 980 981 $url = $this->get_connection_url('/update/extract'); 982 983 // Return results as XML. 984 $url->param('wt', 'xml'); 985 986 // This will prevent solr from automatically making fields for every tika output. 987 $url->param('uprefix', 'ignored_'); 988 989 // Control how content is captured. This will keep our file content clean of non-important metadata. 990 $url->param('captureAttr', 'true'); 991 // Move the content to a field for indexing. 992 $url->param('fmap.content', 'solr_filecontent'); 993 994 // These are common fields that matches the standard *_point dynamic field and causes an error. 995 $url->param('fmap.media_white_point', 'ignored_mwp'); 996 $url->param('fmap.media_black_point', 'ignored_mbp'); 997 998 // Copy each key to the url with literal. 999 // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names. 1000 foreach ($filedoc as $key => $value) { 1001 // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours. 1002 $url->param('fmap.'.$key, 'ignored_'.$key); 1003 // Place data in a tmp field. 1004 $url->param('literal.mdltmp_'.$key, $value); 1005 // Then move to the final field. 1006 $url->param('fmap.mdltmp_'.$key, $key); 1007 } 1008 1009 // This sets the true filename for Tika. 1010 $url->param('resource.name', $storedfile->get_filename()); 1011 1012 // A giant block of code that is really just error checking around the curl request. 1013 try { 1014 // We have to post the file directly in binary data (not using multipart) to avoid 1015 // Solr bug SOLR-15039 which can cause incorrect data when you use multipart upload. 1016 // Note this loads the whole file into memory; see limit in file_is_indexable(). 1017 $curl->setHeader('Content-Type: text/plain; charset=UTF-8'); 1018 $result = $curl->post($url->out(false), $storedfile->get_content()); 1019 $curl->resetHeader(); 1020 1021 $code = $curl->get_errno(); 1022 $info = $curl->get_info(); 1023 1024 // Now error handling. It is just informational, since we aren't tracking per file/doc results. 1025 if ($code != 0) { 1026 // This means an internal cURL error occurred error is in result. 1027 $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.'; 1028 debugging($message, DEBUG_DEVELOPER); 1029 } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) { 1030 // Unexpected HTTP response code. 1031 $message = 'Error while indexing file with document id '.$filedoc['id']; 1032 // Try to get error message out of msg or title if it exists. 1033 if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) { 1034 $message .= ': '.$matches[1]; 1035 } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) { 1036 $message .= ': '.$matches[1]; 1037 } 1038 // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter. 1039 if (CLI_SCRIPT && !PHPUNIT_TEST) { 1040 mtrace($message); 1041 } 1042 } else { 1043 // Check for the expected status field. 1044 if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) { 1045 // Now check for the expected status of 0, if not, error. 1046 if ((int)$matches[1] !== 0) { 1047 $message = 'Unexpected Solr status code '.(int)$matches[1]; 1048 $message .= ' while indexing file with document id '.$filedoc['id'].'.'; 1049 debugging($message, DEBUG_DEVELOPER); 1050 } else { 1051 // The document was successfully indexed. 1052 return; 1053 } 1054 } else { 1055 // We received an unprocessable response. 1056 $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': '; 1057 $message .= strtok($result, "\n"); 1058 debugging($message, DEBUG_DEVELOPER); 1059 } 1060 } 1061 } catch (\Exception $e) { 1062 // There was an error, but we are not tracking per-file success, so we just continue on. 1063 debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER); 1064 } 1065 1066 // If we get here, the document was not indexed due to an error. So we will index just the base info without the file. 1067 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR; 1068 $this->add_solr_document($filedoc); 1069 } 1070 1071 /** 1072 * Checks to see if a passed file is indexable. 1073 * 1074 * @param \stored_file $file The file to check 1075 * @return bool True if the file can be indexed 1076 */ 1077 protected function file_is_indexable($file) { 1078 if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) { 1079 // The file is too big to index. 1080 return false; 1081 } 1082 1083 // Because we now load files into memory to index them in Solr, we also have to ensure that 1084 // we don't try to index anything bigger than the memory limit (less 100MB for safety). 1085 // Memory limit in cron is MEMORY_EXTRA which is usually 256 or 384MB but can be increased 1086 // in config, so this will allow files over 100MB to be indexed. 1087 $limit = ini_get('memory_limit'); 1088 if ($limit && $limit != -1) { 1089 $limitbytes = get_real_size($limit); 1090 if ($file->get_filesize() > $limitbytes) { 1091 return false; 1092 } 1093 } 1094 1095 $mime = $file->get_mimetype(); 1096 1097 if ($mime == 'application/vnd.moodle.backup') { 1098 // We don't index Moodle backup files. There is nothing usefully indexable in them. 1099 return false; 1100 } 1101 1102 return true; 1103 } 1104 1105 /** 1106 * Commits all pending changes. 1107 * 1108 * @return void 1109 */ 1110 protected function commit() { 1111 $this->get_search_client()->commit(); 1112 } 1113 1114 /** 1115 * Do any area cleanup needed, and do anything to confirm contents. 1116 * 1117 * Return false to prevent the search area completed time and stats from being updated. 1118 * 1119 * @param \core_search\base $searcharea The search area that was complete 1120 * @param int $numdocs The number of documents that were added to the index 1121 * @param bool $fullindex True if a full index is being performed 1122 * @return bool True means that data is considered indexed 1123 */ 1124 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) { 1125 $this->commit(); 1126 1127 return true; 1128 } 1129 1130 /** 1131 * Return true if file indexing is supported and enabled. False otherwise. 1132 * 1133 * @return bool 1134 */ 1135 public function file_indexing_enabled() { 1136 return (bool)$this->config->fileindexing; 1137 } 1138 1139 /** 1140 * Defragments the index. 1141 * 1142 * @return void 1143 */ 1144 public function optimize() { 1145 $this->get_search_client()->optimize(1, true, false); 1146 } 1147 1148 /** 1149 * Deletes the specified document. 1150 * 1151 * @param string $id The document id to delete 1152 * @return void 1153 */ 1154 public function delete_by_id($id) { 1155 // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid. 1156 $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id); 1157 $this->commit(); 1158 } 1159 1160 /** 1161 * Delete all area's documents. 1162 * 1163 * @param string $areaid 1164 * @return void 1165 */ 1166 public function delete($areaid = null) { 1167 if ($areaid) { 1168 $this->get_search_client()->deleteByQuery('areaid:' . $areaid); 1169 } else { 1170 $this->get_search_client()->deleteByQuery('*:*'); 1171 } 1172 $this->commit(); 1173 } 1174 1175 /** 1176 * Pings the Solr server using search_solr config 1177 * 1178 * @return true|string Returns true if all good or an error string. 1179 */ 1180 public function is_server_ready() { 1181 1182 $configured = $this->is_server_configured(); 1183 if ($configured !== true) { 1184 return $configured; 1185 } 1186 1187 // As part of the above we have already checked that we can contact the server. For pages 1188 // where performance is important, we skip doing a full schema check as well. 1189 if ($this->should_skip_schema_check()) { 1190 return true; 1191 } 1192 1193 // Update schema if required/possible. 1194 $schemalatest = $this->check_latest_schema(); 1195 if ($schemalatest !== true) { 1196 return $schemalatest; 1197 } 1198 1199 // Check that the schema is already set up. 1200 try { 1201 $schema = new \search_solr\schema(); 1202 $schema->validate_setup(); 1203 } catch (\moodle_exception $e) { 1204 return $e->getMessage(); 1205 } 1206 1207 return true; 1208 } 1209 1210 /** 1211 * Is the solr server properly configured?. 1212 * 1213 * @return true|string Returns true if all good or an error string. 1214 */ 1215 public function is_server_configured() { 1216 1217 if (empty($this->config->server_hostname) || empty($this->config->indexname)) { 1218 return 'No solr configuration found'; 1219 } 1220 1221 if (!$client = $this->get_search_client(false)) { 1222 return get_string('engineserverstatus', 'search'); 1223 } 1224 1225 try { 1226 if ($this->get_solr_major_version() < 4) { 1227 // Minimum solr 4.0. 1228 return get_string('minimumsolr4', 'search_solr'); 1229 } 1230 } catch (\SolrClientException $ex) { 1231 debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER); 1232 return get_string('engineserverstatus', 'search'); 1233 } catch (\SolrServerException $ex) { 1234 debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER); 1235 return get_string('engineserverstatus', 'search'); 1236 } 1237 1238 return true; 1239 } 1240 1241 /** 1242 * Returns the solr server major version. 1243 * 1244 * @return int 1245 */ 1246 public function get_solr_major_version() { 1247 if ($this->solrmajorversion !== null) { 1248 return $this->solrmajorversion; 1249 } 1250 1251 // We should really ping first the server to see if the specified indexname is valid but 1252 // we want to minimise solr server requests as they are expensive. system() emits a warning 1253 // if it can not connect to the configured index in the configured server. 1254 $systemdata = @$this->get_search_client()->system(); 1255 $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version'); 1256 $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.'))); 1257 1258 return $this->solrmajorversion; 1259 } 1260 1261 /** 1262 * Checks if the PHP Solr extension is available. 1263 * 1264 * @return bool 1265 */ 1266 public function is_installed() { 1267 return function_exists('solr_get_version'); 1268 } 1269 1270 /** 1271 * Returns the solr client instance. 1272 * 1273 * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl. 1274 * 1275 * @throws \core_search\engine_exception 1276 * @param bool $triggerexception 1277 * @return \SolrClient 1278 */ 1279 protected function get_search_client($triggerexception = true) { 1280 global $CFG; 1281 1282 // Type comparison as it is set to false if not available. 1283 if ($this->client !== null) { 1284 return $this->client; 1285 } 1286 1287 $options = array( 1288 'hostname' => $this->config->server_hostname, 1289 'path' => '/solr/' . $this->config->indexname, 1290 'login' => !empty($this->config->server_username) ? $this->config->server_username : '', 1291 'password' => !empty($this->config->server_password) ? $this->config->server_password : '', 1292 'port' => !empty($this->config->server_port) ? $this->config->server_port : '', 1293 'secure' => !empty($this->config->secure) ? true : false, 1294 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '', 1295 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '', 1296 'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '', 1297 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '', 1298 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '', 1299 'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30' 1300 ); 1301 1302 if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) { 1303 $options['proxy_host'] = $CFG->proxyhost; 1304 if (!empty($CFG->proxyport)) { 1305 $options['proxy_port'] = $CFG->proxyport; 1306 } 1307 if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) { 1308 $options['proxy_login'] = $CFG->proxyuser; 1309 $options['proxy_password'] = $CFG->proxypassword; 1310 } 1311 } 1312 1313 if (!class_exists('\SolrClient')) { 1314 throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr'); 1315 } 1316 1317 $client = new \SolrClient($options); 1318 1319 if ($client === false && $triggerexception) { 1320 throw new \core_search\engine_exception('engineserverstatus', 'search'); 1321 } 1322 1323 if ($this->cacheclient) { 1324 $this->client = $client; 1325 } 1326 1327 return $client; 1328 } 1329 1330 /** 1331 * Returns a curl object for conntecting to solr. 1332 * 1333 * @return \curl 1334 */ 1335 public function get_curl_object() { 1336 if (!is_null($this->curl)) { 1337 return $this->curl; 1338 } 1339 1340 // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports. 1341 $this->curl = new \curl(['ignoresecurity' => true]); 1342 1343 $options = array(); 1344 // Build the SSL options. Based on pecl-solr and general testing. 1345 if (!empty($this->config->secure)) { 1346 if (!empty($this->config->ssl_cert)) { 1347 $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert; 1348 $options['CURLOPT_SSLCERTTYPE'] = 'PEM'; 1349 } 1350 1351 if (!empty($this->config->ssl_key)) { 1352 $options['CURLOPT_SSLKEY'] = $this->config->ssl_key; 1353 $options['CURLOPT_SSLKEYTYPE'] = 'PEM'; 1354 } 1355 1356 if (!empty($this->config->ssl_keypassword)) { 1357 $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword; 1358 } 1359 1360 if (!empty($this->config->ssl_cainfo)) { 1361 $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo; 1362 } 1363 1364 if (!empty($this->config->ssl_capath)) { 1365 $options['CURLOPT_CAPATH'] = $this->config->ssl_capath; 1366 } 1367 } 1368 1369 // Set timeout as for Solr client. 1370 $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'; 1371 1372 $this->curl->setopt($options); 1373 1374 if (!empty($this->config->server_username) && !empty($this->config->server_password)) { 1375 $authorization = $this->config->server_username . ':' . $this->config->server_password; 1376 $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization)); 1377 } 1378 1379 return $this->curl; 1380 } 1381 1382 /** 1383 * Return a Moodle url object for the server connection. 1384 * 1385 * @param string $path The solr path to append. 1386 * @return \moodle_url 1387 */ 1388 public function get_connection_url($path) { 1389 // Must use the proper protocol, or SSL will fail. 1390 $protocol = !empty($this->config->secure) ? 'https' : 'http'; 1391 $url = $protocol . '://' . rtrim($this->config->server_hostname, '/'); 1392 if (!empty($this->config->server_port)) { 1393 $url .= ':' . $this->config->server_port; 1394 } 1395 $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/'); 1396 1397 return new \moodle_url($url); 1398 } 1399 1400 /** 1401 * Solr includes group support in the execute_query function. 1402 * 1403 * @return bool True 1404 */ 1405 public function supports_group_filtering() { 1406 return true; 1407 } 1408 1409 protected function update_schema($oldversion, $newversion) { 1410 // Construct schema. 1411 $schema = new schema(); 1412 $cansetup = $schema->can_setup_server(); 1413 if ($cansetup !== true) { 1414 return $cansetup; 1415 } 1416 1417 switch ($newversion) { 1418 // This version just requires a setup call to add new fields. 1419 case 2017091700: 1420 $setup = true; 1421 break; 1422 1423 // If we don't know about the schema version we might not have implemented the 1424 // change correctly, so return. 1425 default: 1426 return get_string('schemaversionunknown', 'search'); 1427 } 1428 1429 if ($setup) { 1430 $schema->setup(); 1431 } 1432 1433 return true; 1434 } 1435 1436 /** 1437 * Solr supports sort by location within course contexts or below. 1438 * 1439 * @param \context $context Context that the user requested search from 1440 * @return array Array from order name => display text 1441 */ 1442 public function get_supported_orders(\context $context) { 1443 $orders = parent::get_supported_orders($context); 1444 1445 // If not within a course, no other kind of sorting supported. 1446 $coursecontext = $context->get_course_context(false); 1447 if ($coursecontext) { 1448 // Within a course or activity/block, support sort by location. 1449 $orders['location'] = get_string('order_location', 'search', 1450 $context->get_context_name()); 1451 } 1452 1453 return $orders; 1454 } 1455 1456 /** 1457 * Solr supports search by user id. 1458 * 1459 * @return bool True 1460 */ 1461 public function supports_users() { 1462 return true; 1463 } 1464 1465 /** 1466 * Solr supports deleting the index for a context. 1467 * 1468 * @param int $oldcontextid Context that has been deleted 1469 * @return bool True to indicate that any data was actually deleted 1470 * @throws \core_search\engine_exception 1471 */ 1472 public function delete_index_for_context(int $oldcontextid) { 1473 $client = $this->get_search_client(); 1474 try { 1475 $client->deleteByQuery('contextid:' . $oldcontextid); 1476 $client->commit(true); 1477 return true; 1478 } catch (\Exception $e) { 1479 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage()); 1480 } 1481 } 1482 1483 /** 1484 * Solr supports deleting the index for a course. 1485 * 1486 * @param int $oldcourseid 1487 * @return bool True to indicate that any data was actually deleted 1488 * @throws \core_search\engine_exception 1489 */ 1490 public function delete_index_for_course(int $oldcourseid) { 1491 $client = $this->get_search_client(); 1492 try { 1493 $client->deleteByQuery('courseid:' . $oldcourseid); 1494 $client->commit(true); 1495 return true; 1496 } catch (\Exception $e) { 1497 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage()); 1498 } 1499 } 1500 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body