Differences Between: [Versions 39 and 311]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Solr engine. 19 * 20 * @package search_solr 21 * @copyright 2015 Daniel Neis Araujo 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace search_solr; 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Solr engine. 31 * 32 * @package search_solr 33 * @copyright 2015 Daniel Neis Araujo 34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 35 */ 36 class engine extends \core_search\engine { 37 38 /** 39 * @var string The date format used by solr. 40 */ 41 const DATE_FORMAT = 'Y-m-d\TH:i:s\Z'; 42 43 /** 44 * @var int Commit documents interval (number of miliseconds). 45 */ 46 const AUTOCOMMIT_WITHIN = 15000; 47 48 /** 49 * The maximum number of results to fetch at a time. 50 */ 51 const QUERY_SIZE = 120; 52 53 /** 54 * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending. 55 */ 56 const FRAG_SIZE = 510; 57 58 /** 59 * Marker for the start of a highlight. 60 */ 61 const HIGHLIGHT_START = '@@HI_S@@'; 62 63 /** 64 * Marker for the end of a highlight. 65 */ 66 const HIGHLIGHT_END = '@@HI_E@@'; 67 68 /** @var float Boost value for matching course in location-ordered searches */ 69 const COURSE_BOOST = 1; 70 71 /** @var float Boost value for matching context (in addition to course boost) */ 72 const CONTEXT_BOOST = 0.5; 73 74 /** 75 * @var \SolrClient 76 */ 77 protected $client = null; 78 79 /** 80 * @var bool True if we should reuse SolrClients, false if not. 81 */ 82 protected $cacheclient = true; 83 84 /** 85 * @var \curl Direct curl object. 86 */ 87 protected $curl = null; 88 89 /** 90 * @var array Fields that can be highlighted. 91 */ 92 protected $highlightfields = array('title', 'content', 'description1', 'description2'); 93 94 /** 95 * @var int Number of total docs reported by Sorl for the last query. 96 */ 97 protected $totalenginedocs = 0; 98 99 /** 100 * @var int Number of docs we have processed for the last query. 101 */ 102 protected $processeddocs = 0; 103 104 /** 105 * @var int Number of docs that have been skipped while processing the last query. 106 */ 107 protected $skippeddocs = 0; 108 109 /** 110 * Solr server major version. 111 * 112 * @var int 113 */ 114 protected $solrmajorversion = null; 115 116 /** 117 * Initialises the search engine configuration. 118 * 119 * @param bool $alternateconfiguration If true, use alternate configuration settings 120 * @return void 121 */ 122 public function __construct(bool $alternateconfiguration = false) { 123 parent::__construct($alternateconfiguration); 124 125 $curlversion = curl_version(); 126 if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) { 127 // There is a flaw with curl 7.35.0 that causes problems with client reuse. 128 $this->cacheclient = false; 129 } 130 } 131 132 /** 133 * Prepares a Solr query, applies filters and executes it returning its results. 134 * 135 * @throws \core_search\engine_exception 136 * @param \stdClass $filters Containing query and filters. 137 * @param \stdClass $accessinfo Information about areas user can access. 138 * @param int $limit The maximum number of results to return. 139 * @return \core_search\document[] Results or false if no results 140 */ 141 public function execute_query($filters, $accessinfo, $limit = 0) { 142 global $USER; 143 144 if (empty($limit)) { 145 $limit = \core_search\manager::MAX_RESULTS; 146 } 147 148 // If there is any problem we trigger the exception as soon as possible. 149 $client = $this->get_search_client(); 150 151 // Create the query object. 152 $query = $this->create_user_query($filters, $accessinfo); 153 154 // If the query cannot have results, return none. 155 if (!$query) { 156 return []; 157 } 158 159 // We expect good match rates, so for our first get, we will get a small number of records. 160 // This significantly speeds solr response time for first few pages. 161 $query->setRows(min($limit * 3, static::QUERY_SIZE)); 162 $response = $this->get_query_response($query); 163 164 // Get count data out of the response, and reset our counters. 165 list($included, $found) = $this->get_response_counts($response); 166 $this->totalenginedocs = $found; 167 $this->processeddocs = 0; 168 $this->skippeddocs = 0; 169 if ($included == 0 || $this->totalenginedocs == 0) { 170 // No results. 171 return array(); 172 } 173 174 // Get valid documents out of the response. 175 $results = $this->process_response($response, $limit); 176 177 // We have processed all the docs in the response at this point. 178 $this->processeddocs += $included; 179 180 // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying. 181 while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) { 182 // Offset the start of the query, and since we are making another call, get more per call. 183 $query->setStart($this->processeddocs); 184 $query->setRows(static::QUERY_SIZE); 185 186 $response = $this->get_query_response($query); 187 list($included, $found) = $this->get_response_counts($response); 188 if ($included == 0 || $found == 0) { 189 // No new results were found. Found being empty would be weird, so we will just return. 190 return $results; 191 } 192 $this->totalenginedocs = $found; 193 194 // Get the new response docs, limiting to remaining we need, then add it to the end of the results array. 195 $newdocs = $this->process_response($response, $limit - count($results)); 196 $results = array_merge($results, $newdocs); 197 198 // Add to our processed docs count. 199 $this->processeddocs += $included; 200 } 201 202 return $results; 203 } 204 205 /** 206 * Takes a query and returns the response in SolrObject format. 207 * 208 * @param SolrQuery $query Solr query object. 209 * @return SolrObject|false Response document or false on error. 210 */ 211 protected function get_query_response($query) { 212 try { 213 return $this->get_search_client()->query($query)->getResponse(); 214 } catch (\SolrClientException $ex) { 215 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); 216 $this->queryerror = $ex->getMessage(); 217 return false; 218 } catch (\SolrServerException $ex) { 219 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); 220 $this->queryerror = $ex->getMessage(); 221 return false; 222 } 223 } 224 225 /** 226 * Returns the total number of documents available for the most recently call to execute_query. 227 * 228 * @return int 229 */ 230 public function get_query_total_count() { 231 // Return the total engine count minus the docs we have determined are bad. 232 return $this->totalenginedocs - $this->skippeddocs; 233 } 234 235 /** 236 * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses. 237 * 238 * @param SolrDocument $response The response document from Solr. 239 * @return array A two part array. First how many response docs are in the response. 240 * Second, how many results are vailable in the engine. 241 */ 242 protected function get_response_counts($response) { 243 $found = 0; 244 $included = 0; 245 246 if (isset($response->grouped->solr_filegroupingid->ngroups)) { 247 // Get the number of results for file grouped queries. 248 $found = $response->grouped->solr_filegroupingid->ngroups; 249 $included = count($response->grouped->solr_filegroupingid->groups); 250 } else if (isset($response->response->numFound)) { 251 // Get the number of results for standard queries. 252 $found = $response->response->numFound; 253 if ($found > 0 && is_array($response->response->docs)) { 254 $included = count($response->response->docs); 255 } 256 } 257 258 return array($included, $found); 259 } 260 261 /** 262 * Prepares a new query object with needed limits, filters, etc. 263 * 264 * @param \stdClass $filters Containing query and filters. 265 * @param \stdClass $accessinfo Information about contexts the user can access 266 * @return \SolrDisMaxQuery|null Query object or null if they can't get any results 267 */ 268 protected function create_user_query($filters, $accessinfo) { 269 global $USER; 270 271 // Let's keep these changes internal. 272 $data = clone $filters; 273 274 $query = new \SolrDisMaxQuery(); 275 276 $this->set_query($query, self::replace_underlines($data->q)); 277 $this->add_fields($query); 278 279 // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters 280 // we are really interested in caching contexts filters instead. 281 if (!empty($data->title)) { 282 $query->addFilterQuery('{!field cache=false f=title}' . $data->title); 283 } 284 if (!empty($data->areaids)) { 285 // If areaids are specified, we want to get any that match. 286 $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')'); 287 } 288 if (!empty($data->courseids)) { 289 $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')'); 290 } 291 if (!empty($data->groupids)) { 292 $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')'); 293 } 294 if (!empty($data->userids)) { 295 $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')'); 296 } 297 298 if (!empty($data->timestart) or !empty($data->timeend)) { 299 if (empty($data->timestart)) { 300 $data->timestart = '*'; 301 } else { 302 $data->timestart = \search_solr\document::format_time_for_engine($data->timestart); 303 } 304 if (empty($data->timeend)) { 305 $data->timeend = '*'; 306 } else { 307 $data->timeend = \search_solr\document::format_time_for_engine($data->timeend); 308 } 309 310 // No cache. 311 $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']'); 312 } 313 314 // Restrict to users who are supposed to be able to see a particular result. 315 $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')'); 316 317 // And finally restrict it to the context where the user can access, we want this one cached. 318 // If the user can access all contexts $usercontexts value is just true, we don't need to filter 319 // in that case. 320 if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) { 321 // Join all area contexts into a single array and implode. 322 $allcontexts = array(); 323 foreach ($accessinfo->usercontexts as $areaid => $areacontexts) { 324 if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) { 325 // Skip unused areas. 326 continue; 327 } 328 foreach ($areacontexts as $contextid) { 329 // Ensure they are unique. 330 $allcontexts[$contextid] = $contextid; 331 } 332 } 333 if (empty($allcontexts)) { 334 // This means there are no valid contexts for them, so they get no results. 335 return null; 336 } 337 $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')'); 338 } 339 340 if (!$accessinfo->everything && $accessinfo->separategroupscontexts) { 341 // Add another restriction to handle group ids. If there are any contexts using separate 342 // groups, then results in that context will not show unless you belong to the group. 343 // (Note: Access all groups is taken care of earlier, when computing these arrays.) 344 345 // This special exceptions list allows for particularly pig-headed developers to create 346 // multiple search areas within the same module, where one of them uses separate 347 // groups and the other uses visible groups. It is a little inefficient, but this should 348 // be rare. 349 $exceptions = ''; 350 if ($accessinfo->visiblegroupscontextsareas) { 351 foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) { 352 $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' . 353 implode(' OR ', $areaids) . '))'; 354 } 355 } 356 357 if ($accessinfo->usergroups) { 358 // Either the document has no groupid, or the groupid is one that the user 359 // belongs to, or the context is not one of the separate groups contexts. 360 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' . 361 'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' . 362 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' . 363 $exceptions); 364 } else { 365 // Either the document has no groupid, or the context is not a restricted one. 366 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' . 367 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' . 368 $exceptions); 369 } 370 } 371 372 if ($this->file_indexing_enabled()) { 373 // Now group records by solr_filegroupingid. Limit to 3 results per group. 374 $query->setGroup(true); 375 $query->setGroupLimit(3); 376 $query->setGroupNGroups(true); 377 $query->addGroupField('solr_filegroupingid'); 378 } else { 379 // Make sure we only get text files, in case the index has pre-existing files. 380 $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT); 381 } 382 383 // If ordering by location, add in boost for the relevant course or context ids. 384 if (!empty($filters->order) && $filters->order === 'location') { 385 $coursecontext = $filters->context->get_course_context(); 386 $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST); 387 if ($filters->context->contextlevel !== CONTEXT_COURSE) { 388 // If it's a block or activity, also add a boost for the specific context id. 389 $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST); 390 } 391 } 392 393 return $query; 394 } 395 396 /** 397 * Prepares a new query by setting the query, start offset and rows to return. 398 * 399 * @param SolrQuery $query 400 * @param object $q Containing query and filters. 401 */ 402 protected function set_query($query, $q) { 403 // Set hightlighting. 404 $query->setHighlight(true); 405 foreach ($this->highlightfields as $field) { 406 $query->addHighlightField($field); 407 } 408 $query->setHighlightFragsize(static::FRAG_SIZE); 409 $query->setHighlightSimplePre(self::HIGHLIGHT_START); 410 $query->setHighlightSimplePost(self::HIGHLIGHT_END); 411 $query->setHighlightMergeContiguous(true); 412 413 $query->setQuery($q); 414 415 // A reasonable max. 416 $query->setRows(static::QUERY_SIZE); 417 } 418 419 /** 420 * Sets fields to be returned in the result. 421 * 422 * @param SolrDisMaxQuery|SolrQuery $query object. 423 */ 424 public function add_fields($query) { 425 $documentclass = $this->get_document_classname(); 426 $fields = $documentclass::get_default_fields_definition(); 427 428 $dismax = false; 429 if ($query instanceof \SolrDisMaxQuery) { 430 $dismax = true; 431 } 432 433 foreach ($fields as $key => $field) { 434 $query->addField($key); 435 if ($dismax && !empty($field['mainquery'])) { 436 // Add fields the main query should be run against. 437 // Due to a regression in the PECL solr extension, https://bugs.php.net/bug.php?id=72740, 438 // a boost value is required, even if it is optional; to avoid boosting one among other fields, 439 // the explicit boost value will be the default one, for every field. 440 $query->addQueryField($key, 1); 441 } 442 } 443 } 444 445 /** 446 * Finds the key common to both highlighing and docs array returned from response. 447 * @param object $response containing results. 448 */ 449 public function add_highlight_content($response) { 450 if (!isset($response->highlighting)) { 451 // There is no highlighting to add. 452 return; 453 } 454 455 $highlightedobject = $response->highlighting; 456 foreach ($response->response->docs as $doc) { 457 $x = $doc->id; 458 $highlighteddoc = $highlightedobject->$x; 459 $this->merge_highlight_field_values($doc, $highlighteddoc); 460 } 461 } 462 463 /** 464 * Adds the highlighting array values to docs array values. 465 * 466 * @throws \core_search\engine_exception 467 * @param object $doc containing the results. 468 * @param object $highlighteddoc containing the highlighted results values. 469 */ 470 public function merge_highlight_field_values($doc, $highlighteddoc) { 471 472 foreach ($this->highlightfields as $field) { 473 if (!empty($doc->$field)) { 474 475 // Check that the returned value is not an array. No way we can make this work with multivalued solr fields. 476 if (is_array($doc->{$field})) { 477 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field); 478 } 479 480 if (!empty($highlighteddoc->$field)) { 481 // Replace by the highlighted result. 482 $doc->$field = reset($highlighteddoc->$field); 483 } 484 } 485 } 486 } 487 488 /** 489 * Filters the response on Moodle side. 490 * 491 * @param SolrObject $response Solr object containing the response return from solr server. 492 * @param int $limit The maximum number of results to return. 0 for all. 493 * @param bool $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access. 494 * @return array $results containing final results to be displayed. 495 */ 496 protected function process_response($response, $limit = 0, $skipaccesscheck = false) { 497 global $USER; 498 499 if (empty($response)) { 500 return array(); 501 } 502 503 if (isset($response->grouped)) { 504 return $this->grouped_files_process_response($response, $limit); 505 } 506 507 $userid = $USER->id; 508 $noownerid = \core_search\manager::NO_OWNER_ID; 509 510 $numgranted = 0; 511 512 if (!$docs = $response->response->docs) { 513 return array(); 514 } 515 516 $out = array(); 517 if (!empty($response->response->numFound)) { 518 $this->add_highlight_content($response); 519 520 // Iterate through the results checking its availability and whether they are available for the user or not. 521 foreach ($docs as $key => $docdata) { 522 if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) { 523 // If owneruserid is set, no other user should be able to access this record. 524 continue; 525 } 526 527 if (!$searcharea = $this->get_search_area($docdata->areaid)) { 528 continue; 529 } 530 531 $docdata = $this->standarize_solr_obj($docdata); 532 533 if ($skipaccesscheck) { 534 $access = \core_search\manager::ACCESS_GRANTED; 535 } else { 536 $access = $searcharea->check_access($docdata['itemid']); 537 } 538 switch ($access) { 539 case \core_search\manager::ACCESS_DELETED: 540 $this->delete_by_id($docdata['id']); 541 // Remove one from our processed and total counters, since we promptly deleted. 542 $this->processeddocs--; 543 $this->totalenginedocs--; 544 break; 545 case \core_search\manager::ACCESS_DENIED: 546 $this->skippeddocs++; 547 break; 548 case \core_search\manager::ACCESS_GRANTED: 549 $numgranted++; 550 551 // Add the doc. 552 $out[] = $this->to_document($searcharea, $docdata); 553 break; 554 } 555 556 // Stop when we hit our limit. 557 if (!empty($limit) && count($out) >= $limit) { 558 break; 559 } 560 } 561 } 562 563 return $out; 564 } 565 566 /** 567 * Processes grouped file results into documents, with attached matching files. 568 * 569 * @param SolrObject $response The response returned from solr server 570 * @param int $limit The maximum number of results to return. 0 for all. 571 * @return array Final results to be displayed. 572 */ 573 protected function grouped_files_process_response($response, $limit = 0) { 574 // If we can't find the grouping, or there are no matches in the grouping, return empty. 575 if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) { 576 return array(); 577 } 578 579 $numgranted = 0; 580 $orderedids = array(); 581 $completedocs = array(); 582 $incompletedocs = array(); 583 584 $highlightingobj = $response->highlighting; 585 586 // Each group represents a "master document". 587 $groups = $response->grouped->solr_filegroupingid->groups; 588 foreach ($groups as $group) { 589 $groupid = $group->groupValue; 590 $groupdocs = $group->doclist->docs; 591 $firstdoc = reset($groupdocs); 592 593 if (!$searcharea = $this->get_search_area($firstdoc->areaid)) { 594 // Well, this is a problem. 595 continue; 596 } 597 598 // Check for access. 599 $access = $searcharea->check_access($firstdoc->itemid); 600 switch ($access) { 601 case \core_search\manager::ACCESS_DELETED: 602 // If deleted from Moodle, delete from index and then continue. 603 $this->delete_by_id($firstdoc->id); 604 // Remove one from our processed and total counters, since we promptly deleted. 605 $this->processeddocs--; 606 $this->totalenginedocs--; 607 continue 2; 608 break; 609 case \core_search\manager::ACCESS_DENIED: 610 // This means we should just skip for the current user. 611 $this->skippeddocs++; 612 continue 2; 613 break; 614 } 615 $numgranted++; 616 617 $maindoc = false; 618 $fileids = array(); 619 // Seperate the main document and any files returned. 620 foreach ($groupdocs as $groupdoc) { 621 if ($groupdoc->id == $groupid) { 622 $maindoc = $groupdoc; 623 } else if (isset($groupdoc->solr_fileid)) { 624 $fileids[] = $groupdoc->solr_fileid; 625 } 626 } 627 628 // Store the id of this group, in order, for later merging. 629 $orderedids[] = $groupid; 630 631 if (!$maindoc) { 632 // We don't have the main doc, store what we know for later building. 633 $incompletedocs[$groupid] = $fileids; 634 } else { 635 if (isset($highlightingobj->$groupid)) { 636 // Merge the highlighting for this doc. 637 $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid); 638 } 639 $docdata = $this->standarize_solr_obj($maindoc); 640 $doc = $this->to_document($searcharea, $docdata); 641 // Now we need to attach the result files to the doc. 642 foreach ($fileids as $fileid) { 643 $doc->add_stored_file($fileid); 644 } 645 $completedocs[$groupid] = $doc; 646 } 647 648 if (!empty($limit) && $numgranted >= $limit) { 649 // We have hit the max results, we will just ignore the rest. 650 break; 651 } 652 } 653 654 $incompletedocs = $this->get_missing_docs($incompletedocs); 655 656 $out = array(); 657 // Now merge the complete and incomplete documents, in results order. 658 foreach ($orderedids as $docid) { 659 if (isset($completedocs[$docid])) { 660 $out[] = $completedocs[$docid]; 661 } else if (isset($incompletedocs[$docid])) { 662 $out[] = $incompletedocs[$docid]; 663 } 664 } 665 666 return $out; 667 } 668 669 /** 670 * Retreive any missing main documents and attach provided files. 671 * 672 * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value 673 * associated to the key should be an array of stored_files or stored file ids to attach to the result document. 674 * 675 * Return array also indexed by document id. 676 * 677 * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach. 678 * @return document[] 679 */ 680 protected function get_missing_docs($missingdocs) { 681 if (empty($missingdocs)) { 682 return array(); 683 } 684 685 $docids = array_keys($missingdocs); 686 687 // Build a custom query that will get all the missing documents. 688 $query = new \SolrQuery(); 689 $this->set_query($query, '*'); 690 $this->add_fields($query); 691 $query->setRows(count($docids)); 692 $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')'); 693 694 $response = $this->get_query_response($query); 695 // We know the missing docs have already been checked for access, so don't recheck. 696 $results = $this->process_response($response, 0, true); 697 698 $out = array(); 699 foreach ($results as $result) { 700 $resultid = $result->get('id'); 701 if (!isset($missingdocs[$resultid])) { 702 // We got a result we didn't expect. Skip it. 703 continue; 704 } 705 // Attach the files. 706 foreach ($missingdocs[$resultid] as $filedoc) { 707 $result->add_stored_file($filedoc); 708 } 709 $out[$resultid] = $result; 710 } 711 712 return $out; 713 } 714 715 /** 716 * Returns a standard php array from a \SolrObject instance. 717 * 718 * @param \SolrObject $obj 719 * @return array The returned document as an array. 720 */ 721 public function standarize_solr_obj(\SolrObject $obj) { 722 $properties = $obj->getPropertyNames(); 723 724 $docdata = array(); 725 foreach($properties as $name) { 726 // http://php.net/manual/en/solrobject.getpropertynames.php#98018. 727 $name = trim($name); 728 $docdata[$name] = $obj->offsetGet($name); 729 } 730 return $docdata; 731 } 732 733 /** 734 * Adds a document to the search engine. 735 * 736 * This does not commit to the search engine. 737 * 738 * @param document $document 739 * @param bool $fileindexing True if file indexing is to be used 740 * @return bool 741 */ 742 public function add_document($document, $fileindexing = false) { 743 $docdata = $document->export_for_engine(); 744 745 if (!$this->add_solr_document($docdata)) { 746 return false; 747 } 748 749 if ($fileindexing) { 750 // This will take care of updating all attached files in the index. 751 $this->process_document_files($document); 752 } 753 754 return true; 755 } 756 757 /** 758 * Adds a batch of documents to the engine at once. 759 * 760 * @param \core_search\document[] $documents Documents to add 761 * @param bool $fileindexing If true, indexes files (these are done one at a time) 762 * @return int[] Array of three elements: successfully processed, failed processed, batch count 763 */ 764 public function add_document_batch(array $documents, bool $fileindexing = false): array { 765 $docdatabatch = []; 766 foreach ($documents as $document) { 767 $docdatabatch[] = $document->export_for_engine(); 768 } 769 770 $resultcounts = $this->add_solr_documents($docdatabatch); 771 772 // Files are processed one document at a time (if there are files it's slow anyway). 773 if ($fileindexing) { 774 foreach ($documents as $document) { 775 // This will take care of updating all attached files in the index. 776 $this->process_document_files($document); 777 } 778 } 779 780 return $resultcounts; 781 } 782 783 /** 784 * Replaces underlines at edges of words in the content with spaces. 785 * 786 * For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads', 787 * and 'frogs_and_toads' will be left as 'frogs_and_toads'. 788 * 789 * The reason for this is that for italic content_to_text puts _italic_ underlines at the start 790 * and end of the italicised phrase (not between words). Solr treats underlines as part of the 791 * word, which means that if you search for a word in italic then you can't find it. 792 * 793 * @param string $str String to replace 794 * @return string Replaced string 795 */ 796 protected static function replace_underlines(string $str): string { 797 return preg_replace('~\b_|_\b~', '', $str); 798 } 799 800 /** 801 * Creates a Solr document object. 802 * 803 * @param array $doc Array of document fields 804 * @return \SolrInputDocument Created document 805 */ 806 protected function create_solr_document(array $doc): \SolrInputDocument { 807 $solrdoc = new \SolrInputDocument(); 808 809 // Replace underlines in the content with spaces. The reason for this is that for italic 810 // text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the 811 // word, which means that if you search for a word in italic then you can't find it. 812 if (array_key_exists('content', $doc)) { 813 $doc['content'] = self::replace_underlines($doc['content']); 814 } 815 816 // Set all the fields. 817 foreach ($doc as $field => $value) { 818 $solrdoc->addField($field, $value); 819 } 820 821 return $solrdoc; 822 } 823 824 /** 825 * Adds a text document to the search engine. 826 * 827 * @param array $doc 828 * @return bool 829 */ 830 protected function add_solr_document($doc) { 831 $solrdoc = $this->create_solr_document($doc); 832 833 try { 834 $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN); 835 return true; 836 } catch (\SolrClientException $e) { 837 debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER); 838 } catch (\SolrServerException $e) { 839 // We only use the first line of the message, as it's a fully java stacktrace behind it. 840 $msg = strtok($e->getMessage(), "\n"); 841 debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER); 842 } 843 844 return false; 845 } 846 847 /** 848 * Adds multiple text documents to the search engine. 849 * 850 * @param array $docs Array of documents (each an array of fields) to add 851 * @return int[] Array of success, failure, batch count 852 * @throws \core_search\engine_exception 853 */ 854 protected function add_solr_documents(array $docs): array { 855 $solrdocs = []; 856 foreach ($docs as $doc) { 857 $solrdocs[] = $this->create_solr_document($doc); 858 } 859 860 try { 861 // Add documents in a batch and report that they all succeeded. 862 $this->get_search_client()->addDocuments($solrdocs, true, static::AUTOCOMMIT_WITHIN); 863 return [count($solrdocs), 0, 1]; 864 } catch (\SolrClientException $e) { 865 // If there is an exception, fall through... 866 $donothing = true; 867 } catch (\SolrServerException $e) { 868 // If there is an exception, fall through... 869 $donothing = true; 870 } 871 872 // When there is an error, we fall back to adding them individually so that we can report 873 // which document(s) failed. Since it overwrites, adding the successful ones multiple 874 // times won't hurt. 875 $success = 0; 876 $failure = 0; 877 $batches = 0; 878 foreach ($docs as $doc) { 879 $result = $this->add_solr_document($doc); 880 $batches++; 881 if ($result) { 882 $success++; 883 } else { 884 $failure++; 885 } 886 } 887 888 return [$success, $failure, $batches]; 889 } 890 891 /** 892 * Index files attached to the docuemnt, ensuring the index matches the current document files. 893 * 894 * For documents that aren't known to be new, we check the index for existing files. 895 * - New files we will add. 896 * - Existing and unchanged files we will skip. 897 * - File that are in the index but not on the document will be deleted from the index. 898 * - Files that have changed will be re-indexed. 899 * 900 * @param document $document 901 */ 902 protected function process_document_files($document) { 903 if (!$this->file_indexing_enabled()) { 904 return; 905 } 906 907 // Maximum rows to process at a time. 908 $rows = 500; 909 910 // Get the attached files. 911 $files = $document->get_files(); 912 913 // If this isn't a new document, we need to check the exiting indexed files. 914 if (!$document->get_is_new()) { 915 // We do this progressively, so we can handle lots of files cleanly. 916 list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows); 917 $count = 0; 918 $idstodelete = array(); 919 920 do { 921 // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones. 922 foreach ($indexedfiles as $indexedfile) { 923 $fileid = $indexedfile->solr_fileid; 924 925 if (isset($files[$fileid])) { 926 // Check for changes that would mean we need to re-index the file. If so, just leave in $files. 927 // Filelib does not guarantee time modified is updated, so we will check important values. 928 if ($indexedfile->modified != $files[$fileid]->get_timemodified()) { 929 continue; 930 } 931 if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) { 932 continue; 933 } 934 if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) { 935 continue; 936 } 937 if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE && 938 $this->file_is_indexable($files[$fileid])) { 939 // This means that the last time we indexed this file, filtering blocked it. 940 // Current settings say it is indexable, so we will allow it to be indexed. 941 continue; 942 } 943 944 // If the file is already indexed, we can just remove it from the files array and skip it. 945 unset($files[$fileid]); 946 } else { 947 // This means we have found a file that is no longer attached, so we need to delete from the index. 948 // We do it later, since this is progressive, and it could reorder results. 949 $idstodelete[] = $indexedfile->id; 950 } 951 } 952 $count += $rows; 953 954 if ($count < $numfound) { 955 // If we haven't hit the total count yet, fetch the next batch. 956 list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows); 957 } 958 959 } while ($count < $numfound); 960 961 // Delete files that are no longer attached. 962 foreach ($idstodelete as $id) { 963 // We directly delete the item using the client, as the engine delete_by_id won't work on file docs. 964 $this->get_search_client()->deleteById($id); 965 } 966 } 967 968 // Now we can actually index all the remaining files. 969 foreach ($files as $file) { 970 $this->add_stored_file($document, $file); 971 } 972 } 973 974 /** 975 * Get the currently indexed files for a particular document, returns the total count, and a subset of files. 976 * 977 * @param document $document 978 * @param int $start The row to start the results on. Zero indexed. 979 * @param int $rows The number of rows to fetch 980 * @return array A two element array, the first is the total number of availble results, the second is an array 981 * of documents for the current request. 982 */ 983 protected function get_indexed_files($document, $start = 0, $rows = 500) { 984 // Build a custom query that will get any document files that are in our solr_filegroupingid. 985 $query = new \SolrQuery(); 986 987 // We want to get all file records tied to a document. 988 // For efficiency, we are building our own, stripped down, query. 989 $query->setQuery('*'); 990 $query->setRows($rows); 991 $query->setStart($start); 992 // We want a consistent sorting. 993 $query->addSortField('id'); 994 995 // We only want the bare minimum of fields. 996 $query->addField('id'); 997 $query->addField('modified'); 998 $query->addField('title'); 999 $query->addField('solr_fileid'); 1000 $query->addField('solr_filecontenthash'); 1001 $query->addField('solr_fileindexstatus'); 1002 1003 $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')'); 1004 $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE); 1005 1006 $response = $this->get_query_response($query); 1007 if (empty($response->response->numFound)) { 1008 return array(0, array()); 1009 } 1010 1011 return array($response->response->numFound, $this->convert_file_results($response)); 1012 } 1013 1014 /** 1015 * A very lightweight handler for getting information about already indexed files from a Solr response. 1016 * 1017 * @param SolrObject $responsedoc A Solr response document 1018 * @return stdClass[] An array of objects that contain the basic information for file processing. 1019 */ 1020 protected function convert_file_results($responsedoc) { 1021 if (!$docs = $responsedoc->response->docs) { 1022 return array(); 1023 } 1024 1025 $out = array(); 1026 1027 foreach ($docs as $doc) { 1028 // Copy the bare minimim needed info. 1029 $result = new \stdClass(); 1030 $result->id = $doc->id; 1031 $result->modified = document::import_time_from_engine($doc->modified); 1032 $result->title = $doc->title; 1033 $result->solr_fileid = $doc->solr_fileid; 1034 $result->solr_filecontenthash = $doc->solr_filecontenthash; 1035 $result->solr_fileindexstatus = $doc->solr_fileindexstatus; 1036 $out[] = $result; 1037 } 1038 1039 return $out; 1040 } 1041 1042 /** 1043 * Adds a file to the search engine. 1044 * 1045 * Notes about Solr and Tika indexing. We do not send the mime type, only the filename. 1046 * Tika has much better content type detection than Moodle, and we will have many more doc failures 1047 * if we try to send mime types. 1048 * 1049 * @param document $document 1050 * @param \stored_file $storedfile 1051 * @return void 1052 */ 1053 protected function add_stored_file($document, $storedfile) { 1054 $filedoc = $document->export_file_for_engine($storedfile); 1055 1056 if (!$this->file_is_indexable($storedfile)) { 1057 // For files that we don't consider indexable, we will still place a reference in the search engine. 1058 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE; 1059 $this->add_solr_document($filedoc); 1060 return; 1061 } 1062 1063 $curl = $this->get_curl_object(); 1064 1065 $url = $this->get_connection_url('/update/extract'); 1066 1067 // Return results as XML. 1068 $url->param('wt', 'xml'); 1069 1070 // This will prevent solr from automatically making fields for every tika output. 1071 $url->param('uprefix', 'ignored_'); 1072 1073 // Control how content is captured. This will keep our file content clean of non-important metadata. 1074 $url->param('captureAttr', 'true'); 1075 // Move the content to a field for indexing. 1076 $url->param('fmap.content', 'solr_filecontent'); 1077 1078 // These are common fields that matches the standard *_point dynamic field and causes an error. 1079 $url->param('fmap.media_white_point', 'ignored_mwp'); 1080 $url->param('fmap.media_black_point', 'ignored_mbp'); 1081 1082 // Copy each key to the url with literal. 1083 // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names. 1084 foreach ($filedoc as $key => $value) { 1085 // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours. 1086 $url->param('fmap.'.$key, 'ignored_'.$key); 1087 // Place data in a tmp field. 1088 $url->param('literal.mdltmp_'.$key, $value); 1089 // Then move to the final field. 1090 $url->param('fmap.mdltmp_'.$key, $key); 1091 } 1092 1093 // This sets the true filename for Tika. 1094 $url->param('resource.name', $storedfile->get_filename()); 1095 1096 // A giant block of code that is really just error checking around the curl request. 1097 try { 1098 // We have to post the file directly in binary data (not using multipart) to avoid 1099 // Solr bug SOLR-15039 which can cause incorrect data when you use multipart upload. 1100 // Note this loads the whole file into memory; see limit in file_is_indexable(). 1101 $result = $curl->post($url->out(false), $storedfile->get_content()); 1102 1103 $code = $curl->get_errno(); 1104 $info = $curl->get_info(); 1105 1106 // Now error handling. It is just informational, since we aren't tracking per file/doc results. 1107 if ($code != 0) { 1108 // This means an internal cURL error occurred error is in result. 1109 $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.'; 1110 debugging($message, DEBUG_DEVELOPER); 1111 } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) { 1112 // Unexpected HTTP response code. 1113 $message = 'Error while indexing file with document id '.$filedoc['id']; 1114 // Try to get error message out of msg or title if it exists. 1115 if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) { 1116 $message .= ': '.$matches[1]; 1117 } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) { 1118 $message .= ': '.$matches[1]; 1119 } 1120 // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter. 1121 if (CLI_SCRIPT && !PHPUNIT_TEST) { 1122 mtrace($message); 1123 } 1124 } else { 1125 // Check for the expected status field. 1126 if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) { 1127 // Now check for the expected status of 0, if not, error. 1128 if ((int)$matches[1] !== 0) { 1129 $message = 'Unexpected Solr status code '.(int)$matches[1]; 1130 $message .= ' while indexing file with document id '.$filedoc['id'].'.'; 1131 debugging($message, DEBUG_DEVELOPER); 1132 } else { 1133 // The document was successfully indexed. 1134 return; 1135 } 1136 } else { 1137 // We received an unprocessable response. 1138 $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': '; 1139 $message .= strtok($result, "\n"); 1140 debugging($message, DEBUG_DEVELOPER); 1141 } 1142 } 1143 } catch (\Exception $e) { 1144 // There was an error, but we are not tracking per-file success, so we just continue on. 1145 debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER); 1146 } 1147 1148 // If we get here, the document was not indexed due to an error. So we will index just the base info without the file. 1149 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR; 1150 $this->add_solr_document($filedoc); 1151 } 1152 1153 /** 1154 * Checks to see if a passed file is indexable. 1155 * 1156 * @param \stored_file $file The file to check 1157 * @return bool True if the file can be indexed 1158 */ 1159 protected function file_is_indexable($file) { 1160 if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) { 1161 // The file is too big to index. 1162 return false; 1163 } 1164 1165 // Because we now load files into memory to index them in Solr, we also have to ensure that 1166 // we don't try to index anything bigger than the memory limit (less 100MB for safety). 1167 // Memory limit in cron is MEMORY_EXTRA which is usually 256 or 384MB but can be increased 1168 // in config, so this will allow files over 100MB to be indexed. 1169 $limit = ini_get('memory_limit'); 1170 if ($limit && $limit != -1) { 1171 $limitbytes = get_real_size($limit); 1172 if ($file->get_filesize() > $limitbytes) { 1173 return false; 1174 } 1175 } 1176 1177 $mime = $file->get_mimetype(); 1178 1179 if ($mime == 'application/vnd.moodle.backup') { 1180 // We don't index Moodle backup files. There is nothing usefully indexable in them. 1181 return false; 1182 } 1183 1184 return true; 1185 } 1186 1187 /** 1188 * Commits all pending changes. 1189 * 1190 * @return void 1191 */ 1192 protected function commit() { 1193 $this->get_search_client()->commit(); 1194 } 1195 1196 /** 1197 * Do any area cleanup needed, and do anything to confirm contents. 1198 * 1199 * Return false to prevent the search area completed time and stats from being updated. 1200 * 1201 * @param \core_search\base $searcharea The search area that was complete 1202 * @param int $numdocs The number of documents that were added to the index 1203 * @param bool $fullindex True if a full index is being performed 1204 * @return bool True means that data is considered indexed 1205 */ 1206 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) { 1207 $this->commit(); 1208 1209 return true; 1210 } 1211 1212 /** 1213 * Return true if file indexing is supported and enabled. False otherwise. 1214 * 1215 * @return bool 1216 */ 1217 public function file_indexing_enabled() { 1218 return (bool)$this->config->fileindexing; 1219 } 1220 1221 /** 1222 * Deletes the specified document. 1223 * 1224 * @param string $id The document id to delete 1225 * @return void 1226 */ 1227 public function delete_by_id($id) { 1228 // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid. 1229 $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id); 1230 $this->commit(); 1231 } 1232 1233 /** 1234 * Delete all area's documents. 1235 * 1236 * @param string $areaid 1237 * @return void 1238 */ 1239 public function delete($areaid = null) { 1240 if ($areaid) { 1241 $this->get_search_client()->deleteByQuery('areaid:' . $areaid); 1242 } else { 1243 $this->get_search_client()->deleteByQuery('*:*'); 1244 } 1245 $this->commit(); 1246 } 1247 1248 /** 1249 * Pings the Solr server using search_solr config 1250 * 1251 * @return true|string Returns true if all good or an error string. 1252 */ 1253 public function is_server_ready() { 1254 1255 $configured = $this->is_server_configured(); 1256 if ($configured !== true) { 1257 return $configured; 1258 } 1259 1260 // As part of the above we have already checked that we can contact the server. For pages 1261 // where performance is important, we skip doing a full schema check as well. 1262 if ($this->should_skip_schema_check()) { 1263 return true; 1264 } 1265 1266 // Update schema if required/possible. 1267 $schemalatest = $this->check_latest_schema(); 1268 if ($schemalatest !== true) { 1269 return $schemalatest; 1270 } 1271 1272 // Check that the schema is already set up. 1273 try { 1274 $schema = new schema($this); 1275 $schema->validate_setup(); 1276 } catch (\moodle_exception $e) { 1277 return $e->getMessage(); 1278 } 1279 1280 return true; 1281 } 1282 1283 /** 1284 * Is the solr server properly configured?. 1285 * 1286 * @return true|string Returns true if all good or an error string. 1287 */ 1288 public function is_server_configured() { 1289 1290 if (empty($this->config->server_hostname) || empty($this->config->indexname)) { 1291 return 'No solr configuration found'; 1292 } 1293 1294 if (!$client = $this->get_search_client(false)) { 1295 return get_string('engineserverstatus', 'search'); 1296 } 1297 1298 try { 1299 if ($this->get_solr_major_version() < 4) { 1300 // Minimum solr 4.0. 1301 return get_string('minimumsolr4', 'search_solr'); 1302 } 1303 } catch (\SolrClientException $ex) { 1304 debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER); 1305 return get_string('engineserverstatus', 'search'); 1306 } catch (\SolrServerException $ex) { 1307 debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER); 1308 return get_string('engineserverstatus', 'search'); 1309 } 1310 1311 return true; 1312 } 1313 1314 /** 1315 * Returns the solr server major version. 1316 * 1317 * @return int 1318 */ 1319 public function get_solr_major_version() { 1320 if ($this->solrmajorversion !== null) { 1321 return $this->solrmajorversion; 1322 } 1323 1324 // We should really ping first the server to see if the specified indexname is valid but 1325 // we want to minimise solr server requests as they are expensive. system() emits a warning 1326 // if it can not connect to the configured index in the configured server. 1327 $systemdata = @$this->get_search_client()->system(); 1328 $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version'); 1329 $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.'))); 1330 1331 return $this->solrmajorversion; 1332 } 1333 1334 /** 1335 * Checks if the PHP Solr extension is available. 1336 * 1337 * @return bool 1338 */ 1339 public function is_installed() { 1340 return function_exists('solr_get_version'); 1341 } 1342 1343 /** 1344 * Returns the solr client instance. 1345 * 1346 * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl. 1347 * 1348 * @throws \core_search\engine_exception 1349 * @param bool $triggerexception 1350 * @return \SolrClient 1351 */ 1352 protected function get_search_client($triggerexception = true) { 1353 global $CFG; 1354 1355 // Type comparison as it is set to false if not available. 1356 if ($this->client !== null) { 1357 return $this->client; 1358 } 1359 1360 $options = array( 1361 'hostname' => $this->config->server_hostname, 1362 'path' => '/solr/' . $this->config->indexname, 1363 'login' => !empty($this->config->server_username) ? $this->config->server_username : '', 1364 'password' => !empty($this->config->server_password) ? $this->config->server_password : '', 1365 'port' => !empty($this->config->server_port) ? $this->config->server_port : '', 1366 'secure' => !empty($this->config->secure) ? true : false, 1367 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '', 1368 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '', 1369 'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '', 1370 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '', 1371 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '', 1372 'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30' 1373 ); 1374 1375 if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) { 1376 $options['proxy_host'] = $CFG->proxyhost; 1377 if (!empty($CFG->proxyport)) { 1378 $options['proxy_port'] = $CFG->proxyport; 1379 } 1380 if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) { 1381 $options['proxy_login'] = $CFG->proxyuser; 1382 $options['proxy_password'] = $CFG->proxypassword; 1383 } 1384 } 1385 1386 if (!class_exists('\SolrClient')) { 1387 throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr'); 1388 } 1389 1390 $client = new \SolrClient($options); 1391 1392 if ($client === false && $triggerexception) { 1393 throw new \core_search\engine_exception('engineserverstatus', 'search'); 1394 } 1395 1396 if ($this->cacheclient) { 1397 $this->client = $client; 1398 } 1399 1400 return $client; 1401 } 1402 1403 /** 1404 * Returns a curl object for conntecting to solr. 1405 * 1406 * @return \curl 1407 */ 1408 public function get_curl_object() { 1409 if (!is_null($this->curl)) { 1410 return $this->curl; 1411 } 1412 1413 // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports. 1414 $this->curl = new \curl(['ignoresecurity' => true]); 1415 1416 $options = array(); 1417 // Build the SSL options. Based on pecl-solr and general testing. 1418 if (!empty($this->config->secure)) { 1419 if (!empty($this->config->ssl_cert)) { 1420 $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert; 1421 $options['CURLOPT_SSLCERTTYPE'] = 'PEM'; 1422 } 1423 1424 if (!empty($this->config->ssl_key)) { 1425 $options['CURLOPT_SSLKEY'] = $this->config->ssl_key; 1426 $options['CURLOPT_SSLKEYTYPE'] = 'PEM'; 1427 } 1428 1429 if (!empty($this->config->ssl_keypassword)) { 1430 $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword; 1431 } 1432 1433 if (!empty($this->config->ssl_cainfo)) { 1434 $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo; 1435 } 1436 1437 if (!empty($this->config->ssl_capath)) { 1438 $options['CURLOPT_CAPATH'] = $this->config->ssl_capath; 1439 } 1440 } 1441 1442 // Set timeout as for Solr client. 1443 $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'; 1444 1445 $this->curl->setopt($options); 1446 1447 if (!empty($this->config->server_username) && !empty($this->config->server_password)) { 1448 $authorization = $this->config->server_username . ':' . $this->config->server_password; 1449 $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization)); 1450 } 1451 1452 return $this->curl; 1453 } 1454 1455 /** 1456 * Return a Moodle url object for the server connection. 1457 * 1458 * @param string $path The solr path to append. 1459 * @return \moodle_url 1460 */ 1461 public function get_connection_url($path) { 1462 // Must use the proper protocol, or SSL will fail. 1463 $protocol = !empty($this->config->secure) ? 'https' : 'http'; 1464 $url = $protocol . '://' . rtrim($this->config->server_hostname, '/'); 1465 if (!empty($this->config->server_port)) { 1466 $url .= ':' . $this->config->server_port; 1467 } 1468 $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/'); 1469 1470 return new \moodle_url($url); 1471 } 1472 1473 /** 1474 * Solr includes group support in the execute_query function. 1475 * 1476 * @return bool True 1477 */ 1478 public function supports_group_filtering() { 1479 return true; 1480 } 1481 1482 protected function update_schema($oldversion, $newversion) { 1483 // Construct schema. 1484 $schema = new schema($this); 1485 $cansetup = $schema->can_setup_server(); 1486 if ($cansetup !== true) { 1487 return $cansetup; 1488 } 1489 1490 switch ($newversion) { 1491 // This version just requires a setup call to add new fields. 1492 case 2017091700: 1493 $setup = true; 1494 break; 1495 1496 // If we don't know about the schema version we might not have implemented the 1497 // change correctly, so return. 1498 default: 1499 return get_string('schemaversionunknown', 'search'); 1500 } 1501 1502 if ($setup) { 1503 $schema->setup(); 1504 } 1505 1506 return true; 1507 } 1508 1509 /** 1510 * Solr supports sort by location within course contexts or below. 1511 * 1512 * @param \context $context Context that the user requested search from 1513 * @return array Array from order name => display text 1514 */ 1515 public function get_supported_orders(\context $context) { 1516 $orders = parent::get_supported_orders($context); 1517 1518 // If not within a course, no other kind of sorting supported. 1519 $coursecontext = $context->get_course_context(false); 1520 if ($coursecontext) { 1521 // Within a course or activity/block, support sort by location. 1522 $orders['location'] = get_string('order_location', 'search', 1523 $context->get_context_name()); 1524 } 1525 1526 return $orders; 1527 } 1528 1529 /** 1530 * Solr supports search by user id. 1531 * 1532 * @return bool True 1533 */ 1534 public function supports_users() { 1535 return true; 1536 } 1537 1538 /** 1539 * Solr supports adding documents in a batch. 1540 * 1541 * @return bool True 1542 */ 1543 public function supports_add_document_batch(): bool { 1544 return true; 1545 } 1546 1547 /** 1548 * Solr supports deleting the index for a context. 1549 * 1550 * @param int $oldcontextid Context that has been deleted 1551 * @return bool True to indicate that any data was actually deleted 1552 * @throws \core_search\engine_exception 1553 */ 1554 public function delete_index_for_context(int $oldcontextid) { 1555 $client = $this->get_search_client(); 1556 try { 1557 $client->deleteByQuery('contextid:' . $oldcontextid); 1558 $client->commit(true); 1559 return true; 1560 } catch (\Exception $e) { 1561 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage()); 1562 } 1563 } 1564 1565 /** 1566 * Solr supports deleting the index for a course. 1567 * 1568 * @param int $oldcourseid 1569 * @return bool True to indicate that any data was actually deleted 1570 * @throws \core_search\engine_exception 1571 */ 1572 public function delete_index_for_course(int $oldcourseid) { 1573 $client = $this->get_search_client(); 1574 try { 1575 $client->deleteByQuery('courseid:' . $oldcourseid); 1576 $client->commit(true); 1577 return true; 1578 } catch (\Exception $e) { 1579 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage()); 1580 } 1581 } 1582 1583 /** 1584 * Checks if an alternate configuration has been defined. 1585 * 1586 * @return bool True if alternate configuration is available 1587 */ 1588 public function has_alternate_configuration(): bool { 1589 return !empty($this->config->alternateserver_hostname) && 1590 !empty($this->config->alternateindexname) && 1591 !empty($this->config->alternateserver_port); 1592 } 1593 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body