Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 39 and 401]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Solr engine.
  19   *
  20   * @package    search_solr
  21   * @copyright  2015 Daniel Neis Araujo
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  namespace search_solr;
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  /**
  30   * Solr engine.
  31   *
  32   * @package    search_solr
  33   * @copyright  2015 Daniel Neis Araujo
  34   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  35   */
  36  class engine extends \core_search\engine {
  37  
  38      /**
  39       * @var string The date format used by solr.
  40       */
  41      const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';
  42  
  43      /**
  44       * @var int Commit documents interval (number of miliseconds).
  45       */
  46      const AUTOCOMMIT_WITHIN = 15000;
  47  
  48      /**
  49       * The maximum number of results to fetch at a time.
  50       */
  51      const QUERY_SIZE = 120;
  52  
  53      /**
  54       * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending.
  55       */
  56      const FRAG_SIZE = 510;
  57  
  58      /**
  59       * Marker for the start of a highlight.
  60       */
  61      const HIGHLIGHT_START = '@@HI_S@@';
  62  
  63      /**
  64       * Marker for the end of a highlight.
  65       */
  66      const HIGHLIGHT_END = '@@HI_E@@';
  67  
  68      /** @var float Boost value for matching course in location-ordered searches */
  69      const COURSE_BOOST = 1;
  70  
  71      /** @var float Boost value for matching context (in addition to course boost) */
  72      const CONTEXT_BOOST = 0.5;
  73  
  74      /**
  75       * @var \SolrClient
  76       */
  77      protected $client = null;
  78  
  79      /**
  80       * @var bool True if we should reuse SolrClients, false if not.
  81       */
  82      protected $cacheclient = true;
  83  
  84      /**
  85       * @var \curl Direct curl object.
  86       */
  87      protected $curl = null;
  88  
  89      /**
  90       * @var array Fields that can be highlighted.
  91       */
  92      protected $highlightfields = array('title', 'content', 'description1', 'description2');
  93  
  94      /**
  95       * @var int Number of total docs reported by Sorl for the last query.
  96       */
  97      protected $totalenginedocs = 0;
  98  
  99      /**
 100       * @var int Number of docs we have processed for the last query.
 101       */
 102      protected $processeddocs = 0;
 103  
 104      /**
 105       * @var int Number of docs that have been skipped while processing the last query.
 106       */
 107      protected $skippeddocs = 0;
 108  
 109      /**
 110       * Solr server major version.
 111       *
 112       * @var int
 113       */
 114      protected $solrmajorversion = null;
 115  
 116      /**
 117       * Initialises the search engine configuration.
 118       *
 119       * @param bool $alternateconfiguration If true, use alternate configuration settings
 120       * @return void
 121       */
 122      public function __construct(bool $alternateconfiguration = false) {
 123          parent::__construct($alternateconfiguration);
 124  
 125          $curlversion = curl_version();
 126          if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) {
 127              // There is a flaw with curl 7.35.0 that causes problems with client reuse.
 128              $this->cacheclient = false;
 129          }
 130      }
 131  
 132      /**
 133       * Prepares a Solr query, applies filters and executes it returning its results.
 134       *
 135       * @throws \core_search\engine_exception
 136       * @param  \stdClass $filters Containing query and filters.
 137       * @param  \stdClass $accessinfo Information about areas user can access.
 138       * @param  int       $limit The maximum number of results to return.
 139       * @return \core_search\document[] Results or false if no results
 140       */
 141      public function execute_query($filters, $accessinfo, $limit = 0) {
 142          global $USER;
 143  
 144          if (empty($limit)) {
 145              $limit = \core_search\manager::MAX_RESULTS;
 146          }
 147  
 148          // If there is any problem we trigger the exception as soon as possible.
 149          $client = $this->get_search_client();
 150  
 151          // Create the query object.
 152          $query = $this->create_user_query($filters, $accessinfo);
 153  
 154          // If the query cannot have results, return none.
 155          if (!$query) {
 156              return [];
 157          }
 158  
 159          // We expect good match rates, so for our first get, we will get a small number of records.
 160          // This significantly speeds solr response time for first few pages.
 161          $query->setRows(min($limit * 3, static::QUERY_SIZE));
 162          $response = $this->get_query_response($query);
 163  
 164          // Get count data out of the response, and reset our counters.
 165          list($included, $found) = $this->get_response_counts($response);
 166          $this->totalenginedocs = $found;
 167          $this->processeddocs = 0;
 168          $this->skippeddocs = 0;
 169          if ($included == 0 || $this->totalenginedocs == 0) {
 170              // No results.
 171              return array();
 172          }
 173  
 174          // Get valid documents out of the response.
 175          $results = $this->process_response($response, $limit);
 176  
 177          // We have processed all the docs in the response at this point.
 178          $this->processeddocs += $included;
 179  
 180          // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying.
 181          while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) {
 182              // Offset the start of the query, and since we are making another call, get more per call.
 183              $query->setStart($this->processeddocs);
 184              $query->setRows(static::QUERY_SIZE);
 185  
 186              $response = $this->get_query_response($query);
 187              list($included, $found) = $this->get_response_counts($response);
 188              if ($included == 0 || $found == 0) {
 189                  // No new results were found. Found being empty would be weird, so we will just return.
 190                  return $results;
 191              }
 192              $this->totalenginedocs = $found;
 193  
 194              // Get the new response docs, limiting to remaining we need, then add it to the end of the results array.
 195              $newdocs = $this->process_response($response, $limit - count($results));
 196              $results = array_merge($results, $newdocs);
 197  
 198              // Add to our processed docs count.
 199              $this->processeddocs += $included;
 200          }
 201  
 202          return $results;
 203      }
 204  
 205      /**
 206       * Takes a query and returns the response in SolrObject format.
 207       *
 208       * @param  SolrQuery  $query Solr query object.
 209       * @return SolrObject|false Response document or false on error.
 210       */
 211      protected function get_query_response($query) {
 212          try {
 213              return $this->get_search_client()->query($query)->getResponse();
 214          } catch (\SolrClientException $ex) {
 215              debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
 216              $this->queryerror = $ex->getMessage();
 217              return false;
 218          } catch (\SolrServerException $ex) {
 219              debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
 220              $this->queryerror = $ex->getMessage();
 221              return false;
 222          }
 223      }
 224  
 225      /**
 226       * Returns the total number of documents available for the most recently call to execute_query.
 227       *
 228       * @return int
 229       */
 230      public function get_query_total_count() {
 231          // Return the total engine count minus the docs we have determined are bad.
 232          return $this->totalenginedocs - $this->skippeddocs;
 233      }
 234  
 235      /**
 236       * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses.
 237       *
 238       * @param SolrDocument $response The response document from Solr.
 239       * @return array A two part array. First how many response docs are in the response.
 240       *               Second, how many results are vailable in the engine.
 241       */
 242      protected function get_response_counts($response) {
 243          $found = 0;
 244          $included = 0;
 245  
 246          if (isset($response->grouped->solr_filegroupingid->ngroups)) {
 247              // Get the number of results for file grouped queries.
 248              $found = $response->grouped->solr_filegroupingid->ngroups;
 249              $included = count($response->grouped->solr_filegroupingid->groups);
 250          } else if (isset($response->response->numFound)) {
 251              // Get the number of results for standard queries.
 252              $found = $response->response->numFound;
 253              if ($found > 0 && is_array($response->response->docs)) {
 254                  $included = count($response->response->docs);
 255              }
 256          }
 257  
 258          return array($included, $found);
 259      }
 260  
 261      /**
 262       * Prepares a new query object with needed limits, filters, etc.
 263       *
 264       * @param \stdClass $filters Containing query and filters.
 265       * @param \stdClass $accessinfo Information about contexts the user can access
 266       * @return \SolrDisMaxQuery|null Query object or null if they can't get any results
 267       */
 268      protected function create_user_query($filters, $accessinfo) {
 269          global $USER;
 270  
 271          // Let's keep these changes internal.
 272          $data = clone $filters;
 273  
 274          $query = new \SolrDisMaxQuery();
 275  
 276          $this->set_query($query, self::replace_underlines($data->q));
 277          $this->add_fields($query);
 278  
 279          // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
 280          // we are really interested in caching contexts filters instead.
 281          if (!empty($data->title)) {
 282              $query->addFilterQuery('{!field cache=false f=title}' . $data->title);
 283          }
 284          if (!empty($data->areaids)) {
 285              // If areaids are specified, we want to get any that match.
 286              $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')');
 287          }
 288          if (!empty($data->courseids)) {
 289              $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')');
 290          }
 291          if (!empty($data->groupids)) {
 292              $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')');
 293          }
 294          if (!empty($data->userids)) {
 295              $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')');
 296          }
 297  
 298          if (!empty($data->timestart) or !empty($data->timeend)) {
 299              if (empty($data->timestart)) {
 300                  $data->timestart = '*';
 301              } else {
 302                  $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
 303              }
 304              if (empty($data->timeend)) {
 305                  $data->timeend = '*';
 306              } else {
 307                  $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
 308              }
 309  
 310              // No cache.
 311              $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
 312          }
 313  
 314          // Restrict to users who are supposed to be able to see a particular result.
 315          $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');
 316  
 317          // And finally restrict it to the context where the user can access, we want this one cached.
 318          // If the user can access all contexts $usercontexts value is just true, we don't need to filter
 319          // in that case.
 320          if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) {
 321              // Join all area contexts into a single array and implode.
 322              $allcontexts = array();
 323              foreach ($accessinfo->usercontexts as $areaid => $areacontexts) {
 324                  if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) {
 325                      // Skip unused areas.
 326                      continue;
 327                  }
 328                  foreach ($areacontexts as $contextid) {
 329                      // Ensure they are unique.
 330                      $allcontexts[$contextid] = $contextid;
 331                  }
 332              }
 333              if (empty($allcontexts)) {
 334                  // This means there are no valid contexts for them, so they get no results.
 335                  return null;
 336              }
 337              $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
 338          }
 339  
 340          if (!$accessinfo->everything && $accessinfo->separategroupscontexts) {
 341              // Add another restriction to handle group ids. If there are any contexts using separate
 342              // groups, then results in that context will not show unless you belong to the group.
 343              // (Note: Access all groups is taken care of earlier, when computing these arrays.)
 344  
 345              // This special exceptions list allows for particularly pig-headed developers to create
 346              // multiple search areas within the same module, where one of them uses separate
 347              // groups and the other uses visible groups. It is a little inefficient, but this should
 348              // be rare.
 349              $exceptions = '';
 350              if ($accessinfo->visiblegroupscontextsareas) {
 351                  foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) {
 352                      $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' .
 353                              implode(' OR ', $areaids) . '))';
 354                  }
 355              }
 356  
 357              if ($accessinfo->usergroups) {
 358                  // Either the document has no groupid, or the groupid is one that the user
 359                  // belongs to, or the context is not one of the separate groups contexts.
 360                  $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
 361                          'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' .
 362                          '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
 363                          $exceptions);
 364              } else {
 365                  // Either the document has no groupid, or the context is not a restricted one.
 366                  $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
 367                          '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
 368                          $exceptions);
 369              }
 370          }
 371  
 372          if ($this->file_indexing_enabled()) {
 373              // Now group records by solr_filegroupingid. Limit to 3 results per group.
 374              $query->setGroup(true);
 375              $query->setGroupLimit(3);
 376              $query->setGroupNGroups(true);
 377              $query->addGroupField('solr_filegroupingid');
 378          } else {
 379              // Make sure we only get text files, in case the index has pre-existing files.
 380              $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT);
 381          }
 382  
 383          // If ordering by location, add in boost for the relevant course or context ids.
 384          if (!empty($filters->order) && $filters->order === 'location') {
 385              $coursecontext = $filters->context->get_course_context();
 386              $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST);
 387              if ($filters->context->contextlevel !== CONTEXT_COURSE) {
 388                  // If it's a block or activity, also add a boost for the specific context id.
 389                  $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST);
 390              }
 391          }
 392  
 393          return $query;
 394      }
 395  
 396      /**
 397       * Prepares a new query by setting the query, start offset and rows to return.
 398       *
 399       * @param SolrQuery $query
 400       * @param object    $q Containing query and filters.
 401       */
 402      protected function set_query($query, $q) {
 403          // Set hightlighting.
 404          $query->setHighlight(true);
 405          foreach ($this->highlightfields as $field) {
 406              $query->addHighlightField($field);
 407          }
 408          $query->setHighlightFragsize(static::FRAG_SIZE);
 409          $query->setHighlightSimplePre(self::HIGHLIGHT_START);
 410          $query->setHighlightSimplePost(self::HIGHLIGHT_END);
 411          $query->setHighlightMergeContiguous(true);
 412  
 413          $query->setQuery($q);
 414  
 415          // A reasonable max.
 416          $query->setRows(static::QUERY_SIZE);
 417      }
 418  
 419      /**
 420       * Sets fields to be returned in the result.
 421       *
 422       * @param SolrDisMaxQuery|SolrQuery $query object.
 423       */
 424      public function add_fields($query) {
 425          $documentclass = $this->get_document_classname();
 426          $fields = $documentclass::get_default_fields_definition();
 427  
 428          $dismax = false;
 429          if ($query instanceof \SolrDisMaxQuery) {
 430              $dismax = true;
 431          }
 432  
 433          foreach ($fields as $key => $field) {
 434              $query->addField($key);
 435              if ($dismax && !empty($field['mainquery'])) {
 436                  // Add fields the main query should be run against.
 437                  // Due to a regression in the PECL solr extension, https://bugs.php.net/bug.php?id=72740,
 438                  // a boost value is required, even if it is optional; to avoid boosting one among other fields,
 439                  // the explicit boost value will be the default one, for every field.
 440                  $query->addQueryField($key, 1);
 441              }
 442          }
 443      }
 444  
 445      /**
 446       * Finds the key common to both highlighing and docs array returned from response.
 447       * @param object $response containing results.
 448       */
 449      public function add_highlight_content($response) {
 450          if (!isset($response->highlighting)) {
 451              // There is no highlighting to add.
 452              return;
 453          }
 454  
 455          $highlightedobject = $response->highlighting;
 456          foreach ($response->response->docs as $doc) {
 457              $x = $doc->id;
 458              $highlighteddoc = $highlightedobject->$x;
 459              $this->merge_highlight_field_values($doc, $highlighteddoc);
 460          }
 461      }
 462  
 463      /**
 464       * Adds the highlighting array values to docs array values.
 465       *
 466       * @throws \core_search\engine_exception
 467       * @param object $doc containing the results.
 468       * @param object $highlighteddoc containing the highlighted results values.
 469       */
 470      public function merge_highlight_field_values($doc, $highlighteddoc) {
 471  
 472          foreach ($this->highlightfields as $field) {
 473              if (!empty($doc->$field)) {
 474  
 475                  // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
 476                  if (is_array($doc->{$field})) {
 477                      throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
 478                  }
 479  
 480                  if (!empty($highlighteddoc->$field)) {
 481                      // Replace by the highlighted result.
 482                      $doc->$field = reset($highlighteddoc->$field);
 483                  }
 484              }
 485          }
 486      }
 487  
 488      /**
 489       * Filters the response on Moodle side.
 490       *
 491       * @param SolrObject $response Solr object containing the response return from solr server.
 492       * @param int        $limit The maximum number of results to return. 0 for all.
 493       * @param bool       $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access.
 494       * @return array $results containing final results to be displayed.
 495       */
 496      protected function process_response($response, $limit = 0, $skipaccesscheck = false) {
 497          global $USER;
 498  
 499          if (empty($response)) {
 500              return array();
 501          }
 502  
 503          if (isset($response->grouped)) {
 504              return $this->grouped_files_process_response($response, $limit);
 505          }
 506  
 507          $userid = $USER->id;
 508          $noownerid = \core_search\manager::NO_OWNER_ID;
 509  
 510          $numgranted = 0;
 511  
 512          if (!$docs = $response->response->docs) {
 513              return array();
 514          }
 515  
 516          $out = array();
 517          if (!empty($response->response->numFound)) {
 518              $this->add_highlight_content($response);
 519  
 520              // Iterate through the results checking its availability and whether they are available for the user or not.
 521              foreach ($docs as $key => $docdata) {
 522                  if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {
 523                      // If owneruserid is set, no other user should be able to access this record.
 524                      continue;
 525                  }
 526  
 527                  if (!$searcharea = $this->get_search_area($docdata->areaid)) {
 528                      continue;
 529                  }
 530  
 531                  $docdata = $this->standarize_solr_obj($docdata);
 532  
 533                  if ($skipaccesscheck) {
 534                      $access = \core_search\manager::ACCESS_GRANTED;
 535                  } else {
 536                      $access = $searcharea->check_access($docdata['itemid']);
 537                  }
 538                  switch ($access) {
 539                      case \core_search\manager::ACCESS_DELETED:
 540                          $this->delete_by_id($docdata['id']);
 541                          // Remove one from our processed and total counters, since we promptly deleted.
 542                          $this->processeddocs--;
 543                          $this->totalenginedocs--;
 544                          break;
 545                      case \core_search\manager::ACCESS_DENIED:
 546                          $this->skippeddocs++;
 547                          break;
 548                      case \core_search\manager::ACCESS_GRANTED:
 549                          $numgranted++;
 550  
 551                          // Add the doc.
 552                          $out[] = $this->to_document($searcharea, $docdata);
 553                          break;
 554                  }
 555  
 556                  // Stop when we hit our limit.
 557                  if (!empty($limit) && count($out) >= $limit) {
 558                      break;
 559                  }
 560              }
 561          }
 562  
 563          return $out;
 564      }
 565  
 566      /**
 567       * Processes grouped file results into documents, with attached matching files.
 568       *
 569       * @param SolrObject $response The response returned from solr server
 570       * @param int        $limit The maximum number of results to return. 0 for all.
 571       * @return array Final results to be displayed.
 572       */
 573      protected function grouped_files_process_response($response, $limit = 0) {
 574          // If we can't find the grouping, or there are no matches in the grouping, return empty.
 575          if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {
 576              return array();
 577          }
 578  
 579          $numgranted = 0;
 580          $orderedids = array();
 581          $completedocs = array();
 582          $incompletedocs = array();
 583  
 584          $highlightingobj = $response->highlighting;
 585  
 586          // Each group represents a "master document".
 587          $groups = $response->grouped->solr_filegroupingid->groups;
 588          foreach ($groups as $group) {
 589              $groupid = $group->groupValue;
 590              $groupdocs = $group->doclist->docs;
 591              $firstdoc = reset($groupdocs);
 592  
 593              if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {
 594                  // Well, this is a problem.
 595                  continue;
 596              }
 597  
 598              // Check for access.
 599              $access = $searcharea->check_access($firstdoc->itemid);
 600              switch ($access) {
 601                  case \core_search\manager::ACCESS_DELETED:
 602                      // If deleted from Moodle, delete from index and then continue.
 603                      $this->delete_by_id($firstdoc->id);
 604                      // Remove one from our processed and total counters, since we promptly deleted.
 605                      $this->processeddocs--;
 606                      $this->totalenginedocs--;
 607                      continue 2;
 608                      break;
 609                  case \core_search\manager::ACCESS_DENIED:
 610                      // This means we should just skip for the current user.
 611                      $this->skippeddocs++;
 612                      continue 2;
 613                      break;
 614              }
 615              $numgranted++;
 616  
 617              $maindoc = false;
 618              $fileids = array();
 619              // Seperate the main document and any files returned.
 620              foreach ($groupdocs as $groupdoc) {
 621                  if ($groupdoc->id == $groupid) {
 622                      $maindoc = $groupdoc;
 623                  } else if (isset($groupdoc->solr_fileid)) {
 624                      $fileids[] = $groupdoc->solr_fileid;
 625                  }
 626              }
 627  
 628              // Store the id of this group, in order, for later merging.
 629              $orderedids[] = $groupid;
 630  
 631              if (!$maindoc) {
 632                  // We don't have the main doc, store what we know for later building.
 633                  $incompletedocs[$groupid] = $fileids;
 634              } else {
 635                  if (isset($highlightingobj->$groupid)) {
 636                      // Merge the highlighting for this doc.
 637                      $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);
 638                  }
 639                  $docdata = $this->standarize_solr_obj($maindoc);
 640                  $doc = $this->to_document($searcharea, $docdata);
 641                  // Now we need to attach the result files to the doc.
 642                  foreach ($fileids as $fileid) {
 643                      $doc->add_stored_file($fileid);
 644                  }
 645                  $completedocs[$groupid] = $doc;
 646              }
 647  
 648              if (!empty($limit) && $numgranted >= $limit) {
 649                  // We have hit the max results, we will just ignore the rest.
 650                  break;
 651              }
 652          }
 653  
 654          $incompletedocs = $this->get_missing_docs($incompletedocs);
 655  
 656          $out = array();
 657          // Now merge the complete and incomplete documents, in results order.
 658          foreach ($orderedids as $docid) {
 659              if (isset($completedocs[$docid])) {
 660                  $out[] = $completedocs[$docid];
 661              } else if (isset($incompletedocs[$docid])) {
 662                  $out[] = $incompletedocs[$docid];
 663              }
 664          }
 665  
 666          return $out;
 667      }
 668  
 669      /**
 670       * Retreive any missing main documents and attach provided files.
 671       *
 672       * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value
 673       * associated to the key should be an array of stored_files or stored file ids to attach to the result document.
 674       *
 675       * Return array also indexed by document id.
 676       *
 677       * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.
 678       * @return document[]
 679       */
 680      protected function get_missing_docs($missingdocs) {
 681          if (empty($missingdocs)) {
 682              return array();
 683          }
 684  
 685          $docids = array_keys($missingdocs);
 686  
 687          // Build a custom query that will get all the missing documents.
 688          $query = new \SolrQuery();
 689          $this->set_query($query, '*');
 690          $this->add_fields($query);
 691          $query->setRows(count($docids));
 692          $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');
 693  
 694          $response = $this->get_query_response($query);
 695          // We know the missing docs have already been checked for access, so don't recheck.
 696          $results = $this->process_response($response, 0, true);
 697  
 698          $out = array();
 699          foreach ($results as $result) {
 700              $resultid = $result->get('id');
 701              if (!isset($missingdocs[$resultid])) {
 702                  // We got a result we didn't expect. Skip it.
 703                  continue;
 704              }
 705              // Attach the files.
 706              foreach ($missingdocs[$resultid] as $filedoc) {
 707                  $result->add_stored_file($filedoc);
 708              }
 709              $out[$resultid] = $result;
 710          }
 711  
 712          return $out;
 713      }
 714  
 715      /**
 716       * Returns a standard php array from a \SolrObject instance.
 717       *
 718       * @param \SolrObject $obj
 719       * @return array The returned document as an array.
 720       */
 721      public function standarize_solr_obj(\SolrObject $obj) {
 722          $properties = $obj->getPropertyNames();
 723  
 724          $docdata = array();
 725          foreach($properties as $name) {
 726              // http://php.net/manual/en/solrobject.getpropertynames.php#98018.
 727              $name = trim($name);
 728              $docdata[$name] = $obj->offsetGet($name);
 729          }
 730          return $docdata;
 731      }
 732  
 733      /**
 734       * Adds a document to the search engine.
 735       *
 736       * This does not commit to the search engine.
 737       *
 738       * @param document $document
 739       * @param bool     $fileindexing True if file indexing is to be used
 740       * @return bool
 741       */
 742      public function add_document($document, $fileindexing = false) {
 743          $docdata = $document->export_for_engine();
 744  
 745          if (!$this->add_solr_document($docdata)) {
 746              return false;
 747          }
 748  
 749          if ($fileindexing) {
 750              // This will take care of updating all attached files in the index.
 751              $this->process_document_files($document);
 752          }
 753  
 754          return true;
 755      }
 756  
 757      /**
 758       * Adds a batch of documents to the engine at once.
 759       *
 760       * @param \core_search\document[] $documents Documents to add
 761       * @param bool $fileindexing If true, indexes files (these are done one at a time)
 762       * @return int[] Array of three elements: successfully processed, failed processed, batch count
 763       */
 764      public function add_document_batch(array $documents, bool $fileindexing = false): array {
 765          $docdatabatch = [];
 766          foreach ($documents as $document) {
 767              $docdatabatch[] = $document->export_for_engine();
 768          }
 769  
 770          $resultcounts = $this->add_solr_documents($docdatabatch);
 771  
 772          // Files are processed one document at a time (if there are files it's slow anyway).
 773          if ($fileindexing) {
 774              foreach ($documents as $document) {
 775                  // This will take care of updating all attached files in the index.
 776                  $this->process_document_files($document);
 777              }
 778          }
 779  
 780          return $resultcounts;
 781      }
 782  
 783      /**
 784       * Replaces underlines at edges of words in the content with spaces.
 785       *
 786       * For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads',
 787       * and 'frogs_and_toads' will be left as 'frogs_and_toads'.
 788       *
 789       * The reason for this is that for italic content_to_text puts _italic_ underlines at the start
 790       * and end of the italicised phrase (not between words). Solr treats underlines as part of the
 791       * word, which means that if you search for a word in italic then you can't find it.
 792       *
 793       * @param string $str String to replace
 794       * @return string Replaced string
 795       */
 796      protected static function replace_underlines(string $str): string {
 797          return preg_replace('~\b_|_\b~', '', $str);
 798      }
 799  
 800      /**
 801       * Creates a Solr document object.
 802       *
 803       * @param array $doc Array of document fields
 804       * @return \SolrInputDocument Created document
 805       */
 806      protected function create_solr_document(array $doc): \SolrInputDocument {
 807          $solrdoc = new \SolrInputDocument();
 808  
 809          // Replace underlines in the content with spaces. The reason for this is that for italic
 810          // text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the
 811          // word, which means that if you search for a word in italic then you can't find it.
 812          if (array_key_exists('content', $doc)) {
 813              $doc['content'] = self::replace_underlines($doc['content']);
 814          }
 815  
 816          // Set all the fields.
 817          foreach ($doc as $field => $value) {
 818              $solrdoc->addField($field, $value);
 819          }
 820  
 821          return $solrdoc;
 822      }
 823  
 824      /**
 825       * Adds a text document to the search engine.
 826       *
 827       * @param array $doc
 828       * @return bool
 829       */
 830      protected function add_solr_document($doc) {
 831          $solrdoc = $this->create_solr_document($doc);
 832  
 833          try {
 834              $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
 835              return true;
 836          } catch (\SolrClientException $e) {
 837              debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
 838          } catch (\SolrServerException $e) {
 839              // We only use the first line of the message, as it's a fully java stacktrace behind it.
 840              $msg = strtok($e->getMessage(), "\n");
 841              debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);
 842          }
 843  
 844          return false;
 845      }
 846  
 847      /**
 848       * Adds multiple text documents to the search engine.
 849       *
 850       * @param array $docs Array of documents (each an array of fields) to add
 851       * @return int[] Array of success, failure, batch count
 852       * @throws \core_search\engine_exception
 853       */
 854      protected function add_solr_documents(array $docs): array {
 855          $solrdocs = [];
 856          foreach ($docs as $doc) {
 857              $solrdocs[] = $this->create_solr_document($doc);
 858          }
 859  
 860          try {
 861              // Add documents in a batch and report that they all succeeded.
 862              $this->get_search_client()->addDocuments($solrdocs, true, static::AUTOCOMMIT_WITHIN);
 863              return [count($solrdocs), 0, 1];
 864          } catch (\SolrClientException $e) {
 865              // If there is an exception, fall through...
 866              $donothing = true;
 867          } catch (\SolrServerException $e) {
 868              // If there is an exception, fall through...
 869              $donothing = true;
 870          }
 871  
 872          // When there is an error, we fall back to adding them individually so that we can report
 873          // which document(s) failed. Since it overwrites, adding the successful ones multiple
 874          // times won't hurt.
 875          $success = 0;
 876          $failure = 0;
 877          $batches = 0;
 878          foreach ($docs as $doc) {
 879              $result = $this->add_solr_document($doc);
 880              $batches++;
 881              if ($result) {
 882                  $success++;
 883              } else {
 884                  $failure++;
 885              }
 886          }
 887  
 888          return [$success, $failure, $batches];
 889      }
 890  
 891      /**
 892       * Index files attached to the docuemnt, ensuring the index matches the current document files.
 893       *
 894       * For documents that aren't known to be new, we check the index for existing files.
 895       * - New files we will add.
 896       * - Existing and unchanged files we will skip.
 897       * - File that are in the index but not on the document will be deleted from the index.
 898       * - Files that have changed will be re-indexed.
 899       *
 900       * @param document $document
 901       */
 902      protected function process_document_files($document) {
 903          if (!$this->file_indexing_enabled()) {
 904              return;
 905          }
 906  
 907          // Maximum rows to process at a time.
 908          $rows = 500;
 909  
 910          // Get the attached files.
 911          $files = $document->get_files();
 912  
 913          // If this isn't a new document, we need to check the exiting indexed files.
 914          if (!$document->get_is_new()) {
 915              // We do this progressively, so we can handle lots of files cleanly.
 916              list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
 917              $count = 0;
 918              $idstodelete = array();
 919  
 920              do {
 921                  // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
 922                  foreach ($indexedfiles as $indexedfile) {
 923                      $fileid = $indexedfile->solr_fileid;
 924  
 925                      if (isset($files[$fileid])) {
 926                          // Check for changes that would mean we need to re-index the file. If so, just leave in $files.
 927                          // Filelib does not guarantee time modified is updated, so we will check important values.
 928                          if ($indexedfile->modified != $files[$fileid]->get_timemodified()) {
 929                              continue;
 930                          }
 931                          if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
 932                              continue;
 933                          }
 934                          if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
 935                              continue;
 936                          }
 937                          if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE &&
 938                                  $this->file_is_indexable($files[$fileid])) {
 939                              // This means that the last time we indexed this file, filtering blocked it.
 940                              // Current settings say it is indexable, so we will allow it to be indexed.
 941                              continue;
 942                          }
 943  
 944                          // If the file is already indexed, we can just remove it from the files array and skip it.
 945                          unset($files[$fileid]);
 946                      } else {
 947                          // This means we have found a file that is no longer attached, so we need to delete from the index.
 948                          // We do it later, since this is progressive, and it could reorder results.
 949                          $idstodelete[] = $indexedfile->id;
 950                      }
 951                  }
 952                  $count += $rows;
 953  
 954                  if ($count < $numfound) {
 955                      // If we haven't hit the total count yet, fetch the next batch.
 956                      list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
 957                  }
 958  
 959              } while ($count < $numfound);
 960  
 961              // Delete files that are no longer attached.
 962              foreach ($idstodelete as $id) {
 963                  // We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
 964                  $this->get_search_client()->deleteById($id);
 965              }
 966          }
 967  
 968          // Now we can actually index all the remaining files.
 969          foreach ($files as $file) {
 970              $this->add_stored_file($document, $file);
 971          }
 972      }
 973  
 974      /**
 975       * Get the currently indexed files for a particular document, returns the total count, and a subset of files.
 976       *
 977       * @param document $document
 978       * @param int      $start The row to start the results on. Zero indexed.
 979       * @param int      $rows The number of rows to fetch
 980       * @return array   A two element array, the first is the total number of availble results, the second is an array
 981       *                 of documents for the current request.
 982       */
 983      protected function get_indexed_files($document, $start = 0, $rows = 500) {
 984          // Build a custom query that will get any document files that are in our solr_filegroupingid.
 985          $query = new \SolrQuery();
 986  
 987          // We want to get all file records tied to a document.
 988          // For efficiency, we are building our own, stripped down, query.
 989          $query->setQuery('*');
 990          $query->setRows($rows);
 991          $query->setStart($start);
 992          // We want a consistent sorting.
 993          $query->addSortField('id');
 994  
 995          // We only want the bare minimum of fields.
 996          $query->addField('id');
 997          $query->addField('modified');
 998          $query->addField('title');
 999          $query->addField('solr_fileid');
1000          $query->addField('solr_filecontenthash');
1001          $query->addField('solr_fileindexstatus');
1002  
1003          $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
1004          $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);
1005  
1006          $response = $this->get_query_response($query);
1007          if (empty($response->response->numFound)) {
1008              return array(0, array());
1009          }
1010  
1011          return array($response->response->numFound, $this->convert_file_results($response));
1012      }
1013  
1014      /**
1015       * A very lightweight handler for getting information about already indexed files from a Solr response.
1016       *
1017       * @param SolrObject $responsedoc A Solr response document
1018       * @return stdClass[] An array of objects that contain the basic information for file processing.
1019       */
1020      protected function convert_file_results($responsedoc) {
1021          if (!$docs = $responsedoc->response->docs) {
1022              return array();
1023          }
1024  
1025          $out = array();
1026  
1027          foreach ($docs as $doc) {
1028              // Copy the bare minimim needed info.
1029              $result = new \stdClass();
1030              $result->id = $doc->id;
1031              $result->modified = document::import_time_from_engine($doc->modified);
1032              $result->title = $doc->title;
1033              $result->solr_fileid = $doc->solr_fileid;
1034              $result->solr_filecontenthash = $doc->solr_filecontenthash;
1035              $result->solr_fileindexstatus = $doc->solr_fileindexstatus;
1036              $out[] = $result;
1037          }
1038  
1039          return $out;
1040      }
1041  
1042      /**
1043       * Adds a file to the search engine.
1044       *
1045       * Notes about Solr and Tika indexing. We do not send the mime type, only the filename.
1046       * Tika has much better content type detection than Moodle, and we will have many more doc failures
1047       * if we try to send mime types.
1048       *
1049       * @param document $document
1050       * @param \stored_file $storedfile
1051       * @return void
1052       */
1053      protected function add_stored_file($document, $storedfile) {
1054          $filedoc = $document->export_file_for_engine($storedfile);
1055  
1056          if (!$this->file_is_indexable($storedfile)) {
1057              // For files that we don't consider indexable, we will still place a reference in the search engine.
1058              $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE;
1059              $this->add_solr_document($filedoc);
1060              return;
1061          }
1062  
1063          $curl = $this->get_curl_object();
1064  
1065          $url = $this->get_connection_url('/update/extract');
1066  
1067          // Return results as XML.
1068          $url->param('wt', 'xml');
1069  
1070          // This will prevent solr from automatically making fields for every tika output.
1071          $url->param('uprefix', 'ignored_');
1072  
1073          // Control how content is captured. This will keep our file content clean of non-important metadata.
1074          $url->param('captureAttr', 'true');
1075          // Move the content to a field for indexing.
1076          $url->param('fmap.content', 'solr_filecontent');
1077  
1078          // These are common fields that matches the standard *_point dynamic field and causes an error.
1079          $url->param('fmap.media_white_point', 'ignored_mwp');
1080          $url->param('fmap.media_black_point', 'ignored_mbp');
1081  
1082          // Copy each key to the url with literal.
1083          // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.
1084          foreach ($filedoc as $key => $value) {
1085              // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.
1086              $url->param('fmap.'.$key, 'ignored_'.$key);
1087              // Place data in a tmp field.
1088              $url->param('literal.mdltmp_'.$key, $value);
1089              // Then move to the final field.
1090              $url->param('fmap.mdltmp_'.$key, $key);
1091          }
1092  
1093          // This sets the true filename for Tika.
1094          $url->param('resource.name', $storedfile->get_filename());
1095  
1096          // A giant block of code that is really just error checking around the curl request.
1097          try {
1098              // We have to post the file directly in binary data (not using multipart) to avoid
1099              // Solr bug SOLR-15039 which can cause incorrect data when you use multipart upload.
1100              // Note this loads the whole file into memory; see limit in file_is_indexable().
1101              $result = $curl->post($url->out(false), $storedfile->get_content());
1102  
1103              $code = $curl->get_errno();
1104              $info = $curl->get_info();
1105  
1106              // Now error handling. It is just informational, since we aren't tracking per file/doc results.
1107              if ($code != 0) {
1108                  // This means an internal cURL error occurred error is in result.
1109                  $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';
1110                  debugging($message, DEBUG_DEVELOPER);
1111              } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {
1112                  // Unexpected HTTP response code.
1113                  $message = 'Error while indexing file with document id '.$filedoc['id'];
1114                  // Try to get error message out of msg or title if it exists.
1115                  if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {
1116                      $message .= ': '.$matches[1];
1117                  } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {
1118                      $message .= ': '.$matches[1];
1119                  }
1120                  // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.
1121                  if (CLI_SCRIPT && !PHPUNIT_TEST) {
1122                      mtrace($message);
1123                  }
1124              } else {
1125                  // Check for the expected status field.
1126                  if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {
1127                      // Now check for the expected status of 0, if not, error.
1128                      if ((int)$matches[1] !== 0) {
1129                          $message = 'Unexpected Solr status code '.(int)$matches[1];
1130                          $message .= ' while indexing file with document id '.$filedoc['id'].'.';
1131                          debugging($message, DEBUG_DEVELOPER);
1132                      } else {
1133                          // The document was successfully indexed.
1134                          return;
1135                      }
1136                  } else {
1137                      // We received an unprocessable response.
1138                      $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';
1139                      $message .= strtok($result, "\n");
1140                      debugging($message, DEBUG_DEVELOPER);
1141                  }
1142              }
1143          } catch (\Exception $e) {
1144              // There was an error, but we are not tracking per-file success, so we just continue on.
1145              debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);
1146          }
1147  
1148          // If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
1149          $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR;
1150          $this->add_solr_document($filedoc);
1151      }
1152  
1153      /**
1154       * Checks to see if a passed file is indexable.
1155       *
1156       * @param \stored_file $file The file to check
1157       * @return bool True if the file can be indexed
1158       */
1159      protected function file_is_indexable($file) {
1160          if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {
1161              // The file is too big to index.
1162              return false;
1163          }
1164  
1165          // Because we now load files into memory to index them in Solr, we also have to ensure that
1166          // we don't try to index anything bigger than the memory limit (less 100MB for safety).
1167          // Memory limit in cron is MEMORY_EXTRA which is usually 256 or 384MB but can be increased
1168          // in config, so this will allow files over 100MB to be indexed.
1169          $limit = ini_get('memory_limit');
1170          if ($limit && $limit != -1) {
1171              $limitbytes = get_real_size($limit);
1172              if ($file->get_filesize() > $limitbytes) {
1173                  return false;
1174              }
1175          }
1176  
1177          $mime = $file->get_mimetype();
1178  
1179          if ($mime == 'application/vnd.moodle.backup') {
1180              // We don't index Moodle backup files. There is nothing usefully indexable in them.
1181              return false;
1182          }
1183  
1184          return true;
1185      }
1186  
1187      /**
1188       * Commits all pending changes.
1189       *
1190       * @return void
1191       */
1192      protected function commit() {
1193          $this->get_search_client()->commit();
1194      }
1195  
1196      /**
1197       * Do any area cleanup needed, and do anything to confirm contents.
1198       *
1199       * Return false to prevent the search area completed time and stats from being updated.
1200       *
1201       * @param \core_search\base $searcharea The search area that was complete
1202       * @param int $numdocs The number of documents that were added to the index
1203       * @param bool $fullindex True if a full index is being performed
1204       * @return bool True means that data is considered indexed
1205       */
1206      public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
1207          $this->commit();
1208  
1209          return true;
1210      }
1211  
1212      /**
1213       * Return true if file indexing is supported and enabled. False otherwise.
1214       *
1215       * @return bool
1216       */
1217      public function file_indexing_enabled() {
1218          return (bool)$this->config->fileindexing;
1219      }
1220  
1221      /**
1222       * Deletes the specified document.
1223       *
1224       * @param string $id The document id to delete
1225       * @return void
1226       */
1227      public function delete_by_id($id) {
1228          // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.
1229          $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);
1230          $this->commit();
1231      }
1232  
1233      /**
1234       * Delete all area's documents.
1235       *
1236       * @param string $areaid
1237       * @return void
1238       */
1239      public function delete($areaid = null) {
1240          if ($areaid) {
1241              $this->get_search_client()->deleteByQuery('areaid:' . $areaid);
1242          } else {
1243              $this->get_search_client()->deleteByQuery('*:*');
1244          }
1245          $this->commit();
1246      }
1247  
1248      /**
1249       * Pings the Solr server using search_solr config
1250       *
1251       * @return true|string Returns true if all good or an error string.
1252       */
1253      public function is_server_ready() {
1254  
1255          $configured = $this->is_server_configured();
1256          if ($configured !== true) {
1257              return $configured;
1258          }
1259  
1260          // As part of the above we have already checked that we can contact the server. For pages
1261          // where performance is important, we skip doing a full schema check as well.
1262          if ($this->should_skip_schema_check()) {
1263              return true;
1264          }
1265  
1266          // Update schema if required/possible.
1267          $schemalatest = $this->check_latest_schema();
1268          if ($schemalatest !== true) {
1269              return $schemalatest;
1270          }
1271  
1272          // Check that the schema is already set up.
1273          try {
1274              $schema = new schema($this);
1275              $schema->validate_setup();
1276          } catch (\moodle_exception $e) {
1277              return $e->getMessage();
1278          }
1279  
1280          return true;
1281      }
1282  
1283      /**
1284       * Is the solr server properly configured?.
1285       *
1286       * @return true|string Returns true if all good or an error string.
1287       */
1288      public function is_server_configured() {
1289  
1290          if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
1291              return 'No solr configuration found';
1292          }
1293  
1294          if (!$client = $this->get_search_client(false)) {
1295              return get_string('engineserverstatus', 'search');
1296          }
1297  
1298          try {
1299              if ($this->get_solr_major_version() < 4) {
1300                  // Minimum solr 4.0.
1301                  return get_string('minimumsolr4', 'search_solr');
1302              }
1303          } catch (\SolrClientException $ex) {
1304              debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1305              return get_string('engineserverstatus', 'search');
1306          } catch (\SolrServerException $ex) {
1307              debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1308              return get_string('engineserverstatus', 'search');
1309          }
1310  
1311          return true;
1312      }
1313  
1314      /**
1315       * Returns the solr server major version.
1316       *
1317       * @return int
1318       */
1319      public function get_solr_major_version() {
1320          if ($this->solrmajorversion !== null) {
1321              return $this->solrmajorversion;
1322          }
1323  
1324          // We should really ping first the server to see if the specified indexname is valid but
1325          // we want to minimise solr server requests as they are expensive. system() emits a warning
1326          // if it can not connect to the configured index in the configured server.
1327          $systemdata = @$this->get_search_client()->system();
1328          $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version');
1329          $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.')));
1330  
1331          return $this->solrmajorversion;
1332      }
1333  
1334      /**
1335       * Checks if the PHP Solr extension is available.
1336       *
1337       * @return bool
1338       */
1339      public function is_installed() {
1340          return function_exists('solr_get_version');
1341      }
1342  
1343      /**
1344       * Returns the solr client instance.
1345       *
1346       * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl.
1347       *
1348       * @throws \core_search\engine_exception
1349       * @param bool $triggerexception
1350       * @return \SolrClient
1351       */
1352      protected function get_search_client($triggerexception = true) {
1353          global $CFG;
1354  
1355          // Type comparison as it is set to false if not available.
1356          if ($this->client !== null) {
1357              return $this->client;
1358          }
1359  
1360          $options = array(
1361              'hostname' => $this->config->server_hostname,
1362              'path'     => '/solr/' . $this->config->indexname,
1363              'login'    => !empty($this->config->server_username) ? $this->config->server_username : '',
1364              'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
1365              'port'     => !empty($this->config->server_port) ? $this->config->server_port : '',
1366              'secure' => !empty($this->config->secure) ? true : false,
1367              'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
1368              'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
1369              'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
1370              'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
1371              'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
1372              'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'
1373          );
1374  
1375          if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) {
1376              $options['proxy_host'] = $CFG->proxyhost;
1377              if (!empty($CFG->proxyport)) {
1378                  $options['proxy_port'] = $CFG->proxyport;
1379              }
1380              if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) {
1381                  $options['proxy_login'] = $CFG->proxyuser;
1382                  $options['proxy_password'] = $CFG->proxypassword;
1383              }
1384          }
1385  
1386          if (!class_exists('\SolrClient')) {
1387              throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr');
1388          }
1389  
1390          $client = new \SolrClient($options);
1391  
1392          if ($client === false && $triggerexception) {
1393              throw new \core_search\engine_exception('engineserverstatus', 'search');
1394          }
1395  
1396          if ($this->cacheclient) {
1397              $this->client = $client;
1398          }
1399  
1400          return $client;
1401      }
1402  
1403      /**
1404       * Returns a curl object for conntecting to solr.
1405       *
1406       * @return \curl
1407       */
1408      public function get_curl_object() {
1409          if (!is_null($this->curl)) {
1410              return $this->curl;
1411          }
1412  
1413          // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports.
1414          $this->curl = new \curl(['ignoresecurity' => true]);
1415  
1416          $options = array();
1417          // Build the SSL options. Based on pecl-solr and general testing.
1418          if (!empty($this->config->secure)) {
1419              if (!empty($this->config->ssl_cert)) {
1420                  $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;
1421                  $options['CURLOPT_SSLCERTTYPE'] = 'PEM';
1422              }
1423  
1424              if (!empty($this->config->ssl_key)) {
1425                  $options['CURLOPT_SSLKEY'] = $this->config->ssl_key;
1426                  $options['CURLOPT_SSLKEYTYPE'] = 'PEM';
1427              }
1428  
1429              if (!empty($this->config->ssl_keypassword)) {
1430                  $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;
1431              }
1432  
1433              if (!empty($this->config->ssl_cainfo)) {
1434                  $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;
1435              }
1436  
1437              if (!empty($this->config->ssl_capath)) {
1438                  $options['CURLOPT_CAPATH'] = $this->config->ssl_capath;
1439              }
1440          }
1441  
1442          // Set timeout as for Solr client.
1443          $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30';
1444  
1445          $this->curl->setopt($options);
1446  
1447          if (!empty($this->config->server_username) && !empty($this->config->server_password)) {
1448              $authorization = $this->config->server_username . ':' . $this->config->server_password;
1449              $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization));
1450          }
1451  
1452          return $this->curl;
1453      }
1454  
1455      /**
1456       * Return a Moodle url object for the server connection.
1457       *
1458       * @param string $path The solr path to append.
1459       * @return \moodle_url
1460       */
1461      public function get_connection_url($path) {
1462          // Must use the proper protocol, or SSL will fail.
1463          $protocol = !empty($this->config->secure) ? 'https' : 'http';
1464          $url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
1465          if (!empty($this->config->server_port)) {
1466              $url .= ':' . $this->config->server_port;
1467          }
1468          $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');
1469  
1470          return new \moodle_url($url);
1471      }
1472  
1473      /**
1474       * Solr includes group support in the execute_query function.
1475       *
1476       * @return bool True
1477       */
1478      public function supports_group_filtering() {
1479          return true;
1480      }
1481  
1482      protected function update_schema($oldversion, $newversion) {
1483          // Construct schema.
1484          $schema = new schema($this);
1485          $cansetup = $schema->can_setup_server();
1486          if ($cansetup !== true) {
1487              return $cansetup;
1488          }
1489  
1490          switch ($newversion) {
1491              // This version just requires a setup call to add new fields.
1492              case 2017091700:
1493                  $setup = true;
1494                  break;
1495  
1496              // If we don't know about the schema version we might not have implemented the
1497              // change correctly, so return.
1498              default:
1499                  return get_string('schemaversionunknown', 'search');
1500          }
1501  
1502          if ($setup) {
1503              $schema->setup();
1504          }
1505  
1506          return true;
1507      }
1508  
1509      /**
1510       * Solr supports sort by location within course contexts or below.
1511       *
1512       * @param \context $context Context that the user requested search from
1513       * @return array Array from order name => display text
1514       */
1515      public function get_supported_orders(\context $context) {
1516          $orders = parent::get_supported_orders($context);
1517  
1518          // If not within a course, no other kind of sorting supported.
1519          $coursecontext = $context->get_course_context(false);
1520          if ($coursecontext) {
1521              // Within a course or activity/block, support sort by location.
1522              $orders['location'] = get_string('order_location', 'search',
1523                      $context->get_context_name());
1524          }
1525  
1526          return $orders;
1527      }
1528  
1529      /**
1530       * Solr supports search by user id.
1531       *
1532       * @return bool True
1533       */
1534      public function supports_users() {
1535          return true;
1536      }
1537  
1538      /**
1539       * Solr supports adding documents in a batch.
1540       *
1541       * @return bool True
1542       */
1543      public function supports_add_document_batch(): bool {
1544          return true;
1545      }
1546  
1547      /**
1548       * Solr supports deleting the index for a context.
1549       *
1550       * @param int $oldcontextid Context that has been deleted
1551       * @return bool True to indicate that any data was actually deleted
1552       * @throws \core_search\engine_exception
1553       */
1554      public function delete_index_for_context(int $oldcontextid) {
1555          $client = $this->get_search_client();
1556          try {
1557              $client->deleteByQuery('contextid:' . $oldcontextid);
1558              $client->commit(true);
1559              return true;
1560          } catch (\Exception $e) {
1561              throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1562          }
1563      }
1564  
1565      /**
1566       * Solr supports deleting the index for a course.
1567       *
1568       * @param int $oldcourseid
1569       * @return bool True to indicate that any data was actually deleted
1570       * @throws \core_search\engine_exception
1571       */
1572      public function delete_index_for_course(int $oldcourseid) {
1573          $client = $this->get_search_client();
1574          try {
1575              $client->deleteByQuery('courseid:' . $oldcourseid);
1576              $client->commit(true);
1577              return true;
1578          } catch (\Exception $e) {
1579              throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1580          }
1581      }
1582  
1583      /**
1584       * Checks if an alternate configuration has been defined.
1585       *
1586       * @return bool True if alternate configuration is available
1587       */
1588      public function has_alternate_configuration(): bool {
1589          return !empty($this->config->alternateserver_hostname) &&
1590                  !empty($this->config->alternateindexname) &&
1591                  !empty($this->config->alternateserver_port);
1592      }
1593  }