Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403] [Versions 401 and 403] [Versions 402 and 403]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Document representation. 19 * 20 * @package search_solr 21 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace search_solr; 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Respresents a document to index. 31 * 32 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 33 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 34 */ 35 class document extends \core_search\document { 36 /** 37 * Indicates the file contents were not indexed due to an error. 38 */ 39 const INDEXED_FILE_ERROR = -1; 40 41 /** 42 * Indicates the file contents were not indexed due filtering/settings. 43 */ 44 const INDEXED_FILE_FALSE = 0; 45 46 /** 47 * Indicates the file contents are indexed with the record. 48 */ 49 const INDEXED_FILE_TRUE = 1; 50 51 /** 52 * Any fields that are engine specifc. These are fields that are solely used by a seach engine plugin 53 * for internal purposes. 54 * 55 * @var array 56 */ 57 protected static $enginefields = array( 58 'solr_filegroupingid' => array( 59 'type' => 'string', 60 'stored' => true, 61 'indexed' => true 62 ), 63 'solr_fileid' => array( 64 'type' => 'string', 65 'stored' => true, 66 'indexed' => true 67 ), 68 'solr_filecontenthash' => array( 69 'type' => 'string', 70 'stored' => true, 71 'indexed' => true 72 ), 73 // Stores the status of file indexing. 74 'solr_fileindexstatus' => array( 75 'type' => 'int', 76 'stored' => true, 77 'indexed' => true 78 ), 79 // Field to index, but not store, file contents. 80 'solr_filecontent' => array( 81 'type' => 'text', 82 'stored' => false, 83 'indexed' => true, 84 'mainquery' => true 85 ) 86 ); 87 88 /** 89 * Formats the timestamp according to the search engine needs. 90 * 91 * @param int $timestamp 92 * @return string 93 */ 94 public static function format_time_for_engine($timestamp) { 95 return gmdate(\search_solr\engine::DATE_FORMAT, $timestamp); 96 } 97 98 /** 99 * Formats the timestamp according to the search engine needs. 100 * 101 * @param int $timestamp 102 * @return string 103 */ 104 public static function format_string_for_engine($string) { 105 // 2^15 default. We could convert this to a setting as is possible to 106 // change the max in solr. 107 return \core_text::str_max_bytes($string, 32766); 108 } 109 110 /** 111 * Returns a timestamp from the value stored in the search engine. 112 * 113 * @param string $time 114 * @return int 115 */ 116 public static function import_time_from_engine($time) { 117 return strtotime($time); 118 } 119 120 /** 121 * Overwritten to use HTML (highlighting). 122 * 123 * @return int 124 */ 125 protected function get_text_format() { 126 return FORMAT_HTML; 127 } 128 129 /** 130 * Formats a text string coming from the search engine. 131 * 132 * Even if this is called through an external function it is fine to return HTML as 133 * HTML is considered solr's search engine text format. An external function can ask 134 * for raw text, but this just means that it will not pass through format_text, no that 135 * we can not add HTML. 136 * 137 * @param string $text Text to format 138 * @return string HTML text to be renderer 139 */ 140 protected function format_text($text) { 141 // Since we allow output for highlighting, we need to encode html entities. 142 // This ensures plaintext html chars don't become valid html. 143 $out = s($text); 144 145 $startcount = 0; 146 $endcount = 0; 147 148 // Remove end/start pairs that span a few common seperation characters. Allows us to highlight phrases instead of words. 149 $regex = '|'.engine::HIGHLIGHT_END.'([ .,-]{0,3})'.engine::HIGHLIGHT_START.'|'; 150 $out = preg_replace($regex, '$1', $out); 151 152 // Now replace our start and end highlight markers. 153 $out = str_replace(engine::HIGHLIGHT_START, '<span class="highlight">', $out, $startcount); 154 $out = str_replace(engine::HIGHLIGHT_END, '</span>', $out, $endcount); 155 156 // This makes sure any highlight tags are balanced, incase truncation or the highlight text contained our markers. 157 while ($startcount > $endcount) { 158 $out .= '</span>'; 159 $endcount++; 160 } 161 while ($startcount < $endcount) { 162 $out = '<span class="highlight">' . $out; 163 $endcount++; 164 } 165 166 return parent::format_text($out); 167 } 168 169 /** 170 * Apply any defaults to unset fields before export. Called after document building, but before export. 171 * 172 * Sub-classes of this should make sure to call parent::apply_defaults(). 173 */ 174 protected function apply_defaults() { 175 parent::apply_defaults(); 176 177 // We want to set the solr_filegroupingid to id if it isn't set. 178 if (!isset($this->data['solr_filegroupingid'])) { 179 $this->data['solr_filegroupingid'] = $this->data['id']; 180 } 181 } 182 183 /** 184 * Export the data for the given file in relation to this document. 185 * 186 * @param \stored_file $file The stored file we are talking about. 187 * @return array 188 */ 189 public function export_file_for_engine($file) { 190 $data = $this->export_for_engine(); 191 192 // Content is index in the main document. 193 unset($data['content']); 194 unset($data['description1']); 195 unset($data['description2']); 196 197 // Going to append the fileid to give it a unique id. 198 $data['id'] = $data['id'].'-solrfile'.$file->get_id(); 199 $data['type'] = \core_search\manager::TYPE_FILE; 200 $data['solr_fileid'] = $file->get_id(); 201 $data['solr_filecontenthash'] = $file->get_contenthash(); 202 $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE; 203 $data['title'] = $file->get_filename(); 204 $data['modified'] = self::format_time_for_engine($file->get_timemodified()); 205 206 return $data; 207 } 208 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body