Differences Between: [Versions 402 and 403]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Document representation. 19 * 20 * @package search_solr 21 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace search_solr; 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Respresents a document to index. 31 * 32 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com} 33 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 34 */ 35 class document extends \core_search\document { 36 /** 37 * Indicates the file contents were not indexed due to an error. 38 */ 39 const INDEXED_FILE_ERROR = -1; 40 41 /** 42 * Indicates the file contents were not indexed due filtering/settings. 43 */ 44 const INDEXED_FILE_FALSE = 0; 45 46 /** 47 * Indicates the file contents are indexed with the record. 48 */ 49 const INDEXED_FILE_TRUE = 1; 50 51 /** 52 * Any fields that are engine specifc. These are fields that are solely used by a seach engine plugin 53 * for internal purposes. 54 * 55 * @var array 56 */ 57 protected static $enginefields = array( 58 'solr_filegroupingid' => array( 59 'type' => 'string', 60 'stored' => true, 61 'indexed' => true 62 ), 63 'solr_fileid' => array( 64 'type' => 'string', 65 'stored' => true, 66 'indexed' => true 67 ), 68 'solr_filecontenthash' => array( 69 'type' => 'string', 70 'stored' => true, 71 'indexed' => true 72 ), 73 // Stores the status of file indexing. 74 'solr_fileindexstatus' => array( 75 'type' => 'int', 76 'stored' => true, 77 'indexed' => true 78 ), 79 // Field to index, but not store, file contents. 80 'solr_filecontent' => array( 81 'type' => 'text', 82 'stored' => false, 83 'indexed' => true, 84 'mainquery' => true 85 ) 86 ); 87 88 /** 89 * Formats the timestamp according to the search engine needs. 90 * 91 * @param int $timestamp 92 * @return string 93 */ 94 public static function format_time_for_engine($timestamp) { 95 return gmdate(\search_solr\engine::DATE_FORMAT, $timestamp); 96 } 97 98 /** 99 * Formats the timestamp according to the search engine needs. 100 * 101 * @param int $timestamp 102 * @return string 103 */ 104 public static function format_string_for_engine($string) { 105 // 2^15 default. We could convert this to a setting as is possible to 106 // change the max in solr. 107 return \core_text::str_max_bytes($string, 32766); 108 } 109 110 /** 111 * Returns a timestamp from the value stored in the search engine. 112 * 113 * @param string $time 114 * @return int 115 */ 116 public static function import_time_from_engine($time) { 117 return strtotime($time); 118 } 119 120 /** 121 * Overwritten to use markdown format as we use markdown for solr highlighting. 122 * 123 * @return int 124 */ 125 protected function get_text_format() { 126 return FORMAT_HTML; 127 } 128 129 /** 130 * Formats a text string coming from the search engine. 131 * 132 * @param string $text Text to format 133 * @return string HTML text to be renderer 134 */ 135 protected function format_text($text) { 136 // Since we allow output for highlighting, we need to encode html entities. 137 // This ensures plaintext html chars don't become valid html. 138 $out = s($text); 139 140 $startcount = 0; 141 $endcount = 0; 142 143 // Remove end/start pairs that span a few common seperation characters. Allows us to highlight phrases instead of words. 144 $regex = '|'.engine::HIGHLIGHT_END.'([ .,-]{0,3})'.engine::HIGHLIGHT_START.'|'; 145 $out = preg_replace($regex, '$1', $out); 146 147 // Now replace our start and end highlight markers. 148 $out = str_replace(engine::HIGHLIGHT_START, '<span class="highlight">', $out, $startcount); 149 $out = str_replace(engine::HIGHLIGHT_END, '</span>', $out, $endcount); 150 151 // This makes sure any highlight tags are balanced, incase truncation or the highlight text contained our markers. 152 while ($startcount > $endcount) { 153 $out .= '</span>'; 154 $endcount++; 155 } 156 while ($startcount < $endcount) { 157 $out = '<span class="highlight">' . $out; 158 $endcount++; 159 } 160 161 return parent::format_text($out); 162 } 163 164 /** 165 * Apply any defaults to unset fields before export. Called after document building, but before export. 166 * 167 * Sub-classes of this should make sure to call parent::apply_defaults(). 168 */ 169 protected function apply_defaults() { 170 parent::apply_defaults(); 171 172 // We want to set the solr_filegroupingid to id if it isn't set. 173 if (!isset($this->data['solr_filegroupingid'])) { 174 $this->data['solr_filegroupingid'] = $this->data['id']; 175 } 176 } 177 178 /** 179 * Export the data for the given file in relation to this document. 180 * 181 * @param \stored_file $file The stored file we are talking about. 182 * @return array 183 */ 184 public function export_file_for_engine($file) { 185 $data = $this->export_for_engine(); 186 187 // Content is index in the main document. 188 unset($data['content']); 189 unset($data['description1']); 190 unset($data['description2']); 191 192 // Going to append the fileid to give it a unique id. 193 $data['id'] = $data['id'].'-solrfile'.$file->get_id(); 194 $data['type'] = \core_search\manager::TYPE_FILE; 195 $data['solr_fileid'] = $file->get_id(); 196 $data['solr_filecontenthash'] = $file->get_contenthash(); 197 $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE; 198 $data['title'] = $file->get_filename(); 199 $data['modified'] = self::format_time_for_engine($file->get_timemodified()); 200 201 return $data; 202 } 203 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body