Differences Between: [Versions 402 and 403]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * url_finder class definition. 19 * 20 * @package tool_httpsreplace 21 * @copyright Copyright (c) 2016 Blackboard Inc. (http://www.blackboard.com) 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace tool_httpsreplace; 26 27 use database_column_info; 28 use progress_bar; 29 30 defined('MOODLE_INTERNAL') || die(); 31 32 /** 33 * Examines DB for non-https src or data links 34 * 35 * @package tool_httpsreplace 36 * @copyright Copyright (c) 2016 Blackboard Inc. (http://www.blackboard.com) 37 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 38 */ 39 class url_finder { 40 41 /** 42 * Returns a hash of what hosts are referred to over http and would need to be changed. 43 * 44 * @param progress_bar $progress Progress bar keeping track of this process. 45 * @return array Hash of domains with number of references as the value. 46 */ 47 public function http_link_stats($progress = null) { 48 return $this->process(false, $progress); 49 } 50 51 /** 52 * Changes all resources referred to over http to https. 53 * 54 * @param progress_bar $progress Progress bar keeping track of this process. 55 * @return bool True upon success 56 */ 57 public function upgrade_http_links($progress = null) { 58 return $this->process(true, $progress); 59 } 60 61 /** 62 * Replace http domains with https equivalent, with two types of exceptions 63 * for less straightforward swaps. 64 * 65 * @param string $table 66 * @param database_column_info $column 67 * @param string $domain 68 * @param string $search search string that has prefix, protocol, domain name and one extra character, 69 * example1: src="http://host.com/ 70 * example2: DATA="HTTP://MYDOMAIN.EDU" 71 * example3: src="HTTP://hello.world? 72 * @return void 73 */ 74 protected function domain_swap($table, $column, $domain, $search) { 75 global $DB; 76 77 $renames = json_decode(get_config('tool_httpsreplace', 'renames'), true); 78 79 if (isset($renames[$domain])) { 80 $replace = preg_replace('|http://'.preg_quote($domain).'|i', 'https://' . $renames[$domain], $search); 81 } else { 82 $replace = preg_replace('|http://|i', 'https://', $search); 83 } 84 $DB->set_debug(true); 85 $DB->replace_all_text($table, $column, $search, $replace); 86 $DB->set_debug(false); 87 } 88 89 /** 90 * Returns SQL to be used to match embedded http links in the given column 91 * 92 * @param string $columnname name of the column (ready to be used in the SQL query) 93 * @return array 94 */ 95 protected function get_select_search_in_column($columnname) { 96 global $DB; 97 98 if ($DB->sql_regex_supported()) { 99 // Database supports regex, use it for better match. 100 $select = $columnname . ' ' . $DB->sql_regex() . ' ?'; 101 $params = ["(src|data)\ *=\ *[\\\"\']http://"]; 102 } else { 103 // Databases without regex support should use case-insensitive LIKE. 104 // This will have false positive matches and more results than we need, we'll have to filter them in php. 105 $select = $DB->sql_like($columnname, '?', false); 106 $params = ['%=%http://%']; 107 } 108 109 return [$select, $params]; 110 } 111 112 /** 113 * Originally forked from core function db_search(). 114 * @param bool $replacing Whether or not to replace the found urls. 115 * @param progress_bar $progress Progress bar keeping track of this process. 116 * @return bool|array If $replacing, return true on success. If not, return hash of http urls to number of times used. 117 */ 118 protected function process($replacing = false, $progress = null) { 119 global $DB, $CFG; 120 121 require_once($CFG->libdir.'/filelib.php'); 122 123 // TODO: block_instances have HTML content as base64, need to decode then 124 // search, currently just skipped. See MDL-60024. 125 $skiptables = array( 126 'block_instances', 127 'config', 128 'config_log', 129 'config_plugins', 130 'events_queue', 131 'files', 132 'filter_config', 133 'grade_grades_history', 134 'grade_items_history', 135 'log', 136 'logstore_standard_log', 137 'repository_instance_config', 138 'sessions', 139 'upgrade_log', 140 'grade_categories_history', 141 '', 142 ); 143 144 // Turn off time limits. 145 \core_php_time_limit::raise(); 146 if (!$tables = $DB->get_tables() ) { // No tables yet at all. 147 return false; 148 } 149 150 $urls = array(); 151 152 $numberoftables = count($tables); 153 $tablenumber = 0; 154 foreach ($tables as $table) { 155 if ($progress) { 156 $progress->update($tablenumber, $numberoftables, get_string('searching', 'tool_httpsreplace', $table)); 157 $tablenumber++; 158 } 159 if (in_array($table, $skiptables)) { 160 continue; 161 } 162 if ($columns = $DB->get_columns($table)) { 163 foreach ($columns as $column) { 164 165 // Only convert columns that are either text or long varchar. 166 if ($column->meta_type == 'X' || ($column->meta_type == 'C' && $column->max_length > 255)) { 167 $columnname = $column->name; 168 $columnnamequoted = $DB->get_manager()->generator->getEncQuoted($columnname); 169 list($select, $params) = $this->get_select_search_in_column($columnnamequoted); 170 $rs = $DB->get_recordset_select($table, $select, $params, '', $columnnamequoted); 171 172 $found = array(); 173 foreach ($rs as $record) { 174 // Regex to match src=http://etc. and data=http://etc.urls. 175 // Standard warning on expecting regex to perfectly parse HTML 176 // read http://stackoverflow.com/a/1732454 for more info. 177 $regex = '#((src|data)\ *=\ *[\'\"])(http://)([^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))[\'\"]#i'; 178 preg_match_all($regex, $record->$columnname, $match); 179 foreach ($match[0] as $i => $fullmatch) { 180 if (\core_text::strpos($fullmatch, $CFG->wwwroot) !== false) { 181 continue; 182 } 183 $prefix = $match[1][$i]; 184 $protocol = $match[3][$i]; 185 $url = $protocol . $match[4][$i]; 186 $host = \core_text::strtolower(parse_url($url, PHP_URL_HOST)); 187 if (empty($host)) { 188 continue; 189 } 190 if ($replacing) { 191 // For replace string use: prefix, protocol, host and one extra character. 192 $found[$prefix . \core_text::substr($url, 0, \core_text::strlen($host) + 8)] = $host; 193 } else { 194 $entry["table"] = $table; 195 $entry["columnname"] = $columnname; 196 $entry["url"] = $url; 197 $entry["host"] = $host; 198 $entry["raw"] = $record->$columnname; 199 $entry["ssl"] = ''; 200 $urls[] = $entry; 201 } 202 } 203 } 204 $rs->close(); 205 206 if ($replacing) { 207 foreach ($found as $search => $domain) { 208 $this->domain_swap($table, $column, $domain, $search); 209 } 210 } 211 } 212 } 213 } 214 } 215 216 if ($replacing) { 217 rebuild_course_cache(0, true); 218 purge_all_caches(); 219 return true; 220 } 221 222 $domains = array_map(function ($i) { 223 return $i['host']; 224 }, $urls); 225 226 $uniquedomains = array_unique($domains); 227 228 $sslfailures = array(); 229 230 foreach ($uniquedomains as $domain) { 231 if (!$this->check_domain_availability("https://$domain/")) { 232 $sslfailures[] = $domain; 233 } 234 } 235 236 $results = array(); 237 foreach ($urls as $url) { 238 $host = $url['host']; 239 foreach ($sslfailures as $badhost) { 240 if ($host == $badhost) { 241 if (!isset($results[$host])) { 242 $results[$host] = 1; 243 } else { 244 $results[$host]++; 245 } 246 } 247 } 248 } 249 return $results; 250 } 251 252 /** 253 * Check if url is available (GET request returns 200) 254 * 255 * @param string $url 256 * @return bool 257 */ 258 protected function check_domain_availability($url) { 259 $curl = new \curl(); 260 $curl->head($url); 261 $info = $curl->get_info(); 262 return !empty($info['http_code']) && $info['http_code'] == 200; 263 } 264 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body