Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 401 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * url_finder class definition.
  19   *
  20   * @package    tool_httpsreplace
  21   * @copyright Copyright (c) 2016 Blackboard Inc. (http://www.blackboard.com)
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  namespace tool_httpsreplace;
  26  
  27  use database_column_info;
  28  use progress_bar;
  29  
  30  defined('MOODLE_INTERNAL') || die();
  31  
  32  /**
  33   * Examines DB for non-https src or data links
  34   *
  35   * @package tool_httpsreplace
  36   * @copyright Copyright (c) 2016 Blackboard Inc. (http://www.blackboard.com)
  37   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  38   */
  39  class url_finder {
  40  
  41      /**
  42       * Returns a hash of what hosts are referred to over http and would need to be changed.
  43       *
  44       * @param progress_bar $progress Progress bar keeping track of this process.
  45       * @return array Hash of domains with number of references as the value.
  46       */
  47      public function http_link_stats($progress = null) {
  48          return $this->process(false, $progress);
  49      }
  50  
  51      /**
  52       * Changes all resources referred to over http to https.
  53       *
  54       * @param progress_bar $progress Progress bar keeping track of this process.
  55       * @return bool True upon success
  56       */
  57      public function upgrade_http_links($progress = null) {
  58          return $this->process(true, $progress);
  59      }
  60  
  61      /**
  62       * Replace http domains with https equivalent, with two types of exceptions
  63       * for less straightforward swaps.
  64       *
  65       * @param string $table
  66       * @param database_column_info $column
  67       * @param string $domain
  68       * @param string $search search string that has prefix, protocol, domain name and one extra character,
  69       *      example1: src="http://host.com/
  70       *      example2: DATA="HTTP://MYDOMAIN.EDU"
  71       *      example3: src="HTTP://hello.world?
  72       * @return void
  73       */
  74      protected function domain_swap($table, $column, $domain, $search) {
  75          global $DB;
  76  
  77          $renames = json_decode(get_config('tool_httpsreplace', 'renames'), true);
  78  
  79          if (isset($renames[$domain])) {
  80              $replace = preg_replace('|http://'.preg_quote($domain).'|i', 'https://' . $renames[$domain], $search);
  81          } else {
  82              $replace = preg_replace('|http://|i', 'https://', $search);
  83          }
  84          $DB->set_debug(true);
  85          $DB->replace_all_text($table, $column, $search, $replace);
  86          $DB->set_debug(false);
  87      }
  88  
  89      /**
  90       * Returns SQL to be used to match embedded http links in the given column
  91       *
  92       * @param string $columnname name of the column (ready to be used in the SQL query)
  93       * @return array
  94       */
  95      protected function get_select_search_in_column($columnname) {
  96          global $DB;
  97  
  98          if ($DB->sql_regex_supported()) {
  99              // Database supports regex, use it for better match.
 100              $select = $columnname . ' ' . $DB->sql_regex() . ' ?';
 101              $params = ["(src|data)\ *=\ *[\\\"\']http://"];
 102          } else {
 103              // Databases without regex support should use case-insensitive LIKE.
 104              // This will have false positive matches and more results than we need, we'll have to filter them in php.
 105              $select = $DB->sql_like($columnname, '?', false);
 106              $params = ['%=%http://%'];
 107          }
 108  
 109          return [$select, $params];
 110      }
 111  
 112      /**
 113       * Originally forked from core function db_search().
 114       * @param bool $replacing Whether or not to replace the found urls.
 115       * @param progress_bar $progress Progress bar keeping track of this process.
 116       * @return bool|array If $replacing, return true on success. If not, return hash of http urls to number of times used.
 117       */
 118      protected function process($replacing = false, $progress = null) {
 119          global $DB, $CFG;
 120  
 121          require_once($CFG->libdir.'/filelib.php');
 122  
 123          // TODO: block_instances have HTML content as base64, need to decode then
 124          // search, currently just skipped. See MDL-60024.
 125          $skiptables = array(
 126              'block_instances',
 127              'config',
 128              'config_log',
 129              'config_plugins',
 130              'events_queue',
 131              'files',
 132              'filter_config',
 133              'grade_grades_history',
 134              'grade_items_history',
 135              'log',
 136              'logstore_standard_log',
 137              'repository_instance_config',
 138              'sessions',
 139              'upgrade_log',
 140              'grade_categories_history',
 141              '',
 142          );
 143  
 144          // Turn off time limits.
 145          \core_php_time_limit::raise();
 146          if (!$tables = $DB->get_tables() ) {    // No tables yet at all.
 147              return false;
 148          }
 149  
 150          $urls = array();
 151  
 152          $numberoftables = count($tables);
 153          $tablenumber = 0;
 154          foreach ($tables as $table) {
 155              if ($progress) {
 156                  $progress->update($tablenumber, $numberoftables, get_string('searching', 'tool_httpsreplace', $table));
 157                  $tablenumber++;
 158              }
 159              if (in_array($table, $skiptables)) {
 160                  continue;
 161              }
 162              if ($columns = $DB->get_columns($table)) {
 163                  foreach ($columns as $column) {
 164  
 165                      // Only convert columns that are either text or long varchar.
 166                      if ($column->meta_type == 'X' || ($column->meta_type == 'C' && $column->max_length > 255)) {
 167                          $columnname = $column->name;
 168                          $columnnamequoted = $DB->get_manager()->generator->getEncQuoted($columnname);
 169                          list($select, $params) = $this->get_select_search_in_column($columnnamequoted);
 170                          $rs = $DB->get_recordset_select($table, $select, $params, '', $columnnamequoted);
 171  
 172                          $found = array();
 173                          foreach ($rs as $record) {
 174                              // Regex to match src=http://etc. and data=http://etc.urls.
 175                              // Standard warning on expecting regex to perfectly parse HTML
 176                              // read http://stackoverflow.com/a/1732454 for more info.
 177                              $regex = '#((src|data)\ *=\ *[\'\"])(http://)([^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))[\'\"]#i';
 178                              preg_match_all($regex, $record->$columnname, $match);
 179                              foreach ($match[0] as $i => $fullmatch) {
 180                                  if (\core_text::strpos($fullmatch, $CFG->wwwroot) !== false) {
 181                                      continue;
 182                                  }
 183                                  $prefix = $match[1][$i];
 184                                  $protocol = $match[3][$i];
 185                                  $url = $protocol . $match[4][$i];
 186                                  $host = \core_text::strtolower(parse_url($url, PHP_URL_HOST));
 187                                  if (empty($host)) {
 188                                      continue;
 189                                  }
 190                                  if ($replacing) {
 191                                      // For replace string use: prefix, protocol, host and one extra character.
 192                                      $found[$prefix . \core_text::substr($url, 0, \core_text::strlen($host) + 8)] = $host;
 193                                  } else {
 194                                      $entry["table"] = $table;
 195                                      $entry["columnname"] = $columnname;
 196                                      $entry["url"] = $url;
 197                                      $entry["host"] = $host;
 198                                      $entry["raw"] = $record->$columnname;
 199                                      $entry["ssl"] = '';
 200                                      $urls[] = $entry;
 201                                  }
 202                              }
 203                          }
 204                          $rs->close();
 205  
 206                          if ($replacing) {
 207                              foreach ($found as $search => $domain) {
 208                                  $this->domain_swap($table, $column, $domain, $search);
 209                              }
 210                          }
 211                      }
 212                  }
 213              }
 214          }
 215  
 216          if ($replacing) {
 217              rebuild_course_cache(0, true);
 218              purge_all_caches();
 219              return true;
 220          }
 221  
 222          $domains = array_map(function ($i) {
 223              return $i['host'];
 224          }, $urls);
 225  
 226          $uniquedomains = array_unique($domains);
 227  
 228          $sslfailures = array();
 229  
 230          foreach ($uniquedomains as $domain) {
 231              if (!$this->check_domain_availability("https://$domain/")) {
 232                  $sslfailures[] = $domain;
 233              }
 234          }
 235  
 236          $results = array();
 237          foreach ($urls as $url) {
 238              $host = $url['host'];
 239              foreach ($sslfailures as $badhost) {
 240                  if ($host == $badhost) {
 241                      if (!isset($results[$host])) {
 242                          $results[$host] = 1;
 243                      } else {
 244                          $results[$host]++;
 245                      }
 246                  }
 247              }
 248          }
 249          return $results;
 250      }
 251  
 252      /**
 253       * Check if url is available (GET request returns 200)
 254       *
 255       * @param string $url
 256       * @return bool
 257       */
 258      protected function check_domain_availability($url) {
 259          $curl = new \curl();
 260          $curl->head($url);
 261          $info = $curl->get_info();
 262          return !empty($info['http_code']) && $info['http_code'] == 200;
 263      }
 264  }