Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]

   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * This plugin is used to access files by providing an url
  20   *
  21   * @since Moodle 2.0
  22   * @package    repository_url
  23   * @copyright  2010 Dongsheng Cai {@link http://dongsheng.org}
  24   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  25   */
  26  require_once($CFG->dirroot . '/repository/lib.php');
  27  require_once (__DIR__.'/locallib.php');
  28  
  29  /**
  30   * repository_url class
  31   * A subclass of repository, which is used to download a file from a specific url
  32   *
  33   * @since Moodle 2.0
  34   * @package    repository_url
  35   * @copyright  2009 Dongsheng Cai {@link http://dongsheng.org}
  36   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  37   */
  38  class repository_url extends repository {
  39      /** @var int Maximum time of recursion. */
  40      const MAX_RECURSION_TIME = 5;
  41      /** @var int Maximum number of CSS imports. */
  42      protected const MAX_CSS_IMPORTS = 10;
  43      /** @var int CSS import counter. */
  44      protected int $cssimportcounter = 0;
  45      var $processedfiles = array();
  46      /** @var int Recursion counter. */
  47      var $recursioncounter = 0;
  48  
  49      /**
  50       * @param int $repositoryid
  51       * @param object $context
  52       * @param array $options
  53       */
  54      public function __construct($repositoryid, $context = SYSCONTEXTID, $options = array()){
  55          global $CFG;
  56          parent::__construct($repositoryid, $context, $options);
  57          $this->file_url = optional_param('file', '', PARAM_RAW);
  58          $this->file_url = $this->escape_url($this->file_url);
  59      }
  60  
  61      public function check_login() {
  62          if (!empty($this->file_url)) {
  63              return true;
  64          } else {
  65              return false;
  66          }
  67      }
  68      /**
  69       * @return mixed
  70       */
  71      public function print_login() {
  72          $strdownload = get_string('download', 'repository');
  73          $strname     = get_string('rename', 'repository_url');
  74          $strurl      = get_string('url', 'repository_url');
  75          if ($this->options['ajax']) {
  76              $url = new stdClass();
  77              $url->label = $strurl.': ';
  78              $url->id   = 'fileurl';
  79              $url->type = 'text';
  80              $url->name = 'file';
  81  
  82              $ret['login'] = array($url);
  83              $ret['login_btn_label'] = get_string('download', 'repository_url');
  84              $ret['allowcaching'] = true; // indicates that login form can be cached in filepicker.js
  85              return $ret;
  86          } else {
  87              echo <<<EOD
  88  <table>
  89  <tr>
  90  <td>{$strurl}: </td><td><input name="file" type="text" /></td>
  91  </tr>
  92  </table>
  93  <input type="submit" value="{$strdownload}" />
  94  EOD;
  95  
  96          }
  97      }
  98  
  99      /**
 100       * @param mixed $path
 101       * @param string $search
 102       * @return array
 103       */
 104      public function get_listing($path='', $page='') {
 105          $ret = array();
 106          $ret['list'] = array();
 107          $ret['nosearch'] = true;
 108          $ret['norefresh'] = true;
 109          $ret['nologin'] = true;
 110  
 111          $this->file_url = clean_param($this->file_url, PARAM_URL);
 112          if (empty($this->file_url)) {
 113              throw new repository_exception('validfiletype', 'repository_url');
 114          }
 115  
 116          $this->parse_file(null, $this->file_url, $ret, true);
 117          return $ret;
 118      }
 119  
 120      /**
 121       * Parses one file (either html or css)
 122       *
 123       * @param string $baseurl (optional) URL of the file where link to this file was found
 124       * @param string $relativeurl relative or absolute link to the file
 125       * @param array $list
 126       * @param bool $mainfile true only for main HTML false and false for all embedded/linked files
 127       */
 128      protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) {
 129          if (preg_match('/([\'"])(.*)\1/', $relativeurl, $matches)) {
 130              $relativeurl = $matches[2];
 131          }
 132          if (empty($baseurl)) {
 133              $url = $relativeurl;
 134          } else {
 135              $url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl), ENT_COMPAT);
 136          }
 137          if (in_array($url, $this->processedfiles)) {
 138              // Avoid endless recursion for the same URL with same parameters.
 139              return;
 140          }
 141          // Remove the query string and anchors before check.
 142          $recursioncheckurl = (new moodle_url($url))->out_omit_querystring();
 143          if (in_array($recursioncheckurl, $this->processedfiles)) {
 144              $this->recursioncounter++;
 145          }
 146          if ($this->recursioncounter >= self::MAX_RECURSION_TIME) {
 147              // Avoid endless recursion for the same URL with different parameters.
 148              return;
 149          }
 150          $this->processedfiles[] = $url;
 151          $curl = new curl;
 152          $curl->setopt(array('CURLOPT_FOLLOWLOCATION' => true, 'CURLOPT_MAXREDIRS' => 3));
 153          $msg = $curl->head($url);
 154          $info = $curl->get_info();
 155          if ($info['http_code'] != 200) {
 156              if ($mainfile) {
 157                  $list['error'] = $msg;
 158              }
 159          } else {
 160              $csstoanalyze = '';
 161              if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) {
 162                  // parse as html
 163                  $htmlcontent = $curl->get($info['url']);
 164                  $ddoc = new DOMDocument();
 165                  @$ddoc->loadHTML($htmlcontent);
 166                  // extract <img>
 167                  $tags = $ddoc->getElementsByTagName('img');
 168                  foreach ($tags as $tag) {
 169                      $url = $tag->getAttribute('src');
 170                      $this->add_image_to_list($info['url'], $url, $list);
 171                  }
 172                  // analyse embedded css (<style>)
 173                  $tags = $ddoc->getElementsByTagName('style');
 174                  foreach ($tags as $tag) {
 175                      if ($tag->getAttribute('type') == 'text/css') {
 176                          $csstoanalyze .= $tag->textContent."\n";
 177                      }
 178                  }
 179                  // analyse links to css (<link type='text/css' href='...'>)
 180                  $tags = $ddoc->getElementsByTagName('link');
 181                  foreach ($tags as $tag) {
 182                      if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
 183                          $this->parse_file($info['url'], $tag->getAttribute('href'), $list);
 184                      }
 185                  }
 186              } else if (strstr($info['content_type'], 'css')) {
 187                  // parse as css
 188                  $csscontent = $curl->get($info['url']);
 189                  $csstoanalyze .= $csscontent."\n";
 190              } else if (strstr($info['content_type'], 'image/')) {
 191                  // download this file
 192                  $this->add_image_to_list($info['url'], $info['url'], $list);
 193              } else {
 194                  $list['error'] = get_string('validfiletype', 'repository_url');
 195              }
 196  
 197              // parse all found css styles
 198              if (strlen($csstoanalyze)) {
 199                  $urls = extract_css_urls($csstoanalyze);
 200                  if (!empty($urls['property'])) {
 201                      foreach ($urls['property'] as $url) {
 202                          $this->add_image_to_list($info['url'], $url, $list);
 203                      }
 204                  }
 205                  if (!empty($urls['import'])) {
 206                      foreach ($urls['import'] as $cssurl) {
 207                          // Limit the number of CSS imports to avoid infinite imports.
 208                          if ($this->cssimportcounter >= self::MAX_CSS_IMPORTS) {
 209                              return;
 210                          }
 211                          $this->cssimportcounter++;
 212                          $this->parse_file($info['url'], $cssurl, $list);
 213                      }
 214                  }
 215              }
 216          }
 217      }
 218      protected function add_image_to_list($baseurl, $url, &$list) {
 219          if (empty($list['list'])) {
 220              $list['list'] = array();
 221          }
 222          $src = url_to_absolute($baseurl, htmlspecialchars_decode($url, ENT_COMPAT));
 223          foreach ($list['list'] as $image) {
 224              if ($image['source'] == $src) {
 225                  return;
 226              }
 227          }
 228          $list['list'][] = array(
 229              'title'=>$this->guess_filename($url, ''),
 230              'source'=>$src,
 231              'thumbnail'=>$src,
 232              'thumbnail_height'=>84,
 233              'thumbnail_width'=>84
 234          );
 235      }
 236      public function guess_filename($url, $type) {
 237          $pattern = '#\/([\w_\?\-.]+)$#';
 238          $matches = null;
 239          preg_match($pattern, $url, $matches);
 240          if (empty($matches[1])) {
 241              return $url;
 242          } else {
 243              return $matches[1];
 244          }
 245      }
 246  
 247      /**
 248       * Escapes a url by replacing spaces with %20.
 249       *
 250       * Note: In general moodle does not automatically escape urls, but for the purposes of making this plugin more user friendly
 251       * and make it consistent with some other areas in moodle (such as mod_url), urls will automatically be escaped.
 252       *
 253       * If moodle_url or PARAM_URL is changed to clean characters that need to be escaped, then this function can be removed
 254       *
 255       * @param string $url An unescaped url.
 256       * @return string The escaped url
 257       */
 258      protected function escape_url($url) {
 259          $url = str_replace('"', '%22', $url);
 260          $url = str_replace('\'', '%27', $url);
 261          $url = str_replace(' ', '%20', $url);
 262          $url = str_replace('<', '%3C', $url);
 263          $url = str_replace('>', '%3E', $url);
 264          return $url;
 265      }
 266  
 267      public function supported_returntypes() {
 268          return (FILE_INTERNAL | FILE_EXTERNAL);
 269      }
 270  
 271      /**
 272       * Return the source information
 273       *
 274       * @param stdClass $url
 275       * @return string|null
 276       */
 277      public function get_file_source_info($url) {
 278          return $url;
 279      }
 280  
 281      /**
 282       * file types supported by url downloader plugin
 283       *
 284       * @return array
 285       */
 286      public function supported_filetypes() {
 287          return array('web_image');
 288      }
 289  
 290      /**
 291       * Is this repository accessing private data?
 292       *
 293       * @return bool
 294       */
 295      public function contains_private_data() {
 296          return false;
 297      }
 298  }