Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.

Differences Between: [Versions 310 and 402] [Versions 311 and 402] [Versions 39 and 402] [Versions 400 and 402] [Versions 401 and 402]

   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * This plugin is used to access files by providing an url
  20   *
  21   * @since Moodle 2.0
  22   * @package    repository_url
  23   * @copyright  2010 Dongsheng Cai {@link http://dongsheng.org}
  24   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  25   */
  26  require_once($CFG->dirroot . '/repository/lib.php');
  27  require_once (__DIR__.'/locallib.php');
  28  
  29  /**
  30   * repository_url class
  31   * A subclass of repository, which is used to download a file from a specific url
  32   *
  33   * @since Moodle 2.0
  34   * @package    repository_url
  35   * @copyright  2009 Dongsheng Cai {@link http://dongsheng.org}
  36   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  37   */
  38  class repository_url extends repository {
  39      /** @var int Maximum time of recursion. */
  40      const MAX_RECURSION_TIME = 5;
  41      /** @var int Maximum number of CSS imports. */
  42      protected const MAX_CSS_IMPORTS = 10;
  43      /** @var int CSS import counter. */
  44      protected int $cssimportcounter = 0;
  45      var $processedfiles = array();
  46      /** @var int Recursion counter. */
  47      var $recursioncounter = 0;
  48      /** @var string file URL. */
  49      public $file_url;
  50  
  51      /**
  52       * @param int $repositoryid
  53       * @param object $context
  54       * @param array $options
  55       */
  56      public function __construct($repositoryid, $context = SYSCONTEXTID, $options = array()){
  57          global $CFG;
  58          parent::__construct($repositoryid, $context, $options);
  59          $this->file_url = optional_param('file', '', PARAM_RAW);
  60          $this->file_url = $this->escape_url($this->file_url);
  61      }
  62  
  63      public function check_login() {
  64          if (!empty($this->file_url)) {
  65              return true;
  66          } else {
  67              return false;
  68          }
  69      }
  70      /**
  71       * @return mixed
  72       */
  73      public function print_login() {
  74          $strdownload = get_string('download', 'repository');
  75          $strname     = get_string('rename', 'repository_url');
  76          $strurl      = get_string('url', 'repository_url');
  77          if ($this->options['ajax']) {
  78              $url = new stdClass();
  79              $url->label = $strurl.': ';
  80              $url->id   = 'fileurl';
  81              $url->type = 'text';
  82              $url->name = 'file';
  83  
  84              $ret['login'] = array($url);
  85              $ret['login_btn_label'] = get_string('download', 'repository_url');
  86              $ret['allowcaching'] = true; // indicates that login form can be cached in filepicker.js
  87              return $ret;
  88          } else {
  89              echo <<<EOD
  90  <table>
  91  <tr>
  92  <td>{$strurl}: </td><td><input name="file" type="text" /></td>
  93  </tr>
  94  </table>
  95  <input type="submit" value="{$strdownload}" />
  96  EOD;
  97  
  98          }
  99      }
 100  
 101      /**
 102       * @param mixed $path
 103       * @param string $search
 104       * @return array
 105       */
 106      public function get_listing($path='', $page='') {
 107          $ret = array();
 108          $ret['list'] = array();
 109          $ret['nosearch'] = true;
 110          $ret['norefresh'] = true;
 111          $ret['nologin'] = true;
 112  
 113          $this->file_url = clean_param($this->file_url, PARAM_URL);
 114          if (empty($this->file_url)) {
 115              throw new repository_exception('validfiletype', 'repository_url');
 116          }
 117  
 118          $this->parse_file(null, $this->file_url, $ret, true);
 119          return $ret;
 120      }
 121  
 122      /**
 123       * Parses one file (either html or css)
 124       *
 125       * @param string $baseurl (optional) URL of the file where link to this file was found
 126       * @param string $relativeurl relative or absolute link to the file
 127       * @param array $list
 128       * @param bool $mainfile true only for main HTML false and false for all embedded/linked files
 129       */
 130      protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) {
 131          if (preg_match('/([\'"])(.*)\1/', $relativeurl, $matches)) {
 132              $relativeurl = $matches[2];
 133          }
 134          if (empty($baseurl)) {
 135              $url = $relativeurl;
 136          } else {
 137              $url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl), ENT_COMPAT);
 138          }
 139          if (in_array($url, $this->processedfiles)) {
 140              // Avoid endless recursion for the same URL with same parameters.
 141              return;
 142          }
 143          // Remove the query string and anchors before check.
 144          $recursioncheckurl = (new moodle_url($url))->out_omit_querystring();
 145          if (in_array($recursioncheckurl, $this->processedfiles)) {
 146              $this->recursioncounter++;
 147          }
 148          if ($this->recursioncounter >= self::MAX_RECURSION_TIME) {
 149              // Avoid endless recursion for the same URL with different parameters.
 150              return;
 151          }
 152          $this->processedfiles[] = $url;
 153          $curl = new curl;
 154          $curl->setopt(array('CURLOPT_FOLLOWLOCATION' => true, 'CURLOPT_MAXREDIRS' => 3));
 155          $msg = $curl->head($url);
 156          $info = $curl->get_info();
 157          if ($info['http_code'] != 200) {
 158              if ($mainfile) {
 159                  $list['error'] = $msg;
 160              }
 161          } else {
 162              $csstoanalyze = '';
 163              if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) {
 164                  // parse as html
 165                  $htmlcontent = $curl->get($info['url']);
 166                  $ddoc = new DOMDocument();
 167                  @$ddoc->loadHTML($htmlcontent);
 168                  // extract <img>
 169                  $tags = $ddoc->getElementsByTagName('img');
 170                  foreach ($tags as $tag) {
 171                      $url = $tag->getAttribute('src');
 172                      $this->add_image_to_list($info['url'], $url, $list);
 173                  }
 174                  // analyse embedded css (<style>)
 175                  $tags = $ddoc->getElementsByTagName('style');
 176                  foreach ($tags as $tag) {
 177                      if ($tag->getAttribute('type') == 'text/css') {
 178                          $csstoanalyze .= $tag->textContent."\n";
 179                      }
 180                  }
 181                  // analyse links to css (<link type='text/css' href='...'>)
 182                  $tags = $ddoc->getElementsByTagName('link');
 183                  foreach ($tags as $tag) {
 184                      if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
 185                          $this->parse_file($info['url'], $tag->getAttribute('href'), $list);
 186                      }
 187                  }
 188              } else if (strstr($info['content_type'], 'css')) {
 189                  // parse as css
 190                  $csscontent = $curl->get($info['url']);
 191                  $csstoanalyze .= $csscontent."\n";
 192              } else if (strstr($info['content_type'], 'image/')) {
 193                  // download this file
 194                  $this->add_image_to_list($info['url'], $info['url'], $list);
 195              } else {
 196                  $list['error'] = get_string('validfiletype', 'repository_url');
 197              }
 198  
 199              // parse all found css styles
 200              if (strlen($csstoanalyze)) {
 201                  $urls = extract_css_urls($csstoanalyze);
 202                  if (!empty($urls['property'])) {
 203                      foreach ($urls['property'] as $url) {
 204                          $this->add_image_to_list($info['url'], $url, $list);
 205                      }
 206                  }
 207                  if (!empty($urls['import'])) {
 208                      foreach ($urls['import'] as $cssurl) {
 209                          // Limit the number of CSS imports to avoid infinite imports.
 210                          if ($this->cssimportcounter >= self::MAX_CSS_IMPORTS) {
 211                              return;
 212                          }
 213                          $this->cssimportcounter++;
 214                          $this->parse_file($info['url'], $cssurl, $list);
 215                      }
 216                  }
 217              }
 218          }
 219      }
 220      protected function add_image_to_list($baseurl, $url, &$list) {
 221          if (empty($list['list'])) {
 222              $list['list'] = array();
 223          }
 224          $src = url_to_absolute($baseurl, htmlspecialchars_decode($url, ENT_COMPAT));
 225          foreach ($list['list'] as $image) {
 226              if ($image['source'] == $src) {
 227                  return;
 228              }
 229          }
 230          $list['list'][] = array(
 231              'title'=>$this->guess_filename($url, ''),
 232              'source'=>$src,
 233              'thumbnail'=>$src,
 234              'thumbnail_height'=>84,
 235              'thumbnail_width'=>84
 236          );
 237      }
 238      public function guess_filename($url, $type) {
 239          $pattern = '#\/([\w_\?\-.]+)$#';
 240          $matches = null;
 241          preg_match($pattern, $url, $matches);
 242          if (empty($matches[1])) {
 243              return $url;
 244          } else {
 245              return $matches[1];
 246          }
 247      }
 248  
 249      /**
 250       * Escapes a url by replacing spaces with %20.
 251       *
 252       * Note: In general moodle does not automatically escape urls, but for the purposes of making this plugin more user friendly
 253       * and make it consistent with some other areas in moodle (such as mod_url), urls will automatically be escaped.
 254       *
 255       * If moodle_url or PARAM_URL is changed to clean characters that need to be escaped, then this function can be removed
 256       *
 257       * @param string $url An unescaped url.
 258       * @return string The escaped url
 259       */
 260      protected function escape_url($url) {
 261          $url = str_replace('"', '%22', $url);
 262          $url = str_replace('\'', '%27', $url);
 263          $url = str_replace(' ', '%20', $url);
 264          $url = str_replace('<', '%3C', $url);
 265          $url = str_replace('>', '%3E', $url);
 266          return $url;
 267      }
 268  
 269      public function supported_returntypes() {
 270          return (FILE_INTERNAL | FILE_EXTERNAL);
 271      }
 272  
 273      /**
 274       * Return the source information
 275       *
 276       * @param stdClass $url
 277       * @return string|null
 278       */
 279      public function get_file_source_info($url) {
 280          return $url;
 281      }
 282  
 283      /**
 284       * file types supported by url downloader plugin
 285       *
 286       * @return array
 287       */
 288      public function supported_filetypes() {
 289          return array('web_image');
 290      }
 291  
 292      /**
 293       * Is this repository accessing private data?
 294       *
 295       * @return bool
 296       */
 297      public function contains_private_data() {
 298          return false;
 299      }
 300  }