Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.
   1  <?php
   2  
   3  /**

   4   * HTML Purifier's internal representation of a URI.

   5   * @note

   6   *      Internal data-structures are completely escaped. If the data needs

   7   *      to be used in a non-URI context (which is very unlikely), be sure

   8   *      to decode it first. The URI may not necessarily be well-formed until

   9   *      validate() is called.

  10   */
  11  class HTMLPurifier_URI
  12  {
  13      /**

  14       * @type string

  15       */
  16      public $scheme;
  17  
  18      /**

  19       * @type string

  20       */
  21      public $userinfo;
  22  
  23      /**

  24       * @type string

  25       */
  26      public $host;
  27  
  28      /**

  29       * @type int

  30       */
  31      public $port;
  32  
  33      /**

  34       * @type string

  35       */
  36      public $path;
  37  
  38      /**

  39       * @type string

  40       */
  41      public $query;
  42  
  43      /**

  44       * @type string

  45       */
  46      public $fragment;
  47  
  48      /**

  49       * @param string $scheme

  50       * @param string $userinfo

  51       * @param string $host

  52       * @param int $port

  53       * @param string $path

  54       * @param string $query

  55       * @param string $fragment

  56       * @note Automatically normalizes scheme and port

  57       */
  58      public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
  59      {
  60          $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
  61          $this->userinfo = $userinfo;
  62          $this->host = $host;
  63          $this->port = is_null($port) ? $port : (int)$port;
  64          $this->path = $path;
  65          $this->query = $query;
  66          $this->fragment = $fragment;
  67      }
  68  
  69      /**

  70       * Retrieves a scheme object corresponding to the URI's scheme/default

  71       * @param HTMLPurifier_Config $config

  72       * @param HTMLPurifier_Context $context

  73       * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI

  74       */
  75      public function getSchemeObj($config, $context)
  76      {
  77          $registry = HTMLPurifier_URISchemeRegistry::instance();
  78          if ($this->scheme !== null) {
  79              $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
  80              if (!$scheme_obj) {
  81                  return false;
  82              } // invalid scheme, clean it out

  83          } else {
  84              // no scheme: retrieve the default one

  85              $def = $config->getDefinition('URI');
  86              $scheme_obj = $def->getDefaultScheme($config, $context);
  87              if (!$scheme_obj) {
  88                  if ($def->defaultScheme !== null) {
  89                      // something funky happened to the default scheme object

  90                      trigger_error(
  91                          'Default scheme object "' . $def->defaultScheme . '" was not readable',
  92                          E_USER_WARNING
  93                      );
  94                  } // suppress error if it's null

  95                  return false;
  96              }
  97          }
  98          return $scheme_obj;
  99      }
 100  
 101      /**

 102       * Generic validation method applicable for all schemes. May modify

 103       * this URI in order to get it into a compliant form.

 104       * @param HTMLPurifier_Config $config

 105       * @param HTMLPurifier_Context $context

 106       * @return bool True if validation/filtering succeeds, false if failure

 107       */
 108      public function validate($config, $context)
 109      {
 110          // ABNF definitions from RFC 3986

 111          $chars_sub_delims = '!$&\'()*+,;=';
 112          $chars_gen_delims = ':/?#[]@';
 113          $chars_pchar = $chars_sub_delims . ':@';
 114  
 115          // validate host

 116          if (!is_null($this->host)) {
 117              $host_def = new HTMLPurifier_AttrDef_URI_Host();
 118              $this->host = $host_def->validate($this->host, $config, $context);
 119              if ($this->host === false) {
 120                  $this->host = null;
 121              }
 122          }
 123  
 124          // validate scheme

 125          // NOTE: It's not appropriate to check whether or not this

 126          // scheme is in our registry, since a URIFilter may convert a

 127          // URI that we don't allow into one we do.  So instead, we just

 128          // check if the scheme can be dropped because there is no host

 129          // and it is our default scheme.

 130          if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
 131              // support for relative paths is pretty abysmal when the

 132              // scheme is present, so axe it when possible

 133              $def = $config->getDefinition('URI');
 134              if ($def->defaultScheme === $this->scheme) {
 135                  $this->scheme = null;
 136              }
 137          }
 138  
 139          // validate username

 140          if (!is_null($this->userinfo)) {
 141              $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
 142              $this->userinfo = $encoder->encode($this->userinfo);
 143          }
 144  
 145          // validate port

 146          if (!is_null($this->port)) {
 147              if ($this->port < 1 || $this->port > 65535) {
 148                  $this->port = null;
 149              }
 150          }
 151  
 152          // validate path

 153          $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
 154          if (!is_null($this->host)) { // this catches $this->host === ''
 155              // path-abempty (hier and relative)

 156              // http://www.example.com/my/path

 157              // //www.example.com/my/path (looks odd, but works, and

 158              //                            recognized by most browsers)

 159              // (this set is valid or invalid on a scheme by scheme

 160              // basis, so we'll deal with it later)

 161              // file:///my/path

 162              // ///my/path

 163              $this->path = $segments_encoder->encode($this->path);
 164          } elseif ($this->path !== '') {
 165              if ($this->path[0] === '/') {
 166                  // path-absolute (hier and relative)

 167                  // http:/my/path

 168                  // /my/path

 169                  if (strlen($this->path) >= 2 && $this->path[1] === '/') {
 170                      // This could happen if both the host gets stripped

 171                      // out

 172                      // http://my/path

 173                      // //my/path

 174                      $this->path = '';
 175                  } else {
 176                      $this->path = $segments_encoder->encode($this->path);
 177                  }
 178              } elseif (!is_null($this->scheme)) {
 179                  // path-rootless (hier)

 180                  // http:my/path

 181                  // Short circuit evaluation means we don't need to check nz

 182                  $this->path = $segments_encoder->encode($this->path);
 183              } else {
 184                  // path-noscheme (relative)

 185                  // my/path

 186                  // (once again, not checking nz)

 187                  $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
 188                  $c = strpos($this->path, '/');
 189                  if ($c !== false) {
 190                      $this->path =
 191                          $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
 192                          $segments_encoder->encode(substr($this->path, $c));
 193                  } else {
 194                      $this->path = $segment_nc_encoder->encode($this->path);
 195                  }
 196              }
 197          } else {
 198              // path-empty (hier and relative)

 199              $this->path = ''; // just to be safe

 200          }
 201  
 202          // qf = query and fragment

 203          $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
 204  
 205          if (!is_null($this->query)) {
 206              $this->query = $qf_encoder->encode($this->query);
 207          }
 208  
 209          if (!is_null($this->fragment)) {
 210              $this->fragment = $qf_encoder->encode($this->fragment);
 211          }
 212          return true;
 213      }
 214  
 215      /**

 216       * Convert URI back to string

 217       * @return string URI appropriate for output

 218       */
 219      public function toString()
 220      {
 221          // reconstruct authority

 222          $authority = null;
 223          // there is a rendering difference between a null authority

 224          // (http:foo-bar) and an empty string authority

 225          // (http:///foo-bar).

 226          if (!is_null($this->host)) {
 227              $authority = '';
 228              if (!is_null($this->userinfo)) {
 229                  $authority .= $this->userinfo . '@';
 230              }
 231              $authority .= $this->host;
 232              if (!is_null($this->port)) {
 233                  $authority .= ':' . $this->port;
 234              }
 235          }
 236  
 237          // Reconstruct the result

 238          // One might wonder about parsing quirks from browsers after

 239          // this reconstruction.  Unfortunately, parsing behavior depends

 240          // on what *scheme* was employed (file:///foo is handled *very*

 241          // differently than http:///foo), so unfortunately we have to

 242          // defer to the schemes to do the right thing.

 243          $result = '';
 244          if (!is_null($this->scheme)) {
 245              $result .= $this->scheme . ':';
 246          }
 247          if (!is_null($authority)) {
 248              $result .= '//' . $authority;
 249          }
 250          $result .= $this->path;
 251          if (!is_null($this->query)) {
 252              $result .= '?' . $this->query;
 253          }
 254          if (!is_null($this->fragment)) {
 255              $result .= '#' . $this->fragment;
 256          }
 257  
 258          return $result;
 259      }
 260  
 261      /**

 262       * Returns true if this URL might be considered a 'local' URL given

 263       * the current context.  This is true when the host is null, or

 264       * when it matches the host supplied to the configuration.

 265       *

 266       * Note that this does not do any scheme checking, so it is mostly

 267       * only appropriate for metadata that doesn't care about protocol

 268       * security.  isBenign is probably what you actually want.

 269       * @param HTMLPurifier_Config $config

 270       * @param HTMLPurifier_Context $context

 271       * @return bool

 272       */
 273      public function isLocal($config, $context)
 274      {
 275          if ($this->host === null) {
 276              return true;
 277          }
 278          $uri_def = $config->getDefinition('URI');
 279          if ($uri_def->host === $this->host) {
 280              return true;
 281          }
 282          return false;
 283      }
 284  
 285      /**

 286       * Returns true if this URL should be considered a 'benign' URL,

 287       * that is:

 288       *

 289       *      - It is a local URL (isLocal), and

 290       *      - It has a equal or better level of security

 291       * @param HTMLPurifier_Config $config

 292       * @param HTMLPurifier_Context $context

 293       * @return bool

 294       */
 295      public function isBenign($config, $context)
 296      {
 297          if (!$this->isLocal($config, $context)) {
 298              return false;
 299          }
 300  
 301          $scheme_obj = $this->getSchemeObj($config, $context);
 302          if (!$scheme_obj) {
 303              return false;
 304          } // conservative approach

 305  
 306          $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
 307          if ($current_scheme_obj->secure) {
 308              if (!$scheme_obj->secure) {
 309                  return false;
 310              }
 311          }
 312          return true;
 313      }
 314  }
 315  
 316  // vim: et sw=4 sts=4