Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.
   1  <?php
   2  
   3  /**
   4   * HTML Purifier's internal representation of a URI.
   5   * @note
   6   *      Internal data-structures are completely escaped. If the data needs
   7   *      to be used in a non-URI context (which is very unlikely), be sure
   8   *      to decode it first. The URI may not necessarily be well-formed until
   9   *      validate() is called.
  10   */
  11  class HTMLPurifier_URI
  12  {
  13      /**
  14       * @type string
  15       */
  16      public $scheme;
  17  
  18      /**
  19       * @type string
  20       */
  21      public $userinfo;
  22  
  23      /**
  24       * @type string
  25       */
  26      public $host;
  27  
  28      /**
  29       * @type int
  30       */
  31      public $port;
  32  
  33      /**
  34       * @type string
  35       */
  36      public $path;
  37  
  38      /**
  39       * @type string
  40       */
  41      public $query;
  42  
  43      /**
  44       * @type string
  45       */
  46      public $fragment;
  47  
  48      /**
  49       * @param string $scheme
  50       * @param string $userinfo
  51       * @param string $host
  52       * @param int $port
  53       * @param string $path
  54       * @param string $query
  55       * @param string $fragment
  56       * @note Automatically normalizes scheme and port
  57       */
  58      public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
  59      {
  60          $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
  61          $this->userinfo = $userinfo;
  62          $this->host = $host;
  63          $this->port = is_null($port) ? $port : (int)$port;
  64          $this->path = $path;
  65          $this->query = $query;
  66          $this->fragment = $fragment;
  67      }
  68  
  69      /**
  70       * Retrieves a scheme object corresponding to the URI's scheme/default
  71       * @param HTMLPurifier_Config $config
  72       * @param HTMLPurifier_Context $context
  73       * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
  74       */
  75      public function getSchemeObj($config, $context)
  76      {
  77          $registry = HTMLPurifier_URISchemeRegistry::instance();
  78          if ($this->scheme !== null) {
  79              $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
  80              if (!$scheme_obj) {
  81                  return false;
  82              } // invalid scheme, clean it out
  83          } else {
  84              // no scheme: retrieve the default one
  85              $def = $config->getDefinition('URI');
  86              $scheme_obj = $def->getDefaultScheme($config, $context);
  87              if (!$scheme_obj) {
  88                  if ($def->defaultScheme !== null) {
  89                      // something funky happened to the default scheme object
  90                      trigger_error(
  91                          'Default scheme object "' . $def->defaultScheme . '" was not readable',
  92                          E_USER_WARNING
  93                      );
  94                  } // suppress error if it's null
  95                  return false;
  96              }
  97          }
  98          return $scheme_obj;
  99      }
 100  
 101      /**
 102       * Generic validation method applicable for all schemes. May modify
 103       * this URI in order to get it into a compliant form.
 104       * @param HTMLPurifier_Config $config
 105       * @param HTMLPurifier_Context $context
 106       * @return bool True if validation/filtering succeeds, false if failure
 107       */
 108      public function validate($config, $context)
 109      {
 110          // ABNF definitions from RFC 3986
 111          $chars_sub_delims = '!$&\'()*+,;=';
 112          $chars_gen_delims = ':/?#[]@';
 113          $chars_pchar = $chars_sub_delims . ':@';
 114  
 115          // validate host
 116          if (!is_null($this->host)) {
 117              $host_def = new HTMLPurifier_AttrDef_URI_Host();
 118              $this->host = $host_def->validate($this->host, $config, $context);
 119              if ($this->host === false) {
 120                  $this->host = null;
 121              }
 122          }
 123  
 124          // validate scheme
 125          // NOTE: It's not appropriate to check whether or not this
 126          // scheme is in our registry, since a URIFilter may convert a
 127          // URI that we don't allow into one we do.  So instead, we just
 128          // check if the scheme can be dropped because there is no host
 129          // and it is our default scheme.
 130          if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
 131              // support for relative paths is pretty abysmal when the
 132              // scheme is present, so axe it when possible
 133              $def = $config->getDefinition('URI');
 134              if ($def->defaultScheme === $this->scheme) {
 135                  $this->scheme = null;
 136              }
 137          }
 138  
 139          // validate username
 140          if (!is_null($this->userinfo)) {
 141              $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
 142              $this->userinfo = $encoder->encode($this->userinfo);
 143          }
 144  
 145          // validate port
 146          if (!is_null($this->port)) {
 147              if ($this->port < 1 || $this->port > 65535) {
 148                  $this->port = null;
 149              }
 150          }
 151  
 152          // validate path
 153          $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
 154          if (!is_null($this->host)) { // this catches $this->host === ''
 155              // path-abempty (hier and relative)
 156              // http://www.example.com/my/path
 157              // //www.example.com/my/path (looks odd, but works, and
 158              //                            recognized by most browsers)
 159              // (this set is valid or invalid on a scheme by scheme
 160              // basis, so we'll deal with it later)
 161              // file:///my/path
 162              // ///my/path
 163              $this->path = $segments_encoder->encode($this->path);
 164          } elseif ($this->path !== '') {
 165              if ($this->path[0] === '/') {
 166                  // path-absolute (hier and relative)
 167                  // http:/my/path
 168                  // /my/path
 169                  if (strlen($this->path) >= 2 && $this->path[1] === '/') {
 170                      // This could happen if both the host gets stripped
 171                      // out
 172                      // http://my/path
 173                      // //my/path
 174                      $this->path = '';
 175                  } else {
 176                      $this->path = $segments_encoder->encode($this->path);
 177                  }
 178              } elseif (!is_null($this->scheme)) {
 179                  // path-rootless (hier)
 180                  // http:my/path
 181                  // Short circuit evaluation means we don't need to check nz
 182                  $this->path = $segments_encoder->encode($this->path);
 183              } else {
 184                  // path-noscheme (relative)
 185                  // my/path
 186                  // (once again, not checking nz)
 187                  $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
 188                  $c = strpos($this->path, '/');
 189                  if ($c !== false) {
 190                      $this->path =
 191                          $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
 192                          $segments_encoder->encode(substr($this->path, $c));
 193                  } else {
 194                      $this->path = $segment_nc_encoder->encode($this->path);
 195                  }
 196              }
 197          } else {
 198              // path-empty (hier and relative)
 199              $this->path = ''; // just to be safe
 200          }
 201  
 202          // qf = query and fragment
 203          $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
 204  
 205          if (!is_null($this->query)) {
 206              $this->query = $qf_encoder->encode($this->query);
 207          }
 208  
 209          if (!is_null($this->fragment)) {
 210              $this->fragment = $qf_encoder->encode($this->fragment);
 211          }
 212          return true;
 213      }
 214  
 215      /**
 216       * Convert URI back to string
 217       * @return string URI appropriate for output
 218       */
 219      public function toString()
 220      {
 221          // reconstruct authority
 222          $authority = null;
 223          // there is a rendering difference between a null authority
 224          // (http:foo-bar) and an empty string authority
 225          // (http:///foo-bar).
 226          if (!is_null($this->host)) {
 227              $authority = '';
 228              if (!is_null($this->userinfo)) {
 229                  $authority .= $this->userinfo . '@';
 230              }
 231              $authority .= $this->host;
 232              if (!is_null($this->port)) {
 233                  $authority .= ':' . $this->port;
 234              }
 235          }
 236  
 237          // Reconstruct the result
 238          // One might wonder about parsing quirks from browsers after
 239          // this reconstruction.  Unfortunately, parsing behavior depends
 240          // on what *scheme* was employed (file:///foo is handled *very*
 241          // differently than http:///foo), so unfortunately we have to
 242          // defer to the schemes to do the right thing.
 243          $result = '';
 244          if (!is_null($this->scheme)) {
 245              $result .= $this->scheme . ':';
 246          }
 247          if (!is_null($authority)) {
 248              $result .= '//' . $authority;
 249          }
 250          $result .= $this->path;
 251          if (!is_null($this->query)) {
 252              $result .= '?' . $this->query;
 253          }
 254          if (!is_null($this->fragment)) {
 255              $result .= '#' . $this->fragment;
 256          }
 257  
 258          return $result;
 259      }
 260  
 261      /**
 262       * Returns true if this URL might be considered a 'local' URL given
 263       * the current context.  This is true when the host is null, or
 264       * when it matches the host supplied to the configuration.
 265       *
 266       * Note that this does not do any scheme checking, so it is mostly
 267       * only appropriate for metadata that doesn't care about protocol
 268       * security.  isBenign is probably what you actually want.
 269       * @param HTMLPurifier_Config $config
 270       * @param HTMLPurifier_Context $context
 271       * @return bool
 272       */
 273      public function isLocal($config, $context)
 274      {
 275          if ($this->host === null) {
 276              return true;
 277          }
 278          $uri_def = $config->getDefinition('URI');
 279          if ($uri_def->host === $this->host) {
 280              return true;
 281          }
 282          return false;
 283      }
 284  
 285      /**
 286       * Returns true if this URL should be considered a 'benign' URL,
 287       * that is:
 288       *
 289       *      - It is a local URL (isLocal), and
 290       *      - It has a equal or better level of security
 291       * @param HTMLPurifier_Config $config
 292       * @param HTMLPurifier_Context $context
 293       * @return bool
 294       */
 295      public function isBenign($config, $context)
 296      {
 297          if (!$this->isLocal($config, $context)) {
 298              return false;
 299          }
 300  
 301          $scheme_obj = $this->getSchemeObj($config, $context);
 302          if (!$scheme_obj) {
 303              return false;
 304          } // conservative approach
 305  
 306          $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
 307          if ($current_scheme_obj->secure) {
 308              if (!$scheme_obj->secure) {
 309                  return false;
 310              }
 311          }
 312          return true;
 313      }
 314  }
 315  
 316  // vim: et sw=4 sts=4