Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace GuzzleHttp\Psr7;
   6  
   7  use Psr\Http\Message\UriInterface;
   8  
   9  /**
  10   * Provides methods to normalize and compare URIs.
  11   *
  12   * @author Tobias Schultze
  13   *
  14   * @link https://tools.ietf.org/html/rfc3986#section-6
  15   */
  16  final class UriNormalizer
  17  {
  18      /**
  19       * Default normalizations which only include the ones that preserve semantics.
  20       */
  21      public const PRESERVING_NORMALIZATIONS =
  22          self::CAPITALIZE_PERCENT_ENCODING |
  23          self::DECODE_UNRESERVED_CHARACTERS |
  24          self::CONVERT_EMPTY_PATH |
  25          self::REMOVE_DEFAULT_HOST |
  26          self::REMOVE_DEFAULT_PORT |
  27          self::REMOVE_DOT_SEGMENTS;
  28  
  29      /**
  30       * All letters within a percent-encoding triplet (e.g., "%3A") are case-insensitive, and should be capitalized.
  31       *
  32       * Example: http://example.org/a%c2%b1b → http://example.org/a%C2%B1b
  33       */
  34      public const CAPITALIZE_PERCENT_ENCODING = 1;
  35  
  36      /**
  37       * Decodes percent-encoded octets of unreserved characters.
  38       *
  39       * For consistency, percent-encoded octets in the ranges of ALPHA (%41–%5A and %61–%7A), DIGIT (%30–%39),
  40       * hyphen (%2D), period (%2E), underscore (%5F), or tilde (%7E) should not be created by URI producers and,
  41       * when found in a URI, should be decoded to their corresponding unreserved characters by URI normalizers.
  42       *
  43       * Example: http://example.org/%7Eusern%61me/ → http://example.org/~username/
  44       */
  45      public const DECODE_UNRESERVED_CHARACTERS = 2;
  46  
  47      /**
  48       * Converts the empty path to "/" for http and https URIs.
  49       *
  50       * Example: http://example.org → http://example.org/
  51       */
  52      public const CONVERT_EMPTY_PATH = 4;
  53  
  54      /**
  55       * Removes the default host of the given URI scheme from the URI.
  56       *
  57       * Only the "file" scheme defines the default host "localhost".
  58       * All of `file:/myfile`, `file:///myfile`, and `file://localhost/myfile`
  59       * are equivalent according to RFC 3986. The first format is not accepted
  60       * by PHPs stream functions and thus already normalized implicitly to the
  61       * second format in the Uri class. See `GuzzleHttp\Psr7\Uri::composeComponents`.
  62       *
  63       * Example: file://localhost/myfile → file:///myfile
  64       */
  65      public const REMOVE_DEFAULT_HOST = 8;
  66  
  67      /**
  68       * Removes the default port of the given URI scheme from the URI.
  69       *
  70       * Example: http://example.org:80/ → http://example.org/
  71       */
  72      public const REMOVE_DEFAULT_PORT = 16;
  73  
  74      /**
  75       * Removes unnecessary dot-segments.
  76       *
  77       * Dot-segments in relative-path references are not removed as it would
  78       * change the semantics of the URI reference.
  79       *
  80       * Example: http://example.org/../a/b/../c/./d.html → http://example.org/a/c/d.html
  81       */
  82      public const REMOVE_DOT_SEGMENTS = 32;
  83  
  84      /**
  85       * Paths which include two or more adjacent slashes are converted to one.
  86       *
  87       * Webservers usually ignore duplicate slashes and treat those URIs equivalent.
  88       * But in theory those URIs do not need to be equivalent. So this normalization
  89       * may change the semantics. Encoded slashes (%2F) are not removed.
  90       *
  91       * Example: http://example.org//foo///bar.html → http://example.org/foo/bar.html
  92       */
  93      public const REMOVE_DUPLICATE_SLASHES = 64;
  94  
  95      /**
  96       * Sort query parameters with their values in alphabetical order.
  97       *
  98       * However, the order of parameters in a URI may be significant (this is not defined by the standard).
  99       * So this normalization is not safe and may change the semantics of the URI.
 100       *
 101       * Example: ?lang=en&article=fred → ?article=fred&lang=en
 102       *
 103       * Note: The sorting is neither locale nor Unicode aware (the URI query does not get decoded at all) as the
 104       * purpose is to be able to compare URIs in a reproducible way, not to have the params sorted perfectly.
 105       */
 106      public const SORT_QUERY_PARAMETERS = 128;
 107  
 108      /**
 109       * Returns a normalized URI.
 110       *
 111       * The scheme and host component are already normalized to lowercase per PSR-7 UriInterface.
 112       * This methods adds additional normalizations that can be configured with the $flags parameter.
 113       *
 114       * PSR-7 UriInterface cannot distinguish between an empty component and a missing component as
 115       * getQuery(), getFragment() etc. always return a string. This means the URIs "/?#" and "/" are
 116       * treated equivalent which is not necessarily true according to RFC 3986. But that difference
 117       * is highly uncommon in reality. So this potential normalization is implied in PSR-7 as well.
 118       *
 119       * @param UriInterface $uri   The URI to normalize
 120       * @param int          $flags A bitmask of normalizations to apply, see constants
 121       *
 122       * @link https://tools.ietf.org/html/rfc3986#section-6.2
 123       */
 124      public static function normalize(UriInterface $uri, int $flags = self::PRESERVING_NORMALIZATIONS): UriInterface
 125      {
 126          if ($flags & self::CAPITALIZE_PERCENT_ENCODING) {
 127              $uri = self::capitalizePercentEncoding($uri);
 128          }
 129  
 130          if ($flags & self::DECODE_UNRESERVED_CHARACTERS) {
 131              $uri = self::decodeUnreservedCharacters($uri);
 132          }
 133  
 134          if ($flags & self::CONVERT_EMPTY_PATH && $uri->getPath() === '' &&
 135              ($uri->getScheme() === 'http' || $uri->getScheme() === 'https')
 136          ) {
 137              $uri = $uri->withPath('/');
 138          }
 139  
 140          if ($flags & self::REMOVE_DEFAULT_HOST && $uri->getScheme() === 'file' && $uri->getHost() === 'localhost') {
 141              $uri = $uri->withHost('');
 142          }
 143  
 144          if ($flags & self::REMOVE_DEFAULT_PORT && $uri->getPort() !== null && Uri::isDefaultPort($uri)) {
 145              $uri = $uri->withPort(null);
 146          }
 147  
 148          if ($flags & self::REMOVE_DOT_SEGMENTS && !Uri::isRelativePathReference($uri)) {
 149              $uri = $uri->withPath(UriResolver::removeDotSegments($uri->getPath()));
 150          }
 151  
 152          if ($flags & self::REMOVE_DUPLICATE_SLASHES) {
 153              $uri = $uri->withPath(preg_replace('#//++#', '/', $uri->getPath()));
 154          }
 155  
 156          if ($flags & self::SORT_QUERY_PARAMETERS && $uri->getQuery() !== '') {
 157              $queryKeyValues = explode('&', $uri->getQuery());
 158              sort($queryKeyValues);
 159              $uri = $uri->withQuery(implode('&', $queryKeyValues));
 160          }
 161  
 162          return $uri;
 163      }
 164  
 165      /**
 166       * Whether two URIs can be considered equivalent.
 167       *
 168       * Both URIs are normalized automatically before comparison with the given $normalizations bitmask. The method also
 169       * accepts relative URI references and returns true when they are equivalent. This of course assumes they will be
 170       * resolved against the same base URI. If this is not the case, determination of equivalence or difference of
 171       * relative references does not mean anything.
 172       *
 173       * @param UriInterface $uri1           An URI to compare
 174       * @param UriInterface $uri2           An URI to compare
 175       * @param int          $normalizations A bitmask of normalizations to apply, see constants
 176       *
 177       * @link https://tools.ietf.org/html/rfc3986#section-6.1
 178       */
 179      public static function isEquivalent(UriInterface $uri1, UriInterface $uri2, int $normalizations = self::PRESERVING_NORMALIZATIONS): bool
 180      {
 181          return (string) self::normalize($uri1, $normalizations) === (string) self::normalize($uri2, $normalizations);
 182      }
 183  
 184      private static function capitalizePercentEncoding(UriInterface $uri): UriInterface
 185      {
 186          $regex = '/(?:%[A-Fa-f0-9]{2})++/';
 187  
 188          $callback = function (array $match) {
 189              return strtoupper($match[0]);
 190          };
 191  
 192          return
 193              $uri->withPath(
 194                  preg_replace_callback($regex, $callback, $uri->getPath())
 195              )->withQuery(
 196                  preg_replace_callback($regex, $callback, $uri->getQuery())
 197              );
 198      }
 199  
 200      private static function decodeUnreservedCharacters(UriInterface $uri): UriInterface
 201      {
 202          $regex = '/%(?:2D|2E|5F|7E|3[0-9]|[46][1-9A-F]|[57][0-9A])/i';
 203  
 204          $callback = function (array $match) {
 205              return rawurldecode($match[0]);
 206          };
 207  
 208          return
 209              $uri->withPath(
 210                  preg_replace_callback($regex, $callback, $uri->getPath())
 211              )->withQuery(
 212                  preg_replace_callback($regex, $callback, $uri->getQuery())
 213              );
 214      }
 215  
 216      private function __construct()
 217      {
 218          // cannot be instantiated
 219      }
 220  }