1 <?php 2 3 /** 4 * HTML Purifier's internal representation of a URI. 5 * @note 6 * Internal data-structures are completely escaped. If the data needs 7 * to be used in a non-URI context (which is very unlikely), be sure 8 * to decode it first. The URI may not necessarily be well-formed until 9 * validate() is called. 10 */ 11 class HTMLPurifier_URI 12 { 13 /** 14 * @type string 15 */ 16 public $scheme; 17 18 /** 19 * @type string 20 */ 21 public $userinfo; 22 23 /** 24 * @type string 25 */ 26 public $host; 27 28 /** 29 * @type int 30 */ 31 public $port; 32 33 /** 34 * @type string 35 */ 36 public $path; 37 38 /** 39 * @type string 40 */ 41 public $query; 42 43 /** 44 * @type string 45 */ 46 public $fragment; 47 48 /** 49 * @param string $scheme 50 * @param string $userinfo 51 * @param string $host 52 * @param int $port 53 * @param string $path 54 * @param string $query 55 * @param string $fragment 56 * @note Automatically normalizes scheme and port 57 */ 58 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) 59 { 60 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); 61 $this->userinfo = $userinfo; 62 $this->host = $host; 63 $this->port = is_null($port) ? $port : (int)$port; 64 $this->path = $path; 65 $this->query = $query; 66 $this->fragment = $fragment; 67 } 68 69 /** 70 * Retrieves a scheme object corresponding to the URI's scheme/default 71 * @param HTMLPurifier_Config $config 72 * @param HTMLPurifier_Context $context 73 * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI 74 */ 75 public function getSchemeObj($config, $context) 76 { 77 $registry = HTMLPurifier_URISchemeRegistry::instance(); 78 if ($this->scheme !== null) { 79 $scheme_obj = $registry->getScheme($this->scheme, $config, $context); 80 if (!$scheme_obj) { 81 return false; 82 } // invalid scheme, clean it out 83 } else { 84 // no scheme: retrieve the default one 85 $def = $config->getDefinition('URI'); 86 $scheme_obj = $def->getDefaultScheme($config, $context); 87 if (!$scheme_obj) { 88 if ($def->defaultScheme !== null) { 89 // something funky happened to the default scheme object 90 trigger_error( 91 'Default scheme object "' . $def->defaultScheme . '" was not readable', 92 E_USER_WARNING 93 ); 94 } // suppress error if it's null 95 return false; 96 } 97 } 98 return $scheme_obj; 99 } 100 101 /** 102 * Generic validation method applicable for all schemes. May modify 103 * this URI in order to get it into a compliant form. 104 * @param HTMLPurifier_Config $config 105 * @param HTMLPurifier_Context $context 106 * @return bool True if validation/filtering succeeds, false if failure 107 */ 108 public function validate($config, $context) 109 { 110 // ABNF definitions from RFC 3986 111 $chars_sub_delims = '!$&\'()*+,;='; 112 $chars_gen_delims = ':/?#[]@'; 113 $chars_pchar = $chars_sub_delims . ':@'; 114 115 // validate host 116 if (!is_null($this->host)) { 117 $host_def = new HTMLPurifier_AttrDef_URI_Host(); 118 $this->host = $host_def->validate($this->host, $config, $context); 119 if ($this->host === false) { 120 $this->host = null; 121 } 122 } 123 124 // validate scheme 125 // NOTE: It's not appropriate to check whether or not this 126 // scheme is in our registry, since a URIFilter may convert a 127 // URI that we don't allow into one we do. So instead, we just 128 // check if the scheme can be dropped because there is no host 129 // and it is our default scheme. 130 if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { 131 // support for relative paths is pretty abysmal when the 132 // scheme is present, so axe it when possible 133 $def = $config->getDefinition('URI'); 134 if ($def->defaultScheme === $this->scheme) { 135 $this->scheme = null; 136 } 137 } 138 139 // validate username 140 if (!is_null($this->userinfo)) { 141 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); 142 $this->userinfo = $encoder->encode($this->userinfo); 143 } 144 145 // validate port 146 if (!is_null($this->port)) { 147 if ($this->port < 1 || $this->port > 65535) { 148 $this->port = null; 149 } 150 } 151 152 // validate path 153 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); 154 if (!is_null($this->host)) { // this catches $this->host === '' 155 // path-abempty (hier and relative) 156 // http://www.example.com/my/path 157 // //www.example.com/my/path (looks odd, but works, and 158 // recognized by most browsers) 159 // (this set is valid or invalid on a scheme by scheme 160 // basis, so we'll deal with it later) 161 // file:///my/path 162 // ///my/path 163 $this->path = $segments_encoder->encode($this->path); 164 } elseif ($this->path !== '') { 165 if ($this->path[0] === '/') { 166 // path-absolute (hier and relative) 167 // http:/my/path 168 // /my/path 169 if (strlen($this->path) >= 2 && $this->path[1] === '/') { 170 // This could happen if both the host gets stripped 171 // out 172 // http://my/path 173 // //my/path 174 $this->path = ''; 175 } else { 176 $this->path = $segments_encoder->encode($this->path); 177 } 178 } elseif (!is_null($this->scheme)) { 179 // path-rootless (hier) 180 // http:my/path 181 // Short circuit evaluation means we don't need to check nz 182 $this->path = $segments_encoder->encode($this->path); 183 } else { 184 // path-noscheme (relative) 185 // my/path 186 // (once again, not checking nz) 187 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); 188 $c = strpos($this->path, '/'); 189 if ($c !== false) { 190 $this->path = 191 $segment_nc_encoder->encode(substr($this->path, 0, $c)) . 192 $segments_encoder->encode(substr($this->path, $c)); 193 } else { 194 $this->path = $segment_nc_encoder->encode($this->path); 195 } 196 } 197 } else { 198 // path-empty (hier and relative) 199 $this->path = ''; // just to be safe 200 } 201 202 // qf = query and fragment 203 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); 204 205 if (!is_null($this->query)) { 206 $this->query = $qf_encoder->encode($this->query); 207 } 208 209 if (!is_null($this->fragment)) { 210 $this->fragment = $qf_encoder->encode($this->fragment); 211 } 212 return true; 213 } 214 215 /** 216 * Convert URI back to string 217 * @return string URI appropriate for output 218 */ 219 public function toString() 220 { 221 // reconstruct authority 222 $authority = null; 223 // there is a rendering difference between a null authority 224 // (http:foo-bar) and an empty string authority 225 // (http:///foo-bar). 226 if (!is_null($this->host)) { 227 $authority = ''; 228 if (!is_null($this->userinfo)) { 229 $authority .= $this->userinfo . '@'; 230 } 231 $authority .= $this->host; 232 if (!is_null($this->port)) { 233 $authority .= ':' . $this->port; 234 } 235 } 236 237 // Reconstruct the result 238 // One might wonder about parsing quirks from browsers after 239 // this reconstruction. Unfortunately, parsing behavior depends 240 // on what *scheme* was employed (file:///foo is handled *very* 241 // differently than http:///foo), so unfortunately we have to 242 // defer to the schemes to do the right thing. 243 $result = ''; 244 if (!is_null($this->scheme)) { 245 $result .= $this->scheme . ':'; 246 } 247 if (!is_null($authority)) { 248 $result .= '//' . $authority; 249 } 250 $result .= $this->path; 251 if (!is_null($this->query)) { 252 $result .= '?' . $this->query; 253 } 254 if (!is_null($this->fragment)) { 255 $result .= '#' . $this->fragment; 256 } 257 258 return $result; 259 } 260 261 /** 262 * Returns true if this URL might be considered a 'local' URL given 263 * the current context. This is true when the host is null, or 264 * when it matches the host supplied to the configuration. 265 * 266 * Note that this does not do any scheme checking, so it is mostly 267 * only appropriate for metadata that doesn't care about protocol 268 * security. isBenign is probably what you actually want. 269 * @param HTMLPurifier_Config $config 270 * @param HTMLPurifier_Context $context 271 * @return bool 272 */ 273 public function isLocal($config, $context) 274 { 275 if ($this->host === null) { 276 return true; 277 } 278 $uri_def = $config->getDefinition('URI'); 279 if ($uri_def->host === $this->host) { 280 return true; 281 } 282 return false; 283 } 284 285 /** 286 * Returns true if this URL should be considered a 'benign' URL, 287 * that is: 288 * 289 * - It is a local URL (isLocal), and 290 * - It has a equal or better level of security 291 * @param HTMLPurifier_Config $config 292 * @param HTMLPurifier_Context $context 293 * @return bool 294 */ 295 public function isBenign($config, $context) 296 { 297 if (!$this->isLocal($config, $context)) { 298 return false; 299 } 300 301 $scheme_obj = $this->getSchemeObj($config, $context); 302 if (!$scheme_obj) { 303 return false; 304 } // conservative approach 305 306 $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); 307 if ($current_scheme_obj->secure) { 308 if (!$scheme_obj->secure) { 309 return false; 310 } 311 } 312 return true; 313 } 314 } 315 316 // vim: et sw=4 sts=4
title
Description
Body
title
Description
Body
title
Description
Body
title
Body