See Release Notes
Long Term Support Release
1 <?php 2 3 /** 4 * Parses a URI into the components and fragment identifier as specified 5 * by RFC 3986. 6 */ 7 class HTMLPurifier_URIParser 8 { 9 10 /** 11 * Instance of HTMLPurifier_PercentEncoder to do normalization with. 12 */ 13 protected $percentEncoder; 14 15 public function __construct() 16 { 17 $this->percentEncoder = new HTMLPurifier_PercentEncoder(); 18 } 19 20 /** 21 * Parses a URI. 22 * @param $uri string URI to parse 23 * @return HTMLPurifier_URI representation of URI. This representation has 24 * not been validated yet and may not conform to RFC. 25 */ 26 public function parse($uri) 27 { 28 $uri = $this->percentEncoder->normalize($uri); 29 30 // Regexp is as per Appendix B. 31 // Note that ["<>] are an addition to the RFC's recommended 32 // characters, because they represent external delimeters. 33 $r_URI = '!'. 34 '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme 35 '(//([^/?#"<>]*))?'. // 4. Authority 36 '([^?#"<>]*)'. // 5. Path 37 '(\?([^#"<>]*))?'. // 7. Query 38 '(#([^"<>]*))?'. // 8. Fragment 39 '!'; 40 41 $matches = array(); 42 $result = preg_match($r_URI, $uri, $matches); 43 44 if (!$result) return false; // *really* invalid URI 45 46 // seperate out parts 47 $scheme = !empty($matches[1]) ? $matches[2] : null; 48 $authority = !empty($matches[3]) ? $matches[4] : null; 49 $path = $matches[5]; // always present, can be empty 50 $query = !empty($matches[6]) ? $matches[7] : null; 51 $fragment = !empty($matches[8]) ? $matches[9] : null; 52 53 // further parse authority 54 if ($authority !== null) { 55 $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; 56 $matches = array(); 57 preg_match($r_authority, $authority, $matches); 58 $userinfo = !empty($matches[1]) ? $matches[2] : null; 59 $host = !empty($matches[3]) ? $matches[3] : ''; 60 $port = !empty($matches[4]) ? (int) $matches[5] : null; 61 } else { 62 $port = $host = $userinfo = null; 63 } 64 65 return new HTMLPurifier_URI( 66 $scheme, $userinfo, $host, $port, $path, $query, $fragment); 67 } 68 69 } 70 71 // vim: et sw=4 sts=4
title
Description
Body
title
Description
Body
title
Description
Body
title
Body