1 <?php 2 3 /** 4 * Class that handles operations involving percent-encoding in URIs. 5 * 6 * @warning 7 * Be careful when reusing instances of PercentEncoder. The object 8 * you use for normalize() SHOULD NOT be used for encode(), or 9 * vice-versa. 10 */ 11 class HTMLPurifier_PercentEncoder 12 { 13 14 /** 15 * Reserved characters to preserve when using encode(). 16 * @type array 17 */ 18 protected $preserve = array(); 19 20 /** 21 * String of characters that should be preserved while using encode(). 22 * @param bool $preserve 23 */ 24 public function __construct($preserve = false) 25 { 26 // unreserved letters, ought to const-ify 27 for ($i = 48; $i <= 57; $i++) { // digits 28 $this->preserve[$i] = true; 29 } 30 for ($i = 65; $i <= 90; $i++) { // upper-case 31 $this->preserve[$i] = true; 32 } 33 for ($i = 97; $i <= 122; $i++) { // lower-case 34 $this->preserve[$i] = true; 35 } 36 $this->preserve[45] = true; // Dash - 37 $this->preserve[46] = true; // Period . 38 $this->preserve[95] = true; // Underscore _ 39 $this->preserve[126]= true; // Tilde ~ 40 41 // extra letters not to escape 42 if ($preserve !== false) { 43 for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { 44 $this->preserve[ord($preserve[$i])] = true; 45 } 46 } 47 } 48 49 /** 50 * Our replacement for urlencode, it encodes all non-reserved characters, 51 * as well as any extra characters that were instructed to be preserved. 52 * @note 53 * Assumes that the string has already been normalized, making any 54 * and all percent escape sequences valid. Percents will not be 55 * re-escaped, regardless of their status in $preserve 56 * @param string $string String to be encoded 57 * @return string Encoded string. 58 */ 59 public function encode($string) 60 { 61 $ret = ''; 62 for ($i = 0, $c = strlen($string); $i < $c; $i++) { 63 if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) { 64 $ret .= '%' . sprintf('%02X', $int); 65 } else { 66 $ret .= $string[$i]; 67 } 68 } 69 return $ret; 70 } 71 72 /** 73 * Fix up percent-encoding by decoding unreserved characters and normalizing. 74 * @warning This function is affected by $preserve, even though the 75 * usual desired behavior is for this not to preserve those 76 * characters. Be careful when reusing instances of PercentEncoder! 77 * @param string $string String to normalize 78 * @return string 79 */ 80 public function normalize($string) 81 { 82 if ($string == '') { 83 return ''; 84 } 85 $parts = explode('%', $string); 86 $ret = array_shift($parts); 87 foreach ($parts as $part) { 88 $length = strlen($part); 89 if ($length < 2) { 90 $ret .= '%25' . $part; 91 continue; 92 } 93 $encoding = substr($part, 0, 2); 94 $text = substr($part, 2); 95 if (!ctype_xdigit($encoding)) { 96 $ret .= '%25' . $part; 97 continue; 98 } 99 $int = hexdec($encoding); 100 if (isset($this->preserve[$int])) { 101 $ret .= chr($int) . $text; 102 continue; 103 } 104 $encoding = strtoupper($encoding); 105 $ret .= '%' . $encoding . $text; 106 } 107 return $ret; 108 } 109 } 110 111 // vim: et sw=4 sts=4
title
Description
Body
title
Description
Body
title
Description
Body
title
Body