Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 /** 3 * SimplePie 4 * 5 * A PHP-Based RSS and Atom Feed Framework. 6 * Takes the hard work out of managing a complete RSS/Atom solution. 7 * 8 * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are 12 * permitted provided that the following conditions are met: 13 * 14 * * Redistributions of source code must retain the above copyright notice, this list of 15 * conditions and the following disclaimer. 16 * 17 * * Redistributions in binary form must reproduce the above copyright notice, this list 18 * of conditions and the following disclaimer in the documentation and/or other materials 19 * provided with the distribution. 20 * 21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 22 * to endorse or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 * 35 * @package SimplePie 36 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue 37 * @author Ryan Parman 38 * @author Geoffrey Sneddon 39 * @author Ryan McCue 40 * @link http://simplepie.org/ SimplePie 41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 42 */ 43 44 45 /** 46 * HTTP Response Parser 47 * 48 * @package SimplePie 49 * @subpackage HTTP 50 */ 51 class SimplePie_HTTP_Parser 52 { 53 /** 54 * HTTP Version 55 * 56 * @var float 57 */ 58 public $http_version = 0.0; 59 60 /** 61 * Status code 62 * 63 * @var int 64 */ 65 public $status_code = 0; 66 67 /** 68 * Reason phrase 69 * 70 * @var string 71 */ 72 public $reason = ''; 73 74 /** 75 * Key/value pairs of the headers 76 * 77 * @var array 78 */ 79 public $headers = array(); 80 81 /** 82 * Body of the response 83 * 84 * @var string 85 */ 86 public $body = ''; 87 88 /** 89 * Current state of the state machine 90 * 91 * @var string 92 */ 93 protected $state = 'http_version'; 94 95 /** 96 * Input data 97 * 98 * @var string 99 */ 100 protected $data = ''; 101 102 /** 103 * Input data length (to avoid calling strlen() everytime this is needed) 104 * 105 * @var int 106 */ 107 protected $data_length = 0; 108 109 /** 110 * Current position of the pointer 111 * 112 * @var int 113 */ 114 protected $position = 0; 115 116 /** 117 * Name of the hedaer currently being parsed 118 * 119 * @var string 120 */ 121 protected $name = ''; 122 123 /** 124 * Value of the hedaer currently being parsed 125 * 126 * @var string 127 */ 128 protected $value = ''; 129 130 /** 131 * Create an instance of the class with the input data 132 * 133 * @param string $data Input data 134 */ 135 public function __construct($data) 136 { 137 $this->data = $data; 138 $this->data_length = strlen($this->data); 139 } 140 141 /** 142 * Parse the input data 143 * 144 * @return bool true on success, false on failure 145 */ 146 public function parse() 147 { 148 while ($this->state && $this->state !== 'emit' && $this->has_data()) 149 { 150 $state = $this->state; 151 $this->$state(); 152 } 153 $this->data = ''; 154 if ($this->state === 'emit' || $this->state === 'body') 155 { 156 return true; 157 } 158 159 $this->http_version = ''; 160 $this->status_code = ''; 161 $this->reason = ''; 162 $this->headers = array(); 163 $this->body = ''; 164 return false; 165 } 166 167 /** 168 * Check whether there is data beyond the pointer 169 * 170 * @return bool true if there is further data, false if not 171 */ 172 protected function has_data() 173 { 174 return (bool) ($this->position < $this->data_length); 175 } 176 177 /** 178 * See if the next character is LWS 179 * 180 * @return bool true if the next character is LWS, false if not 181 */ 182 protected function is_linear_whitespace() 183 { 184 return (bool) ($this->data[$this->position] === "\x09" 185 || $this->data[$this->position] === "\x20" 186 || ($this->data[$this->position] === "\x0A" 187 && isset($this->data[$this->position + 1]) 188 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); 189 } 190 191 /** 192 * Parse the HTTP version 193 */ 194 protected function http_version() 195 { 196 if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') 197 { 198 $len = strspn($this->data, '0123456789.', 5); 199 $this->http_version = substr($this->data, 5, $len); 200 $this->position += 5 + $len; 201 if (substr_count($this->http_version, '.') <= 1) 202 { 203 $this->http_version = (float) $this->http_version; 204 $this->position += strspn($this->data, "\x09\x20", $this->position); 205 $this->state = 'status'; 206 } 207 else 208 { 209 $this->state = false; 210 } 211 } 212 else 213 { 214 $this->state = false; 215 } 216 } 217 218 /** 219 * Parse the status code 220 */ 221 protected function status() 222 { 223 if ($len = strspn($this->data, '0123456789', $this->position)) 224 { 225 $this->status_code = (int) substr($this->data, $this->position, $len); 226 $this->position += $len; 227 $this->state = 'reason'; 228 } 229 else 230 { 231 $this->state = false; 232 } 233 } 234 235 /** 236 * Parse the reason phrase 237 */ 238 protected function reason() 239 { 240 $len = strcspn($this->data, "\x0A", $this->position); 241 $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); 242 $this->position += $len + 1; 243 $this->state = 'new_line'; 244 } 245 246 /** 247 * Deal with a new line, shifting data around as needed 248 */ 249 protected function new_line() 250 { 251 $this->value = trim($this->value, "\x0D\x20"); 252 if ($this->name !== '' && $this->value !== '') 253 { 254 $this->name = strtolower($this->name); 255 // We should only use the last Content-Type header. c.f. issue #1 256 if (isset($this->headers[$this->name]) && $this->name !== 'content-type') 257 { 258 $this->headers[$this->name] .= ', ' . $this->value; 259 } 260 else 261 { 262 $this->headers[$this->name] = $this->value; 263 } 264 } 265 $this->name = ''; 266 $this->value = ''; 267 if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") 268 { 269 $this->position += 2; 270 $this->state = 'body'; 271 } 272 elseif ($this->data[$this->position] === "\x0A") 273 { 274 $this->position++; 275 $this->state = 'body'; 276 } 277 else 278 { 279 $this->state = 'name'; 280 } 281 } 282 283 /** 284 * Parse a header name 285 */ 286 protected function name() 287 { 288 $len = strcspn($this->data, "\x0A:", $this->position); 289 if (isset($this->data[$this->position + $len])) 290 { 291 if ($this->data[$this->position + $len] === "\x0A") 292 { 293 $this->position += $len; 294 $this->state = 'new_line'; 295 } 296 else 297 { 298 $this->name = substr($this->data, $this->position, $len); 299 $this->position += $len + 1; 300 $this->state = 'value'; 301 } 302 } 303 else 304 { 305 $this->state = false; 306 } 307 } 308 309 /** 310 * Parse LWS, replacing consecutive LWS characters with a single space 311 */ 312 protected function linear_whitespace() 313 { 314 do 315 { 316 if (substr($this->data, $this->position, 2) === "\x0D\x0A") 317 { 318 $this->position += 2; 319 } 320 elseif ($this->data[$this->position] === "\x0A") 321 { 322 $this->position++; 323 } 324 $this->position += strspn($this->data, "\x09\x20", $this->position); 325 } while ($this->has_data() && $this->is_linear_whitespace()); 326 $this->value .= "\x20"; 327 } 328 329 /** 330 * See what state to move to while within non-quoted header values 331 */ 332 protected function value() 333 { 334 if ($this->is_linear_whitespace()) 335 { 336 $this->linear_whitespace(); 337 } 338 else 339 { 340 switch ($this->data[$this->position]) 341 { 342 case '"': 343 // Workaround for ETags: we have to include the quotes as 344 // part of the tag. 345 if (strtolower($this->name) === 'etag') 346 { 347 $this->value .= '"'; 348 $this->position++; 349 $this->state = 'value_char'; 350 break; 351 } 352 $this->position++; 353 $this->state = 'quote'; 354 break; 355 356 case "\x0A": 357 $this->position++; 358 $this->state = 'new_line'; 359 break; 360 361 default: 362 $this->state = 'value_char'; 363 break; 364 } 365 } 366 } 367 368 /** 369 * Parse a header value while outside quotes 370 */ 371 protected function value_char() 372 { 373 $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); 374 $this->value .= substr($this->data, $this->position, $len); 375 $this->position += $len; 376 $this->state = 'value'; 377 } 378 379 /** 380 * See what state to move to while within quoted header values 381 */ 382 protected function quote() 383 { 384 if ($this->is_linear_whitespace()) 385 { 386 $this->linear_whitespace(); 387 } 388 else 389 { 390 switch ($this->data[$this->position]) 391 { 392 case '"': 393 $this->position++; 394 $this->state = 'value'; 395 break; 396 397 case "\x0A": 398 $this->position++; 399 $this->state = 'new_line'; 400 break; 401 402 case '\\': 403 $this->position++; 404 $this->state = 'quote_escaped'; 405 break; 406 407 default: 408 $this->state = 'quote_char'; 409 break; 410 } 411 } 412 } 413 414 /** 415 * Parse a header value while within quotes 416 */ 417 protected function quote_char() 418 { 419 $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); 420 $this->value .= substr($this->data, $this->position, $len); 421 $this->position += $len; 422 $this->state = 'value'; 423 } 424 425 /** 426 * Parse an escaped character within quotes 427 */ 428 protected function quote_escaped() 429 { 430 $this->value .= $this->data[$this->position]; 431 $this->position++; 432 $this->state = 'quote'; 433 } 434 435 /** 436 * Parse the body 437 */ 438 protected function body() 439 { 440 $this->body = substr($this->data, $this->position); 441 if (!empty($this->headers['transfer-encoding'])) 442 { 443 unset($this->headers['transfer-encoding']); 444 $this->state = 'chunked'; 445 } 446 else 447 { 448 $this->state = 'emit'; 449 } 450 } 451 452 /** 453 * Parsed a "Transfer-Encoding: chunked" body 454 */ 455 protected function chunked() 456 { 457 if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) 458 { 459 $this->state = 'emit'; 460 return; 461 } 462 463 $decoded = ''; 464 $encoded = $this->body; 465 466 while (true) 467 { 468 $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches ); 469 if (!$is_chunked) 470 { 471 // Looks like it's not chunked after all 472 $this->state = 'emit'; 473 return; 474 } 475 476 $length = hexdec(trim($matches[1])); 477 if ($length === 0) 478 { 479 // Ignore trailer headers 480 $this->state = 'emit'; 481 $this->body = $decoded; 482 return; 483 } 484 485 $chunk_length = strlen($matches[0]); 486 $decoded .= $part = substr($encoded, $chunk_length, $length); 487 $encoded = substr($encoded, $chunk_length + $length + 2); 488 489 if (trim($encoded) === '0' || empty($encoded)) 490 { 491 $this->state = 'emit'; 492 $this->body = $decoded; 493 return; 494 } 495 } 496 } 497 498 /** 499 * Prepare headers (take care of proxies headers) 500 * 501 * @param string $headers Raw headers 502 * @param integer $count Redirection count. Default to 1. 503 * 504 * @return string 505 */ 506 static public function prepareHeaders($headers, $count = 1) 507 { 508 $data = explode("\r\n\r\n", $headers, $count); 509 $data = array_pop($data); 510 if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n\r\n")) { 511 $data = str_ireplace("HTTP/1.0 200 Connection established\r\n\r\n", '', $data); 512 } 513 if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n\r\n")) { 514 $data = str_ireplace("HTTP/1.1 200 Connection established\r\n\r\n", '', $data); 515 } 516 return $data; 517 } 518 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body