1 <?php 2 3 declare(strict_types=1); 4 /** 5 * SimplePie 6 * 7 * A PHP-Based RSS and Atom Feed Framework. 8 * Takes the hard work out of managing a complete RSS/Atom solution. 9 * 10 * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without modification, are 14 * permitted provided that the following conditions are met: 15 * 16 * * Redistributions of source code must retain the above copyright notice, this list of 17 * conditions and the following disclaimer. 18 * 19 * * Redistributions in binary form must reproduce the above copyright notice, this list 20 * of conditions and the following disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 24 * to endorse or promote products derived from this software without specific prior 25 * written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 28 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 29 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 30 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 34 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 * 37 * @package SimplePie 38 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue 39 * @author Ryan Parman 40 * @author Sam Sneddon 41 * @author Ryan McCue 42 * @link http://simplepie.org/ SimplePie 43 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 44 */ 45 46 namespace SimplePie\HTTP; 47 48 /** 49 * HTTP Response Parser 50 * 51 * @package SimplePie 52 * @subpackage HTTP 53 */ 54 class Parser 55 { 56 /** 57 * HTTP Version 58 * 59 * @var float 60 */ 61 public $http_version = 0.0; 62 63 /** 64 * Status code 65 * 66 * @var int 67 */ 68 public $status_code = 0; 69 70 /** 71 * Reason phrase 72 * 73 * @var string 74 */ 75 public $reason = ''; 76 77 /** 78 * Key/value pairs of the headers 79 * 80 * @var array 81 */ 82 public $headers = []; 83 84 /** 85 * Body of the response 86 * 87 * @var string 88 */ 89 public $body = ''; 90 91 private const STATE_HTTP_VERSION = 'http_version'; 92 93 private const STATE_STATUS = 'status'; 94 95 private const STATE_REASON = 'reason'; 96 97 private const STATE_NEW_LINE = 'new_line'; 98 99 private const STATE_BODY = 'body'; 100 101 private const STATE_NAME = 'name'; 102 103 private const STATE_VALUE = 'value'; 104 105 private const STATE_VALUE_CHAR = 'value_char'; 106 107 private const STATE_QUOTE = 'quote'; 108 109 private const STATE_QUOTE_ESCAPED = 'quote_escaped'; 110 111 private const STATE_QUOTE_CHAR = 'quote_char'; 112 113 private const STATE_CHUNKED = 'chunked'; 114 115 private const STATE_EMIT = 'emit'; 116 117 private const STATE_ERROR = false; 118 119 /** 120 * Current state of the state machine 121 * 122 * @var self::STATE_* 123 */ 124 protected $state = self::STATE_HTTP_VERSION; 125 126 /** 127 * Input data 128 * 129 * @var string 130 */ 131 protected $data = ''; 132 133 /** 134 * Input data length (to avoid calling strlen() everytime this is needed) 135 * 136 * @var int 137 */ 138 protected $data_length = 0; 139 140 /** 141 * Current position of the pointer 142 * 143 * @var int 144 */ 145 protected $position = 0; 146 147 /** 148 * Name of the hedaer currently being parsed 149 * 150 * @var string 151 */ 152 protected $name = ''; 153 154 /** 155 * Value of the hedaer currently being parsed 156 * 157 * @var string 158 */ 159 protected $value = ''; 160 161 /** 162 * Create an instance of the class with the input data 163 * 164 * @param string $data Input data 165 */ 166 public function __construct($data) 167 { 168 $this->data = $data; 169 $this->data_length = strlen($this->data); 170 } 171 172 /** 173 * Parse the input data 174 * 175 * @return bool true on success, false on failure 176 */ 177 public function parse() 178 { 179 while ($this->state && $this->state !== self::STATE_EMIT && $this->has_data()) { 180 $state = $this->state; 181 $this->$state(); 182 } 183 $this->data = ''; 184 if ($this->state === self::STATE_EMIT || $this->state === self::STATE_BODY) { 185 return true; 186 } 187 188 $this->http_version = ''; 189 $this->status_code = 0; 190 $this->reason = ''; 191 $this->headers = []; 192 $this->body = ''; 193 return false; 194 } 195 196 /** 197 * Check whether there is data beyond the pointer 198 * 199 * @return bool true if there is further data, false if not 200 */ 201 protected function has_data() 202 { 203 return (bool) ($this->position < $this->data_length); 204 } 205 206 /** 207 * See if the next character is LWS 208 * 209 * @return bool true if the next character is LWS, false if not 210 */ 211 protected function is_linear_whitespace() 212 { 213 return (bool) ($this->data[$this->position] === "\x09" 214 || $this->data[$this->position] === "\x20" 215 || ($this->data[$this->position] === "\x0A" 216 && isset($this->data[$this->position + 1]) 217 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); 218 } 219 220 /** 221 * Parse the HTTP version 222 */ 223 protected function http_version() 224 { 225 if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') { 226 $len = strspn($this->data, '0123456789.', 5); 227 $this->http_version = substr($this->data, 5, $len); 228 $this->position += 5 + $len; 229 if (substr_count($this->http_version, '.') <= 1) { 230 $this->http_version = (float) $this->http_version; 231 $this->position += strspn($this->data, "\x09\x20", $this->position); 232 $this->state = self::STATE_STATUS; 233 } else { 234 $this->state = self::STATE_ERROR; 235 } 236 } else { 237 $this->state = self::STATE_ERROR; 238 } 239 } 240 241 /** 242 * Parse the status code 243 */ 244 protected function status() 245 { 246 if ($len = strspn($this->data, '0123456789', $this->position)) { 247 $this->status_code = (int) substr($this->data, $this->position, $len); 248 $this->position += $len; 249 $this->state = self::STATE_REASON; 250 } else { 251 $this->state = self::STATE_ERROR; 252 } 253 } 254 255 /** 256 * Parse the reason phrase 257 */ 258 protected function reason() 259 { 260 $len = strcspn($this->data, "\x0A", $this->position); 261 $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); 262 $this->position += $len + 1; 263 $this->state = self::STATE_NEW_LINE; 264 } 265 266 /** 267 * Deal with a new line, shifting data around as needed 268 */ 269 protected function new_line() 270 { 271 $this->value = trim($this->value, "\x0D\x20"); 272 if ($this->name !== '' && $this->value !== '') { 273 $this->name = strtolower($this->name); 274 // We should only use the last Content-Type header. c.f. issue #1 275 if (isset($this->headers[$this->name]) && $this->name !== 'content-type') { 276 $this->headers[$this->name] .= ', ' . $this->value; 277 } else { 278 $this->headers[$this->name] = $this->value; 279 } 280 } 281 $this->name = ''; 282 $this->value = ''; 283 if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") { 284 $this->position += 2; 285 $this->state = self::STATE_BODY; 286 } elseif ($this->data[$this->position] === "\x0A") { 287 $this->position++; 288 $this->state = self::STATE_BODY; 289 } else { 290 $this->state = self::STATE_NAME; 291 } 292 } 293 294 /** 295 * Parse a header name 296 */ 297 protected function name() 298 { 299 $len = strcspn($this->data, "\x0A:", $this->position); 300 if (isset($this->data[$this->position + $len])) { 301 if ($this->data[$this->position + $len] === "\x0A") { 302 $this->position += $len; 303 $this->state = self::STATE_NEW_LINE; 304 } else { 305 $this->name = substr($this->data, $this->position, $len); 306 $this->position += $len + 1; 307 $this->state = self::STATE_VALUE; 308 } 309 } else { 310 $this->state = self::STATE_ERROR; 311 } 312 } 313 314 /** 315 * Parse LWS, replacing consecutive LWS characters with a single space 316 */ 317 protected function linear_whitespace() 318 { 319 do { 320 if (substr($this->data, $this->position, 2) === "\x0D\x0A") { 321 $this->position += 2; 322 } elseif ($this->data[$this->position] === "\x0A") { 323 $this->position++; 324 } 325 $this->position += strspn($this->data, "\x09\x20", $this->position); 326 } while ($this->has_data() && $this->is_linear_whitespace()); 327 $this->value .= "\x20"; 328 } 329 330 /** 331 * See what state to move to while within non-quoted header values 332 */ 333 protected function value() 334 { 335 if ($this->is_linear_whitespace()) { 336 $this->linear_whitespace(); 337 } else { 338 switch ($this->data[$this->position]) { 339 case '"': 340 // Workaround for ETags: we have to include the quotes as 341 // part of the tag. 342 if (strtolower($this->name) === 'etag') { 343 $this->value .= '"'; 344 $this->position++; 345 $this->state = self::STATE_VALUE_CHAR; 346 break; 347 } 348 $this->position++; 349 $this->state = self::STATE_QUOTE; 350 break; 351 352 case "\x0A": 353 $this->position++; 354 $this->state = self::STATE_NEW_LINE; 355 break; 356 357 default: 358 $this->state = self::STATE_VALUE_CHAR; 359 break; 360 } 361 } 362 } 363 364 /** 365 * Parse a header value while outside quotes 366 */ 367 protected function value_char() 368 { 369 $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); 370 $this->value .= substr($this->data, $this->position, $len); 371 $this->position += $len; 372 $this->state = self::STATE_VALUE; 373 } 374 375 /** 376 * See what state to move to while within quoted header values 377 */ 378 protected function quote() 379 { 380 if ($this->is_linear_whitespace()) { 381 $this->linear_whitespace(); 382 } else { 383 switch ($this->data[$this->position]) { 384 case '"': 385 $this->position++; 386 $this->state = self::STATE_VALUE; 387 break; 388 389 case "\x0A": 390 $this->position++; 391 $this->state = self::STATE_NEW_LINE; 392 break; 393 394 case '\\': 395 $this->position++; 396 $this->state = self::STATE_QUOTE_ESCAPED; 397 break; 398 399 default: 400 $this->state = self::STATE_QUOTE_CHAR; 401 break; 402 } 403 } 404 } 405 406 /** 407 * Parse a header value while within quotes 408 */ 409 protected function quote_char() 410 { 411 $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); 412 $this->value .= substr($this->data, $this->position, $len); 413 $this->position += $len; 414 $this->state = self::STATE_VALUE; 415 } 416 417 /** 418 * Parse an escaped character within quotes 419 */ 420 protected function quote_escaped() 421 { 422 $this->value .= $this->data[$this->position]; 423 $this->position++; 424 $this->state = self::STATE_QUOTE; 425 } 426 427 /** 428 * Parse the body 429 */ 430 protected function body() 431 { 432 $this->body = substr($this->data, $this->position); 433 if (!empty($this->headers['transfer-encoding'])) { 434 unset($this->headers['transfer-encoding']); 435 $this->state = self::STATE_CHUNKED; 436 } else { 437 $this->state = self::STATE_EMIT; 438 } 439 } 440 441 /** 442 * Parsed a "Transfer-Encoding: chunked" body 443 */ 444 protected function chunked() 445 { 446 if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) { 447 $this->state = self::STATE_EMIT; 448 return; 449 } 450 451 $decoded = ''; 452 $encoded = $this->body; 453 454 while (true) { 455 $is_chunked = (bool) preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches); 456 if (!$is_chunked) { 457 // Looks like it's not chunked after all 458 $this->state = self::STATE_EMIT; 459 return; 460 } 461 462 $length = hexdec(trim($matches[1])); 463 if ($length === 0) { 464 // Ignore trailer headers 465 $this->state = self::STATE_EMIT; 466 $this->body = $decoded; 467 return; 468 } 469 470 $chunk_length = strlen($matches[0]); 471 $decoded .= substr($encoded, $chunk_length, $length); 472 $encoded = substr($encoded, $chunk_length + $length + 2); 473 474 // BC for PHP < 8.0: substr() can return bool instead of string 475 $encoded = ($encoded === false) ? '' : $encoded; 476 477 if (trim($encoded) === '0' || empty($encoded)) { 478 $this->state = self::STATE_EMIT; 479 $this->body = $decoded; 480 return; 481 } 482 } 483 } 484 485 /** 486 * Prepare headers (take care of proxies headers) 487 * 488 * @param string $headers Raw headers 489 * @param integer $count Redirection count. Default to 1. 490 * 491 * @return string 492 */ 493 public static function prepareHeaders($headers, $count = 1) 494 { 495 $data = explode("\r\n\r\n", $headers, $count); 496 $data = array_pop($data); 497 if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) { 498 $exploded = explode("\r\n\r\n", $data, 2); 499 $data = end($exploded); 500 } 501 if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) { 502 $exploded = explode("\r\n\r\n", $data, 2); 503 $data = end($exploded); 504 } 505 return $data; 506 } 507 } 508 509 class_alias('SimplePie\HTTP\Parser', 'SimplePie_HTTP_Parser');
title
Description
Body
title
Description
Body
title
Description
Body
title
Body