See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]
1 <?php 2 /** 3 * SimplePie 4 * 5 * A PHP-Based RSS and Atom Feed Framework. 6 * Takes the hard work out of managing a complete RSS/Atom solution. 7 * 8 * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are 12 * permitted provided that the following conditions are met: 13 * 14 * * Redistributions of source code must retain the above copyright notice, this list of 15 * conditions and the following disclaimer. 16 * 17 * * Redistributions in binary form must reproduce the above copyright notice, this list 18 * of conditions and the following disclaimer in the documentation and/or other materials 19 * provided with the distribution. 20 * 21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 22 * to endorse or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 * 35 * @package SimplePie 36 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue 37 * @author Ryan Parman 38 * @author Sam Sneddon 39 * @author Ryan McCue 40 * @link http://simplepie.org/ SimplePie 41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 42 */ 43 44 /** 45 * Used for data cleanup and post-processing 46 * 47 * 48 * This class can be overloaded with {@see SimplePie::set_sanitize_class()} 49 * 50 * @package SimplePie 51 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags 52 */ 53 class SimplePie_Sanitize 54 { 55 // Private vars 56 var $base; 57 58 // Options 59 var $remove_div = true; 60 var $image_handler = ''; 61 var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); 62 var $encode_instead_of_strip = false; 63 var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); 64 var $rename_attributes = array(); 65 var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); 66 var $strip_comments = false; 67 var $output_encoding = 'UTF-8'; 68 var $enable_cache = true; 69 var $cache_location = './cache'; 70 var $cache_name_function = 'md5'; 71 var $timeout = 10; 72 var $useragent = ''; 73 var $force_fsockopen = false; 74 var $replace_url_attributes = null; 75 var $registry; 76 77 /** 78 * List of domains for which to force HTTPS. 79 * @see SimplePie_Sanitize::set_https_domains() 80 * Array is a tree split at DNS levels. Example: 81 * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true))) 82 */ 83 var $https_domains = array(); 84 85 public function __construct() 86 { 87 // Set defaults 88 $this->set_url_replacements(null); 89 } 90 91 public function remove_div($enable = true) 92 { 93 $this->remove_div = (bool) $enable; 94 } 95 96 public function set_image_handler($page = false) 97 { 98 if ($page) 99 { 100 $this->image_handler = (string) $page; 101 } 102 else 103 { 104 $this->image_handler = false; 105 } 106 } 107 108 public function set_registry(SimplePie_Registry $registry) 109 { 110 $this->registry = $registry; 111 } 112 113 public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') 114 { 115 if (isset($enable_cache)) 116 { 117 $this->enable_cache = (bool) $enable_cache; 118 } 119 120 if ($cache_location) 121 { 122 $this->cache_location = (string) $cache_location; 123 } 124 125 if ($cache_name_function) 126 { 127 $this->cache_name_function = (string) $cache_name_function; 128 } 129 } 130 131 public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) 132 { 133 if ($timeout) 134 { 135 $this->timeout = (string) $timeout; 136 } 137 138 if ($useragent) 139 { 140 $this->useragent = (string) $useragent; 141 } 142 143 if ($force_fsockopen) 144 { 145 $this->force_fsockopen = (string) $force_fsockopen; 146 } 147 } 148 149 public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) 150 { 151 if ($tags) 152 { 153 if (is_array($tags)) 154 { 155 $this->strip_htmltags = $tags; 156 } 157 else 158 { 159 $this->strip_htmltags = explode(',', $tags); 160 } 161 } 162 else 163 { 164 $this->strip_htmltags = false; 165 } 166 } 167 168 public function encode_instead_of_strip($encode = false) 169 { 170 $this->encode_instead_of_strip = (bool) $encode; 171 } 172 173 public function rename_attributes($attribs = array()) 174 { 175 if ($attribs) 176 { 177 if (is_array($attribs)) 178 { 179 $this->rename_attributes = $attribs; 180 } 181 else 182 { 183 $this->rename_attributes = explode(',', $attribs); 184 } 185 } 186 else 187 { 188 $this->rename_attributes = false; 189 } 190 } 191 192 public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) 193 { 194 if ($attribs) 195 { 196 if (is_array($attribs)) 197 { 198 $this->strip_attributes = $attribs; 199 } 200 else 201 { 202 $this->strip_attributes = explode(',', $attribs); 203 } 204 } 205 else 206 { 207 $this->strip_attributes = false; 208 } 209 } 210 211 public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'))) 212 { 213 if ($attribs) 214 { 215 if (is_array($attribs)) 216 { 217 $this->add_attributes = $attribs; 218 } 219 else 220 { 221 $this->add_attributes = explode(',', $attribs); 222 } 223 } 224 else 225 { 226 $this->add_attributes = false; 227 } 228 } 229 230 public function strip_comments($strip = false) 231 { 232 $this->strip_comments = (bool) $strip; 233 } 234 235 public function set_output_encoding($encoding = 'UTF-8') 236 { 237 $this->output_encoding = (string) $encoding; 238 } 239 240 /** 241 * Set element/attribute key/value pairs of HTML attributes 242 * containing URLs that need to be resolved relative to the feed 243 * 244 * Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite, 245 * |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src, 246 * |ins|@cite, |q|@cite, |source|@src, |video|@src 247 * 248 * @since 1.0 249 * @param array|null $element_attribute Element/attribute key/value pairs, null for default 250 */ 251 public function set_url_replacements($element_attribute = null) 252 { 253 if ($element_attribute === null) 254 { 255 $element_attribute = array( 256 'a' => 'href', 257 'area' => 'href', 258 'audio' => 'src', 259 'blockquote' => 'cite', 260 'del' => 'cite', 261 'form' => 'action', 262 'img' => array( 263 'longdesc', 264 'src' 265 ), 266 'input' => 'src', 267 'ins' => 'cite', 268 'q' => 'cite', 269 'source' => 'src', 270 'video' => array( 271 'poster', 272 'src' 273 ) 274 ); 275 } 276 $this->replace_url_attributes = (array) $element_attribute; 277 } 278 279 /** 280 * Set the list of domains for which to force HTTPS. 281 * @see SimplePie_Misc::https_url() 282 * Example array('biz', 'example.com', 'example.org', 'www.example.net'); 283 */ 284 public function set_https_domains($domains) 285 { 286 $this->https_domains = array(); 287 foreach ($domains as $domain) 288 { 289 $domain = trim($domain, ". \t\n\r\0\x0B"); 290 $segments = array_reverse(explode('.', $domain)); 291 $node =& $this->https_domains; 292 foreach ($segments as $segment) 293 {//Build a tree 294 if ($node === true) 295 { 296 break; 297 } 298 if (!isset($node[$segment])) 299 { 300 $node[$segment] = array(); 301 } 302 $node =& $node[$segment]; 303 } 304 $node = true; 305 } 306 } 307 308 /** 309 * Check if the domain is in the list of forced HTTPS. 310 */ 311 protected function is_https_domain($domain) 312 { 313 $domain = trim($domain, '. '); 314 $segments = array_reverse(explode('.', $domain)); 315 $node =& $this->https_domains; 316 foreach ($segments as $segment) 317 {//Explore the tree 318 if (isset($node[$segment])) 319 { 320 $node =& $node[$segment]; 321 } 322 else 323 { 324 break; 325 } 326 } 327 return $node === true; 328 } 329 330 /** 331 * Force HTTPS for selected Web sites. 332 */ 333 public function https_url($url) 334 { 335 return (strtolower(substr($url, 0, 7)) === 'http://') && 336 $this->is_https_domain(parse_url($url, PHP_URL_HOST)) ? 337 substr_replace($url, 's', 4, 0) : //Add the 's' to HTTPS 338 $url; 339 } 340 341 public function sanitize($data, $type, $base = '') 342 { 343 $data = trim($data); 344 if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) 345 { 346 if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) 347 { 348 if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) 349 { 350 $type |= SIMPLEPIE_CONSTRUCT_HTML; 351 } 352 else 353 { 354 $type |= SIMPLEPIE_CONSTRUCT_TEXT; 355 } 356 } 357 358 if ($type & SIMPLEPIE_CONSTRUCT_BASE64) 359 { 360 $data = base64_decode($data); 361 } 362 363 if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) 364 { 365 366 if (!class_exists('DOMDocument')) 367 { 368 throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer'); 369 } 370 $document = new DOMDocument(); 371 $document->encoding = 'UTF-8'; 372 373 $data = $this->preprocess($data, $type); 374 375 set_error_handler(array('SimplePie_Misc', 'silence_errors')); 376 $document->loadHTML($data); 377 restore_error_handler(); 378 379 $xpath = new DOMXPath($document); 380 381 // Strip comments 382 if ($this->strip_comments) 383 { 384 $comments = $xpath->query('//comment()'); 385 386 foreach ($comments as $comment) 387 { 388 $comment->parentNode->removeChild($comment); 389 } 390 } 391 392 // Strip out HTML tags and attributes that might cause various security problems. 393 // Based on recommendations by Mark Pilgrim at: 394 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely 395 if ($this->strip_htmltags) 396 { 397 foreach ($this->strip_htmltags as $tag) 398 { 399 $this->strip_tag($tag, $document, $xpath, $type); 400 } 401 } 402 403 if ($this->rename_attributes) 404 { 405 foreach ($this->rename_attributes as $attrib) 406 { 407 $this->rename_attr($attrib, $xpath); 408 } 409 } 410 411 if ($this->strip_attributes) 412 { 413 foreach ($this->strip_attributes as $attrib) 414 { 415 $this->strip_attr($attrib, $xpath); 416 } 417 } 418 419 if ($this->add_attributes) 420 { 421 foreach ($this->add_attributes as $tag => $valuePairs) 422 { 423 $this->add_attr($tag, $valuePairs, $document); 424 } 425 } 426 427 // Replace relative URLs 428 $this->base = $base; 429 foreach ($this->replace_url_attributes as $element => $attributes) 430 { 431 $this->replace_urls($document, $element, $attributes); 432 } 433 434 // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. 435 if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) 436 { 437 $images = $document->getElementsByTagName('img'); 438 foreach ($images as $img) 439 { 440 if ($img->hasAttribute('src')) 441 { 442 $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); 443 $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); 444 445 if ($cache->load()) 446 { 447 $img->setAttribute('src', $this->image_handler . $image_url); 448 } 449 else 450 { 451 $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); 452 $headers = $file->headers; 453 454 if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) 455 { 456 if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) 457 { 458 $img->setAttribute('src', $this->image_handler . $image_url); 459 } 460 else 461 { 462 trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); 463 } 464 } 465 } 466 } 467 } 468 } 469 470 // Get content node 471 $div = $document->getElementsByTagName('body')->item(0)->firstChild; 472 // Finally, convert to a HTML string 473 $data = trim($document->saveHTML($div)); 474 475 if ($this->remove_div) 476 { 477 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); 478 $data = preg_replace('/<\/div>$/', '', $data); 479 } 480 else 481 { 482 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); 483 } 484 485 $data = str_replace('</source>', '', $data); 486 } 487 488 if ($type & SIMPLEPIE_CONSTRUCT_IRI) 489 { 490 $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); 491 if ($absolute !== false) 492 { 493 $data = $absolute; 494 } 495 } 496 497 if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) 498 { 499 $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); 500 } 501 502 if ($this->output_encoding !== 'UTF-8') 503 { 504 $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); 505 } 506 } 507 return $data; 508 } 509 510 protected function preprocess($html, $type) 511 { 512 $ret = ''; 513 $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); 514 if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) 515 { 516 // Atom XHTML constructs are wrapped with a div by default 517 // Note: No protection if $html contains a stray </div>! 518 $html = '<div>' . $html . '</div>'; 519 $ret .= '<!DOCTYPE html>'; 520 $content_type = 'text/html'; 521 } 522 else 523 { 524 $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; 525 $content_type = 'application/xhtml+xml'; 526 } 527 528 $ret .= '<html><head>'; 529 $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; 530 $ret .= '</head><body>' . $html . '</body></html>'; 531 return $ret; 532 } 533 534 public function replace_urls($document, $tag, $attributes) 535 { 536 if (!is_array($attributes)) 537 { 538 $attributes = array($attributes); 539 } 540 541 if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) 542 { 543 $elements = $document->getElementsByTagName($tag); 544 foreach ($elements as $element) 545 { 546 foreach ($attributes as $attribute) 547 { 548 if ($element->hasAttribute($attribute)) 549 { 550 $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); 551 if ($value !== false) 552 { 553 $value = $this->https_url($value); 554 $element->setAttribute($attribute, $value); 555 } 556 } 557 } 558 } 559 } 560 } 561 562 public function do_strip_htmltags($match) 563 { 564 if ($this->encode_instead_of_strip) 565 { 566 if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 567 { 568 $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); 569 $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); 570 return "<$match[1]$match[2]>$match[3]</$match[1]>"; 571 } 572 else 573 { 574 return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); 575 } 576 } 577 elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 578 { 579 return $match[4]; 580 } 581 else 582 { 583 return ''; 584 } 585 } 586 587 protected function strip_tag($tag, $document, $xpath, $type) 588 { 589 $elements = $xpath->query('body//' . $tag); 590 if ($this->encode_instead_of_strip) 591 { 592 foreach ($elements as $element) 593 { 594 $fragment = $document->createDocumentFragment(); 595 596 // For elements which aren't script or style, include the tag itself 597 if (!in_array($tag, array('script', 'style'))) 598 { 599 $text = '<' . $tag; 600 if ($element->hasAttributes()) 601 { 602 $attrs = array(); 603 foreach ($element->attributes as $name => $attr) 604 { 605 $value = $attr->value; 606 607 // In XHTML, empty values should never exist, so we repeat the value 608 if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) 609 { 610 $value = $name; 611 } 612 // For HTML, empty is fine 613 elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) 614 { 615 $attrs[] = $name; 616 continue; 617 } 618 619 // Standard attribute text 620 $attrs[] = $name . '="' . $attr->value . '"'; 621 } 622 $text .= ' ' . implode(' ', $attrs); 623 } 624 $text .= '>'; 625 $fragment->appendChild(new DOMText($text)); 626 } 627 628 $number = $element->childNodes->length; 629 for ($i = $number; $i > 0; $i--) 630 { 631 $child = $element->childNodes->item(0); 632 $fragment->appendChild($child); 633 } 634 635 if (!in_array($tag, array('script', 'style'))) 636 { 637 $fragment->appendChild(new DOMText('</' . $tag . '>')); 638 } 639 640 $element->parentNode->replaceChild($fragment, $element); 641 } 642 643 return; 644 } 645 elseif (in_array($tag, array('script', 'style'))) 646 { 647 foreach ($elements as $element) 648 { 649 $element->parentNode->removeChild($element); 650 } 651 652 return; 653 } 654 else 655 { 656 foreach ($elements as $element) 657 { 658 $fragment = $document->createDocumentFragment(); 659 $number = $element->childNodes->length; 660 for ($i = $number; $i > 0; $i--) 661 { 662 $child = $element->childNodes->item(0); 663 $fragment->appendChild($child); 664 } 665 666 $element->parentNode->replaceChild($fragment, $element); 667 } 668 } 669 } 670 671 protected function strip_attr($attrib, $xpath) 672 { 673 $elements = $xpath->query('//*[@' . $attrib . ']'); 674 675 foreach ($elements as $element) 676 { 677 $element->removeAttribute($attrib); 678 } 679 } 680 681 protected function rename_attr($attrib, $xpath) 682 { 683 $elements = $xpath->query('//*[@' . $attrib . ']'); 684 685 foreach ($elements as $element) 686 { 687 $element->setAttribute('data-sanitized-' . $attrib, $element->getAttribute($attrib)); 688 $element->removeAttribute($attrib); 689 } 690 } 691 692 protected function add_attr($tag, $valuePairs, $document) 693 { 694 $elements = $document->getElementsByTagName($tag); 695 foreach ($elements as $element) 696 { 697 foreach ($valuePairs as $attrib => $value) 698 { 699 $element->setAttribute($attrib, $value); 700 } 701 } 702 } 703 } 704 705 class_alias('SimplePie_Sanitize', 'SimplePie\Sanitize', false);
title
Description
Body
title
Description
Body
title
Description
Body
title
Body