Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 /** 3 * SimplePie 4 * 5 * A PHP-Based RSS and Atom Feed Framework. 6 * Takes the hard work out of managing a complete RSS/Atom solution. 7 * 8 * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are 12 * permitted provided that the following conditions are met: 13 * 14 * * Redistributions of source code must retain the above copyright notice, this list of 15 * conditions and the following disclaimer. 16 * 17 * * Redistributions in binary form must reproduce the above copyright notice, this list 18 * of conditions and the following disclaimer in the documentation and/or other materials 19 * provided with the distribution. 20 * 21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 22 * to endorse or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 * 35 * @package SimplePie 36 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue 37 * @author Ryan Parman 38 * @author Geoffrey Sneddon 39 * @author Ryan McCue 40 * @link http://simplepie.org/ SimplePie 41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 42 */ 43 44 /** 45 * Used for data cleanup and post-processing 46 * 47 * 48 * This class can be overloaded with {@see SimplePie::set_sanitize_class()} 49 * 50 * @package SimplePie 51 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags 52 */ 53 class SimplePie_Sanitize 54 { 55 // Private vars 56 var $base; 57 58 // Options 59 var $remove_div = true; 60 var $image_handler = ''; 61 var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); 62 var $encode_instead_of_strip = false; 63 var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); 64 var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); 65 var $strip_comments = false; 66 var $output_encoding = 'UTF-8'; 67 var $enable_cache = true; 68 var $cache_location = './cache'; 69 var $cache_name_function = 'md5'; 70 var $timeout = 10; 71 var $useragent = ''; 72 var $force_fsockopen = false; 73 var $replace_url_attributes = null; 74 75 public function __construct() 76 { 77 // Set defaults 78 $this->set_url_replacements(null); 79 } 80 81 public function remove_div($enable = true) 82 { 83 $this->remove_div = (bool) $enable; 84 } 85 86 public function set_image_handler($page = false) 87 { 88 if ($page) 89 { 90 $this->image_handler = (string) $page; 91 } 92 else 93 { 94 $this->image_handler = false; 95 } 96 } 97 98 public function set_registry(SimplePie_Registry $registry) 99 { 100 $this->registry = $registry; 101 } 102 103 public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') 104 { 105 if (isset($enable_cache)) 106 { 107 $this->enable_cache = (bool) $enable_cache; 108 } 109 110 if ($cache_location) 111 { 112 $this->cache_location = (string) $cache_location; 113 } 114 115 if ($cache_name_function) 116 { 117 $this->cache_name_function = (string) $cache_name_function; 118 } 119 } 120 121 public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) 122 { 123 if ($timeout) 124 { 125 $this->timeout = (string) $timeout; 126 } 127 128 if ($useragent) 129 { 130 $this->useragent = (string) $useragent; 131 } 132 133 if ($force_fsockopen) 134 { 135 $this->force_fsockopen = (string) $force_fsockopen; 136 } 137 } 138 139 public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) 140 { 141 if ($tags) 142 { 143 if (is_array($tags)) 144 { 145 $this->strip_htmltags = $tags; 146 } 147 else 148 { 149 $this->strip_htmltags = explode(',', $tags); 150 } 151 } 152 else 153 { 154 $this->strip_htmltags = false; 155 } 156 } 157 158 public function encode_instead_of_strip($encode = false) 159 { 160 $this->encode_instead_of_strip = (bool) $encode; 161 } 162 163 public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) 164 { 165 if ($attribs) 166 { 167 if (is_array($attribs)) 168 { 169 $this->strip_attributes = $attribs; 170 } 171 else 172 { 173 $this->strip_attributes = explode(',', $attribs); 174 } 175 } 176 else 177 { 178 $this->strip_attributes = false; 179 } 180 } 181 182 public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'))) 183 { 184 if ($attribs) 185 { 186 if (is_array($attribs)) 187 { 188 $this->add_attributes = $attribs; 189 } 190 else 191 { 192 $this->add_attributes = explode(',', $attribs); 193 } 194 } 195 else 196 { 197 $this->add_attributes = false; 198 } 199 } 200 201 public function strip_comments($strip = false) 202 { 203 $this->strip_comments = (bool) $strip; 204 } 205 206 public function set_output_encoding($encoding = 'UTF-8') 207 { 208 $this->output_encoding = (string) $encoding; 209 } 210 211 /** 212 * Set element/attribute key/value pairs of HTML attributes 213 * containing URLs that need to be resolved relative to the feed 214 * 215 * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite, 216 * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite, 217 * |q|@cite 218 * 219 * @since 1.0 220 * @param array|null $element_attribute Element/attribute key/value pairs, null for default 221 */ 222 public function set_url_replacements($element_attribute = null) 223 { 224 if ($element_attribute === null) 225 { 226 $element_attribute = array( 227 'a' => 'href', 228 'area' => 'href', 229 'blockquote' => 'cite', 230 'del' => 'cite', 231 'form' => 'action', 232 'img' => array( 233 'longdesc', 234 'src' 235 ), 236 'input' => 'src', 237 'ins' => 'cite', 238 'q' => 'cite' 239 ); 240 } 241 $this->replace_url_attributes = (array) $element_attribute; 242 } 243 244 public function sanitize($data, $type, $base = '') 245 { 246 $data = trim($data); 247 if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) 248 { 249 if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) 250 { 251 if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) 252 { 253 $type |= SIMPLEPIE_CONSTRUCT_HTML; 254 } 255 else 256 { 257 $type |= SIMPLEPIE_CONSTRUCT_TEXT; 258 } 259 } 260 261 if ($type & SIMPLEPIE_CONSTRUCT_BASE64) 262 { 263 $data = base64_decode($data); 264 } 265 266 if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) 267 { 268 269 if (!class_exists('DOMDocument')) 270 { 271 throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer'); 272 } 273 $document = new DOMDocument(); 274 $document->encoding = 'UTF-8'; 275 276 $data = $this->preprocess($data, $type); 277 278 set_error_handler(array('SimplePie_Misc', 'silence_errors')); 279 $document->loadHTML($data); 280 restore_error_handler(); 281 282 $xpath = new DOMXPath($document); 283 284 // Strip comments 285 if ($this->strip_comments) 286 { 287 $comments = $xpath->query('//comment()'); 288 289 foreach ($comments as $comment) 290 { 291 $comment->parentNode->removeChild($comment); 292 } 293 } 294 295 // Strip out HTML tags and attributes that might cause various security problems. 296 // Based on recommendations by Mark Pilgrim at: 297 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely 298 if ($this->strip_htmltags) 299 { 300 foreach ($this->strip_htmltags as $tag) 301 { 302 $this->strip_tag($tag, $document, $xpath, $type); 303 } 304 } 305 306 if ($this->strip_attributes) 307 { 308 foreach ($this->strip_attributes as $attrib) 309 { 310 $this->strip_attr($attrib, $xpath); 311 } 312 } 313 314 if ($this->add_attributes) 315 { 316 foreach ($this->add_attributes as $tag => $valuePairs) 317 { 318 $this->add_attr($tag, $valuePairs, $document); 319 } 320 } 321 322 // Replace relative URLs 323 $this->base = $base; 324 foreach ($this->replace_url_attributes as $element => $attributes) 325 { 326 $this->replace_urls($document, $element, $attributes); 327 } 328 329 // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. 330 if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) 331 { 332 $images = $document->getElementsByTagName('img'); 333 foreach ($images as $img) 334 { 335 if ($img->hasAttribute('src')) 336 { 337 $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); 338 $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); 339 340 if ($cache->load()) 341 { 342 $img->setAttribute('src', $this->image_handler . $image_url); 343 } 344 else 345 { 346 $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); 347 $headers = $file->headers; 348 349 if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) 350 { 351 if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) 352 { 353 $img->setAttribute('src', $this->image_handler . $image_url); 354 } 355 else 356 { 357 trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); 358 } 359 } 360 } 361 } 362 } 363 } 364 365 // Get content node 366 $div = $document->getElementsByTagName('body')->item(0)->firstChild; 367 // Finally, convert to a HTML string 368 if (version_compare(PHP_VERSION, '5.3.6', '>=')) 369 { 370 $data = trim($document->saveHTML($div)); 371 } 372 else 373 { 374 $data = trim($document->saveXML($div)); 375 } 376 377 if ($this->remove_div) 378 { 379 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); 380 $data = preg_replace('/<\/div>$/', '', $data); 381 } 382 else 383 { 384 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); 385 } 386 } 387 388 if ($type & SIMPLEPIE_CONSTRUCT_IRI) 389 { 390 $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); 391 if ($absolute !== false) 392 { 393 $data = $absolute; 394 } 395 } 396 397 if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) 398 { 399 $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); 400 } 401 402 if ($this->output_encoding !== 'UTF-8') 403 { 404 $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); 405 } 406 } 407 return $data; 408 } 409 410 protected function preprocess($html, $type) 411 { 412 $ret = ''; 413 $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); 414 if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) 415 { 416 // Atom XHTML constructs are wrapped with a div by default 417 // Note: No protection if $html contains a stray </div>! 418 $html = '<div>' . $html . '</div>'; 419 $ret .= '<!DOCTYPE html>'; 420 $content_type = 'text/html'; 421 } 422 else 423 { 424 $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; 425 $content_type = 'application/xhtml+xml'; 426 } 427 428 $ret .= '<html><head>'; 429 $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; 430 $ret .= '</head><body>' . $html . '</body></html>'; 431 return $ret; 432 } 433 434 public function replace_urls($document, $tag, $attributes) 435 { 436 if (!is_array($attributes)) 437 { 438 $attributes = array($attributes); 439 } 440 441 if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) 442 { 443 $elements = $document->getElementsByTagName($tag); 444 foreach ($elements as $element) 445 { 446 foreach ($attributes as $attribute) 447 { 448 if ($element->hasAttribute($attribute)) 449 { 450 $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); 451 if ($value !== false) 452 { 453 $element->setAttribute($attribute, $value); 454 } 455 } 456 } 457 } 458 } 459 } 460 461 public function do_strip_htmltags($match) 462 { 463 if ($this->encode_instead_of_strip) 464 { 465 if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 466 { 467 $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); 468 $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); 469 return "<$match[1]$match[2]>$match[3]</$match[1]>"; 470 } 471 else 472 { 473 return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); 474 } 475 } 476 elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 477 { 478 return $match[4]; 479 } 480 else 481 { 482 return ''; 483 } 484 } 485 486 protected function strip_tag($tag, $document, $xpath, $type) 487 { 488 $elements = $xpath->query('body//' . $tag); 489 if ($this->encode_instead_of_strip) 490 { 491 foreach ($elements as $element) 492 { 493 $fragment = $document->createDocumentFragment(); 494 495 // For elements which aren't script or style, include the tag itself 496 if (!in_array($tag, array('script', 'style'))) 497 { 498 $text = '<' . $tag; 499 if ($element->hasAttributes()) 500 { 501 $attrs = array(); 502 foreach ($element->attributes as $name => $attr) 503 { 504 $value = $attr->value; 505 506 // In XHTML, empty values should never exist, so we repeat the value 507 if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) 508 { 509 $value = $name; 510 } 511 // For HTML, empty is fine 512 elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) 513 { 514 $attrs[] = $name; 515 continue; 516 } 517 518 // Standard attribute text 519 $attrs[] = $name . '="' . $attr->value . '"'; 520 } 521 $text .= ' ' . implode(' ', $attrs); 522 } 523 $text .= '>'; 524 $fragment->appendChild(new DOMText($text)); 525 } 526 527 $number = $element->childNodes->length; 528 for ($i = $number; $i > 0; $i--) 529 { 530 $child = $element->childNodes->item(0); 531 $fragment->appendChild($child); 532 } 533 534 if (!in_array($tag, array('script', 'style'))) 535 { 536 $fragment->appendChild(new DOMText('</' . $tag . '>')); 537 } 538 539 $element->parentNode->replaceChild($fragment, $element); 540 } 541 542 return; 543 } 544 elseif (in_array($tag, array('script', 'style'))) 545 { 546 foreach ($elements as $element) 547 { 548 $element->parentNode->removeChild($element); 549 } 550 551 return; 552 } 553 else 554 { 555 foreach ($elements as $element) 556 { 557 $fragment = $document->createDocumentFragment(); 558 $number = $element->childNodes->length; 559 for ($i = $number; $i > 0; $i--) 560 { 561 $child = $element->childNodes->item(0); 562 $fragment->appendChild($child); 563 } 564 565 $element->parentNode->replaceChild($fragment, $element); 566 } 567 } 568 } 569 570 protected function strip_attr($attrib, $xpath) 571 { 572 $elements = $xpath->query('//*[@' . $attrib . ']'); 573 574 foreach ($elements as $element) 575 { 576 $element->removeAttribute($attrib); 577 } 578 } 579 580 protected function add_attr($tag, $valuePairs, $document) 581 { 582 $elements = $document->getElementsByTagName($tag); 583 foreach ($elements as $element) 584 { 585 foreach ($valuePairs as $attrib => $value) 586 { 587 $element->setAttribute($attrib, $value); 588 } 589 } 590 } 591 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body