Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 310 and 400] [Versions 39 and 400] [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  namespace core;
  18  
  19  /**
  20   * HTMLPurifier test case
  21   *
  22   * @package    core
  23   * @category   test
  24   * @copyright  2012 Petr Skoda {@link http://skodak.org}
  25   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  26   */
  27  class htmlpurifier_test extends \basic_testcase {
  28  
  29      /**
  30       * Verify _blank target is allowed.
  31       */
  32      public function test_allow_blank_target() {
  33          // See MDL-52651 for an explanation as to why the rel="noreferrer" attribute is expected here.
  34          // Also note we do not need to test links with an existing rel attribute as the HTML Purifier is configured to remove
  35          // the rel attribute.
  36          $text = '<a href="http://moodle.org" target="_blank">Some link</a>';
  37          $expected = '<a href="http://moodle.org" target="_blank" rel="noreferrer noopener">Some link</a>';
  38          $result = format_text($text, FORMAT_HTML);
  39          $this->assertSame($expected, $result);
  40  
  41          $result = format_text('<a href="http://moodle.org" target="some">Some link</a>', FORMAT_HTML);
  42          $this->assertSame('<a href="http://moodle.org">Some link</a>', $result);
  43      }
  44  
  45      /**
  46       * Verify our nolink tag accepted.
  47       */
  48      public function test_nolink() {
  49          // We can not use format text because nolink changes result.
  50          $text = '<nolink><div>no filters</div></nolink>';
  51          $result = purify_html($text, array());
  52          $this->assertSame($text, $result);
  53  
  54          $text = '<nolink>xxx<em>xx</em><div>xxx</div></nolink>';
  55          $result = purify_html($text, array());
  56          $this->assertSame($text, $result);
  57  
  58          // Ensure nolink doesn't force open tags to be closed, so can be virtually everywhere.
  59          $text = '<p><nolink><div>no filters</div></nolink></p>';
  60          $result = purify_html($text, array());
  61          $this->assertSame($text, $result);
  62      }
  63  
  64      /**
  65       * Verify our tex tag accepted.
  66       */
  67      public function test_tex() {
  68          $text = '<tex>a+b=c</tex>';
  69          $result = purify_html($text, array());
  70          $this->assertSame($text, $result);
  71      }
  72  
  73      /**
  74       * Verify our algebra tag accepted.
  75       */
  76      public function test_algebra() {
  77          $text = '<algebra>a+b=c</algebra>';
  78          $result = purify_html($text, array());
  79          $this->assertSame($text, $result);
  80      }
  81  
  82      /**
  83       * Verify our hacky multilang works.
  84       */
  85      public function test_multilang() {
  86          $text = '<lang lang="en">hmmm</lang><lang lang="anything">hm</lang>';
  87          $result = purify_html($text, array());
  88          $this->assertSame($text, $result);
  89  
  90          $text = '<span lang="en" class="multilang">hmmm</span><span lang="anything" class="multilang">hm</span>';
  91          $result = purify_html($text, array());
  92          $this->assertSame($text, $result);
  93  
  94          $text = '<span lang="en">hmmm</span>';
  95          $result = purify_html($text, array());
  96          $this->assertNotSame($text, $result);
  97  
  98          // Keep standard lang tags.
  99  
 100          $text = '<span lang="de_DU" class="multilang">asas</span>';
 101          $result = purify_html($text, array());
 102          $this->assertSame($text, $result);
 103  
 104          $text = '<lang lang="de_DU">xxxxxx</lang>';
 105          $result = purify_html($text, array());
 106          $this->assertSame($text, $result);
 107      }
 108  
 109      /**
 110       * Tests the 'allowid' option for format_text.
 111       */
 112      public function test_format_text_allowid() {
 113          // Start off by not allowing ids (default).
 114          $options = array(
 115              'nocache' => true
 116          );
 117          $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
 118          $this->assertSame('<div>Frog</div>', $result);
 119  
 120          // Now allow ids.
 121          $options['allowid'] = true;
 122          $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
 123          $this->assertSame('<div id="example">Frog</div>', $result);
 124      }
 125  
 126      public function test_allowobjectembed() {
 127          global $CFG;
 128  
 129          $this->assertSame('0', $CFG->allowobjectembed);
 130  
 131          $text = '<object width="425" height="350">
 132  <param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
 133  <param name="wmode" value="transparent" />
 134  <embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" />
 135  </object>hmmm';
 136          $result = purify_html($text, array());
 137          $this->assertSame('hmmm', trim($result));
 138  
 139          $CFG->allowobjectembed = '1';
 140  
 141          $expected = '<object width="425" height="350" data="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash">
 142  <param name="allowScriptAccess" value="never" />
 143  <param name="allowNetworking" value="internal" />
 144  <param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
 145  <param name="wmode" value="transparent" />
 146  <embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" allowscriptaccess="never" allownetworking="internal" />
 147  </object>hmmm';
 148          $result = purify_html($text, array());
 149          $this->assertSame(str_replace("\n", '', $expected), str_replace("\n", '', $result));
 150  
 151          $CFG->allowobjectembed = '0';
 152  
 153          $result = purify_html($text, array());
 154          $this->assertSame('hmmm', trim($result));
 155      }
 156  
 157      /**
 158       * Test if linebreaks kept unchanged.
 159       */
 160      public function test_line_breaking() {
 161          $text = "\n\raa\rsss\nsss\r";
 162          $this->assertSame($text, purify_html($text));
 163      }
 164  
 165      /**
 166       * Test fixing of strict problems.
 167       */
 168      public function test_tidy() {
 169          $text = "<p>xx";
 170          $this->assertSame('<p>xx</p>', purify_html($text));
 171  
 172          $text = "<P>xx</P>";
 173          $this->assertSame('<p>xx</p>', purify_html($text));
 174  
 175          $text = "xx<br>";
 176          $this->assertSame('xx<br />', purify_html($text));
 177      }
 178  
 179      /**
 180       * Test nesting - this used to cause problems in earlier versions.
 181       */
 182      public function test_nested_lists() {
 183          $text = "<ul><li>One<ul><li>Two</li></ul></li><li>Three</li></ul>";
 184          $this->assertSame($text, purify_html($text));
 185      }
 186  
 187      /**
 188       * Test that XSS protection works, complete smoke tests are in htmlpurifier itself.
 189       */
 190      public function test_cleaning_nastiness() {
 191          $text = "x<SCRIPT>alert('XSS')</SCRIPT>x";
 192          $this->assertSame('xx', purify_html($text));
 193  
 194          $text = '<DIV STYLE="background-image:url(javascript:alert(\'XSS\'))">xx</DIV>';
 195          $this->assertSame('<div>xx</div>', purify_html($text));
 196  
 197          $text = '<DIV STYLE="width:expression(alert(\'XSS\'));">xx</DIV>';
 198          $this->assertSame('<div>xx</div>', purify_html($text));
 199  
 200          $text = 'x<IFRAME SRC="javascript:alert(\'XSS\');"></IFRAME>x';
 201          $this->assertSame('xx', purify_html($text));
 202  
 203          $text = 'x<OBJECT TYPE="text/x-scriptlet" DATA="http://ha.ckers.org/scriptlet.html"></OBJECT>x';
 204          $this->assertSame('xx', purify_html($text));
 205  
 206          $text = 'x<EMBED SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></EMBED>x';
 207          $this->assertSame('xx', purify_html($text));
 208  
 209          $text = 'x<form></form>x';
 210          $this->assertSame('xx', purify_html($text));
 211      }
 212  
 213      /**
 214       * Test internal function used for clean_text() speedup.
 215       */
 216      public function test_is_purify_html_necessary() {
 217          // First our shortcuts.
 218          $text = "";
 219          $this->assertFalse(is_purify_html_necessary($text));
 220          $this->assertSame($text, purify_html($text));
 221  
 222          $text = "666";
 223          $this->assertFalse(is_purify_html_necessary($text));
 224          $this->assertSame($text, purify_html($text));
 225  
 226          $text = "abc\ndef \" ' ";
 227          $this->assertFalse(is_purify_html_necessary($text));
 228          $this->assertSame($text, purify_html($text));
 229  
 230          $text = "abc\n<p>def</p>efg<p>hij</p>";
 231          $this->assertFalse(is_purify_html_necessary($text));
 232          $this->assertSame($text, purify_html($text));
 233  
 234          $text = "<br />abc\n<p>def<em>efg</em><strong>hi<br />j</strong></p>";
 235          $this->assertFalse(is_purify_html_necessary($text));
 236          $this->assertSame($text, purify_html($text));
 237  
 238          // Now failures.
 239          $text = "&nbsp;";
 240          $this->assertTrue(is_purify_html_necessary($text));
 241  
 242          $text = "Gin & Tonic";
 243          $this->assertTrue(is_purify_html_necessary($text));
 244  
 245          $text = "Gin > Tonic";
 246          $this->assertTrue(is_purify_html_necessary($text));
 247  
 248          $text = "Gin < Tonic";
 249          $this->assertTrue(is_purify_html_necessary($text));
 250  
 251          $text = "<div>abc</div>";
 252          $this->assertTrue(is_purify_html_necessary($text));
 253  
 254          $text = "<span>abc</span>";
 255          $this->assertTrue(is_purify_html_necessary($text));
 256  
 257          $text = "<br>abc";
 258          $this->assertTrue(is_purify_html_necessary($text));
 259  
 260          $text = "<p class='xxx'>abc</p>";
 261          $this->assertTrue(is_purify_html_necessary($text));
 262  
 263          $text = "<p>abc<em></p></em>";
 264          $this->assertTrue(is_purify_html_necessary($text));
 265  
 266          $text = "<p>abc";
 267          $this->assertTrue(is_purify_html_necessary($text));
 268      }
 269  
 270      public function test_allowed_schemes() {
 271          // First standard schemas.
 272          $text = '<a href="http://www.example.com/course/view.php?id=5">link</a>';
 273          $this->assertSame($text, purify_html($text));
 274  
 275          $text = '<a href="https://www.example.com/course/view.php?id=5">link</a>';
 276          $this->assertSame($text, purify_html($text));
 277  
 278          $text = '<a href="ftp://user@ftp.example.com/some/file.txt">link</a>';
 279          $this->assertSame($text, purify_html($text));
 280  
 281          $text = '<a href="nntp://example.com/group/123">link</a>';
 282          $this->assertSame($text, purify_html($text));
 283  
 284          $text = '<a href="news:groupname">link</a>';
 285          $this->assertSame($text, purify_html($text));
 286  
 287          $text = '<a href="mailto:user@example.com">link</a>';
 288          $this->assertSame($text, purify_html($text));
 289  
 290          // Extra schemes allowed in moodle.
 291          $text = '<a href="irc://irc.example.com/3213?pass">link</a>';
 292          $this->assertSame($text, purify_html($text));
 293  
 294          $text = '<a href="rtsp://www.example.com/movie.mov">link</a>';
 295          $this->assertSame($text, purify_html($text));
 296  
 297          $text = '<a href="rtmp://www.example.com/video.f4v">link</a>';
 298          $this->assertSame($text, purify_html($text));
 299  
 300          $text = '<a href="teamspeak://speak.example.com/?par=val?par2=val2">link</a>';
 301          $this->assertSame($text, purify_html($text));
 302  
 303          $text = '<a href="gopher://gopher.example.com/resource">link</a>';
 304          $this->assertSame($text, purify_html($text));
 305  
 306          $text = '<a href="mms://www.example.com/movie.mms">link</a>';
 307          $this->assertSame($text, purify_html($text));
 308  
 309          // Now some borked or dangerous schemes.
 310          $text = '<a href="javascript://www.example.com">link</a>';
 311          $this->assertSame('<a>link</a>', purify_html($text));
 312  
 313          $text = '<a href="hmmm://www.example.com">link</a>';
 314          $this->assertSame('<a>link</a>', purify_html($text));
 315      }
 316  
 317      /**
 318       * Test non-ascii domain names
 319       */
 320      public function test_idn() {
 321  
 322          // Example of domain that gives the same result in IDNA2003 and IDNA2008 .
 323          $text = '<a href="http://правительство.рф">правительство.рф</a>';
 324          $expected = '<a href="http://xn--80aealotwbjpid2k.xn--p1ai">правительство.рф</a>';
 325          $this->assertSame($expected, purify_html($text));
 326  
 327          // Examples of deviations from http://www.unicode.org/reports/tr46/#Table_Deviation_Characters .
 328          $text = '<a href="http://teßt.de">teßt.de</a>';
 329          $expected = '<a href="http://xn--tet-6ka.de">teßt.de</a>';
 330          $this->assertSame($expected, purify_html($text));
 331  
 332          $text = '<a href="http://βόλος.com">http://βόλος.com</a>';
 333          $expected = '<a href="http://xn--nxasmm1c.com">http://βόλος.com</a>';
 334          $this->assertSame($expected, purify_html($text));
 335  
 336          $text = '<a href="http://نامه‌ای.com">http://نامه‌ای.com</a>';
 337          $expected = '<a href="http://xn--mgba3gch31f060k.com">http://نامه‌ای.com</a>';
 338          $this->assertSame($expected, purify_html($text));
 339      }
 340  
 341      /**
 342       * Tests media tags.
 343       *
 344       * @dataProvider media_tags_provider
 345       * @param string $mediatag HTML media tag
 346       * @param string $expected expected result
 347       */
 348      public function test_media_tags($mediatag, $expected) {
 349          $actual = format_text($mediatag, FORMAT_MOODLE, ['filter' => false]);
 350          $this->assertEquals($expected, $actual);
 351      }
 352  
 353      /**
 354       * Test cases for the test_media_tags test.
 355       */
 356      public function media_tags_provider() {
 357          // Takes an array of attributes, then generates a test for each of them.
 358          $generatetestcases = function($prefix, array $attrs, array $templates) {
 359              return array_reduce($attrs, function($carry, $attr) use ($prefix, $templates) {
 360                  $testcase = [$prefix . '/' . $attr => [
 361                      sprintf($templates[0], $attr),
 362                      sprintf($templates[1], $attr)
 363                  ]];
 364                  return empty(array_values($carry)[0]) ? $testcase : $carry + $testcase;
 365              }, [[]]);
 366          };
 367  
 368          $audioattrs = [
 369              'preload="auto"', 'autoplay=""', 'loop=""', 'muted=""', 'controls=""',
 370              'crossorigin="anonymous"', 'crossorigin="use-credentials"'
 371          ];
 372          $videoattrs = [
 373              'crossorigin="anonymous"', 'crossorigin="use-credentials"',
 374              'poster="https://upload.wikimedia.org/wikipedia/en/1/14/Space_jam.jpg"',
 375              'preload="auto"', 'autoplay=""', 'playsinline=""', 'loop=""', 'muted=""',
 376              'controls=""', 'width="420"', 'height="69"'
 377          ];
 378          return $generatetestcases('Plain audio', $audioattrs + ['src="http://example.com/jam.wav"'], [
 379                  '<audio %1$s>Looks like you can\'t slam the jams.</audio>',
 380                  '<div class="text_to_html"><audio %1$s>Looks like you can\'t slam the jams.</audio></div>'
 381              ]) + $generatetestcases('Audio with one source', $audioattrs, [
 382                  '<audio %1$s><source src="http://example.com/getup.wav">No tasty jams for you.</audio>',
 383                  '<div class="text_to_html">' .
 384                      '<audio %1$s>' .
 385                          '<source src="http://example.com/getup.wav" />' .
 386                          'No tasty jams for you.' .
 387                      '</audio>' .
 388                  '</div>'
 389              ]) + $generatetestcases('Audio with multiple sources', $audioattrs, [
 390                  '<audio %1$s>' .
 391                      '<source src="http://example.com/getup.wav" type="audio/wav">' .
 392                      '<source src="http://example.com/getup.mp3" type="audio/mpeg">' .
 393                      '<source src="http://example.com/getup.ogg" type="audio/ogg">' .
 394                      'No tasty jams for you.' .
 395                  '</audio>',
 396                  '<div class="text_to_html">' .
 397                      '<audio %1$s>' .
 398                          '<source src="http://example.com/getup.wav" type="audio/wav" />' .
 399                          '<source src="http://example.com/getup.mp3" type="audio/mpeg" />' .
 400                          '<source src="http://example.com/getup.ogg" type="audio/ogg" />' .
 401                          'No tasty jams for you.' .
 402                      '</audio>' .
 403                  '</div>'
 404              ]) + $generatetestcases('Audio with sources and tracks', $audioattrs, [
 405                  '<audio %1$s>' .
 406                      '<source src="http://example.com/getup.wav" type="audio/wav">' .
 407                      '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
 408                      '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
 409                      'No tasty jams for you.' .
 410                  '</audio>',
 411                  '<div class="text_to_html">' .
 412                      '<audio %1$s>' .
 413                          '<source src="http://example.com/getup.wav" type="audio/wav" />' .
 414                          '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
 415                          '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
 416                          'No tasty jams for you.' .
 417                      '</audio>' .
 418                  '</div>'
 419              ]) + $generatetestcases('Plain video', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
 420                  '<video %1$s>Oh, that\'s pretty bad 😦</video>',
 421                  '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
 422              ]) + $generatetestcases('Video with illegal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
 423                  '<video %1$s><subtag></subtag>Oh, that\'s pretty bad 😦</video>',
 424                  '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
 425              ]) + $generatetestcases('Video with legal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
 426                  '<video %1$s>Did not work <a href="http://example.com/prettygood.mp4">click here to download</a></video>',
 427                  '<div class="text_to_html"><video %1$s>Did not work <a href="http://example.com/prettygood.mp4">' .
 428                  'click here to download</a></video></div>'
 429              ]) + $generatetestcases('Source tag without video or audio', $videoattrs, [
 430                  'some text <source src="http://example.com/getup.wav" type="audio/wav"> the end',
 431                  '<div class="text_to_html">some text  the end</div>'
 432              ]) + $generatetestcases('Video with one source', $videoattrs, [
 433                  '<video %1$s><source src="http://example.com/prettygood.mp4">Oh, that\'s pretty bad 😦</video>',
 434                  '<div class="text_to_html">' .
 435                      '<video %1$s>' .
 436                          '<source src="http://example.com/prettygood.mp4" />' .
 437                          'Oh, that\'s pretty bad 😦' .
 438                      '</video>' .
 439                  '</div>'
 440              ]) + $generatetestcases('Video with multiple sources', $videoattrs, [
 441                  '<video %1$s>' .
 442                      '<source src="http://example.com/prettygood.mp4" type="video/mp4">' .
 443                      '<source src="http://example.com/eljefe.mp4" type="video/mp4">' .
 444                      '<source src="http://example.com/turnitup.mov" type="video/mov">' .
 445                      'Oh, that\'s pretty bad 😦' .
 446                  '</video>',
 447                  '<div class="text_to_html">' .
 448                      '<video %1$s>' .
 449                          '<source src="http://example.com/prettygood.mp4" type="video/mp4" />' .
 450                          '<source src="http://example.com/eljefe.mp4" type="video/mp4" />' .
 451                          '<source src="http://example.com/turnitup.mov" type="video/mov" />' .
 452                          'Oh, that\'s pretty bad 😦' .
 453                      '</video>' .
 454                  '</div>'
 455              ]) + $generatetestcases('Video with sources and tracks', $audioattrs, [
 456                  '<video %1$s>' .
 457                      '<source src="http://example.com/getup.wav" type="audio/wav">' .
 458                      '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
 459                      '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
 460                      'No tasty jams for you.' .
 461                  '</video>',
 462                  '<div class="text_to_html">' .
 463                      '<video %1$s>' .
 464                          '<source src="http://example.com/getup.wav" type="audio/wav" />' .
 465                          '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
 466                          '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
 467                      'No tasty jams for you.' .
 468                      '</video>' .
 469                  '</div>'
 470              ]) + ['Video with invalid crossorigin' => [
 471                      '<video src="http://example.com/turnitup.mov" crossorigin="can i pls hab?">' .
 472                          'Oh, that\'s pretty bad 😦' .
 473                      '</video>',
 474                      '<div class="text_to_html">' .
 475                          '<video src="http://example.com/turnitup.mov">' .
 476                             'Oh, that\'s pretty bad 😦' .
 477                          '</video>' .
 478                      '</div>'
 479              ]] + ['Audio with invalid crossorigin' => [
 480                      '<audio src="http://example.com/getup.wav" crossorigin="give me. the jams.">' .
 481                          'nyemnyemnyem' .
 482                      '</audio>',
 483                      '<div class="text_to_html">' .
 484                          '<audio src="http://example.com/getup.wav">' .
 485                              'nyemnyemnyem' .
 486                          '</audio>' .
 487                      '</div>'
 488              ]] + ['Other attributes' => [
 489                  '<video src="http://example.com/turnitdown.mov" class="nofilter" data-something="data attribute" someattribute="somevalue" onclick="boom">' .
 490                      '<source src="http://example.com/getup.wav" type="audio/wav" class="shouldberemoved" data-sourcedata="source data" onmouseover="kill session" />' .
 491                      '<track src="http://example.com/subtitles_en.vtt" class="shouldberemoved" data-trackdata="track data" onmouseover="removeme" />' .
 492                      'Do not remove attribute class but remove other attributes' .
 493                  '</video>',
 494                  '<div class="text_to_html">' .
 495                      '<video src="http://example.com/turnitdown.mov" class="nofilter">' .
 496                          '<source src="http://example.com/getup.wav" type="audio/wav" />' .
 497                          '<track src="http://example.com/subtitles_en.vtt" />' .
 498                          'Do not remove attribute class but remove other attributes' .
 499                      '</video>' .
 500                  '</div>'
 501              ]];
 502      }
 503  }