Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 3.9.x will end* 10 May 2021 (12 months).
  • Bug fixes for security issues in 3.9.x will end* 8 May 2023 (36 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 39 and 310] [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401] [Versions 39 and 402] [Versions 39 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Tests our html2text hacks
  19   *
  20   * Note: includes original tests from testweblib.php
  21   *
  22   * @package    core
  23   * @category   phpunit
  24   * @copyright  2012 Petr Skoda {@link http://skodak.org}
  25   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  26   */
  27  
  28  
  29  defined('MOODLE_INTERNAL') || die();
  30  
  31  
  32  class core_html2text_testcase extends basic_testcase {
  33  
  34      /**
  35       * ALT as image replacements.
  36       */
  37      public function test_images() {
  38          $this->assertSame('[edit]', html_to_text('<img src="edit.png" alt="edit" />'));
  39  
  40          $text = 'xx<img src="gif.gif" alt="some gif" />xx';
  41          $result = html_to_text($text, null, false, false);
  42          $this->assertSame($result, 'xx[some gif]xx');
  43      }
  44  
  45      /**
  46       * No magic quotes messing.
  47       */
  48      public function test_no_strip_slashes() {
  49          $this->assertSame('[\edit]', html_to_text('<img src="edit.png" alt="\edit" />'));
  50  
  51          $text = '\\magic\\quotes\\are\\\\horrible';
  52          $result = html_to_text($text, null, false, false);
  53          $this->assertSame($result, $text);
  54      }
  55  
  56      /**
  57       * core_text integration.
  58       */
  59      public function test_core_text() {
  60          $text = '<strong>Žluťoučký koníček</strong>';
  61          $result = html_to_text($text, null, false, false);
  62          $this->assertSame($result, 'ŽLUŤOUČKÝ KONÍČEK');
  63      }
  64  
  65      /**
  66       * Protect 0.
  67       */
  68      public function test_zero() {
  69          $text = '0';
  70          $result = html_to_text($text, null, false, false);
  71          $this->assertSame($result, $text);
  72  
  73          $this->assertSame('0', html_to_text('0'));
  74      }
  75  
  76      /**
  77       * Test the links list enumeration.
  78       */
  79      public function test_build_link_list() {
  80  
  81          // Note the trailing whitespace left intentionally in the text.
  82          $text = 'Total of <a title="List of integrated issues"
  83              href="http://tr.mdl.org/sh.jspa?r=1&j=p+%3D+%22I+d%22+%3D">     
  84              <strong>27 issues</strong></a> and <a href="http://another.url/?f=a&amp;b=2">some</a> other
  85  have been fixed <strong><a href="http://third.url/view.php">last week</a></strong>';
  86  
  87          // Do not collect links.
  88          $result = html_to_text($text, 5000, false);
  89          $this->assertSame('Total of 27 ISSUES and some other have been fixed LAST WEEK', $result);
  90  
  91          // Collect and enumerate links.
  92          $result = html_to_text($text, 5000, true);
  93          $this->assertSame(0, strpos($result, 'Total of 27 ISSUES [1] and some [2] other have been fixed LAST WEEK [3]'));
  94          $this->assertSame(false, strpos($result, '[0]'));
  95          $this->assertSame(1, preg_match('|^'.preg_quote('[1] http://tr.mdl.org/sh.jspa?r=1&j=p+%3D+%22I+d%22+%3D').'$|m', $result));
  96          $this->assertSame(1, preg_match('|^'.preg_quote('[2] http://another.url/?f=a&amp;b=2').'$|m', $result));
  97          $this->assertSame(1, preg_match('|^'.preg_quote('[3] http://third.url/view.php').'$|m', $result));
  98          $this->assertSame(false, strpos($result, '[4]'));
  99  
 100          // Test multiple occurrences of the same URL.
 101          $text = '<p>See <a href="http://moodle.org">moodle.org</a>,
 102              <a href="http://www.google.fr">google</a>, <a href="http://www.univ-lemans.fr">univ-lemans</a>
 103              and <a href="http://www.google.fr">google</a>.
 104              Also try <a href="https://www.google.fr">google via HTTPS</a>.';
 105          $result = html_to_text($text, 5000, true);
 106          $this->assertSame(0, strpos($result, 'See moodle.org [1], google [2], univ-lemans [3] and google [2]. Also try google via HTTPS [4].'));
 107          $this->assertSame(false, strpos($result, '[0]'));
 108          $this->assertSame(1, preg_match('|^'.preg_quote('[1] http://moodle.org').'$|m', $result));
 109          $this->assertSame(1, preg_match('|^'.preg_quote('[2] http://www.google.fr').'$|m', $result));
 110          $this->assertSame(1, preg_match('|^'.preg_quote('[3] http://www.univ-lemans.fr').'$|m', $result));
 111          $this->assertSame(1, preg_match('|^'.preg_quote('[4] https://www.google.fr').'$|m', $result));
 112          $this->assertSame(false, strpos($result, '[5]'));
 113      }
 114  
 115      /**
 116       * Various invalid HTML typed by users that ignore html strict.
 117       **/
 118      public function test_invalid_html() {
 119          $text = 'Gin & Tonic';
 120          $result = html_to_text($text, null, false, false);
 121          $this->assertSame($result, $text);
 122  
 123          $text = 'Gin > Tonic';
 124          $result = html_to_text($text, null, false, false);
 125          $this->assertSame($result, $text);
 126  
 127          $text = 'Gin < Tonic';
 128          $result = html_to_text($text, null, false, false);
 129          $this->assertSame($result, $text);
 130      }
 131  
 132      /**
 133       * Basic text formatting.
 134       */
 135      public function test_simple() {
 136          $this->assertSame("_Hello_ WORLD!\n", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
 137          $this->assertSame("All the WORLD’S a stage.\n\n-- William Shakespeare\n", html_to_text('<p>All the <strong>world’s</strong> a stage.</p><p>-- William Shakespeare</p>'));
 138          $this->assertSame("HELLO WORLD!\n\n", html_to_text('<h1>Hello world!</h1>'));
 139          $this->assertSame("Hello\nworld!", html_to_text('Hello<br />world!'));
 140      }
 141  
 142      /**
 143       * Test line wrapping.
 144       */
 145      public function test_text_nowrap() {
 146          $long = "Here is a long string, more than 75 characters long, since by default html_to_text wraps text at 75 chars.";
 147          $wrapped = "Here is a long string, more than 75 characters long, since by default\nhtml_to_text wraps text at 75 chars.";
 148          $this->assertSame($long, html_to_text($long, 0));
 149          $this->assertSame($wrapped, html_to_text($long));
 150      }
 151  
 152      /**
 153       * Whitespace removal.
 154       */
 155      public function test_trailing_whitespace() {
 156          $this->assertSame('With trailing whitespace and some more text', html_to_text("With trailing whitespace   \nand some   more text", 0));
 157      }
 158  
 159      /**
 160       * PRE parsing.
 161       */
 162      public function test_html_to_text_pre_parsing_problem() {
 163          $strorig = 'Consider the following function:<br /><pre><span style="color: rgb(153, 51, 102);">void FillMeUp(char* in_string) {'.
 164              '<br />  int i = 0;<br />  while (in_string[i] != \'\0\') {<br />    in_string[i] = \'X\';<br />    i++;<br />  }<br />'.
 165              '}</span></pre>What would happen if a non-terminated string were input to this function?<br /><br />';
 166  
 167          // Note, the spaces in the <pre> section are Unicode NBSPs - they may not be displayed in your editor.
 168          $strconv = 'Consider the following function:
 169  
 170  void FillMeUp(char* in_string) {
 171    int i = 0;
 172    while (in_string[i] != \'\0\') {
 173      in_string[i] = \'X\';
 174      i++;
 175    }
 176  }
 177  What would happen if a non-terminated string were input to this function?
 178  
 179  ';
 180  
 181          $this->assertSame($strconv, html_to_text($strorig));
 182      }
 183  
 184      /**
 185       * Scripts should be stripped.
 186       */
 187      public function test_strip_scripts() {
 188          $this->assertSame('Interesting text',
 189                  html_to_text('Interesting <script type="text/javascript">var what_a_mess = "Yuck!";</script> text', 0));
 190      }
 191  }