Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.
   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * Filter converting URLs in the text to HTML links
  20   *
  21   * @package    filter
  22   * @subpackage urltolink
  23   * @copyright  2010 David Mudrak <david@moodle.com>
  24   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  25   */
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  class filter_urltolink extends moodle_text_filter {
  30  
  31      /**
  32       * @var array global configuration for this filter
  33       *
  34       * This might be eventually moved into parent class if we found it
  35       * useful for other filters, too.
  36       */
  37      protected static $globalconfig;
  38  
  39      /**
  40       * Apply the filter to the text
  41       *
  42       * @see filter_manager::apply_filter_chain()
  43       * @param string $text to be processed by the text
  44       * @param array $options filter options
  45       * @return string text after processing
  46       */
  47      public function filter($text, array $options = array()) {
  48          if (!isset($options['originalformat'])) {
  49              // if the format is not specified, we are probably called by {@see format_string()}
  50              // in that case, it would be dangerous to replace URL with the link because it could
  51              // be stripped. therefore, we do nothing
  52              return $text;
  53          }
  54          if (in_array($options['originalformat'], explode(',', get_config('filter_urltolink', 'formats')))) {
  55              $this->convert_urls_into_links($text);
  56          }
  57          return $text;
  58      }
  59  
  60      ////////////////////////////////////////////////////////////////////////////
  61      // internal implementation starts here
  62      ////////////////////////////////////////////////////////////////////////////
  63  
  64      /**
  65       * Given some text this function converts any URLs it finds into HTML links
  66       *
  67       * @param string $text Passed in by reference. The string to be searched for urls.
  68       */
  69      protected function convert_urls_into_links(&$text) {
  70          //I've added img tags to this list of tags to ignore.
  71          //See MDL-21168 for more info. A better way to ignore tags whether or not
  72          //they are escaped partially or completely would be desirable. For example:
  73          //<a href="blah">
  74          //&lt;a href="blah"&gt;
  75          //&lt;a href="blah">
  76          $filterignoretagsopen  = array('<a\s[^>]+?>', '<span[^>]+?class="nolink"[^>]*?>');
  77          $filterignoretagsclose = array('</a>', '</span>');
  78          $ignoretags = [];
  79          filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
  80  
  81          // Check if we support unicode modifiers in regular expressions. Cache it.
  82          // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode
  83          // chars are going to arrive to URLs officially really soon (2010?)
  84          // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/
  85          // Various ideas from: http://alanstorm.com/url_regex_explained
  86          // Unicode check, negative assertion and other bits from Moodle.
  87          static $unicoderegexp;
  88          if (!isset($unicoderegexp)) {
  89              $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false,
  90          }
  91  
  92          // TODO MDL-21296 - use of unicode modifiers may cause a timeout
  93          $urlstart = '(?:http(s)?://|(?<!://)(www\.))';
  94          $domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])';
  95          $numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})';
  96          $port = '(?::\d*)';
  97          $pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})';
  98          $path = "(?:/$pathchar*)*";
  99          $querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
 100          $fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
 101  
 102          // Lookbehind assertions.
 103          // Is not HTML attribute or CSS URL property. Unfortunately legit text like "url(http://...)" will not be a link.
 104          $lookbehindend = "(?<![]),.;])";
 105  
 106          $regex = "$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" .
 107                  "($port?$path$querystring?$fragment?)$lookbehindend";
 108          if ($unicoderegexp) {
 109              $regex = '#' . $regex . '#ui';
 110          } else {
 111              $regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i';
 112          }
 113  
 114          // Locate any HTML tags.
 115          $matches = preg_split('/(<[^<|>]*>)/i', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
 116  
 117          // Iterate through the tokenized text to handle chunks (html and content).
 118          foreach ($matches as $idx => $chunk) {
 119              // Nothing to do. We skip completely any html chunk.
 120              if (strpos(trim($chunk), '<') === 0) {
 121                  continue;
 122              }
 123  
 124              // Nothing to do. We skip any content chunk having any of these attributes.
 125              if (preg_match('#(background=")|(action=")|(style="background)|(href=")|(src=")|(url [(])#', $chunk)) {
 126                  continue;
 127              }
 128  
 129              // Arrived here, we want to process every word in this chunk.
 130              $text = $chunk;
 131              $words = explode(' ', $text);
 132  
 133              foreach ($words as $idx2 => $word) {
 134                  // ReDoS protection. Stop processing if a word is too large.
 135                  if (strlen($word) < 4096) {
 136                      $words[$idx2] = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $word);
 137                  }
 138              }
 139              $text = implode(' ', $words);
 140  
 141              // Copy the result back to the array.
 142              $matches[$idx] = $text;
 143          }
 144  
 145          $text = implode('', $matches);
 146  
 147          if (!empty($ignoretags)) {
 148              $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
 149              $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
 150          }
 151  
 152          if (get_config('filter_urltolink', 'embedimages')) {
 153              // now try to inject the images, this code was originally in the mediapluing filter
 154              // this may be useful only if somebody relies on the fact the links in FORMAT_MOODLE get converted
 155              // to URLs which in turn change to real images
 156              $search = '/<a href="([^"]+\.(jpg|png|gif))" class="_blanktarget">([^>]*)<\/a>/is';
 157              $text = preg_replace_callback($search, 'filter_urltolink_img_callback', $text);
 158          }
 159      }
 160  }
 161  
 162  
 163  /**
 164   * Change links to images into embedded images.
 165   *
 166   * This plugin is intended for automatic conversion of image URLs when FORMAT_MOODLE used.
 167   *
 168   * @param  $link
 169   * @return string
 170   */
 171  function filter_urltolink_img_callback($link) {
 172      if ($link[1] !== $link[3]) {
 173          // this is not a link created by this filter, because the url does not match the text
 174          return $link[0];
 175      }
 176      return '<img class="filter_urltolink_image" alt="" src="'.$link[1].'" />';
 177  }