1 <?php 2 3 // This file is part of Moodle - http://moodle.org/ 4 // 5 // Moodle is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // Moodle is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU General Public License for more details. 14 // 15 // You should have received a copy of the GNU General Public License 16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 17 18 /** 19 * Filter converting URLs in the text to HTML links 20 * 21 * @package filter 22 * @subpackage urltolink 23 * @copyright 2010 David Mudrak <david@moodle.com> 24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 25 */ 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 class filter_urltolink extends moodle_text_filter { 30 31 /** 32 * @var array global configuration for this filter 33 * 34 * This might be eventually moved into parent class if we found it 35 * useful for other filters, too. 36 */ 37 protected static $globalconfig; 38 39 /** 40 * Apply the filter to the text 41 * 42 * @see filter_manager::apply_filter_chain() 43 * @param string $text to be processed by the text 44 * @param array $options filter options 45 * @return string text after processing 46 */ 47 public function filter($text, array $options = array()) { 48 if (!isset($options['originalformat'])) { 49 // if the format is not specified, we are probably called by {@see format_string()} 50 // in that case, it would be dangerous to replace URL with the link because it could 51 // be stripped. therefore, we do nothing 52 return $text; 53 } 54 if (in_array($options['originalformat'], explode(',', get_config('filter_urltolink', 'formats')))) { 55 $this->convert_urls_into_links($text); 56 } 57 return $text; 58 } 59 60 //////////////////////////////////////////////////////////////////////////// 61 // internal implementation starts here 62 //////////////////////////////////////////////////////////////////////////// 63 64 /** 65 * Given some text this function converts any URLs it finds into HTML links 66 * 67 * @param string $text Passed in by reference. The string to be searched for urls. 68 */ 69 protected function convert_urls_into_links(&$text) { 70 //I've added img tags to this list of tags to ignore. 71 //See MDL-21168 for more info. A better way to ignore tags whether or not 72 //they are escaped partially or completely would be desirable. For example: 73 //<a href="blah"> 74 //<a href="blah"> 75 //<a href="blah"> 76 $filterignoretagsopen = array('<a\s[^>]+?>', '<span[^>]+?class="nolink"[^>]*?>'); 77 $filterignoretagsclose = array('</a>', '</span>'); 78 $ignoretags = []; 79 filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags); 80 81 // Check if we support unicode modifiers in regular expressions. Cache it. 82 // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode 83 // chars are going to arrive to URLs officially really soon (2010?) 84 // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/ 85 // Various ideas from: http://alanstorm.com/url_regex_explained 86 // Unicode check, negative assertion and other bits from Moodle. 87 static $unicoderegexp; 88 if (!isset($unicoderegexp)) { 89 $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false, 90 } 91 92 // TODO MDL-21296 - use of unicode modifiers may cause a timeout 93 $urlstart = '(?:http(s)?://|(?<!://)(www\.))'; 94 $domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])'; 95 $numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})'; 96 $port = '(?::\d*)'; 97 $pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})'; 98 $path = "(?:/$pathchar*)*"; 99 $querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)'; 100 $fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)'; 101 102 // Lookbehind assertions. 103 // Is not HTML attribute or CSS URL property. Unfortunately legit text like "url(http://...)" will not be a link. 104 $lookbehindend = "(?<![]),.;])"; 105 106 $regex = "$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" . 107 "($port?$path$querystring?$fragment?)$lookbehindend"; 108 if ($unicoderegexp) { 109 $regex = '#' . $regex . '#ui'; 110 } else { 111 $regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i'; 112 } 113 114 // Locate any HTML tags. 115 $matches = preg_split('/(<[^<|>]*>)/i', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); 116 117 // Iterate through the tokenized text to handle chunks (html and content). 118 foreach ($matches as $idx => $chunk) { 119 // Nothing to do. We skip completely any html chunk. 120 if (strpos(trim($chunk), '<') === 0) { 121 continue; 122 } 123 124 // Nothing to do. We skip any content chunk having any of these attributes. 125 if (preg_match('#(background=")|(action=")|(style="background)|(href=")|(src=")|(url [(])#', $chunk)) { 126 continue; 127 } 128 129 // Arrived here, we want to process every word in this chunk. 130 $text = $chunk; 131 $words = explode(' ', $text); 132 133 foreach ($words as $idx2 => $word) { 134 // ReDoS protection. Stop processing if a word is too large. 135 if (strlen($word) < 4096) { 136 $words[$idx2] = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $word); 137 } 138 } 139 $text = implode(' ', $words); 140 141 // Copy the result back to the array. 142 $matches[$idx] = $text; 143 } 144 145 $text = implode('', $matches); 146 147 if (!empty($ignoretags)) { 148 $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems. 149 $text = str_replace(array_keys($ignoretags),$ignoretags,$text); 150 } 151 152 if (get_config('filter_urltolink', 'embedimages')) { 153 // now try to inject the images, this code was originally in the mediapluing filter 154 // this may be useful only if somebody relies on the fact the links in FORMAT_MOODLE get converted 155 // to URLs which in turn change to real images 156 $search = '/<a href="([^"]+\.(jpg|png|gif))" class="_blanktarget">([^>]*)<\/a>/is'; 157 $text = preg_replace_callback($search, 'filter_urltolink_img_callback', $text); 158 } 159 } 160 } 161 162 163 /** 164 * Change links to images into embedded images. 165 * 166 * This plugin is intended for automatic conversion of image URLs when FORMAT_MOODLE used. 167 * 168 * @param $link 169 * @return string 170 */ 171 function filter_urltolink_img_callback($link) { 172 if ($link[1] !== $link[3]) { 173 // this is not a link created by this filter, because the url does not match the text 174 return $link[0]; 175 } 176 return '<img class="filter_urltolink_image" alt="" src="'.$link[1].'" />'; 177 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body