See Release Notes
Long Term Support Release
1 <?php 2 3 /** 4 * Removes all unrecognized tags from the list of tokens. 5 * 6 * This strategy iterates through all the tokens and removes unrecognized 7 * tokens. If a token is not recognized but a TagTransform is defined for 8 * that element, the element will be transformed accordingly. 9 */ 10 11 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy 12 { 13 14 /** 15 * @param HTMLPurifier_Token[] $tokens 16 * @param HTMLPurifier_Config $config 17 * @param HTMLPurifier_Context $context 18 * @return array|HTMLPurifier_Token[] 19 */ 20 public function execute($tokens, $config, $context) 21 { 22 $definition = $config->getHTMLDefinition(); 23 $generator = new HTMLPurifier_Generator($config, $context); 24 $result = array(); 25 26 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); 27 $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); 28 29 // currently only used to determine if comments should be kept 30 $trusted = $config->get('HTML.Trusted'); 31 $comment_lookup = $config->get('HTML.AllowedComments'); 32 $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); 33 $check_comments = $comment_lookup !== array() || $comment_regexp !== null; 34 35 $remove_script_contents = $config->get('Core.RemoveScriptContents'); 36 $hidden_elements = $config->get('Core.HiddenElements'); 37 38 // remove script contents compatibility 39 if ($remove_script_contents === true) { 40 $hidden_elements['script'] = true; 41 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { 42 unset($hidden_elements['script']); 43 } 44 45 $attr_validator = new HTMLPurifier_AttrValidator(); 46 47 // removes tokens until it reaches a closing tag with its value 48 $remove_until = false; 49 50 // converts comments into text tokens when this is equal to a tag name 51 $textify_comments = false; 52 53 $token = false; 54 $context->register('CurrentToken', $token); 55 56 $e = false; 57 if ($config->get('Core.CollectErrors')) { 58 $e =& $context->get('ErrorCollector'); 59 } 60 61 foreach ($tokens as $token) { 62 if ($remove_until) { 63 if (empty($token->is_tag) || $token->name !== $remove_until) { 64 continue; 65 } 66 } 67 if (!empty($token->is_tag)) { 68 // DEFINITION CALL 69 70 // before any processing, try to transform the element 71 if (isset($definition->info_tag_transform[$token->name])) { 72 $original_name = $token->name; 73 // there is a transformation for this tag 74 // DEFINITION CALL 75 $token = $definition-> 76 info_tag_transform[$token->name]->transform($token, $config, $context); 77 if ($e) { 78 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); 79 } 80 } 81 82 if (isset($definition->info[$token->name])) { 83 // mostly everything's good, but 84 // we need to make sure required attributes are in order 85 if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && 86 $definition->info[$token->name]->required_attr && 87 ($token->name != 'img' || $remove_invalid_img) // ensure config option still works 88 ) { 89 $attr_validator->validateToken($token, $config, $context); 90 $ok = true; 91 foreach ($definition->info[$token->name]->required_attr as $name) { 92 if (!isset($token->attr[$name])) { 93 $ok = false; 94 break; 95 } 96 } 97 if (!$ok) { 98 if ($e) { 99 $e->send( 100 E_ERROR, 101 'Strategy_RemoveForeignElements: Missing required attribute', 102 $name 103 ); 104 } 105 continue; 106 } 107 $token->armor['ValidateAttributes'] = true; 108 } 109 110 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { 111 $textify_comments = $token->name; 112 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { 113 $textify_comments = false; 114 } 115 116 } elseif ($escape_invalid_tags) { 117 // invalid tag, generate HTML representation and insert in 118 if ($e) { 119 $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); 120 } 121 $token = new HTMLPurifier_Token_Text( 122 $generator->generateFromToken($token) 123 ); 124 } else { 125 // check if we need to destroy all of the tag's children 126 // CAN BE GENERICIZED 127 if (isset($hidden_elements[$token->name])) { 128 if ($token instanceof HTMLPurifier_Token_Start) { 129 $remove_until = $token->name; 130 } elseif ($token instanceof HTMLPurifier_Token_Empty) { 131 // do nothing: we're still looking 132 } else { 133 $remove_until = false; 134 } 135 if ($e) { 136 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); 137 } 138 } else { 139 if ($e) { 140 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); 141 } 142 } 143 continue; 144 } 145 } elseif ($token instanceof HTMLPurifier_Token_Comment) { 146 // textify comments in script tags when they are allowed 147 if ($textify_comments !== false) { 148 $data = $token->data; 149 $token = new HTMLPurifier_Token_Text($data); 150 } elseif ($trusted || $check_comments) { 151 // always cleanup comments 152 $trailing_hyphen = false; 153 if ($e) { 154 // perform check whether or not there's a trailing hyphen 155 if (substr($token->data, -1) == '-') { 156 $trailing_hyphen = true; 157 } 158 } 159 $token->data = rtrim($token->data, '-'); 160 $found_double_hyphen = false; 161 while (strpos($token->data, '--') !== false) { 162 $found_double_hyphen = true; 163 $token->data = str_replace('--', '-', $token->data); 164 } 165 if ($trusted || !empty($comment_lookup[trim($token->data)]) || 166 ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) { 167 // OK good 168 if ($e) { 169 if ($trailing_hyphen) { 170 $e->send( 171 E_NOTICE, 172 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' 173 ); 174 } 175 if ($found_double_hyphen) { 176 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); 177 } 178 } 179 } else { 180 if ($e) { 181 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 182 } 183 continue; 184 } 185 } else { 186 // strip comments 187 if ($e) { 188 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); 189 } 190 continue; 191 } 192 } elseif ($token instanceof HTMLPurifier_Token_Text) { 193 } else { 194 continue; 195 } 196 $result[] = $token; 197 } 198 if ($remove_until && $e) { 199 // we removed tokens until the end, throw error 200 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); 201 } 202 $context->destroy('CurrentToken'); 203 return $result; 204 } 205 } 206 207 // vim: et sw=4 sts=4
title
Description
Body
title
Description
Body
title
Description
Body
title
Body