Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.
   1  <?php
   2  
   3  /**
   4   * Removes all unrecognized tags from the list of tokens.
   5   *
   6   * This strategy iterates through all the tokens and removes unrecognized
   7   * tokens. If a token is not recognized but a TagTransform is defined for
   8   * that element, the element will be transformed accordingly.
   9   */
  10  
  11  class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
  12  {
  13  
  14      /**
  15       * @param HTMLPurifier_Token[] $tokens
  16       * @param HTMLPurifier_Config $config
  17       * @param HTMLPurifier_Context $context
  18       * @return array|HTMLPurifier_Token[]
  19       */
  20      public function execute($tokens, $config, $context)
  21      {
  22          $definition = $config->getHTMLDefinition();
  23          $generator = new HTMLPurifier_Generator($config, $context);
  24          $result = array();
  25  
  26          $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
  27          $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
  28  
  29          // currently only used to determine if comments should be kept
  30          $trusted = $config->get('HTML.Trusted');
  31          $comment_lookup = $config->get('HTML.AllowedComments');
  32          $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
  33          $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
  34  
  35          $remove_script_contents = $config->get('Core.RemoveScriptContents');
  36          $hidden_elements = $config->get('Core.HiddenElements');
  37  
  38          // remove script contents compatibility
  39          if ($remove_script_contents === true) {
  40              $hidden_elements['script'] = true;
  41          } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
  42              unset($hidden_elements['script']);
  43          }
  44  
  45          $attr_validator = new HTMLPurifier_AttrValidator();
  46  
  47          // removes tokens until it reaches a closing tag with its value
  48          $remove_until = false;
  49  
  50          // converts comments into text tokens when this is equal to a tag name
  51          $textify_comments = false;
  52  
  53          $token = false;
  54          $context->register('CurrentToken', $token);
  55  
  56          $e = false;
  57          if ($config->get('Core.CollectErrors')) {
  58              $e =& $context->get('ErrorCollector');
  59          }
  60  
  61          foreach ($tokens as $token) {
  62              if ($remove_until) {
  63                  if (empty($token->is_tag) || $token->name !== $remove_until) {
  64                      continue;
  65                  }
  66              }
  67              if (!empty($token->is_tag)) {
  68                  // DEFINITION CALL
  69  
  70                  // before any processing, try to transform the element
  71                  if (isset($definition->info_tag_transform[$token->name])) {
  72                      $original_name = $token->name;
  73                      // there is a transformation for this tag
  74                      // DEFINITION CALL
  75                      $token = $definition->
  76                          info_tag_transform[$token->name]->transform($token, $config, $context);
  77                      if ($e) {
  78                          $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
  79                      }
  80                  }
  81  
  82                  if (isset($definition->info[$token->name])) {
  83                      // mostly everything's good, but
  84                      // we need to make sure required attributes are in order
  85                      if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
  86                          $definition->info[$token->name]->required_attr &&
  87                          ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
  88                      ) {
  89                          $attr_validator->validateToken($token, $config, $context);
  90                          $ok = true;
  91                          foreach ($definition->info[$token->name]->required_attr as $name) {
  92                              if (!isset($token->attr[$name])) {
  93                                  $ok = false;
  94                                  break;
  95                              }
  96                          }
  97                          if (!$ok) {
  98                              if ($e) {
  99                                  $e->send(
 100                                      E_ERROR,
 101                                      'Strategy_RemoveForeignElements: Missing required attribute',
 102                                      $name
 103                                  );
 104                              }
 105                              continue;
 106                          }
 107                          $token->armor['ValidateAttributes'] = true;
 108                      }
 109  
 110                      if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
 111                          $textify_comments = $token->name;
 112                      } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
 113                          $textify_comments = false;
 114                      }
 115  
 116                  } elseif ($escape_invalid_tags) {
 117                      // invalid tag, generate HTML representation and insert in
 118                      if ($e) {
 119                          $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
 120                      }
 121                      $token = new HTMLPurifier_Token_Text(
 122                          $generator->generateFromToken($token)
 123                      );
 124                  } else {
 125                      // check if we need to destroy all of the tag's children
 126                      // CAN BE GENERICIZED
 127                      if (isset($hidden_elements[$token->name])) {
 128                          if ($token instanceof HTMLPurifier_Token_Start) {
 129                              $remove_until = $token->name;
 130                          } elseif ($token instanceof HTMLPurifier_Token_Empty) {
 131                              // do nothing: we're still looking
 132                          } else {
 133                              $remove_until = false;
 134                          }
 135                          if ($e) {
 136                              $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
 137                          }
 138                      } else {
 139                          if ($e) {
 140                              $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
 141                          }
 142                      }
 143                      continue;
 144                  }
 145              } elseif ($token instanceof HTMLPurifier_Token_Comment) {
 146                  // textify comments in script tags when they are allowed
 147                  if ($textify_comments !== false) {
 148                      $data = $token->data;
 149                      $token = new HTMLPurifier_Token_Text($data);
 150                  } elseif ($trusted || $check_comments) {
 151                      // always cleanup comments
 152                      $trailing_hyphen = false;
 153                      if ($e) {
 154                          // perform check whether or not there's a trailing hyphen
 155                          if (substr($token->data, -1) == '-') {
 156                              $trailing_hyphen = true;
 157                          }
 158                      }
 159                      $token->data = rtrim($token->data, '-');
 160                      $found_double_hyphen = false;
 161                      while (strpos($token->data, '--') !== false) {
 162                          $found_double_hyphen = true;
 163                          $token->data = str_replace('--', '-', $token->data);
 164                      }
 165                      if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
 166                          ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
 167                          // OK good
 168                          if ($e) {
 169                              if ($trailing_hyphen) {
 170                                  $e->send(
 171                                      E_NOTICE,
 172                                      'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
 173                                  );
 174                              }
 175                              if ($found_double_hyphen) {
 176                                  $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
 177                              }
 178                          }
 179                      } else {
 180                          if ($e) {
 181                              $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
 182                          }
 183                          continue;
 184                      }
 185                  } else {
 186                      // strip comments
 187                      if ($e) {
 188                          $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
 189                      }
 190                      continue;
 191                  }
 192              } elseif ($token instanceof HTMLPurifier_Token_Text) {
 193              } else {
 194                  continue;
 195              }
 196              $result[] = $token;
 197          }
 198          if ($remove_until && $e) {
 199              // we removed tokens until the end, throw error
 200              $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
 201          }
 202          $context->destroy('CurrentToken');
 203          return $result;
 204      }
 205  }
 206  
 207  // vim: et sw=4 sts=4