Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.
   1  <?php
   2  
   3  // why is this a top level function? Because PHP 5.2.0 doesn't seem to

   4  // understand how to interpret this filter if it's a static method.

   5  // It's all really silly, but if we go this route it might be reasonable

   6  // to coalesce all of these methods into one.

   7  function htmlpurifier_filter_extractstyleblocks_muteerrorhandler()
   8  {
   9  }
  10  
  11  /**

  12   * This filter extracts <style> blocks from input HTML, cleans them up

  13   * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')

  14   * so they can be used elsewhere in the document.

  15   *

  16   * @note

  17   *      See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for

  18   *      sample usage.

  19   *

  20   * @note

  21   *      This filter can also be used on stylesheets not included in the

  22   *      document--something purists would probably prefer. Just directly

  23   *      call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS()

  24   */
  25  class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
  26  {
  27      /**

  28       * @type string

  29       */
  30      public $name = 'ExtractStyleBlocks';
  31  
  32      /**

  33       * @type array

  34       */
  35      private $_styleMatches = array();
  36  
  37      /**

  38       * @type csstidy

  39       */
  40      private $_tidy;
  41  
  42      /**

  43       * @type HTMLPurifier_AttrDef_HTML_ID

  44       */
  45      private $_id_attrdef;
  46  
  47      /**

  48       * @type HTMLPurifier_AttrDef_CSS_Ident

  49       */
  50      private $_class_attrdef;
  51  
  52      /**

  53       * @type HTMLPurifier_AttrDef_Enum

  54       */
  55      private $_enum_attrdef;
  56  
  57      public function __construct()
  58      {
  59          $this->_tidy = new csstidy();
  60          $this->_tidy->set_cfg('lowercase_s', false);
  61          $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
  62          $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
  63          $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(
  64              array(
  65                  'first-child',
  66                  'link',
  67                  'visited',
  68                  'active',
  69                  'hover',
  70                  'focus'
  71              )
  72          );
  73      }
  74  
  75      /**

  76       * Save the contents of CSS blocks to style matches

  77       * @param array $matches preg_replace style $matches array

  78       */
  79      protected function styleCallback($matches)
  80      {
  81          $this->_styleMatches[] = $matches[1];
  82      }
  83  
  84      /**

  85       * Removes inline <style> tags from HTML, saves them for later use

  86       * @param string $html

  87       * @param HTMLPurifier_Config $config

  88       * @param HTMLPurifier_Context $context

  89       * @return string

  90       * @todo Extend to indicate non-text/css style blocks

  91       */
  92      public function preFilter($html, $config, $context)
  93      {
  94          $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl');
  95          if ($tidy !== null) {
  96              $this->_tidy = $tidy;
  97          }
  98          // NB: this must be NON-greedy because if we have

  99          // <style>foo</style>  <style>bar</style>

 100          // we must not grab foo</style>  <style>bar

 101          $html = preg_replace_callback('#<style(?:\s.*)?>(.*)<\/style>#isU', array($this, 'styleCallback'), $html);
 102          $style_blocks = $this->_styleMatches;
 103          $this->_styleMatches = array(); // reset

 104          $context->register('StyleBlocks', $style_blocks); // $context must not be reused

 105          if ($this->_tidy) {
 106              foreach ($style_blocks as &$style) {
 107                  $style = $this->cleanCSS($style, $config, $context);
 108              }
 109          }
 110          return $html;
 111      }
 112  
 113      /**

 114       * Takes CSS (the stuff found in <style>) and cleans it.

 115       * @warning Requires CSSTidy <http://csstidy.sourceforge.net/>

 116       * @param string $css CSS styling to clean

 117       * @param HTMLPurifier_Config $config

 118       * @param HTMLPurifier_Context $context

 119       * @throws HTMLPurifier_Exception

 120       * @return string Cleaned CSS

 121       */
 122      public function cleanCSS($css, $config, $context)
 123      {
 124          // prepare scope

 125          $scope = $config->get('Filter.ExtractStyleBlocks.Scope');
 126          if ($scope !== null) {
 127              $scopes = array_map('trim', explode(',', $scope));
 128          } else {
 129              $scopes = array();
 130          }
 131          // remove comments from CSS

 132          $css = trim($css);
 133          if (strncmp('<!--', $css, 4) === 0) {
 134              $css = substr($css, 4);
 135          }
 136          if (strlen($css) > 3 && substr($css, -3) == '-->') {
 137              $css = substr($css, 0, -3);
 138          }
 139          $css = trim($css);
 140          set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
 141          $this->_tidy->parse($css);
 142          restore_error_handler();
 143          $css_definition = $config->getDefinition('CSS');
 144          $html_definition = $config->getDefinition('HTML');
 145          $new_css = array();
 146          foreach ($this->_tidy->css as $k => $decls) {
 147              // $decls are all CSS declarations inside an @ selector

 148              $new_decls = array();
 149              foreach ($decls as $selector => $style) {
 150                  $selector = trim($selector);
 151                  if ($selector === '') {
 152                      continue;
 153                  } // should not happen

 154                  // Parse the selector

 155                  // Here is the relevant part of the CSS grammar:

 156                  //

 157                  // ruleset

 158                  //   : selector [ ',' S* selector ]* '{' ...

 159                  // selector

 160                  //   : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?

 161                  // combinator

 162                  //   : '+' S*

 163                  //   : '>' S*

 164                  // simple_selector

 165                  //   : element_name [ HASH | class | attrib | pseudo ]*

 166                  //   | [ HASH | class | attrib | pseudo ]+

 167                  // element_name

 168                  //   : IDENT | '*'

 169                  //   ;

 170                  // class

 171                  //   : '.' IDENT

 172                  //   ;

 173                  // attrib

 174                  //   : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*

 175                  //     [ IDENT | STRING ] S* ]? ']'

 176                  //   ;

 177                  // pseudo

 178                  //   : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]

 179                  //   ;

 180                  //

 181                  // For reference, here are the relevant tokens:

 182                  //

 183                  // HASH         #{name}

 184                  // IDENT        {ident}

 185                  // INCLUDES     ==

 186                  // DASHMATCH    |=

 187                  // STRING       {string}

 188                  // FUNCTION     {ident}\(

 189                  //

 190                  // And the lexical scanner tokens

 191                  //

 192                  // name         {nmchar}+

 193                  // nmchar       [_a-z0-9-]|{nonascii}|{escape}

 194                  // nonascii     [\240-\377]

 195                  // escape       {unicode}|\\[^\r\n\f0-9a-f]

 196                  // unicode      \\{h}}{1,6}(\r\n|[ \t\r\n\f])?

 197                  // ident        -?{nmstart}{nmchar*}

 198                  // nmstart      [_a-z]|{nonascii}|{escape}

 199                  // string       {string1}|{string2}

 200                  // string1      \"([^\n\r\f\\"]|\\{nl}|{escape})*\"

 201                  // string2      \'([^\n\r\f\\"]|\\{nl}|{escape})*\'

 202                  //

 203                  // We'll implement a subset (in order to reduce attack

 204                  // surface); in particular:

 205                  //

 206                  //      - No Unicode support

 207                  //      - No escapes support

 208                  //      - No string support (by proxy no attrib support)

 209                  //      - element_name is matched against allowed

 210                  //        elements (some people might find this

 211                  //        annoying...)

 212                  //      - Pseudo-elements one of :first-child, :link,

 213                  //        :visited, :active, :hover, :focus

 214  
 215                  // handle ruleset

 216                  $selectors = array_map('trim', explode(',', $selector));
 217                  $new_selectors = array();
 218                  foreach ($selectors as $sel) {
 219                      // split on +, > and spaces

 220                      $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
 221                      // even indices are chunks, odd indices are

 222                      // delimiters

 223                      $nsel = null;
 224                      $delim = null; // guaranteed to be non-null after

 225                      // two loop iterations

 226                      for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
 227                          $x = $basic_selectors[$i];
 228                          if ($i % 2) {
 229                              // delimiter

 230                              if ($x === ' ') {
 231                                  $delim = ' ';
 232                              } else {
 233                                  $delim = ' ' . $x . ' ';
 234                              }
 235                          } else {
 236                              // simple selector

 237                              $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
 238                              $sdelim = null;
 239                              $nx = null;
 240                              for ($j = 0, $cc = count($components); $j < $cc; $j++) {
 241                                  $y = $components[$j];
 242                                  if ($j === 0) {
 243                                      if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
 244                                          $nx = $y;
 245                                      } else {
 246                                          // $nx stays null; this matters

 247                                          // if we don't manage to find

 248                                          // any valid selector content,

 249                                          // in which case we ignore the

 250                                          // outer $delim

 251                                      }
 252                                  } elseif ($j % 2) {
 253                                      // set delimiter

 254                                      $sdelim = $y;
 255                                  } else {
 256                                      $attrdef = null;
 257                                      if ($sdelim === '#') {
 258                                          $attrdef = $this->_id_attrdef;
 259                                      } elseif ($sdelim === '.') {
 260                                          $attrdef = $this->_class_attrdef;
 261                                      } elseif ($sdelim === ':') {
 262                                          $attrdef = $this->_enum_attrdef;
 263                                      } else {
 264                                          throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
 265                                      }
 266                                      $r = $attrdef->validate($y, $config, $context);
 267                                      if ($r !== false) {
 268                                          if ($r !== true) {
 269                                              $y = $r;
 270                                          }
 271                                          if ($nx === null) {
 272                                              $nx = '';
 273                                          }
 274                                          $nx .= $sdelim . $y;
 275                                      }
 276                                  }
 277                              }
 278                              if ($nx !== null) {
 279                                  if ($nsel === null) {
 280                                      $nsel = $nx;
 281                                  } else {
 282                                      $nsel .= $delim . $nx;
 283                                  }
 284                              } else {
 285                                  // delimiters to the left of invalid

 286                                  // basic selector ignored

 287                              }
 288                          }
 289                      }
 290                      if ($nsel !== null) {
 291                          if (!empty($scopes)) {
 292                              foreach ($scopes as $s) {
 293                                  $new_selectors[] = "$s $nsel";
 294                              }
 295                          } else {
 296                              $new_selectors[] = $nsel;
 297                          }
 298                      }
 299                  }
 300                  if (empty($new_selectors)) {
 301                      continue;
 302                  }
 303                  $selector = implode(', ', $new_selectors);
 304                  foreach ($style as $name => $value) {
 305                      if (!isset($css_definition->info[$name])) {
 306                          unset($style[$name]);
 307                          continue;
 308                      }
 309                      $def = $css_definition->info[$name];
 310                      $ret = $def->validate($value, $config, $context);
 311                      if ($ret === false) {
 312                          unset($style[$name]);
 313                      } else {
 314                          $style[$name] = $ret;
 315                      }
 316                  }
 317                  $new_decls[$selector] = $style;
 318              }
 319              $new_css[$k] = $new_decls;
 320          }
 321          // remove stuff that shouldn't be used, could be reenabled

 322          // after security risks are analyzed

 323          $this->_tidy->css = $new_css;
 324          $this->_tidy->import = array();
 325          $this->_tidy->charset = null;
 326          $this->_tidy->namespace = null;
 327          $css = $this->_tidy->print->plain();
 328          // we are going to escape any special characters <>& to ensure

 329          // that no funny business occurs (i.e. </style> in a font-family prop).

 330          if ($config->get('Filter.ExtractStyleBlocks.Escaping')) {
 331              $css = str_replace(
 332                  array('<', '>', '&'),
 333                  array('\3C ', '\3E ', '\26 '),
 334                  $css
 335              );
 336          }
 337          return $css;
 338      }
 339  }
 340  
 341  // vim: et sw=4 sts=4