Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.
   1  <?php
   2  
   3  /**
   4   * Definition of the purified HTML that describes allowed children,
   5   * attributes, and many other things.
   6   *
   7   * Conventions:
   8   *
   9   * All member variables that are prefixed with info
  10   * (including the main $info array) are used by HTML Purifier internals
  11   * and should not be directly edited when customizing the HTMLDefinition.
  12   * They can usually be set via configuration directives or custom
  13   * modules.
  14   *
  15   * On the other hand, member variables without the info prefix are used
  16   * internally by the HTMLDefinition and MUST NOT be used by other HTML
  17   * Purifier internals. Many of them, however, are public, and may be
  18   * edited by userspace code to tweak the behavior of HTMLDefinition.
  19   *
  20   * @note This class is inspected by Printer_HTMLDefinition; please
  21   *       update that class if things here change.
  22   *
  23   * @warning Directives that change this object's structure must be in
  24   *          the HTML or Attr namespace!
  25   */
  26  class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
  27  {
  28  
  29      // FULLY-PUBLIC VARIABLES ---------------------------------------------
  30  
  31      /**
  32       * Associative array of element names to HTMLPurifier_ElementDef.
  33       * @type HTMLPurifier_ElementDef[]
  34       */
  35      public $info = array();
  36  
  37      /**
  38       * Associative array of global attribute name to attribute definition.
  39       * @type array
  40       */
  41      public $info_global_attr = array();
  42  
  43      /**
  44       * String name of parent element HTML will be going into.
  45       * @type string
  46       */
  47      public $info_parent = 'div';
  48  
  49      /**
  50       * Definition for parent element, allows parent element to be a
  51       * tag that's not allowed inside the HTML fragment.
  52       * @type HTMLPurifier_ElementDef
  53       */
  54      public $info_parent_def;
  55  
  56      /**
  57       * String name of element used to wrap inline elements in block context.
  58       * @type string
  59       * @note This is rarely used except for BLOCKQUOTEs in strict mode
  60       */
  61      public $info_block_wrapper = 'p';
  62  
  63      /**
  64       * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
  65       * @type array
  66       */
  67      public $info_tag_transform = array();
  68  
  69      /**
  70       * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
  71       * @type HTMLPurifier_AttrTransform[]
  72       */
  73      public $info_attr_transform_pre = array();
  74  
  75      /**
  76       * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
  77       * @type HTMLPurifier_AttrTransform[]
  78       */
  79      public $info_attr_transform_post = array();
  80  
  81      /**
  82       * Nested lookup array of content set name (Block, Inline) to
  83       * element name to whether or not it belongs in that content set.
  84       * @type array
  85       */
  86      public $info_content_sets = array();
  87  
  88      /**
  89       * Indexed list of HTMLPurifier_Injector to be used.
  90       * @type HTMLPurifier_Injector[]
  91       */
  92      public $info_injector = array();
  93  
  94      /**
  95       * Doctype object
  96       * @type HTMLPurifier_Doctype
  97       */
  98      public $doctype;
  99  
 100  
 101  
 102      // RAW CUSTOMIZATION STUFF --------------------------------------------
 103  
 104      /**
 105       * Adds a custom attribute to a pre-existing element
 106       * @note This is strictly convenience, and does not have a corresponding
 107       *       method in HTMLPurifier_HTMLModule
 108       * @param string $element_name Element name to add attribute to
 109       * @param string $attr_name Name of attribute
 110       * @param mixed $def Attribute definition, can be string or object, see
 111       *             HTMLPurifier_AttrTypes for details
 112       */
 113      public function addAttribute($element_name, $attr_name, $def)
 114      {
 115          $module = $this->getAnonymousModule();
 116          if (!isset($module->info[$element_name])) {
 117              $element = $module->addBlankElement($element_name);
 118          } else {
 119              $element = $module->info[$element_name];
 120          }
 121          $element->attr[$attr_name] = $def;
 122      }
 123  
 124      /**
 125       * Adds a custom element to your HTML definition
 126       * @see HTMLPurifier_HTMLModule::addElement() for detailed
 127       *       parameter and return value descriptions.
 128       */
 129      public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())
 130      {
 131          $module = $this->getAnonymousModule();
 132          // assume that if the user is calling this, the element
 133          // is safe. This may not be a good idea
 134          $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
 135          return $element;
 136      }
 137  
 138      /**
 139       * Adds a blank element to your HTML definition, for overriding
 140       * existing behavior
 141       * @param string $element_name
 142       * @return HTMLPurifier_ElementDef
 143       * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed
 144       *       parameter and return value descriptions.
 145       */
 146      public function addBlankElement($element_name)
 147      {
 148          $module  = $this->getAnonymousModule();
 149          $element = $module->addBlankElement($element_name);
 150          return $element;
 151      }
 152  
 153      /**
 154       * Retrieves a reference to the anonymous module, so you can
 155       * bust out advanced features without having to make your own
 156       * module.
 157       * @return HTMLPurifier_HTMLModule
 158       */
 159      public function getAnonymousModule()
 160      {
 161          if (!$this->_anonModule) {
 162              $this->_anonModule = new HTMLPurifier_HTMLModule();
 163              $this->_anonModule->name = 'Anonymous';
 164          }
 165          return $this->_anonModule;
 166      }
 167  
 168      private $_anonModule = null;
 169  
 170      // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
 171  
 172      /**
 173       * @type string
 174       */
 175      public $type = 'HTML';
 176  
 177      /**
 178       * @type HTMLPurifier_HTMLModuleManager
 179       */
 180      public $manager;
 181  
 182      /**
 183       * Performs low-cost, preliminary initialization.
 184       */
 185      public function __construct()
 186      {
 187          $this->manager = new HTMLPurifier_HTMLModuleManager();
 188      }
 189  
 190      /**
 191       * @param HTMLPurifier_Config $config
 192       */
 193      protected function doSetup($config)
 194      {
 195          $this->processModules($config);
 196          $this->setupConfigStuff($config);
 197          unset($this->manager);
 198  
 199          // cleanup some of the element definitions
 200          foreach ($this->info as $k => $v) {
 201              unset($this->info[$k]->content_model);
 202              unset($this->info[$k]->content_model_type);
 203          }
 204      }
 205  
 206      /**
 207       * Extract out the information from the manager
 208       * @param HTMLPurifier_Config $config
 209       */
 210      protected function processModules($config)
 211      {
 212          if ($this->_anonModule) {
 213              // for user specific changes
 214              // this is late-loaded so we don't have to deal with PHP4
 215              // reference wonky-ness
 216              $this->manager->addModule($this->_anonModule);
 217              unset($this->_anonModule);
 218          }
 219  
 220          $this->manager->setup($config);
 221          $this->doctype = $this->manager->doctype;
 222  
 223          foreach ($this->manager->modules as $module) {
 224              foreach ($module->info_tag_transform as $k => $v) {
 225                  if ($v === false) {
 226                      unset($this->info_tag_transform[$k]);
 227                  } else {
 228                      $this->info_tag_transform[$k] = $v;
 229                  }
 230              }
 231              foreach ($module->info_attr_transform_pre as $k => $v) {
 232                  if ($v === false) {
 233                      unset($this->info_attr_transform_pre[$k]);
 234                  } else {
 235                      $this->info_attr_transform_pre[$k] = $v;
 236                  }
 237              }
 238              foreach ($module->info_attr_transform_post as $k => $v) {
 239                  if ($v === false) {
 240                      unset($this->info_attr_transform_post[$k]);
 241                  } else {
 242                      $this->info_attr_transform_post[$k] = $v;
 243                  }
 244              }
 245              foreach ($module->info_injector as $k => $v) {
 246                  if ($v === false) {
 247                      unset($this->info_injector[$k]);
 248                  } else {
 249                      $this->info_injector[$k] = $v;
 250                  }
 251              }
 252          }
 253          $this->info = $this->manager->getElements();
 254          $this->info_content_sets = $this->manager->contentSets->lookup;
 255      }
 256  
 257      /**
 258       * Sets up stuff based on config. We need a better way of doing this.
 259       * @param HTMLPurifier_Config $config
 260       */
 261      protected function setupConfigStuff($config)
 262      {
 263          $block_wrapper = $config->get('HTML.BlockWrapper');
 264          if (isset($this->info_content_sets['Block'][$block_wrapper])) {
 265              $this->info_block_wrapper = $block_wrapper;
 266          } else {
 267              trigger_error(
 268                  'Cannot use non-block element as block wrapper',
 269                  E_USER_ERROR
 270              );
 271          }
 272  
 273          $parent = $config->get('HTML.Parent');
 274          $def = $this->manager->getElement($parent, true);
 275          if ($def) {
 276              $this->info_parent = $parent;
 277              $this->info_parent_def = $def;
 278          } else {
 279              trigger_error(
 280                  'Cannot use unrecognized element as parent',
 281                  E_USER_ERROR
 282              );
 283              $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
 284          }
 285  
 286          // support template text
 287          $support = "(for information on implementing this, see the support forums) ";
 288  
 289          // setup allowed elements -----------------------------------------
 290  
 291          $allowed_elements = $config->get('HTML.AllowedElements');
 292          $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
 293  
 294          if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
 295              $allowed = $config->get('HTML.Allowed');
 296              if (is_string($allowed)) {
 297                  list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
 298              }
 299          }
 300  
 301          if (is_array($allowed_elements)) {
 302              foreach ($this->info as $name => $d) {
 303                  if (!isset($allowed_elements[$name])) {
 304                      unset($this->info[$name]);
 305                  }
 306                  unset($allowed_elements[$name]);
 307              }
 308              // emit errors
 309              foreach ($allowed_elements as $element => $d) {
 310                  $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
 311                  trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
 312              }
 313          }
 314  
 315          // setup allowed attributes ---------------------------------------
 316  
 317          $allowed_attributes_mutable = $allowed_attributes; // by copy!
 318          if (is_array($allowed_attributes)) {
 319              // This actually doesn't do anything, since we went away from
 320              // global attributes. It's possible that userland code uses
 321              // it, but HTMLModuleManager doesn't!
 322              foreach ($this->info_global_attr as $attr => $x) {
 323                  $keys = array($attr, "*@$attr", "*.$attr");
 324                  $delete = true;
 325                  foreach ($keys as $key) {
 326                      if ($delete && isset($allowed_attributes[$key])) {
 327                          $delete = false;
 328                      }
 329                      if (isset($allowed_attributes_mutable[$key])) {
 330                          unset($allowed_attributes_mutable[$key]);
 331                      }
 332                  }
 333                  if ($delete) {
 334                      unset($this->info_global_attr[$attr]);
 335                  }
 336              }
 337  
 338              foreach ($this->info as $tag => $info) {
 339                  foreach ($info->attr as $attr => $x) {
 340                      $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
 341                      $delete = true;
 342                      foreach ($keys as $key) {
 343                          if ($delete && isset($allowed_attributes[$key])) {
 344                              $delete = false;
 345                          }
 346                          if (isset($allowed_attributes_mutable[$key])) {
 347                              unset($allowed_attributes_mutable[$key]);
 348                          }
 349                      }
 350                      if ($delete) {
 351                          if ($this->info[$tag]->attr[$attr]->required) {
 352                              trigger_error(
 353                                  "Required attribute '$attr' in element '$tag' " .
 354                                  "was not allowed, which means '$tag' will not be allowed either",
 355                                  E_USER_WARNING
 356                              );
 357                          }
 358                          unset($this->info[$tag]->attr[$attr]);
 359                      }
 360                  }
 361              }
 362              // emit errors
 363              foreach ($allowed_attributes_mutable as $elattr => $d) {
 364                  $bits = preg_split('/[.@]/', $elattr, 2);
 365                  $c = count($bits);
 366                  switch ($c) {
 367                      case 2:
 368                          if ($bits[0] !== '*') {
 369                              $element = htmlspecialchars($bits[0]);
 370                              $attribute = htmlspecialchars($bits[1]);
 371                              if (!isset($this->info[$element])) {
 372                                  trigger_error(
 373                                      "Cannot allow attribute '$attribute' if element " .
 374                                      "'$element' is not allowed/supported $support"
 375                                  );
 376                              } else {
 377                                  trigger_error(
 378                                      "Attribute '$attribute' in element '$element' not supported $support",
 379                                      E_USER_WARNING
 380                                  );
 381                              }
 382                              break;
 383                          }
 384                          // otherwise fall through
 385                      case 1:
 386                          $attribute = htmlspecialchars($bits[0]);
 387                          trigger_error(
 388                              "Global attribute '$attribute' is not ".
 389                              "supported in any elements $support",
 390                              E_USER_WARNING
 391                          );
 392                          break;
 393                  }
 394              }
 395          }
 396  
 397          // setup forbidden elements ---------------------------------------
 398  
 399          $forbidden_elements   = $config->get('HTML.ForbiddenElements');
 400          $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
 401  
 402          foreach ($this->info as $tag => $info) {
 403              if (isset($forbidden_elements[$tag])) {
 404                  unset($this->info[$tag]);
 405                  continue;
 406              }
 407              foreach ($info->attr as $attr => $x) {
 408                  if (isset($forbidden_attributes["$tag@$attr"]) ||
 409                      isset($forbidden_attributes["*@$attr"]) ||
 410                      isset($forbidden_attributes[$attr])
 411                  ) {
 412                      unset($this->info[$tag]->attr[$attr]);
 413                      continue;
 414                  } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually
 415                      // $tag.$attr are not user supplied, so no worries!
 416                      trigger_error(
 417                          "Error with $tag.$attr: tag.attr syntax not supported for " .
 418                          "HTML.ForbiddenAttributes; use tag@attr instead",
 419                          E_USER_WARNING
 420                      );
 421                  }
 422              }
 423          }
 424          foreach ($forbidden_attributes as $key => $v) {
 425              if (strlen($key) < 2) {
 426                  continue;
 427              }
 428              if ($key[0] != '*') {
 429                  continue;
 430              }
 431              if ($key[1] == '.') {
 432                  trigger_error(
 433                      "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",
 434                      E_USER_WARNING
 435                  );
 436              }
 437          }
 438  
 439          // setup injectors -----------------------------------------------------
 440          foreach ($this->info_injector as $i => $injector) {
 441              if ($injector->checkNeeded($config) !== false) {
 442                  // remove injector that does not have it's required
 443                  // elements/attributes present, and is thus not needed.
 444                  unset($this->info_injector[$i]);
 445              }
 446          }
 447      }
 448  
 449      /**
 450       * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
 451       * separate lists for processing. Format is element[attr1|attr2],element2...
 452       * @warning Although it's largely drawn from TinyMCE's implementation,
 453       *      it is different, and you'll probably have to modify your lists
 454       * @param array $list String list to parse
 455       * @return array
 456       * @todo Give this its own class, probably static interface
 457       */
 458      public function parseTinyMCEAllowedList($list)
 459      {
 460          $list = str_replace(array(' ', "\t"), '', $list);
 461  
 462          $elements = array();
 463          $attributes = array();
 464  
 465          $chunks = preg_split('/(,|[\n\r]+)/', $list);
 466          foreach ($chunks as $chunk) {
 467              if (empty($chunk)) {
 468                  continue;
 469              }
 470              // remove TinyMCE element control characters
 471              if (!strpos($chunk, '[')) {
 472                  $element = $chunk;
 473                  $attr = false;
 474              } else {
 475                  list($element, $attr) = explode('[', $chunk);
 476              }
 477              if ($element !== '*') {
 478                  $elements[$element] = true;
 479              }
 480              if (!$attr) {
 481                  continue;
 482              }
 483              $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
 484              $attr = explode('|', $attr);
 485              foreach ($attr as $key) {
 486                  $attributes["$element.$key"] = true;
 487              }
 488          }
 489          return array($elements, $attributes);
 490      }
 491  }
 492  
 493  // vim: et sw=4 sts=4