Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.
/lib/ -> xmlize.php (source)

Differences Between: [Versions 310 and 402] [Versions 311 and 402] [Versions 39 and 402] [Versions 400 and 402]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Code for parsing xml files.
  19   *
  20   * Handles functionality for:
  21   *
  22   * Import of xml files in questionbank and course import.
  23   * Can handle xml files larger than 10MB through chunking the input file.
  24   * Replaces the original xmlize by Hans Anderson, {@link http://www.hansanderson.com/contact/}
  25   * with equal interface.
  26   *
  27   * @package    core
  28   * @subpackage lib
  29   * @copyright  Kilian Singer
  30   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  31   */
  32  
  33  /**
  34   * Exception thrown when there is an error parsing an XML file.
  35   *
  36   * @copyright 2010 The Open University
  37   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  38   */
  39  
  40  defined('MOODLE_INTERNAL') || die();
  41  
  42  class xml_format_exception extends moodle_exception {
  43      /** @var string */
  44      public $errorstring;
  45      /** @var char */
  46      public $char;
  47      /**
  48       * Constructor function
  49       *
  50       * @param string $errorstring Errorstring
  51       * @param int $line Linenumber
  52       * @param char $char Errorcharacter
  53       * @param string $link Link
  54       */
  55      public function __construct($errorstring, $line, $char, $link = '') {
  56          $this->errorstring = $errorstring;
  57          $this->line = $line;
  58          $this->char = $char;
  59  
  60          $a = new stdClass();
  61          $a->errorstring = $errorstring;
  62          $a->errorline = $line;
  63          $a->errorchar = $char;
  64          parent::__construct('errorparsingxml', 'error', $link, $a);
  65      }
  66  }
  67  
  68  /**
  69   * Class for parsing xml files.
  70   *
  71   * Handles functionality for:
  72   *
  73   * Import of xml files in questionbank and course import.
  74   * Can handle xml files larger than 10MB through chunking the input file.
  75   * Uses a similar interface to the original version xmlize() by Hans Anderson.
  76   *
  77   * @package    core
  78   * @subpackage lib
  79   * @copyright  Kilian Singer
  80   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  81   */
  82  class core_xml_parser {
  83      /** @var array resulting $xml tree */
  84      private $xml;
  85      /** @var array stores references to constructed $xml tree */
  86      private $current;
  87      /** @var int tores the level in the XML tree */
  88      private $level;
  89      /**
  90       * Is called when tags are opened.
  91       *
  92       * Note: Used by xml element handler as callback.
  93       *
  94       * @author Kilian Singer
  95       * @param resource $parser The XML parser resource.
  96       * @param string $name The XML source to parse.
  97       * @param array $attrs Stores attributes of XML tag.
  98       */
  99      private function startelement($parser, $name, $attrs) {
 100          $current = &$this->current;
 101          $level = &$this->level;
 102          if (!empty($name)) {
 103              if ($level == 0) {
 104                  $current[$level][$name] = array();
 105                  $current[$level][$name]["@"] = $attrs; // Attribute.
 106                  $current[$level][$name]["#"] = array(); // Other tags.
 107                  $current[$level + 1] = & $current[$level][$name]["#"];
 108                  $level++;
 109              } else {
 110                  if (empty($current[$level][$name])) {
 111                      $current[$level][$name] = array();
 112                  }
 113                  $siz = count($current[$level][$name]);
 114                  if (!empty($attrs)) {
 115                      $current[$level][$name][$siz]["@"] = $attrs; // Attribute.
 116                  }
 117                  $current[$level][$name][$siz]["#"] = array(); // Other tags.
 118                  $current[$level + 1] = & $current[$level][$name][$siz]["#"];
 119                  $level++;
 120              }
 121          }
 122      }
 123  
 124      /**
 125       * Is called when tags are closed.
 126       *
 127       * Note: Used by xml element handler as callback.
 128       *
 129       * @author Kilian Singer
 130       * @param resource $parser The XML parser resource.
 131       * @param string $name The XML source to parse.
 132       */
 133      private function endelement($parser, $name) {
 134          $current = &$this->current;
 135          $level = &$this->level;
 136          if (!empty($name)) {
 137              if (empty($current[$level])) {
 138                  $current[$level] = '';
 139              } else if (array_key_exists(0, $current[$level])) {
 140                  if (count($current[$level]) == 1) {
 141                      $current[$level] = $current[$level][0]; // We remove array index if we only have a single entry.
 142                  }
 143              }
 144  
 145              $level--;
 146          }
 147      }
 148      /**
 149       * Is called for text between the start and the end of tags.
 150       *
 151       * Note: Used by xml element handler as callback.
 152       *
 153       * @author Kilian Singer
 154       * @param resource $parser The XML parser resource.
 155       * @param string $data The XML source to parse.
 156       */
 157      private function characterdata($parser, $data) {
 158          $current = &$this->current;
 159          $level = &$this->level;
 160          if (($data == "0") || (!empty($data) && trim($data) != "")) {
 161              $siz = count($current[$level]);
 162              if ($siz == 0) {
 163                  $current[$level][0] = $data;
 164              } else {
 165                  $key = max(array_keys($current[$level]));
 166                  if (is_int($key)) {
 167                      end($current[$level]);
 168                      if (is_int(key($current[$level]))) { // If last index is nummeric we have CDATA and concat.
 169                          $current[$level][$key] = $current[$level][$key] . $data;
 170                      } else {
 171                          $current[$level][$key + 1] = $data; // Otherwise we make a new key.
 172                      }
 173                  } else {
 174                      $current[$level][0] = $data;
 175                  }
 176              }
 177          }
 178      }
 179  
 180      /**
 181       * Parses XML string.
 182       *
 183       * Note: Interface is kept equal to previous version.
 184       *
 185       * @author Kilian Singer
 186       * @param string $data the XML source to parse.
 187       * @param int $whitespace If set to 1 allows the parser to skip "space" characters in xml document. Default is 1
 188       * @param string $encoding Specify an OUTPUT encoding. If not specified, it defaults to UTF-8.
 189       * @param bool $reporterrors if set to true, then a {@link xml_format_exception}
 190       *      exception will be thrown if the XML is not well-formed. Otherwise errors are ignored.
 191       * @return array representation of the parsed XML.
 192       */
 193      public function parse($data, $whitespace = 1, $encoding = 'UTF-8', $reporterrors = false) {
 194          $data = trim($data);
 195          $this->xml = array();
 196          $this->current = array();
 197          $this->level = 0;
 198          $this->current[0] = & $this->xml;
 199          $parser = xml_parser_create($encoding);
 200          xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
 201          xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, $whitespace);
 202          xml_set_element_handler($parser, [$this, "startelement"], [$this, "endelement"]);
 203          xml_set_character_data_handler($parser, [$this, "characterdata"]);
 204          // Start parsing an xml document.
 205          for ($i = 0; $i < strlen($data); $i += 4096) {
 206              if (!xml_parse($parser, substr($data, $i, 4096))) {
 207                  break;
 208              }
 209          }
 210          if ($reporterrors) {
 211              $errorcode = xml_get_error_code($parser);
 212              if ($errorcode) {
 213                  $exception = new xml_format_exception(xml_error_string($errorcode),
 214                          xml_get_current_line_number($parser),
 215                          xml_get_current_column_number($parser));
 216                  xml_parser_free($parser);
 217                  throw $exception;
 218              }
 219          }
 220          xml_parser_free($parser); // Deletes the parser.
 221          if (empty($this->xml)) { // XML file is invalid or empty, return false.
 222              return false;
 223          }
 224          return $this->xml;
 225      }
 226  }
 227  
 228  /**
 229   * XML parsing function calles into class.
 230   *
 231   * Note: Used by xml element handler as callback.
 232   *
 233   * @param string $data the XML source to parse.
 234   * @param int $whitespace If set to 1 allows the parser to skip "space" characters in xml document. Default is 1
 235   * @param string $encoding Specify an OUTPUT encoding. If not specified, it defaults to UTF-8.
 236   * @param bool $reporterrors if set to true, then a {@link xml_format_exception}
 237   *      exception will be thrown if the XML is not well-formed. Otherwise errors are ignored.
 238   * @return array representation of the parsed XML.
 239   */
 240  function xmlize($data, $whitespace = 1, $encoding = 'UTF-8', $reporterrors = false) {
 241      $hxml = new core_xml_parser();
 242      return $hxml->parse($data, $whitespace, $encoding, $reporterrors);
 243  }