Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.
/lib/ -> xmlize.php (source)

Differences Between: [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Code for parsing xml files.
  19   *
  20   * Handles functionality for:
  21   *
  22   * Import of xml files in questionbank and course import.
  23   * Can handle xml files larger than 10MB through chunking the input file.
  24   * Replaces the original xmlize by Hans Anderson, {@link http://www.hansanderson.com/contact/}
  25   * with equal interface.
  26   *
  27   * @package    core
  28   * @subpackage lib
  29   * @copyright  Kilian Singer
  30   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  31   */
  32  
  33  /**
  34   * Exception thrown when there is an error parsing an XML file.
  35   *
  36   * @copyright 2010 The Open University
  37   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  38   */
  39  
  40  defined('MOODLE_INTERNAL') || die();
  41  
  42  class xml_format_exception extends moodle_exception {
  43      /** @var string */
  44      public $errorstring;
  45      /** @var int */
  46      public $line;
  47      /** @var char */
  48      public $char;
  49      /**
  50       * Constructor function
  51       *
  52       * @param string $errorstring Errorstring
  53       * @param int $line Linenumber
  54       * @param char $char Errorcharacter
  55       * @param string $link Link
  56       */
  57      public function __construct($errorstring, $line, $char, $link = '') {
  58          $this->errorstring = $errorstring;
  59          $this->line = $line;
  60          $this->char = $char;
  61  
  62          $a = new stdClass();
  63          $a->errorstring = $errorstring;
  64          $a->errorline = $line;
  65          $a->errorchar = $char;
  66          parent::__construct('errorparsingxml', 'error', $link, $a);
  67      }
  68  }
  69  
  70  /**
  71   * Class for parsing xml files.
  72   *
  73   * Handles functionality for:
  74   *
  75   * Import of xml files in questionbank and course import.
  76   * Can handle xml files larger than 10MB through chunking the input file.
  77   * Uses a similar interface to the original version xmlize() by Hans Anderson.
  78   *
  79   * @package    core
  80   * @subpackage lib
  81   * @copyright  Kilian Singer
  82   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  83   */
  84  class core_xml_parser {
  85      /** @var array resulting $xml tree */
  86      private $xml;
  87      /** @var array stores references to constructed $xml tree */
  88      private $current;
  89      /** @var int tores the level in the XML tree */
  90      private $level;
  91      /**
  92       * Is called when tags are opened.
  93       *
  94       * Note: Used by xml element handler as callback.
  95       *
  96       * @author Kilian Singer
  97       * @param resource $parser The XML parser resource.
  98       * @param string $name The XML source to parse.
  99       * @param array $attrs Stores attributes of XML tag.
 100       */
 101      private function startelement($parser, $name, $attrs) {
 102          $current = &$this->current;
 103          $level = &$this->level;
 104          if (!empty($name)) {
 105              if ($level == 0) {
 106                  $current[$level][$name] = array();
 107                  $current[$level][$name]["@"] = $attrs; // Attribute.
 108                  $current[$level][$name]["#"] = array(); // Other tags.
 109                  $current[$level + 1] = & $current[$level][$name]["#"];
 110                  $level++;
 111              } else {
 112                  if (empty($current[$level][$name])) {
 113                      $current[$level][$name] = array();
 114                  }
 115                  $siz = count($current[$level][$name]);
 116                  if (!empty($attrs)) {
 117                      $current[$level][$name][$siz]["@"] = $attrs; // Attribute.
 118                  }
 119                  $current[$level][$name][$siz]["#"] = array(); // Other tags.
 120                  $current[$level + 1] = & $current[$level][$name][$siz]["#"];
 121                  $level++;
 122              }
 123          }
 124      }
 125  
 126      /**
 127       * Is called when tags are closed.
 128       *
 129       * Note: Used by xml element handler as callback.
 130       *
 131       * @author Kilian Singer
 132       * @param resource $parser The XML parser resource.
 133       * @param string $name The XML source to parse.
 134       */
 135      private function endelement($parser, $name) {
 136          $current = &$this->current;
 137          $level = &$this->level;
 138          if (!empty($name)) {
 139              if (empty($current[$level])) {
 140                  $current[$level] = '';
 141              } else if (array_key_exists(0, $current[$level])) {
 142                  if (count($current[$level]) == 1) {
 143                      $current[$level] = $current[$level][0]; // We remove array index if we only have a single entry.
 144                  }
 145              }
 146  
 147              $level--;
 148          }
 149      }
 150      /**
 151       * Is called for text between the start and the end of tags.
 152       *
 153       * Note: Used by xml element handler as callback.
 154       *
 155       * @author Kilian Singer
 156       * @param resource $parser The XML parser resource.
 157       * @param string $data The XML source to parse.
 158       */
 159      private function characterdata($parser, $data) {
 160          $current = &$this->current;
 161          $level = &$this->level;
 162          if (($data == "0") || (!empty($data) && trim($data) != "")) {
 163              $siz = count($current[$level]);
 164              if ($siz == 0) {
 165                  $current[$level][0] = $data;
 166              } else {
 167                  $key = max(array_keys($current[$level]));
 168                  if (is_int($key)) {
 169                      end($current[$level]);
 170                      if (is_int(key($current[$level]))) { // If last index is nummeric we have CDATA and concat.
 171                          $current[$level][$key] = $current[$level][$key] . $data;
 172                      } else {
 173                          $current[$level][$key + 1] = $data; // Otherwise we make a new key.
 174                      }
 175                  } else {
 176                      $current[$level][0] = $data;
 177                  }
 178              }
 179          }
 180      }
 181  
 182      /**
 183       * Parses XML string.
 184       *
 185       * Note: Interface is kept equal to previous version.
 186       *
 187       * @author Kilian Singer
 188       * @param string $data the XML source to parse.
 189       * @param int $whitespace If set to 1 allows the parser to skip "space" characters in xml document. Default is 1
 190       * @param string $encoding Specify an OUTPUT encoding. If not specified, it defaults to UTF-8.
 191       * @param bool $reporterrors if set to true, then a {@link xml_format_exception}
 192       *      exception will be thrown if the XML is not well-formed. Otherwise errors are ignored.
 193       * @return array representation of the parsed XML.
 194       */
 195      public function parse($data, $whitespace = 1, $encoding = 'UTF-8', $reporterrors = false) {
 196          $data = trim($data);
 197          $this->xml = array();
 198          $this->current = array();
 199          $this->level = 0;
 200          $this->current[0] = & $this->xml;
 201          $parser = xml_parser_create($encoding);
 202          xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
 203          xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, $whitespace);
 204          xml_set_element_handler($parser, [$this, "startelement"], [$this, "endelement"]);
 205          xml_set_character_data_handler($parser, [$this, "characterdata"]);
 206          // Start parsing an xml document.
 207          for ($i = 0; $i < strlen($data); $i += 4096) {
 208              if (!xml_parse($parser, substr($data, $i, 4096))) {
 209                  break;
 210              }
 211          }
 212          if ($reporterrors) {
 213              $errorcode = xml_get_error_code($parser);
 214              if ($errorcode) {
 215                  $exception = new xml_format_exception(xml_error_string($errorcode),
 216                          xml_get_current_line_number($parser),
 217                          xml_get_current_column_number($parser));
 218                  xml_parser_free($parser);
 219                  throw $exception;
 220              }
 221          }
 222          xml_parser_free($parser); // Deletes the parser.
 223          if (empty($this->xml)) { // XML file is invalid or empty, return false.
 224              return false;
 225          }
 226          return $this->xml;
 227      }
 228  }
 229  
 230  /**
 231   * XML parsing function calles into class.
 232   *
 233   * Note: Used by xml element handler as callback.
 234   *
 235   * @param string $data the XML source to parse.
 236   * @param int $whitespace If set to 1 allows the parser to skip "space" characters in xml document. Default is 1
 237   * @param string $encoding Specify an OUTPUT encoding. If not specified, it defaults to UTF-8.
 238   * @param bool $reporterrors if set to true, then a {@link xml_format_exception}
 239   *      exception will be thrown if the XML is not well-formed. Otherwise errors are ignored.
 240   * @return array representation of the parsed XML.
 241   */
 242  function xmlize($data, $whitespace = 1, $encoding = 'UTF-8', $reporterrors = false) {
 243      $hxml = new core_xml_parser();
 244      return $hxml->parse($data, $whitespace, $encoding, $reporterrors);
 245  }