Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 400 and 402] [Versions 400 and 403]

   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * @package moodlecore
  20   * @subpackage backup-xml
  21   * @copyright 2010 onwards Eloy Lafuente (stronk7) {@link http://stronk7.com}
  22   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  /**
  26   * Class implementing one SAX progressive push parser.
  27   *
  28   * SAX parser able to process XML content from files/variables. It supports
  29   * attributes and case folding and works only with UTF-8 content. It's one
  30   * progressive push parser because, intead of loading big crunchs of information
  31   * in memory, it "publishes" (pushes) small information in a "propietary array format" througt
  32   * the corresponding @progressive_parser_processor, that will be the responsibe for
  33   * returning information into handy formats to higher levels.
  34   *
  35   * Note that, while this progressive parser is able to process any XML file, it is
  36   * 100% progressive so it publishes the information in the original order it's parsed (that's
  37   * the expected behaviour) so information belonging to the same path can be returned in
  38   * different chunks if there are inner levels/paths in the middle. Be warned!
  39   *
  40   * The "propietary array format" that the parser publishes to the @progressive_parser_processor
  41   * is this:
  42   *    array (
  43   *        'path' => path where the tags belong to,
  44   *        'level'=> level (1-based) of the tags
  45   *        'tags  => array (
  46   *            'name' => name of the tag,
  47   *            'attrs'=> array( name of the attr => value of the attr),
  48   *            'cdata => cdata of the tag
  49   *        )
  50   *    )
  51   *
  52   * TODO: Finish phpdocs
  53   */
  54  class progressive_parser {
  55  
  56      protected $xml_parser; // PHP's low level XML SAX parser
  57      protected $file;       // full path to file being progressively parsed | => mutually exclusive
  58      protected $contents;   // contents being progressively parsed          |
  59  
  60      /**
  61       * @var progressive_parser_processor to be used to publish processed information
  62       */
  63      protected $processor;
  64  
  65      protected $level;      // level of the current tag
  66      protected $path;       // path of the current tag
  67      protected $accum;      // accumulated char data of the current tag
  68      protected $attrs;      // attributes of the current tag
  69  
  70      protected $topush;     // array containing current level information being parsed to be "pushed"
  71      protected $prevlevel;  // level of the previous tag processed - to detect pushing places
  72      protected $currtag;    // name/value/attributes of the tag being processed
  73  
  74      /**
  75       * @var \core\progress\base Progress tracker called for each action
  76       */
  77      protected $progress;
  78  
  79      public function __construct($case_folding = false) {
  80          $this->xml_parser = xml_parser_create('UTF-8');
  81          xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, $case_folding);
  82          xml_set_object($this->xml_parser, $this);
  83          xml_set_element_handler($this->xml_parser, array($this, 'start_tag'), array($this, 'end_tag'));
  84          xml_set_character_data_handler($this->xml_parser, array($this, 'char_data'));
  85  
  86          $this->file     = null;
  87          $this->contents = null;
  88          $this->procesor = null;
  89          $this->level    = 0;
  90          $this->path     = '';
  91          $this->accum    = '';
  92          $this->attrs    = array();
  93          $this->topush  = array();
  94          $this->prevlevel = 0;
  95          $this->currtag   = array();
  96      }
  97  
  98      /*
  99       * Sets the XML file to be processed by the parser
 100       */
 101      public function set_file($file) {
 102          if (!file_exists($file) || (!is_readable($file))) {
 103              throw new progressive_parser_exception('invalid_file_to_parse');
 104          }
 105          $this->file = $file;
 106          $this->contents = null;
 107      }
 108  
 109      /*
 110       * Sets the XML contents to be processed by the parser
 111       */
 112      public function set_contents($contents) {
 113          if (empty($contents)) {
 114              throw new progressive_parser_exception('invalid_contents_to_parse');
 115          }
 116          $this->contents = $contents;
 117          $this->file = null;
 118      }
 119  
 120      /*
 121       * Define the @progressive_parser_processor in charge of processing the parsed chunks
 122       */
 123      public function set_processor($processor) {
 124          if (!$processor instanceof progressive_parser_processor) {
 125              throw new progressive_parser_exception('invalid_parser_processor');
 126          }
 127          $this->processor = $processor;
 128      }
 129  
 130      /**
 131       * Sets the progress tracker for the parser. If set, the tracker will be
 132       * called to report indeterminate progress for each chunk of XML.
 133       *
 134       * The caller should have already called start_progress on the progress tracker.
 135       *
 136       * @param \core\progress\base $progress Progress tracker
 137       */
 138      public function set_progress(\core\progress\base $progress) {
 139          $this->progress = $progress;
 140      }
 141  
 142      /*
 143       * Process the XML, delegating found chunks to the @progressive_parser_processor
 144       */
 145      public function process() {
 146          if (empty($this->processor)) {
 147              throw new progressive_parser_exception('undefined_parser_processor');
 148          }
 149          if (empty($this->file) && empty($this->contents)) {
 150              throw new progressive_parser_exception('undefined_xml_to_parse');
 151          }
 152          if (is_null($this->xml_parser)) {
 153              throw new progressive_parser_exception('progressive_parser_already_used');
 154          }
 155          if ($this->file) {
 156              $fh = fopen($this->file, 'r');
 157              while ($buffer = fread($fh, 8192)) {
 158                  $this->parse($buffer, feof($fh));
 159              }
 160              fclose($fh);
 161          } else {
 162              $this->parse($this->contents, true);
 163          }
 164          xml_parser_free($this->xml_parser);
 165          $this->xml_parser = null;
 166      }
 167  
 168      /**
 169       * Provides one cross-platform dirname function for
 170       * handling parser paths, see MDL-24381
 171       */
 172      public static function dirname($path) {
 173          return str_replace('\\', '/', dirname($path));
 174      }
 175  
 176  // Protected API starts here
 177  
 178      protected function parse($data, $eof) {
 179          if (!xml_parse($this->xml_parser, $data, $eof)) {
 180              throw new progressive_parser_exception(
 181                  'xml_parsing_error', null,
 182                  sprintf('XML error: %s at line %d, column %d',
 183                          xml_error_string(xml_get_error_code($this->xml_parser)),
 184                          xml_get_current_line_number($this->xml_parser),
 185                          xml_get_current_column_number($this->xml_parser)));
 186          }
 187      }
 188  
 189      protected function publish($data) {
 190          $this->processor->receive_chunk($data);
 191          if (!empty($this->progress)) {
 192              // Report indeterminate progress.
 193              $this->progress->progress();
 194          }
 195      }
 196  
 197      /**
 198       * Inform to the processor that we have started parsing one path
 199       */
 200      protected function inform_start($path) {
 201          $this->processor->before_path($path);
 202      }
 203  
 204      /**
 205       * Inform to the processor that we have finished parsing one path
 206       */
 207      protected function inform_end($path) {
 208          $this->processor->after_path($path);
 209      }
 210  
 211      protected function postprocess_cdata($data) {
 212          return $this->processor->process_cdata($data);
 213      }
 214  
 215      protected function start_tag($parser, $tag, $attributes) {
 216  
 217          // Normal update of parser internals
 218          $this->level++;
 219          $this->path .= '/' . $tag;
 220          $this->accum = '';
 221          $this->attrs = !empty($attributes) ? $attributes : array();
 222  
 223          // Inform processor we are about to start one tag
 224          $this->inform_start($this->path);
 225  
 226          // Entering a new inner level, publish all the information available
 227          if ($this->level > $this->prevlevel) {
 228              if (!empty($this->currtag) && (!empty($this->currtag['attrs']) || !empty($this->currtag['cdata']))) {
 229                  // We always add the last not-empty repetition. Empty ones are ignored.
 230                  if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
 231                      // Do nothing, the tag already exists and the repetition is empty
 232                  } else {
 233                      $this->topush['tags'][$this->currtag['name']] = $this->currtag;
 234                  }
 235              }
 236              if (!empty($this->topush['tags'])) {
 237                  $this->publish($this->topush);
 238              }
 239              $this->currtag = array();
 240              $this->topush = array();
 241          }
 242  
 243          // If not set, build to push common header
 244          if (empty($this->topush)) {
 245              $this->topush['path']  = progressive_parser::dirname($this->path);
 246              $this->topush['level'] = $this->level;
 247              $this->topush['tags']  = array();
 248          }
 249  
 250          // Handling a new tag, create it
 251          $this->currtag['name'] = $tag;
 252          // And add attributes if present
 253          if ($this->attrs) {
 254              $this->currtag['attrs'] = $this->attrs;
 255          }
 256  
 257          // For the records
 258          $this->prevlevel = $this->level;
 259      }
 260  
 261      protected function end_tag($parser, $tag) {
 262  
 263          // Ending rencently started tag, add value to current tag
 264          if ($this->level == $this->prevlevel) {
 265              $this->currtag['cdata'] = $this->postprocess_cdata($this->accum);
 266              // We always add the last not-empty repetition. Empty ones are ignored.
 267              if (isset($this->topush['tags'][$this->currtag['name']]) && trim($this->currtag['cdata']) === '') {
 268                  // Do nothing, the tag already exists and the repetition is empty
 269              } else {
 270                  $this->topush['tags'][$this->currtag['name']] = $this->currtag;
 271              }
 272              $this->currtag = array();
 273          }
 274  
 275          // Leaving one level, publish all the information available
 276          if ($this->level < $this->prevlevel) {
 277              if (!empty($this->topush['tags'])) {
 278                  $this->publish($this->topush);
 279              }
 280              $this->currtag = array();
 281              $this->topush = array();
 282          }
 283  
 284          // For the records
 285          $this->prevlevel = $this->level;
 286  
 287          // Inform processor we have finished one tag
 288          $this->inform_end($this->path);
 289  
 290          // Normal update of parser internals
 291          $this->level--;
 292          $this->path = progressive_parser::dirname($this->path);
 293      }
 294  
 295      protected function char_data($parser, $data) {
 296          $this->accum .= $data;
 297      }
 298  }
 299  
 300  /*
 301   * Exception class used by all the @progressive_parser stuff
 302   */
 303  class progressive_parser_exception extends moodle_exception {
 304  
 305      public function __construct($errorcode, $a=NULL, $debuginfo=null) {
 306          parent::__construct($errorcode, 'error', '', $a, $debuginfo);
 307      }
 308  }