Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 310 and 311] [Versions 311 and 401] [Versions 39 and 311]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\XLSX;
   4  
   5  use Box\Spout\Common\Entity\Cell;
   6  use Box\Spout\Common\Entity\Row;
   7  use Box\Spout\Common\Exception\IOException;
   8  use Box\Spout\Reader\Common\Manager\RowManager;
   9  use Box\Spout\Reader\Common\XMLProcessor;
  10  use Box\Spout\Reader\Exception\InvalidValueException;
  11  use Box\Spout\Reader\Exception\XMLProcessingException;
  12  use Box\Spout\Reader\IteratorInterface;
  13  use Box\Spout\Reader\Wrapper\XMLReader;
  14  use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory;
  15  use Box\Spout\Reader\XLSX\Helper\CellHelper;
  16  use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
  17  
  18  /**
  19   * Class RowIterator
  20   */
  21  class RowIterator implements IteratorInterface
  22  {
  23      /** Definition of XML nodes names used to parse data */
  24      const XML_NODE_DIMENSION = 'dimension';
  25      const XML_NODE_WORKSHEET = 'worksheet';
  26      const XML_NODE_ROW = 'row';
  27      const XML_NODE_CELL = 'c';
  28  
  29      /** Definition of XML attributes used to parse data */
  30      const XML_ATTRIBUTE_REF = 'ref';
  31      const XML_ATTRIBUTE_SPANS = 'spans';
  32      const XML_ATTRIBUTE_ROW_INDEX = 'r';
  33      const XML_ATTRIBUTE_CELL_INDEX = 'r';
  34  
  35      /** @var string Path of the XLSX file being read */
  36      protected $filePath;
  37  
  38      /** @var string Path of the sheet data XML file as in [Content_Types].xml */
  39      protected $sheetDataXMLFilePath;
  40  
  41      /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
  42      protected $xmlReader;
  43  
  44      /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
  45      protected $xmlProcessor;
  46  
  47      /** @var Helper\CellValueFormatter Helper to format cell values */
  48      protected $cellValueFormatter;
  49  
  50      /** @var \Box\Spout\Reader\Common\Manager\RowManager Manages rows */
  51      protected $rowManager;
  52  
  53      /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */
  54      protected $entityFactory;
  55  
  56      /**
  57       * TODO: This variable can be deleted when row indices get preserved
  58       * @var int Number of read rows
  59       */
  60      protected $numReadRows = 0;
  61  
  62      /** @var Row Contains the row currently processed */
  63      protected $currentlyProcessedRow;
  64  
  65      /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
  66      protected $rowBuffer;
  67  
  68      /** @var bool Indicates whether all rows have been read */
  69      protected $hasReachedEndOfFile = false;
  70  
  71      /** @var int The number of columns the sheet has (0 meaning undefined) */
  72      protected $numColumns = 0;
  73  
  74      /** @var bool Whether empty rows should be returned or skipped */
  75      protected $shouldPreserveEmptyRows;
  76  
  77      /** @var int Last row index processed (one-based) */
  78      protected $lastRowIndexProcessed = 0;
  79  
  80      /** @var int Row index to be processed next (one-based) */
  81      protected $nextRowIndexToBeProcessed = 0;
  82  
  83      /** @var int Last column index processed (zero-based) */
  84      protected $lastColumnIndexProcessed = -1;
  85  
  86      /**
  87       * @param string $filePath Path of the XLSX file being read
  88       * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
  89       * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved
  90       * @param XMLReader $xmlReader XML Reader
  91       * @param XMLProcessor $xmlProcessor Helper to process XML files
  92       * @param CellValueFormatter $cellValueFormatter Helper to format cell values
  93       * @param RowManager $rowManager Manages rows
  94       * @param InternalEntityFactory $entityFactory Factory to create entities
  95       */
  96      public function __construct(
  97          $filePath,
  98          $sheetDataXMLFilePath,
  99          $shouldPreserveEmptyRows,
 100          $xmlReader,
 101          XMLProcessor $xmlProcessor,
 102          CellValueFormatter $cellValueFormatter,
 103          RowManager $rowManager,
 104          InternalEntityFactory $entityFactory
 105      ) {
 106          $this->filePath = $filePath;
 107          $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
 108          $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
 109          $this->xmlReader = $xmlReader;
 110          $this->cellValueFormatter = $cellValueFormatter;
 111          $this->rowManager = $rowManager;
 112          $this->entityFactory = $entityFactory;
 113  
 114          // Register all callbacks to process different nodes when reading the XML file
 115          $this->xmlProcessor = $xmlProcessor;
 116          $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
 117          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
 118          $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
 119          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
 120          $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
 121      }
 122  
 123      /**
 124       * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
 125       * @return string Path of the XML file containing the sheet data,
 126       *                without the leading slash.
 127       */
 128      protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
 129      {
 130          return \ltrim($sheetDataXMLFilePath, '/');
 131      }
 132  
 133      /**
 134       * Rewind the Iterator to the first element.
 135       * Initializes the XMLReader object that reads the associated sheet data.
 136       * The XMLReader is configured to be safe from billion laughs attack.
 137       * @see http://php.net/manual/en/iterator.rewind.php
 138       *
 139       * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
 140       * @return void
 141       */
 142      public function rewind()
 143      {
 144          $this->xmlReader->close();
 145  
 146          if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
 147              throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
 148          }
 149  
 150          $this->numReadRows = 0;
 151          $this->lastRowIndexProcessed = 0;
 152          $this->nextRowIndexToBeProcessed = 0;
 153          $this->rowBuffer = null;
 154          $this->hasReachedEndOfFile = false;
 155          $this->numColumns = 0;
 156  
 157          $this->next();
 158      }
 159  
 160      /**
 161       * Checks if current position is valid
 162       * @see http://php.net/manual/en/iterator.valid.php
 163       *
 164       * @return bool
 165       */
 166      public function valid()
 167      {
 168          return (!$this->hasReachedEndOfFile);
 169      }
 170  
 171      /**
 172       * Move forward to next element. Reads data describing the next unprocessed row.
 173       * @see http://php.net/manual/en/iterator.next.php
 174       *
 175       * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 176       * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
 177       * @return void
 178       */
 179      public function next()
 180      {
 181          $this->nextRowIndexToBeProcessed++;
 182  
 183          if ($this->doesNeedDataForNextRowToBeProcessed()) {
 184              $this->readDataForNextRow();
 185          }
 186      }
 187  
 188      /**
 189       * Returns whether we need data for the next row to be processed.
 190       * We don't need to read data if:
 191       *   we have already read at least one row
 192       *     AND
 193       *   we need to preserve empty rows
 194       *     AND
 195       *   the last row that was read is not the row that need to be processed
 196       *   (i.e. if we need to return empty rows)
 197       *
 198       * @return bool Whether we need data for the next row to be processed.
 199       */
 200      protected function doesNeedDataForNextRowToBeProcessed()
 201      {
 202          $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
 203  
 204          return (
 205              !$hasReadAtLeastOneRow ||
 206              !$this->shouldPreserveEmptyRows ||
 207              $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
 208          );
 209      }
 210  
 211      /**
 212       * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 213       * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
 214       * @return void
 215       */
 216      protected function readDataForNextRow()
 217      {
 218          $this->currentlyProcessedRow = $this->entityFactory->createRow();
 219  
 220          try {
 221              $this->xmlProcessor->readUntilStopped();
 222          } catch (XMLProcessingException $exception) {
 223              throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
 224          }
 225  
 226          $this->rowBuffer = $this->currentlyProcessedRow;
 227      }
 228  
 229      /**
 230       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
 231       * @return int A return code that indicates what action should the processor take next
 232       */
 233      protected function processDimensionStartingNode($xmlReader)
 234      {
 235          // Read dimensions of the sheet
 236          $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
 237          if (\preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
 238              $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
 239          }
 240  
 241          return XMLProcessor::PROCESSING_CONTINUE;
 242      }
 243  
 244      /**
 245       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
 246       * @return int A return code that indicates what action should the processor take next
 247       */
 248      protected function processRowStartingNode($xmlReader)
 249      {
 250          // Reset index of the last processed column
 251          $this->lastColumnIndexProcessed = -1;
 252  
 253          // Mark the last processed row as the one currently being read
 254          $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
 255  
 256          // Read spans info if present
 257          $numberOfColumnsForRow = $this->numColumns;
 258          $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
 259          if ($spans) {
 260              list(, $numberOfColumnsForRow) = \explode(':', $spans);
 261              $numberOfColumnsForRow = (int) $numberOfColumnsForRow;
 262          }
 263  
 264          $cells = \array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell(''));
 265          $this->currentlyProcessedRow->setCells($cells);
 266  
 267          return XMLProcessor::PROCESSING_CONTINUE;
 268      }
 269  
 270      /**
 271       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
 272       * @return int A return code that indicates what action should the processor take next
 273       */
 274      protected function processCellStartingNode($xmlReader)
 275      {
 276          $currentColumnIndex = $this->getColumnIndex($xmlReader);
 277  
 278          // NOTE: expand() will automatically decode all XML entities of the child nodes
 279          $node = $xmlReader->expand();
 280          $cell = $this->getCell($node);
 281  
 282          $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex);
 283          $this->lastColumnIndexProcessed = $currentColumnIndex;
 284  
 285          return XMLProcessor::PROCESSING_CONTINUE;
 286      }
 287  
 288      /**
 289       * @return int A return code that indicates what action should the processor take next
 290       */
 291      protected function processRowEndingNode()
 292      {
 293          // if the fetched row is empty and we don't want to preserve it..,
 294          if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) {
 295              // ... skip it
 296              return XMLProcessor::PROCESSING_CONTINUE;
 297          }
 298  
 299          $this->numReadRows++;
 300  
 301          // If needed, we fill the empty cells
 302          if ($this->numColumns === 0) {
 303              $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
 304          }
 305  
 306          // at this point, we have all the data we need for the row
 307          // so that we can populate the buffer
 308          return XMLProcessor::PROCESSING_STOP;
 309      }
 310  
 311      /**
 312       * @return int A return code that indicates what action should the processor take next
 313       */
 314      protected function processWorksheetEndingNode()
 315      {
 316          // The closing "</worksheet>" marks the end of the file
 317          $this->hasReachedEndOfFile = true;
 318  
 319          return XMLProcessor::PROCESSING_STOP;
 320      }
 321  
 322      /**
 323       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
 324       * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
 325       * @return int Row index
 326       */
 327      protected function getRowIndex($xmlReader)
 328      {
 329          // Get "r" attribute if present (from something like <row r="3"...>
 330          $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
 331  
 332          return ($currentRowIndex !== null) ?
 333                  (int) $currentRowIndex :
 334                  $this->lastRowIndexProcessed + 1;
 335      }
 336  
 337      /**
 338       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
 339       * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
 340       * @return int Column index
 341       */
 342      protected function getColumnIndex($xmlReader)
 343      {
 344          // Get "r" attribute if present (from something like <c r="A1"...>
 345          $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
 346  
 347          return ($currentCellIndex !== null) ?
 348                  CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
 349                  $this->lastColumnIndexProcessed + 1;
 350      }
 351  
 352      /**
 353       * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
 354       *
 355       * @param \DOMNode $node
 356       * @return Cell The cell set with the associated with the cell
 357       */
 358      protected function getCell($node)
 359      {
 360          try {
 361              $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
 362              $cell = $this->entityFactory->createCell($cellValue);
 363          } catch (InvalidValueException $exception) {
 364              $cell = $this->entityFactory->createCell($exception->getInvalidValue());
 365              $cell->setType(Cell::TYPE_ERROR);
 366          }
 367  
 368          return $cell;
 369      }
 370  
 371      /**
 372       * Return the current element, either an empty row or from the buffer.
 373       * @see http://php.net/manual/en/iterator.current.php
 374       *
 375       * @return Row|null
 376       */
 377      public function current()
 378      {
 379          $rowToBeProcessed = $this->rowBuffer;
 380  
 381          if ($this->shouldPreserveEmptyRows) {
 382              // when we need to preserve empty rows, we will either return
 383              // an empty row or the last row read. This depends whether the
 384              // index of last row that was read matches the index of the last
 385              // row whose value should be returned.
 386              if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
 387                  // return empty row if mismatch between last processed row
 388                  // and the row that needs to be returned
 389                  $rowToBeProcessed = $this->entityFactory->createRow();
 390              }
 391          }
 392  
 393          return $rowToBeProcessed;
 394      }
 395  
 396      /**
 397       * Return the key of the current element. Here, the row index.
 398       * @see http://php.net/manual/en/iterator.key.php
 399       *
 400       * @return int
 401       */
 402      public function key()
 403      {
 404          // TODO: This should return $this->nextRowIndexToBeProcessed
 405          //       but to avoid a breaking change, the return value for
 406          //       this function has been kept as the number of rows read.
 407          return $this->shouldPreserveEmptyRows ?
 408                  $this->nextRowIndexToBeProcessed :
 409                  $this->numReadRows;
 410      }
 411  
 412      /**
 413       * Cleans up what was created to iterate over the object.
 414       *
 415       * @return void
 416       */
 417      public function end()
 418      {
 419          $this->xmlReader->close();
 420      }
 421  }