Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 311 and 401]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\ODS;
   4  
   5  use Box\Spout\Common\Entity\Cell;
   6  use Box\Spout\Common\Entity\Row;
   7  use Box\Spout\Common\Exception\IOException;
   8  use Box\Spout\Common\Manager\OptionsManagerInterface;
   9  use Box\Spout\Reader\Common\Entity\Options;
  10  use Box\Spout\Reader\Common\Manager\RowManager;
  11  use Box\Spout\Reader\Common\XMLProcessor;
  12  use Box\Spout\Reader\Exception\InvalidValueException;
  13  use Box\Spout\Reader\Exception\IteratorNotRewindableException;
  14  use Box\Spout\Reader\Exception\XMLProcessingException;
  15  use Box\Spout\Reader\IteratorInterface;
  16  use Box\Spout\Reader\ODS\Creator\InternalEntityFactory;
  17  use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
  18  use Box\Spout\Reader\Wrapper\XMLReader;
  19  
  20  /**
  21   * Class RowIterator
  22   */
  23  class RowIterator implements IteratorInterface
  24  {
  25      /** Definition of XML nodes names used to parse data */
  26      const XML_NODE_TABLE = 'table:table';
  27      const XML_NODE_ROW = 'table:table-row';
  28      const XML_NODE_CELL = 'table:table-cell';
  29      const MAX_COLUMNS_EXCEL = 16384;
  30  
  31      /** Definition of XML attribute used to parse data */
  32      const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
  33      const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
  34  
  35      /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
  36      protected $xmlReader;
  37  
  38      /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
  39      protected $xmlProcessor;
  40  
  41      /** @var bool Whether empty rows should be returned or skipped */
  42      protected $shouldPreserveEmptyRows;
  43  
  44      /** @var Helper\CellValueFormatter Helper to format cell values */
  45      protected $cellValueFormatter;
  46  
  47      /** @var RowManager Manages rows */
  48      protected $rowManager;
  49  
  50      /** @var InternalEntityFactory Factory to create entities */
  51      protected $entityFactory;
  52  
  53      /** @var bool Whether the iterator has already been rewound once */
  54      protected $hasAlreadyBeenRewound = false;
  55  
  56      /** @var Row The currently processed row */
  57      protected $currentlyProcessedRow;
  58  
  59      /** @var Row Buffer used to store the current row, while checking if there are more rows to read */
  60      protected $rowBuffer;
  61  
  62      /** @var bool Indicates whether all rows have been read */
  63      protected $hasReachedEndOfFile = false;
  64  
  65      /** @var int Last row index processed (one-based) */
  66      protected $lastRowIndexProcessed = 0;
  67  
  68      /** @var int Row index to be processed next (one-based) */
  69      protected $nextRowIndexToBeProcessed = 1;
  70  
  71      /** @var Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
  72      protected $lastProcessedCell;
  73  
  74      /** @var int Number of times the last processed row should be repeated */
  75      protected $numRowsRepeated = 1;
  76  
  77      /** @var int Number of times the last cell value should be copied to the cells on its right */
  78      protected $numColumnsRepeated = 1;
  79  
  80      /** @var bool Whether at least one cell has been read for the row currently being processed */
  81      protected $hasAlreadyReadOneCellInCurrentRow = false;
  82  
  83      /**
  84       * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
  85       * @param OptionsManagerInterface $optionsManager Reader's options manager
  86       * @param CellValueFormatter $cellValueFormatter Helper to format cell values
  87       * @param XMLProcessor $xmlProcessor Helper to process XML files
  88       * @param RowManager $rowManager Manages rows
  89       * @param InternalEntityFactory $entityFactory Factory to create entities
  90       */
  91      public function __construct(
  92          XMLReader $xmlReader,
  93          OptionsManagerInterface $optionsManager,
  94          CellValueFormatter $cellValueFormatter,
  95          XMLProcessor $xmlProcessor,
  96          RowManager $rowManager,
  97          InternalEntityFactory $entityFactory
  98      ) {
  99          $this->xmlReader = $xmlReader;
 100          $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
 101          $this->cellValueFormatter = $cellValueFormatter;
 102          $this->entityFactory = $entityFactory;
 103          $this->rowManager = $rowManager;
 104  
 105          // Register all callbacks to process different nodes when reading the XML file
 106          $this->xmlProcessor = $xmlProcessor;
 107          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
 108          $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
 109          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
 110          $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
 111      }
 112  
 113      /**
 114       * Rewind the Iterator to the first element.
 115       * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
 116       * @see http://php.net/manual/en/iterator.rewind.php
 117       *
 118       * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
 119       * @return void
 120       */
 121      public function rewind()
 122      {
 123          // Because sheet and row data is located in the file, we can't rewind both the
 124          // sheet iterator and the row iterator, as XML file cannot be read backwards.
 125          // Therefore, rewinding the row iterator has been disabled.
 126          if ($this->hasAlreadyBeenRewound) {
 127              throw new IteratorNotRewindableException();
 128          }
 129  
 130          $this->hasAlreadyBeenRewound = true;
 131          $this->lastRowIndexProcessed = 0;
 132          $this->nextRowIndexToBeProcessed = 1;
 133          $this->rowBuffer = null;
 134          $this->hasReachedEndOfFile = false;
 135  
 136          $this->next();
 137      }
 138  
 139      /**
 140       * Checks if current position is valid
 141       * @see http://php.net/manual/en/iterator.valid.php
 142       *
 143       * @return bool
 144       */
 145      public function valid()
 146      {
 147          return (!$this->hasReachedEndOfFile);
 148      }
 149  
 150      /**
 151       * Move forward to next element. Empty rows will be skipped.
 152       * @see http://php.net/manual/en/iterator.next.php
 153       *
 154       * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 155       * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
 156       * @return void
 157       */
 158      public function next()
 159      {
 160          if ($this->doesNeedDataForNextRowToBeProcessed()) {
 161              $this->readDataForNextRow();
 162          }
 163  
 164          $this->lastRowIndexProcessed++;
 165      }
 166  
 167      /**
 168       * Returns whether we need data for the next row to be processed.
 169       * We DO need to read data if:
 170       *   - we have not read any rows yet
 171       *      OR
 172       *   - the next row to be processed immediately follows the last read row
 173       *
 174       * @return bool Whether we need data for the next row to be processed.
 175       */
 176      protected function doesNeedDataForNextRowToBeProcessed()
 177      {
 178          $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
 179  
 180          return (
 181              !$hasReadAtLeastOneRow ||
 182              $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
 183          );
 184      }
 185  
 186      /**
 187       * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 188       * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
 189       * @return void
 190       */
 191      protected function readDataForNextRow()
 192      {
 193          $this->currentlyProcessedRow = $this->entityFactory->createRow();
 194  
 195          try {
 196              $this->xmlProcessor->readUntilStopped();
 197          } catch (XMLProcessingException $exception) {
 198              throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
 199          }
 200  
 201          $this->rowBuffer = $this->currentlyProcessedRow;
 202      }
 203  
 204      /**
 205       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
 206       * @return int A return code that indicates what action should the processor take next
 207       */
 208      protected function processRowStartingNode($xmlReader)
 209      {
 210          // Reset data from current row
 211          $this->hasAlreadyReadOneCellInCurrentRow = false;
 212          $this->lastProcessedCell = null;
 213          $this->numColumnsRepeated = 1;
 214          $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
 215  
 216          return XMLProcessor::PROCESSING_CONTINUE;
 217      }
 218  
 219      /**
 220       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
 221       * @return int A return code that indicates what action should the processor take next
 222       */
 223      protected function processCellStartingNode($xmlReader)
 224      {
 225          $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
 226  
 227          // NOTE: expand() will automatically decode all XML entities of the child nodes
 228          $node = $xmlReader->expand();
 229          $currentCell = $this->getCell($node);
 230  
 231          // process cell N only after having read cell N+1 (see below why)
 232          if ($this->hasAlreadyReadOneCellInCurrentRow) {
 233              for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
 234                  $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
 235              }
 236          }
 237  
 238          $this->hasAlreadyReadOneCellInCurrentRow = true;
 239          $this->lastProcessedCell = $currentCell;
 240          $this->numColumnsRepeated = $currentNumColumnsRepeated;
 241  
 242          return XMLProcessor::PROCESSING_CONTINUE;
 243      }
 244  
 245      /**
 246       * @return int A return code that indicates what action should the processor take next
 247       */
 248      protected function processRowEndingNode()
 249      {
 250          $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
 251  
 252          // if the fetched row is empty and we don't want to preserve it...
 253          if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
 254              // ... skip it
 255              return XMLProcessor::PROCESSING_CONTINUE;
 256          }
 257  
 258          // if the row is empty, we don't want to return more than one cell
 259          $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
 260          $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells();
 261  
 262          // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
 263          // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
 264          // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
 265          // with a number-columns-repeated value equals to the number of (supported columns - used columns).
 266          // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
 267          // always 16384 cells.
 268          if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
 269              for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
 270                  $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
 271              }
 272          }
 273  
 274          // If we are processing row N and the row is repeated M times,
 275          // then the next row to be processed will be row (N+M).
 276          $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
 277  
 278          // at this point, we have all the data we need for the row
 279          // so that we can populate the buffer
 280          return XMLProcessor::PROCESSING_STOP;
 281      }
 282  
 283      /**
 284       * @return int A return code that indicates what action should the processor take next
 285       */
 286      protected function processTableEndingNode()
 287      {
 288          // The closing "</table:table>" marks the end of the file
 289          $this->hasReachedEndOfFile = true;
 290  
 291          return XMLProcessor::PROCESSING_STOP;
 292      }
 293  
 294      /**
 295       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
 296       * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
 297       */
 298      protected function getNumRowsRepeatedForCurrentNode($xmlReader)
 299      {
 300          $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
 301  
 302          return ($numRowsRepeated !== null) ? (int) $numRowsRepeated : 1;
 303      }
 304  
 305      /**
 306       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
 307       * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
 308       */
 309      protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
 310      {
 311          $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
 312  
 313          return ($numColumnsRepeated !== null) ? (int) $numColumnsRepeated : 1;
 314      }
 315  
 316      /**
 317       * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
 318       *
 319       * @param \DOMNode $node
 320       * @return Cell The cell set with the associated with the cell
 321       */
 322      protected function getCell($node)
 323      {
 324          try {
 325              $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
 326              $cell = $this->entityFactory->createCell($cellValue);
 327          } catch (InvalidValueException $exception) {
 328              $cell = $this->entityFactory->createCell($exception->getInvalidValue());
 329              $cell->setType(Cell::TYPE_ERROR);
 330          }
 331  
 332          return $cell;
 333      }
 334  
 335      /**
 336       * After finishing processing each cell, a row is considered empty if it contains
 337       * no cells or if the last read cell is empty.
 338       * After finishing processing each cell, the last read cell is not part of the
 339       * row data yet (as we still need to apply the "num-columns-repeated" attribute).
 340       *
 341       * @param Row $currentRow
 342       * @param Cell $lastReadCell The last read cell
 343       * @return bool Whether the row is empty
 344       */
 345      protected function isEmptyRow($currentRow, $lastReadCell)
 346      {
 347          return (
 348              $this->rowManager->isEmpty($currentRow) &&
 349              (!isset($lastReadCell) || $lastReadCell->isEmpty())
 350          );
 351      }
 352  
 353      /**
 354       * Return the current element, from the buffer.
 355       * @see http://php.net/manual/en/iterator.current.php
 356       *
 357       * @return Row
 358       */
 359      public function current()
 360      {
 361          return $this->rowBuffer;
 362      }
 363  
 364      /**
 365       * Return the key of the current element
 366       * @see http://php.net/manual/en/iterator.key.php
 367       *
 368       * @return int
 369       */
 370      public function key()
 371      {
 372          return $this->lastRowIndexProcessed;
 373      }
 374  
 375      /**
 376       * Cleans up what was created to iterate over the object.
 377       *
 378       * @return void
 379       */
 380      public function end()
 381      {
 382          $this->xmlReader->close();
 383      }
 384  }