Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.

Differences Between: [Versions 402 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Reader\ODS;
   6  
   7  use DOMElement;
   8  use OpenSpout\Common\Entity\Cell;
   9  use OpenSpout\Common\Entity\Row;
  10  use OpenSpout\Reader\Common\XMLProcessor;
  11  use OpenSpout\Reader\Exception\InvalidValueException;
  12  use OpenSpout\Reader\Exception\IteratorNotRewindableException;
  13  use OpenSpout\Reader\ODS\Helper\CellValueFormatter;
  14  use OpenSpout\Reader\RowIteratorInterface;
  15  use OpenSpout\Reader\Wrapper\XMLReader;
  16  
  17  final class RowIterator implements RowIteratorInterface
  18  {
  19      /**
  20       * Definition of XML nodes names used to parse data.
  21       */
  22      public const XML_NODE_TABLE = 'table:table';
  23      public const XML_NODE_ROW = 'table:table-row';
  24      public const XML_NODE_CELL = 'table:table-cell';
  25      public const MAX_COLUMNS_EXCEL = 16384;
  26  
  27      /**
  28       * Definition of XML attribute used to parse data.
  29       */
  30      public const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
  31      public const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
  32  
  33      private Options $options;
  34  
  35      /** @var XMLProcessor Helper Object to process XML nodes */
  36      private XMLProcessor $xmlProcessor;
  37  
  38      /** @var Helper\CellValueFormatter Helper to format cell values */
  39      private Helper\CellValueFormatter $cellValueFormatter;
  40  
  41      /** @var bool Whether the iterator has already been rewound once */
  42      private bool $hasAlreadyBeenRewound = false;
  43  
  44      /** @var Row The currently processed row */
  45      private Row $currentlyProcessedRow;
  46  
  47      /** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
  48      private ?Row $rowBuffer;
  49  
  50      /** @var bool Indicates whether all rows have been read */
  51      private bool $hasReachedEndOfFile = false;
  52  
  53      /** @var int Last row index processed (one-based) */
  54      private int $lastRowIndexProcessed = 0;
  55  
  56      /** @var int Row index to be processed next (one-based) */
  57      private int $nextRowIndexToBeProcessed = 1;
  58  
  59      /** @var null|Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
  60      private ?Cell $lastProcessedCell;
  61  
  62      /** @var int Number of times the last processed row should be repeated */
  63      private int $numRowsRepeated = 1;
  64  
  65      /** @var int Number of times the last cell value should be copied to the cells on its right */
  66      private int $numColumnsRepeated = 1;
  67  
  68      /** @var bool Whether at least one cell has been read for the row currently being processed */
  69      private bool $hasAlreadyReadOneCellInCurrentRow = false;
  70  
  71      public function __construct(
  72          Options $options,
  73          CellValueFormatter $cellValueFormatter,
  74          XMLProcessor $xmlProcessor
  75      ) {
  76          $this->cellValueFormatter = $cellValueFormatter;
  77  
  78          // Register all callbacks to process different nodes when reading the XML file
  79          $this->xmlProcessor = $xmlProcessor;
  80          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
  81          $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
  82          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
  83          $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
  84          $this->options = $options;
  85      }
  86  
  87      /**
  88       * Rewind the Iterator to the first element.
  89       * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
  90       *
  91       * @see http://php.net/manual/en/iterator.rewind.php
  92       *
  93       * @throws \OpenSpout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
  94       */
  95      public function rewind(): void
  96      {
  97          // Because sheet and row data is located in the file, we can't rewind both the
  98          // sheet iterator and the row iterator, as XML file cannot be read backwards.
  99          // Therefore, rewinding the row iterator has been disabled.
 100          if ($this->hasAlreadyBeenRewound) {
 101              throw new IteratorNotRewindableException();
 102          }
 103  
 104          $this->hasAlreadyBeenRewound = true;
 105          $this->lastRowIndexProcessed = 0;
 106          $this->nextRowIndexToBeProcessed = 1;
 107          $this->rowBuffer = null;
 108          $this->hasReachedEndOfFile = false;
 109  
 110          $this->next();
 111      }
 112  
 113      /**
 114       * Checks if current position is valid.
 115       *
 116       * @see http://php.net/manual/en/iterator.valid.php
 117       */
 118      public function valid(): bool
 119      {
 120          return !$this->hasReachedEndOfFile;
 121      }
 122  
 123      /**
 124       * Move forward to next element. Empty rows will be skipped.
 125       *
 126       * @see http://php.net/manual/en/iterator.next.php
 127       *
 128       * @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 129       * @throws \OpenSpout\Common\Exception\IOException                   If unable to read the sheet data XML
 130       */
 131      public function next(): void
 132      {
 133          if ($this->doesNeedDataForNextRowToBeProcessed()) {
 134              $this->readDataForNextRow();
 135          }
 136  
 137          ++$this->lastRowIndexProcessed;
 138      }
 139  
 140      /**
 141       * Return the current element, from the buffer.
 142       *
 143       * @see http://php.net/manual/en/iterator.current.php
 144       */
 145      public function current(): Row
 146      {
 147          return $this->rowBuffer;
 148      }
 149  
 150      /**
 151       * Return the key of the current element.
 152       *
 153       * @see http://php.net/manual/en/iterator.key.php
 154       */
 155      public function key(): int
 156      {
 157          return $this->lastRowIndexProcessed;
 158      }
 159  
 160      /**
 161       * Returns whether we need data for the next row to be processed.
 162       * We DO need to read data if:
 163       *   - we have not read any rows yet
 164       *      OR
 165       *   - the next row to be processed immediately follows the last read row.
 166       *
 167       * @return bool whether we need data for the next row to be processed
 168       */
 169      private function doesNeedDataForNextRowToBeProcessed(): bool
 170      {
 171          $hasReadAtLeastOneRow = (0 !== $this->lastRowIndexProcessed);
 172  
 173          return
 174              !$hasReadAtLeastOneRow
 175              || $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
 176          ;
 177      }
 178  
 179      /**
 180       * @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 181       * @throws \OpenSpout\Common\Exception\IOException                   If unable to read the sheet data XML
 182       */
 183      private function readDataForNextRow(): void
 184      {
 185          $this->currentlyProcessedRow = new Row([], null);
 186  
 187          $this->xmlProcessor->readUntilStopped();
 188  
 189          $this->rowBuffer = $this->currentlyProcessedRow;
 190      }
 191  
 192      /**
 193       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
 194       *
 195       * @return int A return code that indicates what action should the processor take next
 196       */
 197      private function processRowStartingNode(XMLReader $xmlReader): int
 198      {
 199          // Reset data from current row
 200          $this->hasAlreadyReadOneCellInCurrentRow = false;
 201          $this->lastProcessedCell = null;
 202          $this->numColumnsRepeated = 1;
 203          $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
 204  
 205          return XMLProcessor::PROCESSING_CONTINUE;
 206      }
 207  
 208      /**
 209       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
 210       *
 211       * @return int A return code that indicates what action should the processor take next
 212       */
 213      private function processCellStartingNode(XMLReader $xmlReader): int
 214      {
 215          $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
 216  
 217          // NOTE: expand() will automatically decode all XML entities of the child nodes
 218          /** @var DOMElement $node */
 219          $node = $xmlReader->expand();
 220          $currentCell = $this->getCell($node);
 221  
 222          // process cell N only after having read cell N+1 (see below why)
 223          if ($this->hasAlreadyReadOneCellInCurrentRow) {
 224              for ($i = 0; $i < $this->numColumnsRepeated; ++$i) {
 225                  $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
 226              }
 227          }
 228  
 229          $this->hasAlreadyReadOneCellInCurrentRow = true;
 230          $this->lastProcessedCell = $currentCell;
 231          $this->numColumnsRepeated = $currentNumColumnsRepeated;
 232  
 233          return XMLProcessor::PROCESSING_CONTINUE;
 234      }
 235  
 236      /**
 237       * @return int A return code that indicates what action should the processor take next
 238       */
 239      private function processRowEndingNode(): int
 240      {
 241          $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
 242  
 243          // if the fetched row is empty and we don't want to preserve it...
 244          if (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyRow) {
 245              // ... skip it
 246              return XMLProcessor::PROCESSING_CONTINUE;
 247          }
 248  
 249          // if the row is empty, we don't want to return more than one cell
 250          $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
 251          $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells();
 252  
 253          // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
 254          // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
 255          // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
 256          // with a number-columns-repeated value equals to the number of (supported columns - used columns).
 257          // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
 258          // always 16384 cells.
 259          if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
 260              for ($i = 0; $i < $actualNumColumnsRepeated; ++$i) {
 261                  $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
 262              }
 263          }
 264  
 265          // If we are processing row N and the row is repeated M times,
 266          // then the next row to be processed will be row (N+M).
 267          $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
 268  
 269          // at this point, we have all the data we need for the row
 270          // so that we can populate the buffer
 271          return XMLProcessor::PROCESSING_STOP;
 272      }
 273  
 274      /**
 275       * @return int A return code that indicates what action should the processor take next
 276       */
 277      private function processTableEndingNode(): int
 278      {
 279          // The closing "</table:table>" marks the end of the file
 280          $this->hasReachedEndOfFile = true;
 281  
 282          return XMLProcessor::PROCESSING_STOP;
 283      }
 284  
 285      /**
 286       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
 287       *
 288       * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
 289       */
 290      private function getNumRowsRepeatedForCurrentNode(XMLReader $xmlReader): int
 291      {
 292          $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
 293  
 294          return (null !== $numRowsRepeated) ? (int) $numRowsRepeated : 1;
 295      }
 296  
 297      /**
 298       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
 299       *
 300       * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
 301       */
 302      private function getNumColumnsRepeatedForCurrentNode(XMLReader $xmlReader): int
 303      {
 304          $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
 305  
 306          return (null !== $numColumnsRepeated) ? (int) $numColumnsRepeated : 1;
 307      }
 308  
 309      /**
 310       * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
 311       *
 312       * @return Cell The cell set with the associated with the cell
 313       */
 314      private function getCell(DOMElement $node): Cell
 315      {
 316          try {
 317              $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
 318              $cell = Cell::fromValue($cellValue);
 319          } catch (InvalidValueException $exception) {
 320              $cell = new Cell\ErrorCell($exception->getInvalidValue(), null);
 321          }
 322  
 323          return $cell;
 324      }
 325  
 326      /**
 327       * After finishing processing each cell, a row is considered empty if it contains
 328       * no cells or if the last read cell is empty.
 329       * After finishing processing each cell, the last read cell is not part of the
 330       * row data yet (as we still need to apply the "num-columns-repeated" attribute).
 331       *
 332       * @param null|Cell $lastReadCell The last read cell
 333       *
 334       * @return bool Whether the row is empty
 335       */
 336      private function isEmptyRow(Row $currentRow, ?Cell $lastReadCell): bool
 337      {
 338          return
 339              $currentRow->isEmpty()
 340              && (null === $lastReadCell || $lastReadCell instanceof Cell\EmptyCell)
 341          ;
 342      }
 343  }