Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.3.x will end 7 October 2024 (12 months).
  • Bug fixes for security issues in 4.3.x will end 21 April 2025 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.2.x is supported too.

Differences Between: [Versions 402 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Reader\ODS;
   6  
   7  use DOMElement;
   8  use OpenSpout\Common\Entity\Cell;
   9  use OpenSpout\Common\Entity\Row;
  10  use OpenSpout\Reader\Common\XMLProcessor;
  11  use OpenSpout\Reader\Exception\InvalidValueException;
  12  use OpenSpout\Reader\Exception\IteratorNotRewindableException;
  13  use OpenSpout\Reader\ODS\Helper\CellValueFormatter;
  14  use OpenSpout\Reader\RowIteratorInterface;
  15  use OpenSpout\Reader\Wrapper\XMLReader;
  16  
  17  final class RowIterator implements RowIteratorInterface
  18  {
  19      /**
  20       * Definition of XML nodes names used to parse data.
  21       */
  22      public const XML_NODE_TABLE = 'table:table';
  23      public const XML_NODE_ROW = 'table:table-row';
  24      public const XML_NODE_CELL = 'table:table-cell';
  25      public const MAX_COLUMNS_EXCEL = 16384;
  26  
  27      /**
  28       * Definition of XML attribute used to parse data.
  29       */
  30      public const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
  31      public const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
  32  
  33      private Options $options;
  34  
  35      /** @var XMLProcessor Helper Object to process XML nodes */
  36      private XMLProcessor $xmlProcessor;
  37  
  38      /** @var Helper\CellValueFormatter Helper to format cell values */
  39      private Helper\CellValueFormatter $cellValueFormatter;
  40  
  41      /** @var bool Whether the iterator has already been rewound once */
  42      private bool $hasAlreadyBeenRewound = false;
  43  
  44      /** @var Row The currently processed row */
  45      private Row $currentlyProcessedRow;
  46  
  47      /** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
  48      private ?Row $rowBuffer;
  49  
  50      /** @var bool Indicates whether all rows have been read */
  51      private bool $hasReachedEndOfFile = false;
  52  
  53      /** @var int Last row index processed (one-based) */
  54      private int $lastRowIndexProcessed = 0;
  55  
  56      /** @var int Row index to be processed next (one-based) */
  57      private int $nextRowIndexToBeProcessed = 1;
  58  
  59      /** @var null|Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
  60      private ?Cell $lastProcessedCell;
  61  
  62      /** @var int Number of times the last processed row should be repeated */
  63      private int $numRowsRepeated = 1;
  64  
  65      /** @var int Number of times the last cell value should be copied to the cells on its right */
  66      private int $numColumnsRepeated = 1;
  67  
  68      /** @var bool Whether at least one cell has been read for the row currently being processed */
  69      private bool $hasAlreadyReadOneCellInCurrentRow = false;
  70  
  71      public function __construct(
  72          Options $options,
  73          CellValueFormatter $cellValueFormatter,
  74          XMLProcessor $xmlProcessor
  75      ) {
  76          $this->cellValueFormatter = $cellValueFormatter;
  77  
  78          // Register all callbacks to process different nodes when reading the XML file
  79          $this->xmlProcessor = $xmlProcessor;
  80          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
  81          $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
  82          $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
  83          $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
  84          $this->options = $options;
  85      }
  86  
  87      /**
  88       * Rewind the Iterator to the first element.
  89       * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
  90       *
  91       * @see http://php.net/manual/en/iterator.rewind.php
  92       *
  93       * @throws \OpenSpout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
  94       */
  95      public function rewind(): void
  96      {
  97          // Because sheet and row data is located in the file, we can't rewind both the
  98          // sheet iterator and the row iterator, as XML file cannot be read backwards.
  99          // Therefore, rewinding the row iterator has been disabled.
 100          if ($this->hasAlreadyBeenRewound) {
 101              throw new IteratorNotRewindableException();
 102          }
 103  
 104          $this->hasAlreadyBeenRewound = true;
 105          $this->lastRowIndexProcessed = 0;
 106          $this->nextRowIndexToBeProcessed = 1;
 107          $this->rowBuffer = null;
 108          $this->hasReachedEndOfFile = false;
 109  
 110          $this->next();
 111      }
 112  
 113      /**
 114       * Checks if current position is valid.
 115       *
 116       * @see http://php.net/manual/en/iterator.valid.php
 117       */
 118      public function valid(): bool
 119      {
 120          return !$this->hasReachedEndOfFile;
 121      }
 122  
 123      /**
 124       * Move forward to next element. Empty rows will be skipped.
 125       *
 126       * @see http://php.net/manual/en/iterator.next.php
 127       *
 128       * @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 129       * @throws \OpenSpout\Common\Exception\IOException                   If unable to read the sheet data XML
 130       */
 131      public function next(): void
 132      {
 133          if ($this->doesNeedDataForNextRowToBeProcessed()) {
 134              $this->readDataForNextRow();
 135          }
 136  
 137          ++$this->lastRowIndexProcessed;
 138      }
 139  
 140      /**
 141       * Return the current element, from the buffer.
 142       *
 143       * @see http://php.net/manual/en/iterator.current.php
 144       */
 145      public function current(): Row
 146      {
 147          return $this->rowBuffer;
 148      }
 149  
 150      /**
 151       * Return the key of the current element.
 152       *
 153       * @see http://php.net/manual/en/iterator.key.php
 154       */
 155      public function key(): int
 156      {
 157          return $this->lastRowIndexProcessed;
 158      }
 159  
 160      /**
 161       * Returns whether we need data for the next row to be processed.
 162       * We DO need to read data if:
 163       *   - we have not read any rows yet
 164       *      OR
 165       *   - the next row to be processed immediately follows the last read row.
 166       *
 167       * @return bool whether we need data for the next row to be processed
 168       */
 169      private function doesNeedDataForNextRowToBeProcessed(): bool
 170      {
 171          $hasReadAtLeastOneRow = (0 !== $this->lastRowIndexProcessed);
 172  
 173          return
 174              !$hasReadAtLeastOneRow
 175              || $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1;
 176      }
 177  
 178      /**
 179       * @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
 180       * @throws \OpenSpout\Common\Exception\IOException                   If unable to read the sheet data XML
 181       */
 182      private function readDataForNextRow(): void
 183      {
 184          $this->currentlyProcessedRow = new Row([], null);
 185  
 186          $this->xmlProcessor->readUntilStopped();
 187  
 188          $this->rowBuffer = $this->currentlyProcessedRow;
 189      }
 190  
 191      /**
 192       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
 193       *
 194       * @return int A return code that indicates what action should the processor take next
 195       */
 196      private function processRowStartingNode(XMLReader $xmlReader): int
 197      {
 198          // Reset data from current row
 199          $this->hasAlreadyReadOneCellInCurrentRow = false;
 200          $this->lastProcessedCell = null;
 201          $this->numColumnsRepeated = 1;
 202          $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
 203  
 204          return XMLProcessor::PROCESSING_CONTINUE;
 205      }
 206  
 207      /**
 208       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
 209       *
 210       * @return int A return code that indicates what action should the processor take next
 211       */
 212      private function processCellStartingNode(XMLReader $xmlReader): int
 213      {
 214          $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
 215  
 216          // NOTE: expand() will automatically decode all XML entities of the child nodes
 217          /** @var DOMElement $node */
 218          $node = $xmlReader->expand();
 219          $currentCell = $this->getCell($node);
 220  
 221          // process cell N only after having read cell N+1 (see below why)
 222          if ($this->hasAlreadyReadOneCellInCurrentRow) {
 223              for ($i = 0; $i < $this->numColumnsRepeated; ++$i) {
 224                  $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
 225              }
 226          }
 227  
 228          $this->hasAlreadyReadOneCellInCurrentRow = true;
 229          $this->lastProcessedCell = $currentCell;
 230          $this->numColumnsRepeated = $currentNumColumnsRepeated;
 231  
 232          return XMLProcessor::PROCESSING_CONTINUE;
 233      }
 234  
 235      /**
 236       * @return int A return code that indicates what action should the processor take next
 237       */
 238      private function processRowEndingNode(): int
 239      {
 240          $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
 241  
 242          // if the fetched row is empty and we don't want to preserve it...
 243          if (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyRow) {
 244              // ... skip it
 245              return XMLProcessor::PROCESSING_CONTINUE;
 246          }
 247  
 248          // if the row is empty, we don't want to return more than one cell
 249          $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
 250          $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells();
 251  
 252          // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
 253          // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
 254          // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
 255          // with a number-columns-repeated value equals to the number of (supported columns - used columns).
 256          // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
 257          // always 16384 cells.
 258          if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
 259              for ($i = 0; $i < $actualNumColumnsRepeated; ++$i) {
 260                  $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
 261              }
 262          }
 263  
 264          // If we are processing row N and the row is repeated M times,
 265          // then the next row to be processed will be row (N+M).
 266          $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
 267  
 268          // at this point, we have all the data we need for the row
 269          // so that we can populate the buffer
 270          return XMLProcessor::PROCESSING_STOP;
 271      }
 272  
 273      /**
 274       * @return int A return code that indicates what action should the processor take next
 275       */
 276      private function processTableEndingNode(): int
 277      {
 278          // The closing "</table:table>" marks the end of the file
 279          $this->hasReachedEndOfFile = true;
 280  
 281          return XMLProcessor::PROCESSING_STOP;
 282      }
 283  
 284      /**
 285       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
 286       *
 287       * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
 288       */
 289      private function getNumRowsRepeatedForCurrentNode(XMLReader $xmlReader): int
 290      {
 291          $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
 292  
 293          return (null !== $numRowsRepeated) ? (int) $numRowsRepeated : 1;
 294      }
 295  
 296      /**
 297       * @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
 298       *
 299       * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
 300       */
 301      private function getNumColumnsRepeatedForCurrentNode(XMLReader $xmlReader): int
 302      {
 303          $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
 304  
 305          return (null !== $numColumnsRepeated) ? (int) $numColumnsRepeated : 1;
 306      }
 307  
 308      /**
 309       * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
 310       *
 311       * @return Cell The cell set with the associated with the cell
 312       */
 313      private function getCell(DOMElement $node): Cell
 314      {
 315          try {
 316              $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
 317              $cell = Cell::fromValue($cellValue);
 318          } catch (InvalidValueException $exception) {
 319              $cell = new Cell\ErrorCell($exception->getInvalidValue(), null);
 320          }
 321  
 322          return $cell;
 323      }
 324  
 325      /**
 326       * After finishing processing each cell, a row is considered empty if it contains
 327       * no cells or if the last read cell is empty.
 328       * After finishing processing each cell, the last read cell is not part of the
 329       * row data yet (as we still need to apply the "num-columns-repeated" attribute).
 330       *
 331       * @param null|Cell $lastReadCell The last read cell
 332       *
 333       * @return bool Whether the row is empty
 334       */
 335      private function isEmptyRow(Row $currentRow, ?Cell $lastReadCell): bool
 336      {
 337          return
 338              $currentRow->isEmpty()
 339              && (null === $lastReadCell || $lastReadCell instanceof Cell\EmptyCell);
 340      }
 341  }