Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401]
1 <?php 2 3 namespace Box\Spout\Reader\XLSX; 4 5 use Box\Spout\Common\Entity\Cell; 6 use Box\Spout\Common\Entity\Row; 7 use Box\Spout\Common\Exception\IOException; 8 use Box\Spout\Reader\Common\Manager\RowManager; 9 use Box\Spout\Reader\Common\XMLProcessor; 10 use Box\Spout\Reader\Exception\InvalidValueException; 11 use Box\Spout\Reader\Exception\XMLProcessingException; 12 use Box\Spout\Reader\IteratorInterface; 13 use Box\Spout\Reader\Wrapper\XMLReader; 14 use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; 15 use Box\Spout\Reader\XLSX\Helper\CellHelper; 16 use Box\Spout\Reader\XLSX\Helper\CellValueFormatter; 17 18 /** 19 * Class RowIterator 20 */ 21 class RowIterator implements IteratorInterface 22 { 23 /** Definition of XML nodes names used to parse data */ 24 const XML_NODE_DIMENSION = 'dimension'; 25 const XML_NODE_WORKSHEET = 'worksheet'; 26 const XML_NODE_ROW = 'row'; 27 const XML_NODE_CELL = 'c'; 28 29 /** Definition of XML attributes used to parse data */ 30 const XML_ATTRIBUTE_REF = 'ref'; 31 const XML_ATTRIBUTE_SPANS = 'spans'; 32 const XML_ATTRIBUTE_ROW_INDEX = 'r'; 33 const XML_ATTRIBUTE_CELL_INDEX = 'r'; 34 35 /** @var string Path of the XLSX file being read */ 36 protected $filePath; 37 38 /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */ 39 protected $sheetDataXMLFilePath; 40 41 /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ 42 protected $xmlReader; 43 44 /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ 45 protected $xmlProcessor; 46 47 /** @var Helper\CellValueFormatter Helper to format cell values */ 48 protected $cellValueFormatter; 49 50 /** @var \Box\Spout\Reader\Common\Manager\RowManager Manages rows */ 51 protected $rowManager; 52 53 /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */ 54 protected $entityFactory; 55 56 /** 57 * TODO: This variable can be deleted when row indices get preserved 58 * @var int Number of read rows 59 */ 60 protected $numReadRows = 0; 61 62 /** @var Row Contains the row currently processed */ 63 protected $currentlyProcessedRow; 64 65 /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */ 66 protected $rowBuffer; 67 68 /** @var bool Indicates whether all rows have been read */ 69 protected $hasReachedEndOfFile = false; 70 71 /** @var int The number of columns the sheet has (0 meaning undefined) */ 72 protected $numColumns = 0; 73 74 /** @var bool Whether empty rows should be returned or skipped */ 75 protected $shouldPreserveEmptyRows; 76 77 /** @var int Last row index processed (one-based) */ 78 protected $lastRowIndexProcessed = 0; 79 80 /** @var int Row index to be processed next (one-based) */ 81 protected $nextRowIndexToBeProcessed = 0; 82 83 /** @var int Last column index processed (zero-based) */ 84 protected $lastColumnIndexProcessed = -1; 85 86 /** 87 * @param string $filePath Path of the XLSX file being read 88 * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml 89 * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved 90 * @param XMLReader $xmlReader XML Reader 91 * @param XMLProcessor $xmlProcessor Helper to process XML files 92 * @param CellValueFormatter $cellValueFormatter Helper to format cell values 93 * @param RowManager $rowManager Manages rows 94 * @param InternalEntityFactory $entityFactory Factory to create entities 95 */ 96 public function __construct( 97 $filePath, 98 $sheetDataXMLFilePath, 99 $shouldPreserveEmptyRows, 100 $xmlReader, 101 XMLProcessor $xmlProcessor, 102 CellValueFormatter $cellValueFormatter, 103 RowManager $rowManager, 104 InternalEntityFactory $entityFactory 105 ) { 106 $this->filePath = $filePath; 107 $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); 108 $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; 109 $this->xmlReader = $xmlReader; 110 $this->cellValueFormatter = $cellValueFormatter; 111 $this->rowManager = $rowManager; 112 $this->entityFactory = $entityFactory; 113 114 // Register all callbacks to process different nodes when reading the XML file 115 $this->xmlProcessor = $xmlProcessor; 116 $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']); 117 $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); 118 $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); 119 $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); 120 $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']); 121 } 122 123 /** 124 * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml 125 * @return string Path of the XML file containing the sheet data, 126 * without the leading slash. 127 */ 128 protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath) 129 { 130 return ltrim($sheetDataXMLFilePath, '/'); 131 } 132 133 /** 134 * Rewind the Iterator to the first element. 135 * Initializes the XMLReader object that reads the associated sheet data. 136 * The XMLReader is configured to be safe from billion laughs attack. 137 * @see http://php.net/manual/en/iterator.rewind.php 138 * 139 * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read 140 * @return void 141 */ 142 public function rewind() 143 { 144 $this->xmlReader->close(); 145 146 if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) { 147 throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\"."); 148 } 149 150 $this->numReadRows = 0; 151 $this->lastRowIndexProcessed = 0; 152 $this->nextRowIndexToBeProcessed = 0; 153 $this->rowBuffer = null; 154 $this->hasReachedEndOfFile = false; 155 $this->numColumns = 0; 156 157 $this->next(); 158 } 159 160 /** 161 * Checks if current position is valid 162 * @see http://php.net/manual/en/iterator.valid.php 163 * 164 * @return bool 165 */ 166 public function valid() 167 { 168 return (!$this->hasReachedEndOfFile); 169 } 170 171 /** 172 * Move forward to next element. Reads data describing the next unprocessed row. 173 * @see http://php.net/manual/en/iterator.next.php 174 * 175 * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found 176 * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML 177 * @return void 178 */ 179 public function next() 180 { 181 $this->nextRowIndexToBeProcessed++; 182 183 if ($this->doesNeedDataForNextRowToBeProcessed()) { 184 $this->readDataForNextRow(); 185 } 186 } 187 188 /** 189 * Returns whether we need data for the next row to be processed. 190 * We don't need to read data if: 191 * we have already read at least one row 192 * AND 193 * we need to preserve empty rows 194 * AND 195 * the last row that was read is not the row that need to be processed 196 * (i.e. if we need to return empty rows) 197 * 198 * @return bool Whether we need data for the next row to be processed. 199 */ 200 protected function doesNeedDataForNextRowToBeProcessed() 201 { 202 $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); 203 204 return ( 205 !$hasReadAtLeastOneRow || 206 !$this->shouldPreserveEmptyRows || 207 $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed 208 ); 209 } 210 211 /** 212 * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found 213 * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML 214 * @return void 215 */ 216 protected function readDataForNextRow() 217 { 218 $this->currentlyProcessedRow = $this->entityFactory->createRow(); 219 220 try { 221 $this->xmlProcessor->readUntilStopped(); 222 } catch (XMLProcessingException $exception) { 223 throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); 224 } 225 226 $this->rowBuffer = $this->currentlyProcessedRow; 227 } 228 229 /** 230 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node 231 * @return int A return code that indicates what action should the processor take next 232 */ 233 protected function processDimensionStartingNode($xmlReader) 234 { 235 // Read dimensions of the sheet 236 $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) 237 if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) { 238 $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1; 239 } 240 241 return XMLProcessor::PROCESSING_CONTINUE; 242 } 243 244 /** 245 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node 246 * @return int A return code that indicates what action should the processor take next 247 */ 248 protected function processRowStartingNode($xmlReader) 249 { 250 // Reset index of the last processed column 251 $this->lastColumnIndexProcessed = -1; 252 253 // Mark the last processed row as the one currently being read 254 $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader); 255 256 // Read spans info if present 257 $numberOfColumnsForRow = $this->numColumns; 258 $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance 259 if ($spans) { 260 list(, $numberOfColumnsForRow) = explode(':', $spans); 261 $numberOfColumnsForRow = (int) $numberOfColumnsForRow; 262 } 263 264 $cells = array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell('')); 265 $this->currentlyProcessedRow->setCells($cells); 266 267 return XMLProcessor::PROCESSING_CONTINUE; 268 } 269 270 /** 271 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node 272 * @return int A return code that indicates what action should the processor take next 273 */ 274 protected function processCellStartingNode($xmlReader) 275 { 276 $currentColumnIndex = $this->getColumnIndex($xmlReader); 277 278 // NOTE: expand() will automatically decode all XML entities of the child nodes 279 $node = $xmlReader->expand(); 280 $cell = $this->getCell($node); 281 282 $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex); 283 $this->lastColumnIndexProcessed = $currentColumnIndex; 284 285 return XMLProcessor::PROCESSING_CONTINUE; 286 } 287 288 /** 289 * @return int A return code that indicates what action should the processor take next 290 */ 291 protected function processRowEndingNode() 292 { 293 // if the fetched row is empty and we don't want to preserve it.., 294 if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) { 295 // ... skip it 296 return XMLProcessor::PROCESSING_CONTINUE; 297 } 298 299 $this->numReadRows++; 300 301 // If needed, we fill the empty cells 302 if ($this->numColumns === 0) { 303 $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow); 304 } 305 306 // at this point, we have all the data we need for the row 307 // so that we can populate the buffer 308 return XMLProcessor::PROCESSING_STOP; 309 } 310 311 /** 312 * @return int A return code that indicates what action should the processor take next 313 */ 314 protected function processWorksheetEndingNode() 315 { 316 // The closing "</worksheet>" marks the end of the file 317 $this->hasReachedEndOfFile = true; 318 319 return XMLProcessor::PROCESSING_STOP; 320 } 321 322 /** 323 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node 324 * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid 325 * @return int Row index 326 */ 327 protected function getRowIndex($xmlReader) 328 { 329 // Get "r" attribute if present (from something like <row r="3"...> 330 $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX); 331 332 return ($currentRowIndex !== null) ? 333 (int) $currentRowIndex : 334 $this->lastRowIndexProcessed + 1; 335 } 336 337 /** 338 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node 339 * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid 340 * @return int Column index 341 */ 342 protected function getColumnIndex($xmlReader) 343 { 344 // Get "r" attribute if present (from something like <c r="A1"...> 345 $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); 346 347 return ($currentCellIndex !== null) ? 348 CellHelper::getColumnIndexFromCellIndex($currentCellIndex) : 349 $this->lastColumnIndexProcessed + 1; 350 } 351 352 /** 353 * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node. 354 * 355 * @param \DOMNode $node 356 * @return Cell The cell set with the associated with the cell 357 */ 358 protected function getCell($node) 359 { 360 try { 361 $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node); 362 $cell = $this->entityFactory->createCell($cellValue); 363 } catch (InvalidValueException $exception) { 364 $cell = $this->entityFactory->createCell($exception->getInvalidValue()); 365 $cell->setType(Cell::TYPE_ERROR); 366 } 367 368 return $cell; 369 } 370 371 /** 372 * Return the current element, either an empty row or from the buffer. 373 * @see http://php.net/manual/en/iterator.current.php 374 * 375 * @return Row|null 376 */ 377 public function current() 378 { 379 $rowToBeProcessed = $this->rowBuffer; 380 381 if ($this->shouldPreserveEmptyRows) { 382 // when we need to preserve empty rows, we will either return 383 // an empty row or the last row read. This depends whether the 384 // index of last row that was read matches the index of the last 385 // row whose value should be returned. 386 if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) { 387 // return empty row if mismatch between last processed row 388 // and the row that needs to be returned 389 $rowToBeProcessed = $this->entityFactory->createRow(); 390 } 391 } 392 393 return $rowToBeProcessed; 394 } 395 396 /** 397 * Return the key of the current element. Here, the row index. 398 * @see http://php.net/manual/en/iterator.key.php 399 * 400 * @return int 401 */ 402 public function key() 403 { 404 // TODO: This should return $this->nextRowIndexToBeProcessed 405 // but to avoid a breaking change, the return value for 406 // this function has been kept as the number of rows read. 407 return $this->shouldPreserveEmptyRows ? 408 $this->nextRowIndexToBeProcessed : 409 $this->numReadRows; 410 } 411 412 /** 413 * Cleans up what was created to iterate over the object. 414 * 415 * @return void 416 */ 417 public function end() 418 { 419 $this->xmlReader->close(); 420 } 421 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body