Differences Between: [Versions 402 and 403]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace OpenSpout\Reader\CSV; 6 7 use OpenSpout\Common\Entity\Cell; 8 use OpenSpout\Common\Entity\Row; 9 use OpenSpout\Common\Helper\EncodingHelper; 10 use OpenSpout\Reader\RowIteratorInterface; 11 12 /** 13 * Iterate over CSV rows. 14 */ 15 final class RowIterator implements RowIteratorInterface 16 { 17 /** 18 * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accommodates for very long lines). 19 */ 20 public const MAX_READ_BYTES_PER_LINE = 0; 21 22 /** @var null|resource Pointer to the CSV file to read */ 23 private $filePointer; 24 25 /** @var int Number of read rows */ 26 private int $numReadRows = 0; 27 28 /** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */ 29 private ?Row $rowBuffer; 30 31 /** @var bool Indicates whether all rows have been read */ 32 private bool $hasReachedEndOfFile = false; 33 34 private Options $options; 35 36 /** @var EncodingHelper Helper to work with different encodings */ 37 private EncodingHelper $encodingHelper; 38 39 /** 40 * @param resource $filePointer Pointer to the CSV file to read 41 */ 42 public function __construct( 43 $filePointer, 44 Options $options, 45 EncodingHelper $encodingHelper 46 ) { 47 $this->filePointer = $filePointer; 48 $this->options = $options; 49 $this->encodingHelper = $encodingHelper; 50 } 51 52 /** 53 * Rewind the Iterator to the first element. 54 * 55 * @see http://php.net/manual/en/iterator.rewind.php 56 */ 57 public function rewind(): void 58 { 59 $this->rewindAndSkipBom(); 60 61 $this->numReadRows = 0; 62 $this->rowBuffer = null; 63 64 $this->next(); 65 } 66 67 /** 68 * Checks if current position is valid. 69 * 70 * @see http://php.net/manual/en/iterator.valid.php 71 */ 72 public function valid(): bool 73 { 74 return null !== $this->filePointer && !$this->hasReachedEndOfFile; 75 } 76 77 /** 78 * Move forward to next element. Reads data for the next unprocessed row. 79 * 80 * @see http://php.net/manual/en/iterator.next.php 81 * 82 * @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 83 */ 84 public function next(): void 85 { 86 $this->hasReachedEndOfFile = feof($this->filePointer); 87 88 if (!$this->hasReachedEndOfFile) { 89 $this->readDataForNextRow(); 90 } 91 } 92 93 /** 94 * Return the current element from the buffer. 95 * 96 * @see http://php.net/manual/en/iterator.current.php 97 */ 98 public function current(): ?Row 99 { 100 return $this->rowBuffer; 101 } 102 103 /** 104 * Return the key of the current element. 105 * 106 * @see http://php.net/manual/en/iterator.key.php 107 */ 108 public function key(): int 109 { 110 return $this->numReadRows; 111 } 112 113 /** 114 * This rewinds and skips the BOM if inserted at the beginning of the file 115 * by moving the file pointer after it, so that it is not read. 116 */ 117 private function rewindAndSkipBom(): void 118 { 119 $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->options->ENCODING); 120 121 // sets the cursor after the BOM (0 means no BOM, so rewind it) 122 fseek($this->filePointer, $byteOffsetToSkipBom); 123 } 124 125 /** 126 * @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 127 */ 128 private function readDataForNextRow(): void 129 { 130 do { 131 $rowData = $this->getNextUTF8EncodedRow(); 132 } while ($this->shouldReadNextRow($rowData)); 133 134 if (false !== $rowData) { 135 // array_map will replace NULL values by empty strings 136 $rowDataBufferAsArray = array_map('\\strval', $rowData); 137 $this->rowBuffer = new Row(array_map(static function ($cellValue) { 138 return Cell::fromValue($cellValue); 139 }, $rowDataBufferAsArray), null); 140 ++$this->numReadRows; 141 } else { 142 // If we reach this point, it means end of file was reached. 143 // This happens when the last lines are empty lines. 144 $this->hasReachedEndOfFile = true; 145 } 146 } 147 148 /** 149 * @param array<int, null|string>|bool $currentRowData 150 * 151 * @return bool Whether the data for the current row can be returned or if we need to keep reading 152 */ 153 private function shouldReadNextRow($currentRowData): bool 154 { 155 $hasSuccessfullyFetchedRowData = (false !== $currentRowData); 156 $hasNowReachedEndOfFile = feof($this->filePointer); 157 $isEmptyLine = $this->isEmptyLine($currentRowData); 158 159 return 160 (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) 161 || (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyLine) 162 ; 163 } 164 165 /** 166 * Returns the next row, converted if necessary to UTF-8. 167 * As fgetcsv() does not manage correctly encoding for non UTF-8 data, 168 * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes). 169 * 170 * @return array<int, null|string>|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read 171 * 172 * @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 173 */ 174 private function getNextUTF8EncodedRow(): array|false 175 { 176 $encodedRowData = fgetcsv( 177 $this->filePointer, 178 self::MAX_READ_BYTES_PER_LINE, 179 $this->options->FIELD_DELIMITER, 180 $this->options->FIELD_ENCLOSURE, 181 '' 182 ); 183 if (false === $encodedRowData) { 184 return false; 185 } 186 187 foreach ($encodedRowData as $cellIndex => $cellValue) { 188 switch ($this->options->ENCODING) { 189 case EncodingHelper::ENCODING_UTF16_LE: 190 case EncodingHelper::ENCODING_UTF32_LE: 191 // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data 192 $cellValue = ltrim($cellValue); 193 194 break; 195 196 case EncodingHelper::ENCODING_UTF16_BE: 197 case EncodingHelper::ENCODING_UTF32_BE: 198 // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data 199 $cellValue = rtrim($cellValue); 200 201 break; 202 } 203 204 $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->options->ENCODING); 205 } 206 207 return $encodedRowData; 208 } 209 210 /** 211 * @param array<int, null|string>|bool $lineData Array containing the cells value for the line 212 * 213 * @return bool Whether the given line is empty 214 */ 215 private function isEmptyLine($lineData): bool 216 { 217 return \is_array($lineData) && 1 === \count($lineData) && null === $lineData[0]; 218 } 219 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body