Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.

Differences Between: [Versions 402 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Reader\CSV;
   6  
   7  use OpenSpout\Common\Entity\Cell;
   8  use OpenSpout\Common\Entity\Row;
   9  use OpenSpout\Common\Helper\EncodingHelper;
  10  use OpenSpout\Reader\RowIteratorInterface;
  11  
  12  /**
  13   * Iterate over CSV rows.
  14   */
  15  final class RowIterator implements RowIteratorInterface
  16  {
  17      /**
  18       * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accommodates for very long lines).
  19       */
  20      public const MAX_READ_BYTES_PER_LINE = 0;
  21  
  22      /** @var null|resource Pointer to the CSV file to read */
  23      private $filePointer;
  24  
  25      /** @var int Number of read rows */
  26      private int $numReadRows = 0;
  27  
  28      /** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
  29      private ?Row $rowBuffer;
  30  
  31      /** @var bool Indicates whether all rows have been read */
  32      private bool $hasReachedEndOfFile = false;
  33  
  34      private Options $options;
  35  
  36      /** @var EncodingHelper Helper to work with different encodings */
  37      private EncodingHelper $encodingHelper;
  38  
  39      /**
  40       * @param resource $filePointer Pointer to the CSV file to read
  41       */
  42      public function __construct(
  43          $filePointer,
  44          Options $options,
  45          EncodingHelper $encodingHelper
  46      ) {
  47          $this->filePointer = $filePointer;
  48          $this->options = $options;
  49          $this->encodingHelper = $encodingHelper;
  50      }
  51  
  52      /**
  53       * Rewind the Iterator to the first element.
  54       *
  55       * @see http://php.net/manual/en/iterator.rewind.php
  56       */
  57      public function rewind(): void
  58      {
  59          $this->rewindAndSkipBom();
  60  
  61          $this->numReadRows = 0;
  62          $this->rowBuffer = null;
  63  
  64          $this->next();
  65      }
  66  
  67      /**
  68       * Checks if current position is valid.
  69       *
  70       * @see http://php.net/manual/en/iterator.valid.php
  71       */
  72      public function valid(): bool
  73      {
  74          return null !== $this->filePointer && !$this->hasReachedEndOfFile;
  75      }
  76  
  77      /**
  78       * Move forward to next element. Reads data for the next unprocessed row.
  79       *
  80       * @see http://php.net/manual/en/iterator.next.php
  81       *
  82       * @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
  83       */
  84      public function next(): void
  85      {
  86          $this->hasReachedEndOfFile = feof($this->filePointer);
  87  
  88          if (!$this->hasReachedEndOfFile) {
  89              $this->readDataForNextRow();
  90          }
  91      }
  92  
  93      /**
  94       * Return the current element from the buffer.
  95       *
  96       * @see http://php.net/manual/en/iterator.current.php
  97       */
  98      public function current(): ?Row
  99      {
 100          return $this->rowBuffer;
 101      }
 102  
 103      /**
 104       * Return the key of the current element.
 105       *
 106       * @see http://php.net/manual/en/iterator.key.php
 107       */
 108      public function key(): int
 109      {
 110          return $this->numReadRows;
 111      }
 112  
 113      /**
 114       * This rewinds and skips the BOM if inserted at the beginning of the file
 115       * by moving the file pointer after it, so that it is not read.
 116       */
 117      private function rewindAndSkipBom(): void
 118      {
 119          $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->options->ENCODING);
 120  
 121          // sets the cursor after the BOM (0 means no BOM, so rewind it)
 122          fseek($this->filePointer, $byteOffsetToSkipBom);
 123      }
 124  
 125      /**
 126       * @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
 127       */
 128      private function readDataForNextRow(): void
 129      {
 130          do {
 131              $rowData = $this->getNextUTF8EncodedRow();
 132          } while ($this->shouldReadNextRow($rowData));
 133  
 134          if (false !== $rowData) {
 135              // array_map will replace NULL values by empty strings
 136              $rowDataBufferAsArray = array_map('\\strval', $rowData);
 137              $this->rowBuffer = new Row(array_map(static function ($cellValue) {
 138                  return Cell::fromValue($cellValue);
 139              }, $rowDataBufferAsArray), null);
 140              ++$this->numReadRows;
 141          } else {
 142              // If we reach this point, it means end of file was reached.
 143              // This happens when the last lines are empty lines.
 144              $this->hasReachedEndOfFile = true;
 145          }
 146      }
 147  
 148      /**
 149       * @param array<int, null|string>|bool $currentRowData
 150       *
 151       * @return bool Whether the data for the current row can be returned or if we need to keep reading
 152       */
 153      private function shouldReadNextRow($currentRowData): bool
 154      {
 155          $hasSuccessfullyFetchedRowData = (false !== $currentRowData);
 156          $hasNowReachedEndOfFile = feof($this->filePointer);
 157          $isEmptyLine = $this->isEmptyLine($currentRowData);
 158  
 159          return
 160              (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile)
 161              || (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyLine)
 162          ;
 163      }
 164  
 165      /**
 166       * Returns the next row, converted if necessary to UTF-8.
 167       * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
 168       * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes).
 169       *
 170       * @return array<int, null|string>|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
 171       *
 172       * @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
 173       */
 174      private function getNextUTF8EncodedRow(): array|false
 175      {
 176          $encodedRowData = fgetcsv(
 177              $this->filePointer,
 178              self::MAX_READ_BYTES_PER_LINE,
 179              $this->options->FIELD_DELIMITER,
 180              $this->options->FIELD_ENCLOSURE,
 181              ''
 182          );
 183          if (false === $encodedRowData) {
 184              return false;
 185          }
 186  
 187          foreach ($encodedRowData as $cellIndex => $cellValue) {
 188              switch ($this->options->ENCODING) {
 189                  case EncodingHelper::ENCODING_UTF16_LE:
 190                  case EncodingHelper::ENCODING_UTF32_LE:
 191                      // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 192                      $cellValue = ltrim($cellValue);
 193  
 194                      break;
 195  
 196                  case EncodingHelper::ENCODING_UTF16_BE:
 197                  case EncodingHelper::ENCODING_UTF32_BE:
 198                      // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 199                      $cellValue = rtrim($cellValue);
 200  
 201                      break;
 202              }
 203  
 204              $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->options->ENCODING);
 205          }
 206  
 207          return $encodedRowData;
 208      }
 209  
 210      /**
 211       * @param array<int, null|string>|bool $lineData Array containing the cells value for the line
 212       *
 213       * @return bool Whether the given line is empty
 214       */
 215      private function isEmptyLine($lineData): bool
 216      {
 217          return \is_array($lineData) && 1 === \count($lineData) && null === $lineData[0];
 218      }
 219  }