Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\CSV;
   4  
   5  use Box\Spout\Common\Entity\Row;
   6  use Box\Spout\Common\Helper\EncodingHelper;
   7  use Box\Spout\Common\Helper\GlobalFunctionsHelper;
   8  use Box\Spout\Common\Manager\OptionsManagerInterface;
   9  use Box\Spout\Reader\Common\Entity\Options;
  10  use Box\Spout\Reader\CSV\Creator\InternalEntityFactory;
  11  use Box\Spout\Reader\IteratorInterface;
  12  
  13  /**
  14   * Class RowIterator
  15   * Iterate over CSV rows.
  16   */
  17  class RowIterator implements IteratorInterface
  18  {
  19      /**
  20       * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines).
  21       */
  22      const MAX_READ_BYTES_PER_LINE = 0;
  23  
  24      /** @var resource Pointer to the CSV file to read */
  25      protected $filePointer;
  26  
  27      /** @var int Number of read rows */
  28      protected $numReadRows = 0;
  29  
  30      /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
  31      protected $rowBuffer;
  32  
  33      /** @var bool Indicates whether all rows have been read */
  34      protected $hasReachedEndOfFile = false;
  35  
  36      /** @var string Defines the character used to delimit fields (one character only) */
  37      protected $fieldDelimiter;
  38  
  39      /** @var string Defines the character used to enclose fields (one character only) */
  40      protected $fieldEnclosure;
  41  
  42      /** @var string Encoding of the CSV file to be read */
  43      protected $encoding;
  44  
  45      /** @var bool Whether empty rows should be returned or skipped */
  46      protected $shouldPreserveEmptyRows;
  47  
  48      /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
  49      protected $encodingHelper;
  50  
  51      /** @var \Box\Spout\Reader\CSV\Creator\InternalEntityFactory Factory to create entities */
  52      protected $entityFactory;
  53  
  54      /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
  55      protected $globalFunctionsHelper;
  56  
  57      /**
  58       * @param resource $filePointer Pointer to the CSV file to read
  59       * @param OptionsManagerInterface $optionsManager
  60       * @param EncodingHelper $encodingHelper
  61       * @param InternalEntityFactory $entityFactory
  62       * @param GlobalFunctionsHelper $globalFunctionsHelper
  63       */
  64      public function __construct(
  65          $filePointer,
  66          OptionsManagerInterface $optionsManager,
  67          EncodingHelper $encodingHelper,
  68          InternalEntityFactory $entityFactory,
  69          GlobalFunctionsHelper $globalFunctionsHelper
  70      ) {
  71          $this->filePointer = $filePointer;
  72          $this->fieldDelimiter = $optionsManager->getOption(Options::FIELD_DELIMITER);
  73          $this->fieldEnclosure = $optionsManager->getOption(Options::FIELD_ENCLOSURE);
  74          $this->encoding = $optionsManager->getOption(Options::ENCODING);
  75          $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
  76          $this->encodingHelper = $encodingHelper;
  77          $this->entityFactory = $entityFactory;
  78          $this->globalFunctionsHelper = $globalFunctionsHelper;
  79      }
  80  
  81      /**
  82       * Rewind the Iterator to the first element
  83       * @see http://php.net/manual/en/iterator.rewind.php
  84       *
  85       * @return void
  86       */
  87      public function rewind() : void
  88      {
  89          $this->rewindAndSkipBom();
  90  
  91          $this->numReadRows = 0;
  92          $this->rowBuffer = null;
  93  
  94          $this->next();
  95      }
  96  
  97      /**
  98       * This rewinds and skips the BOM if inserted at the beginning of the file
  99       * by moving the file pointer after it, so that it is not read.
 100       *
 101       * @return void
 102       */
 103      protected function rewindAndSkipBom()
 104      {
 105          $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
 106  
 107          // sets the cursor after the BOM (0 means no BOM, so rewind it)
 108          $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
 109      }
 110  
 111      /**
 112       * Checks if current position is valid
 113       * @see http://php.net/manual/en/iterator.valid.php
 114       *
 115       * @return bool
 116       */
 117      public function valid() : bool
 118      {
 119          return ($this->filePointer && !$this->hasReachedEndOfFile);
 120      }
 121  
 122      /**
 123       * Move forward to next element. Reads data for the next unprocessed row.
 124       * @see http://php.net/manual/en/iterator.next.php
 125       *
 126       * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
 127       * @return void
 128       */
 129      public function next() : void
 130      {
 131          $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
 132  
 133          if (!$this->hasReachedEndOfFile) {
 134              $this->readDataForNextRow();
 135          }
 136      }
 137  
 138      /**
 139       * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
 140       * @return void
 141       */
 142      protected function readDataForNextRow()
 143      {
 144          do {
 145              $rowData = $this->getNextUTF8EncodedRow();
 146          } while ($this->shouldReadNextRow($rowData));
 147  
 148          if ($rowData !== false) {
 149              // array_map will replace NULL values by empty strings
 150              $rowDataBufferAsArray = array_map(function ($value) { return (string) $value; }, $rowData);
 151              $this->rowBuffer = $this->entityFactory->createRowFromArray($rowDataBufferAsArray);
 152              $this->numReadRows++;
 153          } else {
 154              // If we reach this point, it means end of file was reached.
 155              // This happens when the last lines are empty lines.
 156              $this->hasReachedEndOfFile = true;
 157          }
 158      }
 159  
 160      /**
 161       * @param array|bool $currentRowData
 162       * @return bool Whether the data for the current row can be returned or if we need to keep reading
 163       */
 164      protected function shouldReadNextRow($currentRowData)
 165      {
 166          $hasSuccessfullyFetchedRowData = ($currentRowData !== false);
 167          $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
 168          $isEmptyLine = $this->isEmptyLine($currentRowData);
 169  
 170          return (
 171              (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
 172              (!$this->shouldPreserveEmptyRows && $isEmptyLine)
 173          );
 174      }
 175  
 176      /**
 177       * Returns the next row, converted if necessary to UTF-8.
 178       * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
 179       * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
 180       *
 181       * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
 182       * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
 183       */
 184      protected function getNextUTF8EncodedRow()
 185      {
 186          $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
 187          if ($encodedRowData === false) {
 188              return false;
 189          }
 190  
 191          foreach ($encodedRowData as $cellIndex => $cellValue) {
 192              switch ($this->encoding) {
 193                  case EncodingHelper::ENCODING_UTF16_LE:
 194                  case EncodingHelper::ENCODING_UTF32_LE:
 195                      // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 196                      $cellValue = \ltrim($cellValue);
 197                      break;
 198  
 199                  case EncodingHelper::ENCODING_UTF16_BE:
 200                  case EncodingHelper::ENCODING_UTF32_BE:
 201                      // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 202                      $cellValue = \rtrim($cellValue);
 203                      break;
 204              }
 205  
 206              $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
 207          }
 208  
 209          return $encodedRowData;
 210      }
 211  
 212      /**
 213       * @param array|bool $lineData Array containing the cells value for the line
 214       * @return bool Whether the given line is empty
 215       */
 216      protected function isEmptyLine($lineData)
 217      {
 218          return (\is_array($lineData) && \count($lineData) === 1 && $lineData[0] === null);
 219      }
 220  
 221      /**
 222       * Return the current element from the buffer
 223       * @see http://php.net/manual/en/iterator.current.php
 224       *
 225       * @return Row|null
 226       */
 227      public function current() : ?Row
 228      {
 229          return $this->rowBuffer;
 230      }
 231  
 232      /**
 233       * Return the key of the current element
 234       * @see http://php.net/manual/en/iterator.key.php
 235       *
 236       * @return int
 237       */
 238      public function key() : int
 239      {
 240          return $this->numReadRows;
 241      }
 242  
 243      /**
 244       * Cleans up what was created to iterate over the object.
 245       *
 246       * @return void
 247       */
 248      public function end() : void
 249      {
 250          // do nothing
 251      }
 252  }