Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
   6  use PhpOffice\PhpSpreadsheet\Cell\Cell;
   7  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   8  use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter;
   9  use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
  10  use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
  11  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  12  use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
  13  
  14  class Csv extends BaseReader
  15  {
  16      const DEFAULT_FALLBACK_ENCODING = 'CP1252';
  17      const GUESS_ENCODING = 'guess';
  18      const UTF8_BOM = "\xEF\xBB\xBF";
  19      const UTF8_BOM_LEN = 3;
  20      const UTF16BE_BOM = "\xfe\xff";
  21      const UTF16BE_BOM_LEN = 2;
  22      const UTF16BE_LF = "\x00\x0a";
  23      const UTF16LE_BOM = "\xff\xfe";
  24      const UTF16LE_BOM_LEN = 2;
  25      const UTF16LE_LF = "\x0a\x00";
  26      const UTF32BE_BOM = "\x00\x00\xfe\xff";
  27      const UTF32BE_BOM_LEN = 4;
  28      const UTF32BE_LF = "\x00\x00\x00\x0a";
  29      const UTF32LE_BOM = "\xff\xfe\x00\x00";
  30      const UTF32LE_BOM_LEN = 4;
  31      const UTF32LE_LF = "\x0a\x00\x00\x00";
  32  
  33      /**
  34       * Input encoding.
  35       *
  36       * @var string
  37       */
  38      private $inputEncoding = 'UTF-8';
  39  
  40      /**
  41       * Fallback encoding if guess strikes out.
  42       *
  43       * @var string
  44       */
  45      private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
  46  
  47      /**
  48       * Delimiter.
  49       *
  50       * @var ?string
  51       */
  52      private $delimiter;
  53  
  54      /**
  55       * Enclosure.
  56       *
  57       * @var string
  58       */
  59      private $enclosure = '"';
  60  
  61      /**
  62       * Sheet index to read.
  63       *
  64       * @var int
  65       */
  66      private $sheetIndex = 0;
  67  
  68      /**
  69       * Load rows contiguously.
  70       *
  71       * @var bool
  72       */
  73      private $contiguous = false;
  74  
  75      /**
  76       * The character that can escape the enclosure.
  77       *
  78       * @var string
  79       */
  80      private $escapeCharacter = '\\';
  81  
  82      /**
  83       * Callback for setting defaults in construction.
  84       *
  85       * @var ?callable
  86       */
  87      private static $constructorCallback;
  88  
  89      /**
  90       * Attempt autodetect line endings (deprecated after PHP8.1)?
  91       *
  92       * @var bool
  93       */
  94      private $testAutodetect = true;
  95  
  96      /**
  97       * @var bool
  98       */
  99      protected $castFormattedNumberToNumeric = false;
 100  
 101      /**
 102       * @var bool
 103       */
 104      protected $preserveNumericFormatting = false;
 105  
 106      /** @var bool */
 107      private $preserveNullString = false;
 108  
 109      /**
 110       * Create a new CSV Reader instance.
 111       */
 112      public function __construct()
 113      {
 114          parent::__construct();
 115          $callback = self::$constructorCallback;
 116          if ($callback !== null) {
 117              $callback($this);
 118          }
 119      }
 120  
 121      /**
 122       * Set a callback to change the defaults.
 123       *
 124       * The callback must accept the Csv Reader object as the first parameter,
 125       * and it should return void.
 126       */
 127      public static function setConstructorCallback(?callable $callback): void
 128      {
 129          self::$constructorCallback = $callback;
 130      }
 131  
 132      public static function getConstructorCallback(): ?callable
 133      {
 134          return self::$constructorCallback;
 135      }
 136  
 137      public function setInputEncoding(string $encoding): self
 138      {
 139          $this->inputEncoding = $encoding;
 140  
 141          return $this;
 142      }
 143  
 144      public function getInputEncoding(): string
 145      {
 146          return $this->inputEncoding;
 147      }
 148  
 149      public function setFallbackEncoding(string $fallbackEncoding): self
 150      {
 151          $this->fallbackEncoding = $fallbackEncoding;
 152  
 153          return $this;
 154      }
 155  
 156      public function getFallbackEncoding(): string
 157      {
 158          return $this->fallbackEncoding;
 159      }
 160  
 161      /**
 162       * Move filepointer past any BOM marker.
 163       */
 164      protected function skipBOM(): void
 165      {
 166          rewind($this->fileHandle);
 167  
 168          if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
 169              rewind($this->fileHandle);
 170          }
 171      }
 172  
 173      /**
 174       * Identify any separator that is explicitly set in the file.
 175       */
 176      protected function checkSeparator(): void
 177      {
 178          $line = fgets($this->fileHandle);
 179          if ($line === false) {
 180              return;
 181          }
 182  
 183          if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
 184              $this->delimiter = substr($line, 4, 1);
 185  
 186              return;
 187          }
 188  
 189          $this->skipBOM();
 190      }
 191  
 192      /**
 193       * Infer the separator if it isn't explicitly set in the file or specified by the user.
 194       */
 195      protected function inferSeparator(): void
 196      {
 197          if ($this->delimiter !== null) {
 198              return;
 199          }
 200  
 201          $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
 202  
 203          // If number of lines is 0, nothing to infer : fall back to the default
 204          if ($inferenceEngine->linesCounted() === 0) {
 205              $this->delimiter = $inferenceEngine->getDefaultDelimiter();
 206              $this->skipBOM();
 207  
 208              return;
 209          }
 210  
 211          $this->delimiter = $inferenceEngine->infer();
 212  
 213          // If no delimiter could be detected, fall back to the default
 214          if ($this->delimiter === null) {
 215              $this->delimiter = $inferenceEngine->getDefaultDelimiter();
 216          }
 217  
 218          $this->skipBOM();
 219      }
 220  
 221      /**
 222       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 223       */
 224      public function listWorksheetInfo(string $filename): array
 225      {
 226          // Open file
 227          $this->openFileOrMemory($filename);
 228          $fileHandle = $this->fileHandle;
 229  
 230          // Skip BOM, if any
 231          $this->skipBOM();
 232          $this->checkSeparator();
 233          $this->inferSeparator();
 234  
 235          $worksheetInfo = [];
 236          $worksheetInfo[0]['worksheetName'] = 'Worksheet';
 237          $worksheetInfo[0]['lastColumnLetter'] = 'A';
 238          $worksheetInfo[0]['lastColumnIndex'] = 0;
 239          $worksheetInfo[0]['totalRows'] = 0;
 240          $worksheetInfo[0]['totalColumns'] = 0;
 241  
 242          // Loop through each line of the file in turn
 243          $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 244          while (is_array($rowData)) {
 245              ++$worksheetInfo[0]['totalRows'];
 246              $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
 247              $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 248          }
 249  
 250          $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
 251          $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
 252  
 253          // Close file
 254          fclose($fileHandle);
 255  
 256          return $worksheetInfo;
 257      }
 258  
 259      /**
 260       * Loads Spreadsheet from file.
 261       */
 262      protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
 263      {
 264          // Create new Spreadsheet
 265          $spreadsheet = new Spreadsheet();
 266  
 267          // Load into this instance
 268          return $this->loadIntoExisting($filename, $spreadsheet);
 269      }
 270  
 271      /**
 272       * Loads Spreadsheet from string.
 273       */
 274      public function loadSpreadsheetFromString(string $contents): Spreadsheet
 275      {
 276          // Create new Spreadsheet
 277          $spreadsheet = new Spreadsheet();
 278  
 279          // Load into this instance
 280          return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true);
 281      }
 282  
 283      private function openFileOrMemory(string $filename): void
 284      {
 285          // Open file
 286          $fhandle = $this->canRead($filename);
 287          if (!$fhandle) {
 288              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 289          }
 290          if ($this->inputEncoding === self::GUESS_ENCODING) {
 291              $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
 292          }
 293          $this->openFile($filename);
 294          if ($this->inputEncoding !== 'UTF-8') {
 295              fclose($this->fileHandle);
 296              $entireFile = file_get_contents($filename);
 297              $this->fileHandle = fopen('php://memory', 'r+b');
 298              if ($this->fileHandle !== false && $entireFile !== false) {
 299                  $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
 300                  fwrite($this->fileHandle, $data);
 301                  $this->skipBOM();
 302              }
 303          }
 304      }
 305  
 306      public function setTestAutoDetect(bool $value): self
 307      {
 308          $this->testAutodetect = $value;
 309  
 310          return $this;
 311      }
 312  
 313      private function setAutoDetect(?string $value): ?string
 314      {
 315          $retVal = null;
 316          if ($value !== null && $this->testAutodetect) {
 317              $retVal2 = @ini_set('auto_detect_line_endings', $value);
 318              if (is_string($retVal2)) {
 319                  $retVal = $retVal2;
 320              }
 321          }
 322  
 323          return $retVal;
 324      }
 325  
 326      public function castFormattedNumberToNumeric(
 327          bool $castFormattedNumberToNumeric,
 328          bool $preserveNumericFormatting = false
 329      ): void {
 330          $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric;
 331          $this->preserveNumericFormatting = $preserveNumericFormatting;
 332      }
 333  
 334      /**
 335       * Open data uri for reading.
 336       */
 337      private function openDataUri(string $filename): void
 338      {
 339          $fileHandle = fopen($filename, 'rb');
 340          if ($fileHandle === false) {
 341              // @codeCoverageIgnoreStart
 342              throw new ReaderException('Could not open file ' . $filename . ' for reading.');
 343              // @codeCoverageIgnoreEnd
 344          }
 345  
 346          $this->fileHandle = $fileHandle;
 347      }
 348  
 349      /**
 350       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 351       */
 352      public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
 353      {
 354          return $this->loadStringOrFile($filename, $spreadsheet, false);
 355      }
 356  
 357      /**
 358       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 359       */
 360      private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet
 361      {
 362          // Deprecated in Php8.1
 363          $iniset = $this->setAutoDetect('1');
 364  
 365          // Open file
 366          if ($dataUri) {
 367              $this->openDataUri($filename);
 368          } else {
 369              $this->openFileOrMemory($filename);
 370          }
 371          $fileHandle = $this->fileHandle;
 372  
 373          // Skip BOM, if any
 374          $this->skipBOM();
 375          $this->checkSeparator();
 376          $this->inferSeparator();
 377  
 378          // Create new PhpSpreadsheet object
 379          while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
 380              $spreadsheet->createSheet();
 381          }
 382          $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
 383  
 384          // Set our starting row based on whether we're in contiguous mode or not
 385          $currentRow = 1;
 386          $outRow = 0;
 387  
 388          // Loop through each line of the file in turn
 389          $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 390          $valueBinder = Cell::getValueBinder();
 391          $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
 392          while (is_array($rowData)) {
 393              $noOutputYet = true;
 394              $columnLetter = 'A';
 395              foreach ($rowData as $rowDatum) {
 396                  $this->convertBoolean($rowDatum, $preserveBooleanString);
 397                  $numberFormatMask = $this->convertFormattedNumber($rowDatum);
 398                  if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
 399                      if ($this->contiguous) {
 400                          if ($noOutputYet) {
 401                              $noOutputYet = false;
 402                              ++$outRow;
 403                          }
 404                      } else {
 405                          $outRow = $currentRow;
 406                      }
 407                      // Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
 408                      $sheet->getCell($columnLetter . $outRow)->getStyle()
 409                          ->getNumberFormat()
 410                          ->setFormatCode($numberFormatMask);
 411                      // Set cell value
 412                      $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
 413                  }
 414                  ++$columnLetter;
 415              }
 416              $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 417              ++$currentRow;
 418          }
 419  
 420          // Close file
 421          fclose($fileHandle);
 422  
 423          $this->setAutoDetect($iniset);
 424  
 425          // Return
 426          return $spreadsheet;
 427      }
 428  
 429      /**
 430       * Convert string true/false to boolean, and null to null-string.
 431       *
 432       * @param mixed $rowDatum
 433       */
 434      private function convertBoolean(&$rowDatum, bool $preserveBooleanString): void
 435      {
 436          if (is_string($rowDatum) && !$preserveBooleanString) {
 437              if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
 438                  $rowDatum = true;
 439              } elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
 440                  $rowDatum = false;
 441              }
 442          } else {
 443              $rowDatum = $rowDatum ?? '';
 444          }
 445      }
 446  
 447      /**
 448       * Convert numeric strings to int or float values.
 449       *
 450       * @param mixed $rowDatum
 451       */
 452      private function convertFormattedNumber(&$rowDatum): string
 453      {
 454          $numberFormatMask = NumberFormat::FORMAT_GENERAL;
 455          if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
 456              $numeric = str_replace(
 457                  [StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()],
 458                  ['', '.'],
 459                  $rowDatum
 460              );
 461  
 462              if (is_numeric($numeric)) {
 463                  $decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator());
 464                  if ($this->preserveNumericFormatting === true) {
 465                      $numberFormatMask = (strpos($rowDatum, StringHelper::getThousandsSeparator()) !== false)
 466                          ? '#,##0' : '0';
 467                      if ($decimalPos !== false) {
 468                          $decimals = strlen($rowDatum) - $decimalPos - 1;
 469                          $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6));
 470                      }
 471                  }
 472  
 473                  $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric;
 474              }
 475          }
 476  
 477          return $numberFormatMask;
 478      }
 479  
 480      public function getDelimiter(): ?string
 481      {
 482          return $this->delimiter;
 483      }
 484  
 485      public function setDelimiter(?string $delimiter): self
 486      {
 487          $this->delimiter = $delimiter;
 488  
 489          return $this;
 490      }
 491  
 492      public function getEnclosure(): string
 493      {
 494          return $this->enclosure;
 495      }
 496  
 497      public function setEnclosure(string $enclosure): self
 498      {
 499          if ($enclosure == '') {
 500              $enclosure = '"';
 501          }
 502          $this->enclosure = $enclosure;
 503  
 504          return $this;
 505      }
 506  
 507      public function getSheetIndex(): int
 508      {
 509          return $this->sheetIndex;
 510      }
 511  
 512      public function setSheetIndex(int $indexValue): self
 513      {
 514          $this->sheetIndex = $indexValue;
 515  
 516          return $this;
 517      }
 518  
 519      public function setContiguous(bool $contiguous): self
 520      {
 521          $this->contiguous = $contiguous;
 522  
 523          return $this;
 524      }
 525  
 526      public function getContiguous(): bool
 527      {
 528          return $this->contiguous;
 529      }
 530  
 531      public function setEscapeCharacter(string $escapeCharacter): self
 532      {
 533          $this->escapeCharacter = $escapeCharacter;
 534  
 535          return $this;
 536      }
 537  
 538      public function getEscapeCharacter(): string
 539      {
 540          return $this->escapeCharacter;
 541      }
 542  
 543      /**
 544       * Can the current IReader read the file?
 545       */
 546      public function canRead(string $filename): bool
 547      {
 548          // Check if file exists
 549          try {
 550              $this->openFile($filename);
 551          } catch (ReaderException $e) {
 552              return false;
 553          }
 554  
 555          fclose($this->fileHandle);
 556  
 557          // Trust file extension if any
 558          $extension = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
 559          if (in_array($extension, ['csv', 'tsv'])) {
 560              return true;
 561          }
 562  
 563          // Attempt to guess mimetype
 564          $type = mime_content_type($filename);
 565          $supportedTypes = [
 566              'application/csv',
 567              'text/csv',
 568              'text/plain',
 569              'inode/x-empty',
 570          ];
 571  
 572          return in_array($type, $supportedTypes, true);
 573      }
 574  
 575      private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
 576      {
 577          if ($encoding === '') {
 578              $pos = strpos($contents, $compare);
 579              if ($pos !== false && $pos % strlen($compare) === 0) {
 580                  $encoding = $setEncoding;
 581              }
 582          }
 583      }
 584  
 585      private static function guessEncodingNoBom(string $filename): string
 586      {
 587          $encoding = '';
 588          $contents = file_get_contents($filename);
 589          self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
 590          self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
 591          self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
 592          self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
 593          if ($encoding === '' && preg_match('//u', $contents) === 1) {
 594              $encoding = 'UTF-8';
 595          }
 596  
 597          return $encoding;
 598      }
 599  
 600      private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
 601      {
 602          if ($encoding === '') {
 603              if ($compare === substr($first4, 0, strlen($compare))) {
 604                  $encoding = $setEncoding;
 605              }
 606          }
 607      }
 608  
 609      private static function guessEncodingBom(string $filename): string
 610      {
 611          $encoding = '';
 612          $first4 = file_get_contents($filename, false, null, 0, 4);
 613          if ($first4 !== false) {
 614              self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
 615              self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
 616              self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
 617              self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
 618              self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
 619          }
 620  
 621          return $encoding;
 622      }
 623  
 624      public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
 625      {
 626          $encoding = self::guessEncodingBom($filename);
 627          if ($encoding === '') {
 628              $encoding = self::guessEncodingNoBom($filename);
 629          }
 630  
 631          return ($encoding === '') ? $dflt : $encoding;
 632      }
 633  
 634      public function setPreserveNullString(bool $value): self
 635      {
 636          $this->preserveNullString = $value;
 637  
 638          return $this;
 639      }
 640  
 641      public function getPreserveNullString(): bool
 642      {
 643          return $this->preserveNullString;
 644      }
 645  }