Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.3.x will end 7 October 2024 (12 months).
  • Bug fixes for security issues in 4.3.x will end 21 April 2025 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.2.x is supported too.

Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403] [Versions 401 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
   6  use PhpOffice\PhpSpreadsheet\Cell\Cell;
   7  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   8  use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter;
   9  use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
  10  use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
  11  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  12  use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
  13  
  14  class Csv extends BaseReader
  15  {
  16      const DEFAULT_FALLBACK_ENCODING = 'CP1252';
  17      const GUESS_ENCODING = 'guess';
  18      const UTF8_BOM = "\xEF\xBB\xBF";
  19      const UTF8_BOM_LEN = 3;
  20      const UTF16BE_BOM = "\xfe\xff";
  21      const UTF16BE_BOM_LEN = 2;
  22      const UTF16BE_LF = "\x00\x0a";
  23      const UTF16LE_BOM = "\xff\xfe";
  24      const UTF16LE_BOM_LEN = 2;
  25      const UTF16LE_LF = "\x0a\x00";
  26      const UTF32BE_BOM = "\x00\x00\xfe\xff";
  27      const UTF32BE_BOM_LEN = 4;
  28      const UTF32BE_LF = "\x00\x00\x00\x0a";
  29      const UTF32LE_BOM = "\xff\xfe\x00\x00";
  30      const UTF32LE_BOM_LEN = 4;
  31      const UTF32LE_LF = "\x0a\x00\x00\x00";
  32  
  33      /**
  34       * Input encoding.
  35       *
  36       * @var string
  37       */
  38      private $inputEncoding = 'UTF-8';
  39  
  40      /**
  41       * Fallback encoding if guess strikes out.
  42       *
  43       * @var string
  44       */
  45      private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
  46  
  47      /**
  48       * Delimiter.
  49       *
  50       * @var ?string
  51       */
  52      private $delimiter;
  53  
  54      /**
  55       * Enclosure.
  56       *
  57       * @var string
  58       */
  59      private $enclosure = '"';
  60  
  61      /**
  62       * Sheet index to read.
  63       *
  64       * @var int
  65       */
  66      private $sheetIndex = 0;
  67  
  68      /**
  69       * Load rows contiguously.
  70       *
  71       * @var bool
  72       */
  73      private $contiguous = false;
  74  
  75      /**
  76       * The character that can escape the enclosure.
  77       *
  78       * @var string
  79       */
  80      private $escapeCharacter = '\\';
  81  
  82      /**
  83       * Callback for setting defaults in construction.
  84       *
  85       * @var ?callable
  86       */
  87      private static $constructorCallback;
  88  
  89      /**
  90       * Attempt autodetect line endings (deprecated after PHP8.1)?
  91       *
  92       * @var bool
  93       */
  94      private $testAutodetect = true;
  95  
  96      /**
  97       * @var bool
  98       */
  99      protected $castFormattedNumberToNumeric = false;
 100  
 101      /**
 102       * @var bool
 103       */
 104      protected $preserveNumericFormatting = false;
 105  
 106      /** @var bool */
 107      private $preserveNullString = false;
 108  
 109      /**
 110       * Create a new CSV Reader instance.
 111       */
 112      public function __construct()
 113      {
 114          parent::__construct();
 115          $callback = self::$constructorCallback;
 116          if ($callback !== null) {
 117              $callback($this);
 118          }
 119      }
 120  
 121      /**
 122       * Set a callback to change the defaults.
 123       *
 124       * The callback must accept the Csv Reader object as the first parameter,
 125       * and it should return void.
 126       */
 127      public static function setConstructorCallback(?callable $callback): void
 128      {
 129          self::$constructorCallback = $callback;
 130      }
 131  
 132      public static function getConstructorCallback(): ?callable
 133      {
 134          return self::$constructorCallback;
 135      }
 136  
 137      public function setInputEncoding(string $encoding): self
 138      {
 139          $this->inputEncoding = $encoding;
 140  
 141          return $this;
 142      }
 143  
 144      public function getInputEncoding(): string
 145      {
 146          return $this->inputEncoding;
 147      }
 148  
 149      public function setFallbackEncoding(string $fallbackEncoding): self
 150      {
 151          $this->fallbackEncoding = $fallbackEncoding;
 152  
 153          return $this;
 154      }
 155  
 156      public function getFallbackEncoding(): string
 157      {
 158          return $this->fallbackEncoding;
 159      }
 160  
 161      /**
 162       * Move filepointer past any BOM marker.
 163       */
 164      protected function skipBOM(): void
 165      {
 166          rewind($this->fileHandle);
 167  
 168          if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
 169              rewind($this->fileHandle);
 170          }
 171      }
 172  
 173      /**
 174       * Identify any separator that is explicitly set in the file.
 175       */
 176      protected function checkSeparator(): void
 177      {
 178          $line = fgets($this->fileHandle);
 179          if ($line === false) {
 180              return;
 181          }
 182  
 183          if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
 184              $this->delimiter = substr($line, 4, 1);
 185  
 186              return;
 187          }
 188  
 189          $this->skipBOM();
 190      }
 191  
 192      /**
 193       * Infer the separator if it isn't explicitly set in the file or specified by the user.
 194       */
 195      protected function inferSeparator(): void
 196      {
 197          if ($this->delimiter !== null) {
 198              return;
 199          }
 200  
 201          $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
 202  
 203          // If number of lines is 0, nothing to infer : fall back to the default
 204          if ($inferenceEngine->linesCounted() === 0) {
 205              $this->delimiter = $inferenceEngine->getDefaultDelimiter();
 206              $this->skipBOM();
 207  
 208              return;
 209          }
 210  
 211          $this->delimiter = $inferenceEngine->infer();
 212  
 213          // If no delimiter could be detected, fall back to the default
 214          if ($this->delimiter === null) {
 215              $this->delimiter = $inferenceEngine->getDefaultDelimiter();
 216          }
 217  
 218          $this->skipBOM();
 219      }
 220  
 221      /**
 222       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 223       */
 224      public function listWorksheetInfo(string $filename): array
 225      {
 226          // Open file
 227          $this->openFileOrMemory($filename);
 228          $fileHandle = $this->fileHandle;
 229  
 230          // Skip BOM, if any
 231          $this->skipBOM();
 232          $this->checkSeparator();
 233          $this->inferSeparator();
 234  
 235          $worksheetInfo = [];
 236          $worksheetInfo[0]['worksheetName'] = 'Worksheet';
 237          $worksheetInfo[0]['lastColumnLetter'] = 'A';
 238          $worksheetInfo[0]['lastColumnIndex'] = 0;
 239          $worksheetInfo[0]['totalRows'] = 0;
 240          $worksheetInfo[0]['totalColumns'] = 0;
 241  
 242          // Loop through each line of the file in turn
 243          $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 244          while (is_array($rowData)) {
 245              ++$worksheetInfo[0]['totalRows'];
 246              $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
 247              $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 248          }
 249  
 250          $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
 251          $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
 252  
 253          // Close file
 254          fclose($fileHandle);
 255  
 256          return $worksheetInfo;
 257      }
 258  
 259      /**
 260       * Loads Spreadsheet from file.
 261       */
 262      protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
 263      {
 264          // Create new Spreadsheet
 265          $spreadsheet = new Spreadsheet();
 266  
 267          // Load into this instance
 268          return $this->loadIntoExisting($filename, $spreadsheet);
 269      }
 270  
 271      /**
 272       * Loads Spreadsheet from string.
 273       */
 274      public function loadSpreadsheetFromString(string $contents): Spreadsheet
 275      {
 276          // Create new Spreadsheet
 277          $spreadsheet = new Spreadsheet();
 278  
 279          // Load into this instance
 280          return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true);
 281      }
 282  
 283      private function openFileOrMemory(string $filename): void
 284      {
 285          // Open file
 286          $fhandle = $this->canRead($filename);
 287          if (!$fhandle) {
 288              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 289          }
 290          if ($this->inputEncoding === self::GUESS_ENCODING) {
 291              $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
 292          }
 293          $this->openFile($filename);
 294          if ($this->inputEncoding !== 'UTF-8') {
 295              fclose($this->fileHandle);
 296              $entireFile = file_get_contents($filename);
 297              $fileHandle = fopen('php://memory', 'r+b');
 298              if ($fileHandle !== false && $entireFile !== false) {
 299                  $this->fileHandle = $fileHandle;
 300                  $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
 301                  fwrite($this->fileHandle, $data);
 302                  $this->skipBOM();
 303              }
 304          }
 305      }
 306  
 307      public function setTestAutoDetect(bool $value): self
 308      {
 309          $this->testAutodetect = $value;
 310  
 311          return $this;
 312      }
 313  
 314      private function setAutoDetect(?string $value): ?string
 315      {
 316          $retVal = null;
 317          if ($value !== null && $this->testAutodetect) {
 318              $retVal2 = @ini_set('auto_detect_line_endings', $value);
 319              if (is_string($retVal2)) {
 320                  $retVal = $retVal2;
 321              }
 322          }
 323  
 324          return $retVal;
 325      }
 326  
 327      public function castFormattedNumberToNumeric(
 328          bool $castFormattedNumberToNumeric,
 329          bool $preserveNumericFormatting = false
 330      ): void {
 331          $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric;
 332          $this->preserveNumericFormatting = $preserveNumericFormatting;
 333      }
 334  
 335      /**
 336       * Open data uri for reading.
 337       */
 338      private function openDataUri(string $filename): void
 339      {
 340          $fileHandle = fopen($filename, 'rb');
 341          if ($fileHandle === false) {
 342              // @codeCoverageIgnoreStart
 343              throw new ReaderException('Could not open file ' . $filename . ' for reading.');
 344              // @codeCoverageIgnoreEnd
 345          }
 346  
 347          $this->fileHandle = $fileHandle;
 348      }
 349  
 350      /**
 351       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 352       */
 353      public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
 354      {
 355          return $this->loadStringOrFile($filename, $spreadsheet, false);
 356      }
 357  
 358      /**
 359       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 360       */
 361      private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet
 362      {
 363          // Deprecated in Php8.1
 364          $iniset = $this->setAutoDetect('1');
 365  
 366          // Open file
 367          if ($dataUri) {
 368              $this->openDataUri($filename);
 369          } else {
 370              $this->openFileOrMemory($filename);
 371          }
 372          $fileHandle = $this->fileHandle;
 373  
 374          // Skip BOM, if any
 375          $this->skipBOM();
 376          $this->checkSeparator();
 377          $this->inferSeparator();
 378  
 379          // Create new PhpSpreadsheet object
 380          while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
 381              $spreadsheet->createSheet();
 382          }
 383          $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
 384  
 385          // Set our starting row based on whether we're in contiguous mode or not
 386          $currentRow = 1;
 387          $outRow = 0;
 388  
 389          // Loop through each line of the file in turn
 390          $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 391          $valueBinder = Cell::getValueBinder();
 392          $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
 393          while (is_array($rowData)) {
 394              $noOutputYet = true;
 395              $columnLetter = 'A';
 396              foreach ($rowData as $rowDatum) {
 397                  $this->convertBoolean($rowDatum, $preserveBooleanString);
 398                  $numberFormatMask = $this->convertFormattedNumber($rowDatum);
 399                  if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
 400                      if ($this->contiguous) {
 401                          if ($noOutputYet) {
 402                              $noOutputYet = false;
 403                              ++$outRow;
 404                          }
 405                      } else {
 406                          $outRow = $currentRow;
 407                      }
 408                      // Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
 409                      $sheet->getCell($columnLetter . $outRow)->getStyle()
 410                          ->getNumberFormat()
 411                          ->setFormatCode($numberFormatMask);
 412                      // Set cell value
 413                      $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
 414                  }
 415                  ++$columnLetter;
 416              }
 417              $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
 418              ++$currentRow;
 419          }
 420  
 421          // Close file
 422          fclose($fileHandle);
 423  
 424          $this->setAutoDetect($iniset);
 425  
 426          // Return
 427          return $spreadsheet;
 428      }
 429  
 430      /**
 431       * Convert string true/false to boolean, and null to null-string.
 432       *
 433       * @param mixed $rowDatum
 434       */
 435      private function convertBoolean(&$rowDatum, bool $preserveBooleanString): void
 436      {
 437          if (is_string($rowDatum) && !$preserveBooleanString) {
 438              if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
 439                  $rowDatum = true;
 440              } elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
 441                  $rowDatum = false;
 442              }
 443          } else {
 444              $rowDatum = $rowDatum ?? '';
 445          }
 446      }
 447  
 448      /**
 449       * Convert numeric strings to int or float values.
 450       *
 451       * @param mixed $rowDatum
 452       */
 453      private function convertFormattedNumber(&$rowDatum): string
 454      {
 455          $numberFormatMask = NumberFormat::FORMAT_GENERAL;
 456          if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
 457              $numeric = str_replace(
 458                  [StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()],
 459                  ['', '.'],
 460                  $rowDatum
 461              );
 462  
 463              if (is_numeric($numeric)) {
 464                  $decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator());
 465                  if ($this->preserveNumericFormatting === true) {
 466                      $numberFormatMask = (strpos($rowDatum, StringHelper::getThousandsSeparator()) !== false)
 467                          ? '#,##0' : '0';
 468                      if ($decimalPos !== false) {
 469                          $decimals = strlen($rowDatum) - $decimalPos - 1;
 470                          $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6));
 471                      }
 472                  }
 473  
 474                  $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric;
 475              }
 476          }
 477  
 478          return $numberFormatMask;
 479      }
 480  
 481      public function getDelimiter(): ?string
 482      {
 483          return $this->delimiter;
 484      }
 485  
 486      public function setDelimiter(?string $delimiter): self
 487      {
 488          $this->delimiter = $delimiter;
 489  
 490          return $this;
 491      }
 492  
 493      public function getEnclosure(): string
 494      {
 495          return $this->enclosure;
 496      }
 497  
 498      public function setEnclosure(string $enclosure): self
 499      {
 500          if ($enclosure == '') {
 501              $enclosure = '"';
 502          }
 503          $this->enclosure = $enclosure;
 504  
 505          return $this;
 506      }
 507  
 508      public function getSheetIndex(): int
 509      {
 510          return $this->sheetIndex;
 511      }
 512  
 513      public function setSheetIndex(int $indexValue): self
 514      {
 515          $this->sheetIndex = $indexValue;
 516  
 517          return $this;
 518      }
 519  
 520      public function setContiguous(bool $contiguous): self
 521      {
 522          $this->contiguous = $contiguous;
 523  
 524          return $this;
 525      }
 526  
 527      public function getContiguous(): bool
 528      {
 529          return $this->contiguous;
 530      }
 531  
 532      public function setEscapeCharacter(string $escapeCharacter): self
 533      {
 534          $this->escapeCharacter = $escapeCharacter;
 535  
 536          return $this;
 537      }
 538  
 539      public function getEscapeCharacter(): string
 540      {
 541          return $this->escapeCharacter;
 542      }
 543  
 544      /**
 545       * Can the current IReader read the file?
 546       */
 547      public function canRead(string $filename): bool
 548      {
 549          // Check if file exists
 550          try {
 551              $this->openFile($filename);
 552          } catch (ReaderException $e) {
 553              return false;
 554          }
 555  
 556          fclose($this->fileHandle);
 557  
 558          // Trust file extension if any
 559          $extension = strtolower(/** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_EXTENSION));
 560          if (in_array($extension, ['csv', 'tsv'])) {
 561              return true;
 562          }
 563  
 564          // Attempt to guess mimetype
 565          $type = mime_content_type($filename);
 566          $supportedTypes = [
 567              'application/csv',
 568              'text/csv',
 569              'text/plain',
 570              'inode/x-empty',
 571          ];
 572  
 573          return in_array($type, $supportedTypes, true);
 574      }
 575  
 576      private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
 577      {
 578          if ($encoding === '') {
 579              $pos = strpos($contents, $compare);
 580              if ($pos !== false && $pos % strlen($compare) === 0) {
 581                  $encoding = $setEncoding;
 582              }
 583          }
 584      }
 585  
 586      private static function guessEncodingNoBom(string $filename): string
 587      {
 588          $encoding = '';
 589          $contents = file_get_contents($filename);
 590          self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
 591          self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
 592          self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
 593          self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
 594          if ($encoding === '' && preg_match('//u', $contents) === 1) {
 595              $encoding = 'UTF-8';
 596          }
 597  
 598          return $encoding;
 599      }
 600  
 601      private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
 602      {
 603          if ($encoding === '') {
 604              if ($compare === substr($first4, 0, strlen($compare))) {
 605                  $encoding = $setEncoding;
 606              }
 607          }
 608      }
 609  
 610      private static function guessEncodingBom(string $filename): string
 611      {
 612          $encoding = '';
 613          $first4 = file_get_contents($filename, false, null, 0, 4);
 614          if ($first4 !== false) {
 615              self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
 616              self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
 617              self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
 618              self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
 619              self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
 620          }
 621  
 622          return $encoding;
 623      }
 624  
 625      public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
 626      {
 627          $encoding = self::guessEncodingBom($filename);
 628          if ($encoding === '') {
 629              $encoding = self::guessEncodingNoBom($filename);
 630          }
 631  
 632          return ($encoding === '') ? $dflt : $encoding;
 633      }
 634  
 635      public function setPreserveNullString(bool $value): self
 636      {
 637          $this->preserveNullString = $value;
 638  
 639          return $this;
 640      }
 641  
 642      public function getPreserveNullString(): bool
 643      {
 644          return $this->preserveNullString;
 645      }
 646  }