Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 310 and 311] [Versions 311 and 400] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403] [Versions 39 and 311]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use InvalidArgumentException;
   6  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   7  use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
   8  use PhpOffice\PhpSpreadsheet\Spreadsheet;
   9  
  10  class Csv extends BaseReader
  11  {
  12      const UTF8_BOM = "\xEF\xBB\xBF";
  13      const UTF8_BOM_LEN = 3;
  14      const UTF16BE_BOM = "\xfe\xff";
  15      const UTF16BE_BOM_LEN = 2;
  16      const UTF16BE_LF = "\x00\x0a";
  17      const UTF16LE_BOM = "\xff\xfe";
  18      const UTF16LE_BOM_LEN = 2;
  19      const UTF16LE_LF = "\x0a\x00";
  20      const UTF32BE_BOM = "\x00\x00\xfe\xff";
  21      const UTF32BE_BOM_LEN = 4;
  22      const UTF32BE_LF = "\x00\x00\x00\x0a";
  23      const UTF32LE_BOM = "\xff\xfe\x00\x00";
  24      const UTF32LE_BOM_LEN = 4;
  25      const UTF32LE_LF = "\x0a\x00\x00\x00";
  26  
  27      /**
  28       * Input encoding.
  29       *
  30       * @var string
  31       */
  32      private $inputEncoding = 'UTF-8';
  33  
  34      /**
  35       * Delimiter.
  36       *
  37       * @var string
  38       */
  39      private $delimiter;
  40  
  41      /**
  42       * Enclosure.
  43       *
  44       * @var string
  45       */
  46      private $enclosure = '"';
  47  
  48      /**
  49       * Sheet index to read.
  50       *
  51       * @var int
  52       */
  53      private $sheetIndex = 0;
  54  
  55      /**
  56       * Load rows contiguously.
  57       *
  58       * @var bool
  59       */
  60      private $contiguous = false;
  61  
  62      /**
  63       * The character that can escape the enclosure.
  64       *
  65       * @var string
  66       */
  67      private $escapeCharacter = '\\';
  68  
  69      /**
  70       * Create a new CSV Reader instance.
  71       */
  72      public function __construct()
  73      {
  74          parent::__construct();
  75      }
  76  
  77      /**
  78       * Set input encoding.
  79       *
  80       * @param string $pValue Input encoding, eg: 'UTF-8'
  81       *
  82       * @return $this
  83       */
  84      public function setInputEncoding($pValue)
  85      {
  86          $this->inputEncoding = $pValue;
  87  
  88          return $this;
  89      }
  90  
  91      /**
  92       * Get input encoding.
  93       *
  94       * @return string
  95       */
  96      public function getInputEncoding()
  97      {
  98          return $this->inputEncoding;
  99      }
 100  
 101      /**
 102       * Move filepointer past any BOM marker.
 103       */
 104      protected function skipBOM(): void
 105      {
 106          rewind($this->fileHandle);
 107  
 108          if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
 109              rewind($this->fileHandle);
 110          }
 111      }
 112  
 113      /**
 114       * Identify any separator that is explicitly set in the file.
 115       */
 116      protected function checkSeparator(): void
 117      {
 118          $line = fgets($this->fileHandle);
 119          if ($line === false) {
 120              return;
 121          }
 122  
 123          if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
 124              $this->delimiter = substr($line, 4, 1);
 125  
 126              return;
 127          }
 128  
 129          $this->skipBOM();
 130      }
 131  
 132      /**
 133       * Infer the separator if it isn't explicitly set in the file or specified by the user.
 134       */
 135      protected function inferSeparator(): void
 136      {
 137          if ($this->delimiter !== null) {
 138              return;
 139          }
 140  
 141          $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~'];
 142          $counts = [];
 143          foreach ($potentialDelimiters as $delimiter) {
 144              $counts[$delimiter] = [];
 145          }
 146  
 147          // Count how many times each of the potential delimiters appears in each line
 148          $numberLines = 0;
 149          while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
 150              $countLine = [];
 151              for ($i = strlen($line) - 1; $i >= 0; --$i) {
 152                  $char = $line[$i];
 153                  if (isset($counts[$char])) {
 154                      if (!isset($countLine[$char])) {
 155                          $countLine[$char] = 0;
 156                      }
 157                      ++$countLine[$char];
 158                  }
 159              }
 160              foreach ($potentialDelimiters as $delimiter) {
 161                  $counts[$delimiter][] = $countLine[$delimiter]
 162                      ?? 0;
 163              }
 164          }
 165  
 166          // If number of lines is 0, nothing to infer : fall back to the default
 167          if ($numberLines === 0) {
 168              $this->delimiter = reset($potentialDelimiters);
 169              $this->skipBOM();
 170  
 171              return;
 172          }
 173  
 174          // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
 175          $meanSquareDeviations = [];
 176          $middleIdx = floor(($numberLines - 1) / 2);
 177  
 178          foreach ($potentialDelimiters as $delimiter) {
 179              $series = $counts[$delimiter];
 180              sort($series);
 181  
 182              $median = ($numberLines % 2)
 183                  ? $series[$middleIdx]
 184                  : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
 185  
 186              if ($median === 0) {
 187                  continue;
 188              }
 189  
 190              $meanSquareDeviations[$delimiter] = array_reduce(
 191                  $series,
 192                  function ($sum, $value) use ($median) {
 193                      return $sum + ($value - $median) ** 2;
 194                  }
 195              ) / count($series);
 196          }
 197  
 198          // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
 199          $min = INF;
 200          foreach ($potentialDelimiters as $delimiter) {
 201              if (!isset($meanSquareDeviations[$delimiter])) {
 202                  continue;
 203              }
 204  
 205              if ($meanSquareDeviations[$delimiter] < $min) {
 206                  $min = $meanSquareDeviations[$delimiter];
 207                  $this->delimiter = $delimiter;
 208              }
 209          }
 210  
 211          // If no delimiter could be detected, fall back to the default
 212          if ($this->delimiter === null) {
 213              $this->delimiter = reset($potentialDelimiters);
 214          }
 215  
 216          $this->skipBOM();
 217      }
 218  
 219      /**
 220       * Get the next full line from the file.
 221       *
 222       * @return false|string
 223       */
 224      private function getNextLine()
 225      {
 226          $line = '';
 227          $enclosure = ($this->escapeCharacter === '' ? ''
 228              : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')'))
 229              . preg_quote($this->enclosure, '/');
 230  
 231          do {
 232              // Get the next line in the file
 233              $newLine = fgets($this->fileHandle);
 234  
 235              // Return false if there is no next line
 236              if ($newLine === false) {
 237                  return false;
 238              }
 239  
 240              // Add the new line to the line passed in
 241              $line = $line . $newLine;
 242  
 243              // Drop everything that is enclosed to avoid counting false positives in enclosures
 244              $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
 245  
 246              // See if we have any enclosures left in the line
 247              // if we still have an enclosure then we need to read the next line as well
 248          } while (preg_match('/(' . $enclosure . ')/', $line) > 0);
 249  
 250          return $line;
 251      }
 252  
 253      /**
 254       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 255       *
 256       * @param string $pFilename
 257       *
 258       * @return array
 259       */
 260      public function listWorksheetInfo($pFilename)
 261      {
 262          // Open file
 263          $this->openFileOrMemory($pFilename);
 264          $fileHandle = $this->fileHandle;
 265  
 266          // Skip BOM, if any
 267          $this->skipBOM();
 268          $this->checkSeparator();
 269          $this->inferSeparator();
 270  
 271          $worksheetInfo = [];
 272          $worksheetInfo[0]['worksheetName'] = 'Worksheet';
 273          $worksheetInfo[0]['lastColumnLetter'] = 'A';
 274          $worksheetInfo[0]['lastColumnIndex'] = 0;
 275          $worksheetInfo[0]['totalRows'] = 0;
 276          $worksheetInfo[0]['totalColumns'] = 0;
 277  
 278          // Loop through each line of the file in turn
 279          while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
 280              ++$worksheetInfo[0]['totalRows'];
 281              $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
 282          }
 283  
 284          $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
 285          $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
 286  
 287          // Close file
 288          fclose($fileHandle);
 289  
 290          return $worksheetInfo;
 291      }
 292  
 293      /**
 294       * Loads Spreadsheet from file.
 295       *
 296       * @param string $pFilename
 297       *
 298       * @return Spreadsheet
 299       */
 300      public function load($pFilename)
 301      {
 302          // Create new Spreadsheet
 303          $spreadsheet = new Spreadsheet();
 304  
 305          // Load into this instance
 306          return $this->loadIntoExisting($pFilename, $spreadsheet);
 307      }
 308  
 309      private function openFileOrMemory($pFilename): void
 310      {
 311          // Open file
 312          $fhandle = $this->canRead($pFilename);
 313          if (!$fhandle) {
 314              throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
 315          }
 316          $this->openFile($pFilename);
 317          if ($this->inputEncoding !== 'UTF-8') {
 318              fclose($this->fileHandle);
 319              $entireFile = file_get_contents($pFilename);
 320              $this->fileHandle = fopen('php://memory', 'r+b');
 321              $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
 322              fwrite($this->fileHandle, $data);
 323              $this->skipBOM();
 324          }
 325      }
 326  
 327      /**
 328       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 329       *
 330       * @param string $pFilename
 331       *
 332       * @return Spreadsheet
 333       */
 334      public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
 335      {
 336          $lineEnding = ini_get('auto_detect_line_endings');
 337          ini_set('auto_detect_line_endings', true);
 338  
 339          // Open file
 340          $this->openFileOrMemory($pFilename);
 341          $fileHandle = $this->fileHandle;
 342  
 343          // Skip BOM, if any
 344          $this->skipBOM();
 345          $this->checkSeparator();
 346          $this->inferSeparator();
 347  
 348          // Create new PhpSpreadsheet object
 349          while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
 350              $spreadsheet->createSheet();
 351          }
 352          $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
 353  
 354          // Set our starting row based on whether we're in contiguous mode or not
 355          $currentRow = 1;
 356          $outRow = 0;
 357  
 358          // Loop through each line of the file in turn
 359          while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
 360              $noOutputYet = true;
 361              $columnLetter = 'A';
 362              foreach ($rowData as $rowDatum) {
 363                  if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
 364                      if ($this->contiguous) {
 365                          if ($noOutputYet) {
 366                              $noOutputYet = false;
 367                              ++$outRow;
 368                          }
 369                      } else {
 370                          $outRow = $currentRow;
 371                      }
 372                      // Set cell value
 373                      $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
 374                  }
 375                  ++$columnLetter;
 376              }
 377              ++$currentRow;
 378          }
 379  
 380          // Close file
 381          fclose($fileHandle);
 382  
 383          ini_set('auto_detect_line_endings', $lineEnding);
 384  
 385          // Return
 386          return $spreadsheet;
 387      }
 388  
 389      /**
 390       * Get delimiter.
 391       *
 392       * @return string
 393       */
 394      public function getDelimiter()
 395      {
 396          return $this->delimiter;
 397      }
 398  
 399      /**
 400       * Set delimiter.
 401       *
 402       * @param string $delimiter Delimiter, eg: ','
 403       *
 404       * @return $this
 405       */
 406      public function setDelimiter($delimiter)
 407      {
 408          $this->delimiter = $delimiter;
 409  
 410          return $this;
 411      }
 412  
 413      /**
 414       * Get enclosure.
 415       *
 416       * @return string
 417       */
 418      public function getEnclosure()
 419      {
 420          return $this->enclosure;
 421      }
 422  
 423      /**
 424       * Set enclosure.
 425       *
 426       * @param string $enclosure Enclosure, defaults to "
 427       *
 428       * @return $this
 429       */
 430      public function setEnclosure($enclosure)
 431      {
 432          if ($enclosure == '') {
 433              $enclosure = '"';
 434          }
 435          $this->enclosure = $enclosure;
 436  
 437          return $this;
 438      }
 439  
 440      /**
 441       * Get sheet index.
 442       *
 443       * @return int
 444       */
 445      public function getSheetIndex()
 446      {
 447          return $this->sheetIndex;
 448      }
 449  
 450      /**
 451       * Set sheet index.
 452       *
 453       * @param int $pValue Sheet index
 454       *
 455       * @return $this
 456       */
 457      public function setSheetIndex($pValue)
 458      {
 459          $this->sheetIndex = $pValue;
 460  
 461          return $this;
 462      }
 463  
 464      /**
 465       * Set Contiguous.
 466       *
 467       * @param bool $contiguous
 468       *
 469       * @return $this
 470       */
 471      public function setContiguous($contiguous)
 472      {
 473          $this->contiguous = (bool) $contiguous;
 474  
 475          return $this;
 476      }
 477  
 478      /**
 479       * Get Contiguous.
 480       *
 481       * @return bool
 482       */
 483      public function getContiguous()
 484      {
 485          return $this->contiguous;
 486      }
 487  
 488      /**
 489       * Set escape backslashes.
 490       *
 491       * @param string $escapeCharacter
 492       *
 493       * @return $this
 494       */
 495      public function setEscapeCharacter($escapeCharacter)
 496      {
 497          $this->escapeCharacter = $escapeCharacter;
 498  
 499          return $this;
 500      }
 501  
 502      /**
 503       * Get escape backslashes.
 504       *
 505       * @return string
 506       */
 507      public function getEscapeCharacter()
 508      {
 509          return $this->escapeCharacter;
 510      }
 511  
 512      /**
 513       * Can the current IReader read the file?
 514       *
 515       * @param string $pFilename
 516       *
 517       * @return bool
 518       */
 519      public function canRead($pFilename)
 520      {
 521          // Check if file exists
 522          try {
 523              $this->openFile($pFilename);
 524          } catch (InvalidArgumentException $e) {
 525              return false;
 526          }
 527  
 528          fclose($this->fileHandle);
 529  
 530          // Trust file extension if any
 531          $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION));
 532          if (in_array($extension, ['csv', 'tsv'])) {
 533              return true;
 534          }
 535  
 536          // Attempt to guess mimetype
 537          $type = mime_content_type($pFilename);
 538          $supportedTypes = [
 539              'application/csv',
 540              'text/csv',
 541              'text/plain',
 542              'inode/x-empty',
 543          ];
 544  
 545          return in_array($type, $supportedTypes, true);
 546      }
 547  
 548      private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
 549      {
 550          if ($encoding === '') {
 551              $pos = strpos($contents, $compare);
 552              if ($pos !== false && $pos % strlen($compare) === 0) {
 553                  $encoding = $setEncoding;
 554              }
 555          }
 556      }
 557  
 558      private static function guessEncodingNoBom(string $filename): string
 559      {
 560          $encoding = '';
 561          $contents = file_get_contents($filename);
 562          self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
 563          self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
 564          self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
 565          self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
 566          if ($encoding === '' && preg_match('//u', $contents) === 1) {
 567              $encoding = 'UTF-8';
 568          }
 569  
 570          return $encoding;
 571      }
 572  
 573      private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
 574      {
 575          if ($encoding === '') {
 576              if ($compare === substr($first4, 0, strlen($compare))) {
 577                  $encoding = $setEncoding;
 578              }
 579          }
 580      }
 581  
 582      private static function guessEncodingBom(string $filename): string
 583      {
 584          $encoding = '';
 585          $first4 = file_get_contents($filename, false, null, 0, 4);
 586          if ($first4 !== false) {
 587              self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
 588              self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
 589              self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
 590              self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
 591              self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
 592          }
 593  
 594          return $encoding;
 595      }
 596  
 597      public static function guessEncoding(string $filename, string $dflt = 'CP1252'): string
 598      {
 599          $encoding = self::guessEncodingBom($filename);
 600          if ($encoding === '') {
 601              $encoding = self::guessEncodingNoBom($filename);
 602          }
 603  
 604          return ($encoding === '') ? $dflt : $encoding;
 605      }
 606  }