Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   6  use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
   7  use PhpOffice\PhpSpreadsheet\Spreadsheet;
   8  
   9  class Csv extends BaseReader
  10  {
  11      /**
  12       * Input encoding.
  13       *
  14       * @var string
  15       */
  16      private $inputEncoding = 'UTF-8';
  17  
  18      /**
  19       * Delimiter.
  20       *
  21       * @var string
  22       */
  23      private $delimiter;
  24  
  25      /**
  26       * Enclosure.
  27       *
  28       * @var string
  29       */
  30      private $enclosure = '"';
  31  
  32      /**
  33       * Sheet index to read.
  34       *
  35       * @var int
  36       */
  37      private $sheetIndex = 0;
  38  
  39      /**
  40       * Load rows contiguously.
  41       *
  42       * @var bool
  43       */
  44      private $contiguous = false;
  45  
  46      /**
  47       * Row counter for loading rows contiguously.
  48       *
  49       * @var int
  50       */
  51      private $contiguousRow = -1;
  52  
  53      /**
  54       * The character that can escape the enclosure.
  55       *
  56       * @var string
  57       */
  58      private $escapeCharacter = '\\';
  59  
  60      /**
  61       * Create a new CSV Reader instance.
  62       */
  63      public function __construct()
  64      {
  65          parent::__construct();
  66      }
  67  
  68      /**
  69       * Set input encoding.
  70       *
  71       * @param string $pValue Input encoding, eg: 'UTF-8'
  72       *
  73       * @return Csv
  74       */
  75      public function setInputEncoding($pValue)
  76      {
  77          $this->inputEncoding = $pValue;
  78  
  79          return $this;
  80      }
  81  
  82      /**
  83       * Get input encoding.
  84       *
  85       * @return string
  86       */
  87      public function getInputEncoding()
  88      {
  89          return $this->inputEncoding;
  90      }
  91  
  92      /**
  93       * Move filepointer past any BOM marker.
  94       */
  95      protected function skipBOM()
  96      {
  97          rewind($this->fileHandle);
  98  
  99          switch ($this->inputEncoding) {
 100              case 'UTF-8':
 101                  fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
 102                      fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
 103  
 104                  break;
 105              case 'UTF-16LE':
 106                  fgets($this->fileHandle, 3) == "\xFF\xFE" ?
 107                      fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
 108  
 109                  break;
 110              case 'UTF-16BE':
 111                  fgets($this->fileHandle, 3) == "\xFE\xFF" ?
 112                      fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
 113  
 114                  break;
 115              case 'UTF-32LE':
 116                  fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
 117                      fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
 118  
 119                  break;
 120              case 'UTF-32BE':
 121                  fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
 122                      fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
 123  
 124                  break;
 125              default:
 126                  break;
 127          }
 128      }
 129  
 130      /**
 131       * Identify any separator that is explicitly set in the file.
 132       */
 133      protected function checkSeparator()
 134      {
 135          $line = fgets($this->fileHandle);
 136          if ($line === false) {
 137              return;
 138          }
 139  
 140          if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
 141              $this->delimiter = substr($line, 4, 1);
 142  
 143              return;
 144          }
 145  
 146          $this->skipBOM();
 147      }
 148  
 149      /**
 150       * Infer the separator if it isn't explicitly set in the file or specified by the user.
 151       */
 152      protected function inferSeparator()
 153      {
 154          if ($this->delimiter !== null) {
 155              return;
 156          }
 157  
 158          $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~'];
 159          $counts = [];
 160          foreach ($potentialDelimiters as $delimiter) {
 161              $counts[$delimiter] = [];
 162          }
 163  
 164          // Count how many times each of the potential delimiters appears in each line
 165          $numberLines = 0;
 166          while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
 167              $countLine = [];
 168              for ($i = strlen($line) - 1; $i >= 0; --$i) {
 169                  $char = $line[$i];
 170                  if (isset($counts[$char])) {
 171                      if (!isset($countLine[$char])) {
 172                          $countLine[$char] = 0;
 173                      }
 174                      ++$countLine[$char];
 175                  }
 176              }
 177              foreach ($potentialDelimiters as $delimiter) {
 178                  $counts[$delimiter][] = $countLine[$delimiter]
 179                      ?? 0;
 180              }
 181          }
 182  
 183          // If number of lines is 0, nothing to infer : fall back to the default
 184          if ($numberLines === 0) {
 185              $this->delimiter = reset($potentialDelimiters);
 186              $this->skipBOM();
 187  
 188              return;
 189          }
 190  
 191          // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
 192          $meanSquareDeviations = [];
 193          $middleIdx = floor(($numberLines - 1) / 2);
 194  
 195          foreach ($potentialDelimiters as $delimiter) {
 196              $series = $counts[$delimiter];
 197              sort($series);
 198  
 199              $median = ($numberLines % 2)
 200                  ? $series[$middleIdx]
 201                  : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
 202  
 203              if ($median === 0) {
 204                  continue;
 205              }
 206  
 207              $meanSquareDeviations[$delimiter] = array_reduce(
 208                  $series,
 209                  function ($sum, $value) use ($median) {
 210                      return $sum + pow($value - $median, 2);
 211                  }
 212              ) / count($series);
 213          }
 214  
 215          // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
 216          $min = INF;
 217          foreach ($potentialDelimiters as $delimiter) {
 218              if (!isset($meanSquareDeviations[$delimiter])) {
 219                  continue;
 220              }
 221  
 222              if ($meanSquareDeviations[$delimiter] < $min) {
 223                  $min = $meanSquareDeviations[$delimiter];
 224                  $this->delimiter = $delimiter;
 225              }
 226          }
 227  
 228          // If no delimiter could be detected, fall back to the default
 229          if ($this->delimiter === null) {
 230              $this->delimiter = reset($potentialDelimiters);
 231          }
 232  
 233          $this->skipBOM();
 234      }
 235  
 236      /**
 237       * Get the next full line from the file.
 238       *
 239       * @param string $line
 240       *
 241       * @return bool|string
 242       */
 243      private function getNextLine($line = '')
 244      {
 245          // Get the next line in the file
 246          $newLine = fgets($this->fileHandle);
 247  
 248          // Return false if there is no next line
 249          if ($newLine === false) {
 250              return false;
 251          }
 252  
 253          // Add the new line to the line passed in
 254          $line = $line . $newLine;
 255  
 256          // Drop everything that is enclosed to avoid counting false positives in enclosures
 257          $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')'
 258              . preg_quote($this->enclosure, '/');
 259          $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
 260  
 261          // See if we have any enclosures left in the line
 262          // if we still have an enclosure then we need to read the next line as well
 263          if (preg_match('/(' . $enclosure . ')/', $line) > 0) {
 264              $line = $this->getNextLine($line);
 265          }
 266  
 267          return $line;
 268      }
 269  
 270      /**
 271       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 272       *
 273       * @param string $pFilename
 274       *
 275       * @throws Exception
 276       *
 277       * @return array
 278       */
 279      public function listWorksheetInfo($pFilename)
 280      {
 281          // Open file
 282          if (!$this->canRead($pFilename)) {
 283              throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
 284          }
 285          $this->openFile($pFilename);
 286          $fileHandle = $this->fileHandle;
 287  
 288          // Skip BOM, if any
 289          $this->skipBOM();
 290          $this->checkSeparator();
 291          $this->inferSeparator();
 292  
 293          $worksheetInfo = [];
 294          $worksheetInfo[0]['worksheetName'] = 'Worksheet';
 295          $worksheetInfo[0]['lastColumnLetter'] = 'A';
 296          $worksheetInfo[0]['lastColumnIndex'] = 0;
 297          $worksheetInfo[0]['totalRows'] = 0;
 298          $worksheetInfo[0]['totalColumns'] = 0;
 299  
 300          // Loop through each line of the file in turn
 301          while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
 302              ++$worksheetInfo[0]['totalRows'];
 303              $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
 304          }
 305  
 306          $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
 307          $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
 308  
 309          // Close file
 310          fclose($fileHandle);
 311  
 312          return $worksheetInfo;
 313      }
 314  
 315      /**
 316       * Loads Spreadsheet from file.
 317       *
 318       * @param string $pFilename
 319       *
 320       * @throws Exception
 321       *
 322       * @return Spreadsheet
 323       */
 324      public function load($pFilename)
 325      {
 326          // Create new Spreadsheet
 327          $spreadsheet = new Spreadsheet();
 328  
 329          // Load into this instance
 330          return $this->loadIntoExisting($pFilename, $spreadsheet);
 331      }
 332  
 333      /**
 334       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 335       *
 336       * @param string $pFilename
 337       * @param Spreadsheet $spreadsheet
 338       *
 339       * @throws Exception
 340       *
 341       * @return Spreadsheet
 342       */
 343      public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
 344      {
 345          $lineEnding = ini_get('auto_detect_line_endings');
 346          ini_set('auto_detect_line_endings', true);
 347  
 348          // Open file
 349          if (!$this->canRead($pFilename)) {
 350              throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
 351          }
 352          $this->openFile($pFilename);
 353          $fileHandle = $this->fileHandle;
 354  
 355          // Skip BOM, if any
 356          $this->skipBOM();
 357          $this->checkSeparator();
 358          $this->inferSeparator();
 359  
 360          // Create new PhpSpreadsheet object
 361          while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
 362              $spreadsheet->createSheet();
 363          }
 364          $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
 365  
 366          // Set our starting row based on whether we're in contiguous mode or not
 367          $currentRow = 1;
 368          if ($this->contiguous) {
 369              $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
 370          }
 371  
 372          // Loop through each line of the file in turn
 373          while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
 374              $columnLetter = 'A';
 375              foreach ($rowData as $rowDatum) {
 376                  if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
 377                      // Convert encoding if necessary
 378                      if ($this->inputEncoding !== 'UTF-8') {
 379                          $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
 380                      }
 381  
 382                      // Set cell value
 383                      $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
 384                  }
 385                  ++$columnLetter;
 386              }
 387              ++$currentRow;
 388          }
 389  
 390          // Close file
 391          fclose($fileHandle);
 392  
 393          if ($this->contiguous) {
 394              $this->contiguousRow = $currentRow;
 395          }
 396  
 397          ini_set('auto_detect_line_endings', $lineEnding);
 398  
 399          // Return
 400          return $spreadsheet;
 401      }
 402  
 403      /**
 404       * Get delimiter.
 405       *
 406       * @return string
 407       */
 408      public function getDelimiter()
 409      {
 410          return $this->delimiter;
 411      }
 412  
 413      /**
 414       * Set delimiter.
 415       *
 416       * @param string $delimiter Delimiter, eg: ','
 417       *
 418       * @return CSV
 419       */
 420      public function setDelimiter($delimiter)
 421      {
 422          $this->delimiter = $delimiter;
 423  
 424          return $this;
 425      }
 426  
 427      /**
 428       * Get enclosure.
 429       *
 430       * @return string
 431       */
 432      public function getEnclosure()
 433      {
 434          return $this->enclosure;
 435      }
 436  
 437      /**
 438       * Set enclosure.
 439       *
 440       * @param string $enclosure Enclosure, defaults to "
 441       *
 442       * @return CSV
 443       */
 444      public function setEnclosure($enclosure)
 445      {
 446          if ($enclosure == '') {
 447              $enclosure = '"';
 448          }
 449          $this->enclosure = $enclosure;
 450  
 451          return $this;
 452      }
 453  
 454      /**
 455       * Get sheet index.
 456       *
 457       * @return int
 458       */
 459      public function getSheetIndex()
 460      {
 461          return $this->sheetIndex;
 462      }
 463  
 464      /**
 465       * Set sheet index.
 466       *
 467       * @param int $pValue Sheet index
 468       *
 469       * @return CSV
 470       */
 471      public function setSheetIndex($pValue)
 472      {
 473          $this->sheetIndex = $pValue;
 474  
 475          return $this;
 476      }
 477  
 478      /**
 479       * Set Contiguous.
 480       *
 481       * @param bool $contiguous
 482       *
 483       * @return Csv
 484       */
 485      public function setContiguous($contiguous)
 486      {
 487          $this->contiguous = (bool) $contiguous;
 488          if (!$contiguous) {
 489              $this->contiguousRow = -1;
 490          }
 491  
 492          return $this;
 493      }
 494  
 495      /**
 496       * Get Contiguous.
 497       *
 498       * @return bool
 499       */
 500      public function getContiguous()
 501      {
 502          return $this->contiguous;
 503      }
 504  
 505      /**
 506       * Set escape backslashes.
 507       *
 508       * @param string $escapeCharacter
 509       *
 510       * @return $this
 511       */
 512      public function setEscapeCharacter($escapeCharacter)
 513      {
 514          $this->escapeCharacter = $escapeCharacter;
 515  
 516          return $this;
 517      }
 518  
 519      /**
 520       * Get escape backslashes.
 521       *
 522       * @return string
 523       */
 524      public function getEscapeCharacter()
 525      {
 526          return $this->escapeCharacter;
 527      }
 528  
 529      /**
 530       * Can the current IReader read the file?
 531       *
 532       * @param string $pFilename
 533       *
 534       * @return bool
 535       */
 536      public function canRead($pFilename)
 537      {
 538          // Check if file exists
 539          try {
 540              $this->openFile($pFilename);
 541          } catch (Exception $e) {
 542              return false;
 543          }
 544  
 545          fclose($this->fileHandle);
 546  
 547          // Trust file extension if any
 548          $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION));
 549          if (in_array($extension, ['csv', 'tsv'])) {
 550              return true;
 551          }
 552  
 553          // Attempt to guess mimetype
 554          $type = mime_content_type($pFilename);
 555          $supportedTypes = [
 556              'text/csv',
 557              'text/plain',
 558              'inode/x-empty',
 559          ];
 560  
 561          return in_array($type, $supportedTypes, true);
 562      }
 563  }