Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.
<?php

namespace PhpOffice\PhpSpreadsheet\Reader;

> use InvalidArgumentException;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Shared\StringHelper; use PhpOffice\PhpSpreadsheet\Spreadsheet; class Csv extends BaseReader {
> const UTF8_BOM = "\xEF\xBB\xBF"; /** > const UTF8_BOM_LEN = 3; * Input encoding. > const UTF16BE_BOM = "\xfe\xff"; * > const UTF16BE_BOM_LEN = 2; * @var string > const UTF16BE_LF = "\x00\x0a"; */ > const UTF16LE_BOM = "\xff\xfe"; private $inputEncoding = 'UTF-8'; > const UTF16LE_BOM_LEN = 2; > const UTF16LE_LF = "\x0a\x00"; /** > const UTF32BE_BOM = "\x00\x00\xfe\xff"; * Delimiter. > const UTF32BE_BOM_LEN = 4; * > const UTF32BE_LF = "\x00\x00\x00\x0a"; * @var string > const UTF32LE_BOM = "\xff\xfe\x00\x00"; */ > const UTF32LE_BOM_LEN = 4; private $delimiter; > const UTF32LE_LF = "\x0a\x00\x00\x00"; >
/** * Enclosure. * * @var string */ private $enclosure = '"'; /** * Sheet index to read. * * @var int */ private $sheetIndex = 0; /** * Load rows contiguously. * * @var bool */ private $contiguous = false; /**
< * Row counter for loading rows contiguously. < * < * @var int < */ < private $contiguousRow = -1; < < /**
* The character that can escape the enclosure. * * @var string */ private $escapeCharacter = '\\'; /** * Create a new CSV Reader instance. */ public function __construct() { parent::__construct(); } /** * Set input encoding. * * @param string $pValue Input encoding, eg: 'UTF-8' *
< * @return Csv
> * @return $this
*/ public function setInputEncoding($pValue) { $this->inputEncoding = $pValue; return $this; } /** * Get input encoding. * * @return string */ public function getInputEncoding() { return $this->inputEncoding; } /** * Move filepointer past any BOM marker. */
< protected function skipBOM()
> protected function skipBOM(): void
{ rewind($this->fileHandle);
< switch ($this->inputEncoding) { < case 'UTF-8': < fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ? < fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0); < < break; < case 'UTF-16LE': < fgets($this->fileHandle, 3) == "\xFF\xFE" ? < fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); < < break; < case 'UTF-16BE': < fgets($this->fileHandle, 3) == "\xFE\xFF" ? < fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); < < break; < case 'UTF-32LE': < fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ? < fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); < < break; < case 'UTF-32BE': < fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ? < fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); < < break; < default: < break;
> if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) { > rewind($this->fileHandle);
} } /** * Identify any separator that is explicitly set in the file. */
< protected function checkSeparator()
> protected function checkSeparator(): void
{ $line = fgets($this->fileHandle); if ($line === false) { return; } if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { $this->delimiter = substr($line, 4, 1); return; } $this->skipBOM(); } /** * Infer the separator if it isn't explicitly set in the file or specified by the user. */
< protected function inferSeparator()
> protected function inferSeparator(): void
{ if ($this->delimiter !== null) { return; } $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~']; $counts = []; foreach ($potentialDelimiters as $delimiter) { $counts[$delimiter] = []; } // Count how many times each of the potential delimiters appears in each line $numberLines = 0; while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) { $countLine = []; for ($i = strlen($line) - 1; $i >= 0; --$i) { $char = $line[$i]; if (isset($counts[$char])) { if (!isset($countLine[$char])) { $countLine[$char] = 0; } ++$countLine[$char]; } } foreach ($potentialDelimiters as $delimiter) { $counts[$delimiter][] = $countLine[$delimiter] ?? 0; } } // If number of lines is 0, nothing to infer : fall back to the default if ($numberLines === 0) { $this->delimiter = reset($potentialDelimiters); $this->skipBOM(); return; } // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) $meanSquareDeviations = []; $middleIdx = floor(($numberLines - 1) / 2); foreach ($potentialDelimiters as $delimiter) { $series = $counts[$delimiter]; sort($series); $median = ($numberLines % 2) ? $series[$middleIdx] : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; if ($median === 0) { continue; } $meanSquareDeviations[$delimiter] = array_reduce( $series, function ($sum, $value) use ($median) {
< return $sum + pow($value - $median, 2);
> return $sum + ($value - $median) ** 2;
} ) / count($series); } // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) $min = INF; foreach ($potentialDelimiters as $delimiter) { if (!isset($meanSquareDeviations[$delimiter])) { continue; } if ($meanSquareDeviations[$delimiter] < $min) { $min = $meanSquareDeviations[$delimiter]; $this->delimiter = $delimiter; } } // If no delimiter could be detected, fall back to the default if ($this->delimiter === null) { $this->delimiter = reset($potentialDelimiters); } $this->skipBOM(); } /** * Get the next full line from the file. *
< * @param string $line < * < * @return bool|string
> * @return false|string
*/
< private function getNextLine($line = '')
> private function getNextLine()
{
> $line = ''; // Get the next line in the file > $enclosure = ($this->escapeCharacter === '' ? '' $newLine = fgets($this->fileHandle); > : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')')) > . preg_quote($this->enclosure, '/'); // Return false if there is no next line > if ($newLine === false) { > do {
return false; } // Add the new line to the line passed in $line = $line . $newLine; // Drop everything that is enclosed to avoid counting false positives in enclosures
< $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')' < . preg_quote($this->enclosure, '/');
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line); // See if we have any enclosures left in the line // if we still have an enclosure then we need to read the next line as well
< if (preg_match('/(' . $enclosure . ')/', $line) > 0) { < $line = $this->getNextLine($line); < }
> } while (preg_match('/(' . $enclosure . ')/', $line) > 0);
return $line; } /** * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). * * @param string $pFilename *
< * @throws Exception < *
* @return array */ public function listWorksheetInfo($pFilename) { // Open file
< if (!$this->canRead($pFilename)) { < throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); < } < $this->openFile($pFilename);
> $this->openFileOrMemory($pFilename);
$fileHandle = $this->fileHandle; // Skip BOM, if any $this->skipBOM(); $this->checkSeparator(); $this->inferSeparator(); $worksheetInfo = []; $worksheetInfo[0]['worksheetName'] = 'Worksheet'; $worksheetInfo[0]['lastColumnLetter'] = 'A'; $worksheetInfo[0]['lastColumnIndex'] = 0; $worksheetInfo[0]['totalRows'] = 0; $worksheetInfo[0]['totalColumns'] = 0; // Loop through each line of the file in turn while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { ++$worksheetInfo[0]['totalRows']; $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); } $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; // Close file fclose($fileHandle); return $worksheetInfo; } /** * Loads Spreadsheet from file. * * @param string $pFilename *
< * @throws Exception < *
* @return Spreadsheet */ public function load($pFilename) { // Create new Spreadsheet $spreadsheet = new Spreadsheet(); // Load into this instance return $this->loadIntoExisting($pFilename, $spreadsheet); }
> private function openFileOrMemory($pFilename): void /** > { * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. > // Open file * > $fhandle = $this->canRead($pFilename); * @param string $pFilename > if (!$fhandle) { * @param Spreadsheet $spreadsheet > throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); * > } * @throws Exception > $this->openFile($pFilename); * > if ($this->inputEncoding !== 'UTF-8') { * @return Spreadsheet > fclose($this->fileHandle); */ > $entireFile = file_get_contents($pFilename); public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) > $this->fileHandle = fopen('php://memory', 'r+b'); { > $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); $lineEnding = ini_get('auto_detect_line_endings'); > fwrite($this->fileHandle, $data); ini_set('auto_detect_line_endings', true); > $this->skipBOM(); > } // Open file > } if (!$this->canRead($pFilename)) { >
< * @param Spreadsheet $spreadsheet < * < * @throws Exception
< if (!$this->canRead($pFilename)) { < throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); < } < $this->openFile($pFilename);
> $this->openFileOrMemory($pFilename);
$this->checkSeparator(); $this->inferSeparator(); // Create new PhpSpreadsheet object while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { $spreadsheet->createSheet(); } $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); // Set our starting row based on whether we're in contiguous mode or not $currentRow = 1;
< if ($this->contiguous) { < $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; < }
> $outRow = 0;
// Loop through each line of the file in turn while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
> $noOutputYet = true;
$columnLetter = 'A'; foreach ($rowData as $rowDatum) { if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
< // Convert encoding if necessary < if ($this->inputEncoding !== 'UTF-8') { < $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
> if ($this->contiguous) { > if ($noOutputYet) { > $noOutputYet = false; > ++$outRow; > } > } else { > $outRow = $currentRow;
}
<
// Set cell value
< $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
> $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
} ++$columnLetter; } ++$currentRow; } // Close file fclose($fileHandle);
< if ($this->contiguous) { < $this->contiguousRow = $currentRow; < } <
ini_set('auto_detect_line_endings', $lineEnding); // Return return $spreadsheet; } /** * Get delimiter. * * @return string */ public function getDelimiter() { return $this->delimiter; } /** * Set delimiter. * * @param string $delimiter Delimiter, eg: ',' *
< * @return CSV
> * @return $this
*/ public function setDelimiter($delimiter) { $this->delimiter = $delimiter; return $this; } /** * Get enclosure. * * @return string */ public function getEnclosure() { return $this->enclosure; } /** * Set enclosure. * * @param string $enclosure Enclosure, defaults to " *
< * @return CSV
> * @return $this
*/ public function setEnclosure($enclosure) { if ($enclosure == '') { $enclosure = '"'; } $this->enclosure = $enclosure; return $this; } /** * Get sheet index. * * @return int */ public function getSheetIndex() { return $this->sheetIndex; } /** * Set sheet index. * * @param int $pValue Sheet index *
< * @return CSV
> * @return $this
*/ public function setSheetIndex($pValue) { $this->sheetIndex = $pValue; return $this; } /** * Set Contiguous. * * @param bool $contiguous *
< * @return Csv
> * @return $this
*/ public function setContiguous($contiguous) { $this->contiguous = (bool) $contiguous;
< if (!$contiguous) { < $this->contiguousRow = -1; < }
return $this; } /** * Get Contiguous. * * @return bool */ public function getContiguous() { return $this->contiguous; } /** * Set escape backslashes. * * @param string $escapeCharacter * * @return $this */ public function setEscapeCharacter($escapeCharacter) { $this->escapeCharacter = $escapeCharacter; return $this; } /** * Get escape backslashes. * * @return string */ public function getEscapeCharacter() { return $this->escapeCharacter; } /** * Can the current IReader read the file? * * @param string $pFilename * * @return bool */ public function canRead($pFilename) { // Check if file exists try { $this->openFile($pFilename);
< } catch (Exception $e) {
> } catch (InvalidArgumentException $e) {
return false; } fclose($this->fileHandle); // Trust file extension if any $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION)); if (in_array($extension, ['csv', 'tsv'])) { return true; } // Attempt to guess mimetype $type = mime_content_type($pFilename); $supportedTypes = [
> 'application/csv',
'text/csv', 'text/plain', 'inode/x-empty', ]; return in_array($type, $supportedTypes, true);
> } } > } > private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void > { > if ($encoding === '') { > $pos = strpos($contents, $compare); > if ($pos !== false && $pos % strlen($compare) === 0) { > $encoding = $setEncoding; > } > } > } > > private static function guessEncodingNoBom(string $filename): string > { > $encoding = ''; > $contents = file_get_contents($filename); > self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE'); > self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE'); > self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE'); > self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE'); > if ($encoding === '' && preg_match('//u', $contents) === 1) { > $encoding = 'UTF-8'; > } > > return $encoding; > } > > private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void > { > if ($encoding === '') { > if ($compare === substr($first4, 0, strlen($compare))) { > $encoding = $setEncoding; > } > } > } > > private static function guessEncodingBom(string $filename): string > { > $encoding = ''; > $first4 = file_get_contents($filename, false, null, 0, 4); > if ($first4 !== false) { > self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8'); > self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE'); > self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE'); > self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE'); > self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE'); > } > > return $encoding; > } > > public static function guessEncoding(string $filename, string $dflt = 'CP1252'): string > { > $encoding = self::guessEncodingBom($filename); > if ($encoding === '') { > $encoding = self::guessEncodingNoBom($filename); > } > > return ($encoding === '') ? $dflt : $encoding;