Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.
<?php

namespace PhpOffice\PhpSpreadsheet\Reader;

< use InvalidArgumentException;
> use PhpOffice\PhpSpreadsheet\Calculation\Calculation; > use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
> use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter; use PhpOffice\PhpSpreadsheet\Shared\StringHelper; > use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
> use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
class Csv extends BaseReader {
> const DEFAULT_FALLBACK_ENCODING = 'CP1252'; const UTF8_BOM = "\xEF\xBB\xBF"; > const GUESS_ENCODING = 'guess';
const UTF8_BOM_LEN = 3; const UTF16BE_BOM = "\xfe\xff"; const UTF16BE_BOM_LEN = 2; const UTF16BE_LF = "\x00\x0a"; const UTF16LE_BOM = "\xff\xfe"; const UTF16LE_BOM_LEN = 2; const UTF16LE_LF = "\x0a\x00"; const UTF32BE_BOM = "\x00\x00\xfe\xff"; const UTF32BE_BOM_LEN = 4; const UTF32BE_LF = "\x00\x00\x00\x0a"; const UTF32LE_BOM = "\xff\xfe\x00\x00"; const UTF32LE_BOM_LEN = 4; const UTF32LE_LF = "\x0a\x00\x00\x00"; /** * Input encoding. * * @var string */ private $inputEncoding = 'UTF-8'; /**
< * Delimiter.
> * Fallback encoding if guess strikes out.
* * @var string */
> private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING; private $delimiter; > > /** /** > * Delimiter. * Enclosure. > * * > * @var ?string * @var string > */
*/ private $enclosure = '"'; /** * Sheet index to read. * * @var int */ private $sheetIndex = 0; /** * Load rows contiguously. * * @var bool */ private $contiguous = false; /** * The character that can escape the enclosure. * * @var string */ private $escapeCharacter = '\\'; /**
> * Callback for setting defaults in construction. * Create a new CSV Reader instance. > * */ > * @var ?callable public function __construct() > */ { > private static $constructorCallback; parent::__construct(); > } > /** > * Attempt autodetect line endings (deprecated after PHP8.1)? /** > * * Set input encoding. > * @var bool * > */ * @param string $pValue Input encoding, eg: 'UTF-8' > private $testAutodetect = true; * > * @return $this > /** */ > * @var bool public function setInputEncoding($pValue) > */ { > protected $castFormattedNumberToNumeric = false; $this->inputEncoding = $pValue; > > /** return $this; > * @var bool } > */ > protected $preserveNumericFormatting = false; /** > * Get input encoding. > /** @var bool */ * > private $preserveNullString = false; * @return string > */ > /**
public function getInputEncoding()
> $callback = self::$constructorCallback; { > if ($callback !== null) { return $this->inputEncoding; > $callback($this); } > }
< * Set input encoding.
> * Set a callback to change the defaults.
< * @param string $pValue Input encoding, eg: 'UTF-8' < * < * @return $this
> * The callback must accept the Csv Reader object as the first parameter, > * and it should return void.
< public function setInputEncoding($pValue)
> public static function setConstructorCallback(?callable $callback): void
< $this->inputEncoding = $pValue;
> self::$constructorCallback = $callback; > } > > public static function getConstructorCallback(): ?callable > { > return self::$constructorCallback; > } > > public function setInputEncoding(string $encoding): self > { > $this->inputEncoding = $encoding;
< /** < * Get input encoding. < * < * @return string < */ < public function getInputEncoding()
> public function getInputEncoding(): string
> public function setFallbackEncoding(string $fallbackEncoding): self /** > { * Identify any separator that is explicitly set in the file. > $this->fallbackEncoding = $fallbackEncoding; */ > protected function checkSeparator(): void > return $this; { > } $line = fgets($this->fileHandle); > if ($line === false) { > public function getFallbackEncoding(): string return; > { } > return $this->fallbackEncoding; > } if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { >
$this->delimiter = substr($line, 4, 1); return; } $this->skipBOM(); } /** * Infer the separator if it isn't explicitly set in the file or specified by the user. */ protected function inferSeparator(): void { if ($this->delimiter !== null) { return; }
< $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~']; < $counts = []; < foreach ($potentialDelimiters as $delimiter) { < $counts[$delimiter] = []; < } < < // Count how many times each of the potential delimiters appears in each line < $numberLines = 0; < while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) { < $countLine = []; < for ($i = strlen($line) - 1; $i >= 0; --$i) { < $char = $line[$i]; < if (isset($counts[$char])) { < if (!isset($countLine[$char])) { < $countLine[$char] = 0; < } < ++$countLine[$char]; < } < } < foreach ($potentialDelimiters as $delimiter) { < $counts[$delimiter][] = $countLine[$delimiter] < ?? 0; < } < }
> $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
// If number of lines is 0, nothing to infer : fall back to the default
< if ($numberLines === 0) { < $this->delimiter = reset($potentialDelimiters);
> if ($inferenceEngine->linesCounted() === 0) { > $this->delimiter = $inferenceEngine->getDefaultDelimiter();
$this->skipBOM(); return; }
< // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) < $meanSquareDeviations = []; < $middleIdx = floor(($numberLines - 1) / 2); < < foreach ($potentialDelimiters as $delimiter) { < $series = $counts[$delimiter]; < sort($series); < < $median = ($numberLines % 2) < ? $series[$middleIdx] < : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; < < if ($median === 0) { < continue; < } < < $meanSquareDeviations[$delimiter] = array_reduce( < $series, < function ($sum, $value) use ($median) { < return $sum + ($value - $median) ** 2; < } < ) / count($series); < } < < // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) < $min = INF; < foreach ($potentialDelimiters as $delimiter) { < if (!isset($meanSquareDeviations[$delimiter])) { < continue; < } < < if ($meanSquareDeviations[$delimiter] < $min) { < $min = $meanSquareDeviations[$delimiter]; < $this->delimiter = $delimiter; < } < }
> $this->delimiter = $inferenceEngine->infer();
// If no delimiter could be detected, fall back to the default if ($this->delimiter === null) {
< $this->delimiter = reset($potentialDelimiters);
> $this->delimiter = $inferenceEngine->getDefaultDelimiter();
} $this->skipBOM(); } /**
< * Get the next full line from the file. < * < * @return false|string < */ < private function getNextLine() < { < $line = ''; < $enclosure = ($this->escapeCharacter === '' ? '' < : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')')) < . preg_quote($this->enclosure, '/'); < < do { < // Get the next line in the file < $newLine = fgets($this->fileHandle); < < // Return false if there is no next line < if ($newLine === false) { < return false; < } < < // Add the new line to the line passed in < $line = $line . $newLine; < < // Drop everything that is enclosed to avoid counting false positives in enclosures < $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line); < < // See if we have any enclosures left in the line < // if we still have an enclosure then we need to read the next line as well < } while (preg_match('/(' . $enclosure . ')/', $line) > 0); < < return $line; < } < < /**
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
< * < * @param string $pFilename < * < * @return array
*/
< public function listWorksheetInfo($pFilename)
> public function listWorksheetInfo(string $filename): array
{ // Open file
< $this->openFileOrMemory($pFilename);
> $this->openFileOrMemory($filename);
$fileHandle = $this->fileHandle; // Skip BOM, if any $this->skipBOM(); $this->checkSeparator(); $this->inferSeparator(); $worksheetInfo = []; $worksheetInfo[0]['worksheetName'] = 'Worksheet'; $worksheetInfo[0]['lastColumnLetter'] = 'A'; $worksheetInfo[0]['lastColumnIndex'] = 0; $worksheetInfo[0]['totalRows'] = 0; $worksheetInfo[0]['totalColumns'] = 0; // Loop through each line of the file in turn
< while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); > while (is_array($rowData)) {
++$worksheetInfo[0]['totalRows']; $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
} $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; // Close file fclose($fileHandle); return $worksheetInfo; } /** * Loads Spreadsheet from file.
< * < * @param string $pFilename < * < * @return Spreadsheet
*/
< public function load($pFilename)
> protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
{ // Create new Spreadsheet $spreadsheet = new Spreadsheet(); // Load into this instance
< return $this->loadIntoExisting($pFilename, $spreadsheet);
> return $this->loadIntoExisting($filename, $spreadsheet);
}
< private function openFileOrMemory($pFilename): void
> /** > * Loads Spreadsheet from string. > */ > public function loadSpreadsheetFromString(string $contents): Spreadsheet > { > // Create new Spreadsheet > $spreadsheet = new Spreadsheet(); > > // Load into this instance > return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true); > } > > private function openFileOrMemory(string $filename): void
{ // Open file
< $fhandle = $this->canRead($pFilename);
> $fhandle = $this->canRead($filename);
if (!$fhandle) {
< throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
> throw new Exception($filename . ' is an Invalid Spreadsheet file.');
}
< $this->openFile($pFilename);
> if ($this->inputEncoding === self::GUESS_ENCODING) { > $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding); > } > $this->openFile($filename);
if ($this->inputEncoding !== 'UTF-8') { fclose($this->fileHandle);
< $entireFile = file_get_contents($pFilename); < $this->fileHandle = fopen('php://memory', 'r+b');
> $entireFile = file_get_contents($filename); > $fileHandle = fopen('php://memory', 'r+b'); > if ($fileHandle !== false && $entireFile !== false) { > $this->fileHandle = $fileHandle;
$data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); fwrite($this->fileHandle, $data); $this->skipBOM(); } }
> } > /** > public function setTestAutoDetect(bool $value): self * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. > { * > $this->testAutodetect = $value; * @param string $pFilename > * > return $this; * @return Spreadsheet > } */ > public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) > private function setAutoDetect(?string $value): ?string { > { $lineEnding = ini_get('auto_detect_line_endings'); > $retVal = null; ini_set('auto_detect_line_endings', true); > if ($value !== null && $this->testAutodetect) { > $retVal2 = @ini_set('auto_detect_line_endings', $value); // Open file > if (is_string($retVal2)) { $this->openFileOrMemory($pFilename); > $retVal = $retVal2; $fileHandle = $this->fileHandle; > } > } // Skip BOM, if any > $this->skipBOM(); > return $retVal; $this->checkSeparator(); > } $this->inferSeparator(); > > public function castFormattedNumberToNumeric( // Create new PhpSpreadsheet object > bool $castFormattedNumberToNumeric, while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { > bool $preserveNumericFormatting = false $spreadsheet->createSheet(); > ): void { } > $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric; $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); > $this->preserveNumericFormatting = $preserveNumericFormatting; > } // Set our starting row based on whether we're in contiguous mode or not > $currentRow = 1; > /** $outRow = 0; > * Open data uri for reading. > */ // Loop through each line of the file in turn > private function openDataUri(string $filename): void while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { > { $noOutputYet = true; > $fileHandle = fopen($filename, 'rb'); $columnLetter = 'A'; > if ($fileHandle === false) { foreach ($rowData as $rowDatum) { > // @codeCoverageIgnoreStart if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { > throw new ReaderException('Could not open file ' . $filename . ' for reading.'); if ($this->contiguous) { > // @codeCoverageIgnoreEnd if ($noOutputYet) { > } $noOutputYet = false; > ++$outRow; > $this->fileHandle = $fileHandle; } > }
< * < * @param string $pFilename < * < * @return Spreadsheet
< public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
> public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
< $lineEnding = ini_get('auto_detect_line_endings'); < ini_set('auto_detect_line_endings', true);
> return $this->loadStringOrFile($filename, $spreadsheet, false); > } > > /** > * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. > */ > private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet > { > // Deprecated in Php8.1 > $iniset = $this->setAutoDetect('1');
< $this->openFileOrMemory($pFilename);
> if ($dataUri) { > $this->openDataUri($filename); > } else { > $this->openFileOrMemory($filename); > }
< while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); > $valueBinder = Cell::getValueBinder(); > $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion(); > while (is_array($rowData)) {
< if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
> $this->convertBoolean($rowDatum, $preserveBooleanString); > $numberFormatMask = $this->convertFormattedNumber($rowDatum); > if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
> // Set basic styling for the value (Note that this could be overloaded by styling in a value binder) // Close file > $sheet->getCell($columnLetter . $outRow)->getStyle() fclose($fileHandle); > ->getNumberFormat() > ->setFormatCode($numberFormatMask);
ini_set('auto_detect_line_endings', $lineEnding);
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
< ini_set('auto_detect_line_endings', $lineEnding);
> $this->setAutoDetect($iniset);
// Return return $spreadsheet; } /**
< * Get delimiter.
> * Convert string true/false to boolean, and null to null-string.
*
< * @return string
> * @param mixed $rowDatum
*/
< public function getDelimiter()
> private function convertBoolean(&$rowDatum, bool $preserveBooleanString): void
{
< return $this->delimiter;
> if (is_string($rowDatum) && !$preserveBooleanString) { > if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) { > $rowDatum = true; > } elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) { > $rowDatum = false; > } > } else { > $rowDatum = $rowDatum ?? ''; > }
} /**
< * Set delimiter. < * < * @param string $delimiter Delimiter, eg: ','
> * Convert numeric strings to int or float values.
*
< * @return $this
> * @param mixed $rowDatum
*/
< public function setDelimiter($delimiter)
> private function convertFormattedNumber(&$rowDatum): string > { > $numberFormatMask = NumberFormat::FORMAT_GENERAL; > if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) { > $numeric = str_replace( > [StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()], > ['', '.'], > $rowDatum > ); > > if (is_numeric($numeric)) { > $decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator()); > if ($this->preserveNumericFormatting === true) { > $numberFormatMask = (strpos($rowDatum, StringHelper::getThousandsSeparator()) !== false) > ? '#,##0' : '0'; > if ($decimalPos !== false) { > $decimals = strlen($rowDatum) - $decimalPos - 1; > $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6)); > } > } > > $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric; > } > } > > return $numberFormatMask; > } > > public function getDelimiter(): ?string > { > return $this->delimiter; > } > > public function setDelimiter(?string $delimiter): self
{ $this->delimiter = $delimiter; return $this; }
< /** < * Get enclosure. < * < * @return string < */ < public function getEnclosure()
> public function getEnclosure(): string
{ return $this->enclosure; }
< /** < * Set enclosure. < * < * @param string $enclosure Enclosure, defaults to " < * < * @return $this < */ < public function setEnclosure($enclosure)
> public function setEnclosure(string $enclosure): self
{ if ($enclosure == '') { $enclosure = '"'; } $this->enclosure = $enclosure; return $this; }
< /** < * Get sheet index. < * < * @return int < */ < public function getSheetIndex()
> public function getSheetIndex(): int
{ return $this->sheetIndex; }
< /** < * Set sheet index. < * < * @param int $pValue Sheet index < * < * @return $this < */ < public function setSheetIndex($pValue)
> public function setSheetIndex(int $indexValue): self
{
< $this->sheetIndex = $pValue;
> $this->sheetIndex = $indexValue;
return $this; }
< /** < * Set Contiguous. < * < * @param bool $contiguous < * < * @return $this < */ < public function setContiguous($contiguous)
> public function setContiguous(bool $contiguous): self
{
< $this->contiguous = (bool) $contiguous;
> $this->contiguous = $contiguous;
return $this; }
< /** < * Get Contiguous. < * < * @return bool < */ < public function getContiguous()
> public function getContiguous(): bool
{ return $this->contiguous; }
< /** < * Set escape backslashes. < * < * @param string $escapeCharacter < * < * @return $this < */ < public function setEscapeCharacter($escapeCharacter)
> public function setEscapeCharacter(string $escapeCharacter): self
{ $this->escapeCharacter = $escapeCharacter; return $this; }
< /** < * Get escape backslashes. < * < * @return string < */ < public function getEscapeCharacter()
> public function getEscapeCharacter(): string
{ return $this->escapeCharacter; } /** * Can the current IReader read the file?
< * < * @param string $pFilename < * < * @return bool
*/
< public function canRead($pFilename)
> public function canRead(string $filename): bool
{ // Check if file exists try {
< $this->openFile($pFilename); < } catch (InvalidArgumentException $e) {
> $this->openFile($filename); > } catch (ReaderException $e) {
return false; } fclose($this->fileHandle); // Trust file extension if any
< $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION));
> $extension = strtolower(/** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_EXTENSION));
if (in_array($extension, ['csv', 'tsv'])) { return true; } // Attempt to guess mimetype
< $type = mime_content_type($pFilename);
> $type = mime_content_type($filename);
$supportedTypes = [ 'application/csv', 'text/csv', 'text/plain', 'inode/x-empty', ]; return in_array($type, $supportedTypes, true); } private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void { if ($encoding === '') { $pos = strpos($contents, $compare); if ($pos !== false && $pos % strlen($compare) === 0) { $encoding = $setEncoding; } } } private static function guessEncodingNoBom(string $filename): string { $encoding = ''; $contents = file_get_contents($filename); self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE'); self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE'); self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE'); self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE'); if ($encoding === '' && preg_match('//u', $contents) === 1) { $encoding = 'UTF-8'; } return $encoding; } private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void { if ($encoding === '') { if ($compare === substr($first4, 0, strlen($compare))) { $encoding = $setEncoding; } } } private static function guessEncodingBom(string $filename): string { $encoding = ''; $first4 = file_get_contents($filename, false, null, 0, 4); if ($first4 !== false) { self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8'); self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE'); self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE'); self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE'); self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE'); } return $encoding; }
< public static function guessEncoding(string $filename, string $dflt = 'CP1252'): string
> public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
{ $encoding = self::guessEncodingBom($filename); if ($encoding === '') { $encoding = self::guessEncodingNoBom($filename); } return ($encoding === '') ? $dflt : $encoding;
> } } > } > public function setPreserveNullString(bool $value): self > { > $this->preserveNullString = $value; > > return $this; > } > > public function getPreserveNullString(): bool > { > return $this->preserveNullString;