<?php
namespace PhpOffice\PhpSpreadsheet\Reader;
> use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
> use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
> use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
> use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
> use PhpOffice\PhpSpreadsheet\Style\NumberFormat;
class Csv extends BaseReader
{
> const DEFAULT_FALLBACK_ENCODING = 'CP1252';
/**
> const GUESS_ENCODING = 'guess';
* Input encoding.
> const UTF8_BOM = "\xEF\xBB\xBF";
*
> const UTF8_BOM_LEN = 3;
* @var string
> const UTF16BE_BOM = "\xfe\xff";
*/
> const UTF16BE_BOM_LEN = 2;
private $inputEncoding = 'UTF-8';
> const UTF16BE_LF = "\x00\x0a";
> const UTF16LE_BOM = "\xff\xfe";
/**
> const UTF16LE_BOM_LEN = 2;
* Delimiter.
> const UTF16LE_LF = "\x0a\x00";
*
> const UTF32BE_BOM = "\x00\x00\xfe\xff";
* @var string
> const UTF32BE_BOM_LEN = 4;
*/
> const UTF32BE_LF = "\x00\x00\x00\x0a";
private $delimiter;
> const UTF32LE_BOM = "\xff\xfe\x00\x00";
> const UTF32LE_BOM_LEN = 4;
/**
> const UTF32LE_LF = "\x0a\x00\x00\x00";
* Enclosure.
>
< * Delimiter.
> * Fallback encoding if guess strikes out.
* @var string
> private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
*/
>
private $enclosure = '"';
> /**
> * Delimiter.
/**
> *
* Sheet index to read.
> * @var ?string
*
> */
* @var int
*/
private $sheetIndex = 0;
/**
* Load rows contiguously.
*
* @var bool
*/
private $contiguous = false;
/**
< * Row counter for loading rows contiguously.
> * The character that can escape the enclosure.
*
< * @var int
> * @var string
*/
< private $contiguousRow = -1;
> private $escapeCharacter = '\\';
/**
< * The character that can escape the enclosure.
> * Callback for setting defaults in construction.
*
< * @var string
> * @var ?callable
*/
< private $escapeCharacter = '\\';
> private static $constructorCallback;
>
> /**
> * Attempt autodetect line endings (deprecated after PHP8.1)?
> *
> * @var bool
> */
> private $testAutodetect = true;
>
> /**
> * @var bool
> */
> protected $castFormattedNumberToNumeric = false;
>
> /**
> * @var bool
> */
> protected $preserveNumericFormatting = false;
>
> /** @var bool */
> private $preserveNullString = false;
/**
* Create a new CSV Reader instance.
*/
public function __construct()
{
parent::__construct();
> $callback = self::$constructorCallback;
}
> if ($callback !== null) {
> $callback($this);
/**
> }
< * Set input encoding.
> * Set a callback to change the defaults.
*
< * @param string $pValue Input encoding, eg: 'UTF-8'
< *
< * @return Csv
> * The callback must accept the Csv Reader object as the first parameter,
> * and it should return void.
*/
< public function setInputEncoding($pValue)
> public static function setConstructorCallback(?callable $callback): void
> {
> self::$constructorCallback = $callback;
> }
>
> public static function getConstructorCallback(): ?callable
> {
> return self::$constructorCallback;
> }
>
> public function setInputEncoding(string $encoding): self
{
< $this->inputEncoding = $pValue;
> $this->inputEncoding = $encoding;
return $this;
}
< /**
< * Get input encoding.
< *
< * @return string
< */
< public function getInputEncoding()
> public function getInputEncoding(): string
{
return $this->inputEncoding;
}
> public function setFallbackEncoding(string $fallbackEncoding): self
/**
> {
* Move filepointer past any BOM marker.
> $this->fallbackEncoding = $fallbackEncoding;
*/
>
protected function skipBOM()
> return $this;
{
> }
rewind($this->fileHandle);
>
> public function getFallbackEncoding(): string
switch ($this->inputEncoding) {
> {
case 'UTF-8':
> return $this->fallbackEncoding;
fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
> }
fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
>
< protected function skipBOM()
> protected function skipBOM(): void
< switch ($this->inputEncoding) {
< case 'UTF-8':
< fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
< fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
<
< break;
< case 'UTF-16LE':
< fgets($this->fileHandle, 3) == "\xFF\xFE" ?
< fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
<
< break;
< case 'UTF-16BE':
< fgets($this->fileHandle, 3) == "\xFE\xFF" ?
< fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
<
< break;
< case 'UTF-32LE':
< fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
< fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
<
< break;
< case 'UTF-32BE':
< fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
< fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
<
< break;
< default:
< break;
> if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
> rewind($this->fileHandle);
*/
< protected function checkSeparator()
> protected function checkSeparator(): void
{
$line = fgets($this->fileHandle);
if ($line === false) {
return;
}
if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
$this->delimiter = substr($line, 4, 1);
return;
}
$this->skipBOM();
}
/**
* Infer the separator if it isn't explicitly set in the file or specified by the user.
*/
< protected function inferSeparator()
> protected function inferSeparator(): void
{
if ($this->delimiter !== null) {
return;
}
< $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~'];
< $counts = [];
< foreach ($potentialDelimiters as $delimiter) {
< $counts[$delimiter] = [];
< }
<
< // Count how many times each of the potential delimiters appears in each line
< $numberLines = 0;
< while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
< $countLine = [];
< for ($i = strlen($line) - 1; $i >= 0; --$i) {
< $char = $line[$i];
< if (isset($counts[$char])) {
< if (!isset($countLine[$char])) {
< $countLine[$char] = 0;
< }
< ++$countLine[$char];
< }
< }
< foreach ($potentialDelimiters as $delimiter) {
< $counts[$delimiter][] = $countLine[$delimiter]
< ?? 0;
< }
< }
> $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
// If number of lines is 0, nothing to infer : fall back to the default
< if ($numberLines === 0) {
< $this->delimiter = reset($potentialDelimiters);
> if ($inferenceEngine->linesCounted() === 0) {
> $this->delimiter = $inferenceEngine->getDefaultDelimiter();
$this->skipBOM();
return;
}
< // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
< $meanSquareDeviations = [];
< $middleIdx = floor(($numberLines - 1) / 2);
<
< foreach ($potentialDelimiters as $delimiter) {
< $series = $counts[$delimiter];
< sort($series);
<
< $median = ($numberLines % 2)
< ? $series[$middleIdx]
< : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
<
< if ($median === 0) {
< continue;
< }
<
< $meanSquareDeviations[$delimiter] = array_reduce(
< $series,
< function ($sum, $value) use ($median) {
< return $sum + pow($value - $median, 2);
< }
< ) / count($series);
< }
<
< // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
< $min = INF;
< foreach ($potentialDelimiters as $delimiter) {
< if (!isset($meanSquareDeviations[$delimiter])) {
< continue;
< }
<
< if ($meanSquareDeviations[$delimiter] < $min) {
< $min = $meanSquareDeviations[$delimiter];
< $this->delimiter = $delimiter;
< }
< }
> $this->delimiter = $inferenceEngine->infer();
// If no delimiter could be detected, fall back to the default
if ($this->delimiter === null) {
< $this->delimiter = reset($potentialDelimiters);
> $this->delimiter = $inferenceEngine->getDefaultDelimiter();
}
$this->skipBOM();
}
/**
< * Get the next full line from the file.
< *
< * @param string $line
< *
< * @return bool|string
< */
< private function getNextLine($line = '')
< {
< // Get the next line in the file
< $newLine = fgets($this->fileHandle);
<
< // Return false if there is no next line
< if ($newLine === false) {
< return false;
< }
<
< // Add the new line to the line passed in
< $line = $line . $newLine;
<
< // Drop everything that is enclosed to avoid counting false positives in enclosures
< $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')'
< . preg_quote($this->enclosure, '/');
< $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
<
< // See if we have any enclosures left in the line
< // if we still have an enclosure then we need to read the next line as well
< if (preg_match('/(' . $enclosure . ')/', $line) > 0) {
< $line = $this->getNextLine($line);
< }
<
< return $line;
< }
<
< /**
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
< *
< * @param string $pFilename
< *
< * @throws Exception
< *
< * @return array
*/
< public function listWorksheetInfo($pFilename)
> public function listWorksheetInfo(string $filename): array
{
// Open file
< if (!$this->canRead($pFilename)) {
< throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
< }
< $this->openFile($pFilename);
> $this->openFileOrMemory($filename);
$fileHandle = $this->fileHandle;
// Skip BOM, if any
$this->skipBOM();
$this->checkSeparator();
$this->inferSeparator();
$worksheetInfo = [];
$worksheetInfo[0]['worksheetName'] = 'Worksheet';
$worksheetInfo[0]['lastColumnLetter'] = 'A';
$worksheetInfo[0]['lastColumnIndex'] = 0;
$worksheetInfo[0]['totalRows'] = 0;
$worksheetInfo[0]['totalColumns'] = 0;
// Loop through each line of the file in turn
< while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
> while (is_array($rowData)) {
++$worksheetInfo[0]['totalRows'];
$worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
}
$worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
$worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
// Close file
fclose($fileHandle);
return $worksheetInfo;
}
/**
* Loads Spreadsheet from file.
< *
< * @param string $pFilename
< *
< * @throws Exception
< *
< * @return Spreadsheet
*/
< public function load($pFilename)
> protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
{
// Create new Spreadsheet
$spreadsheet = new Spreadsheet();
// Load into this instance
< return $this->loadIntoExisting($pFilename, $spreadsheet);
> return $this->loadIntoExisting($filename, $spreadsheet);
> }
>
> /**
> * Loads Spreadsheet from string.
> */
> public function loadSpreadsheetFromString(string $contents): Spreadsheet
> {
> // Create new Spreadsheet
> $spreadsheet = new Spreadsheet();
>
> // Load into this instance
> return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true);
> }
>
> private function openFileOrMemory(string $filename): void
> {
> // Open file
> $fhandle = $this->canRead($filename);
> if (!$fhandle) {
> throw new Exception($filename . ' is an Invalid Spreadsheet file.');
> }
> if ($this->inputEncoding === self::GUESS_ENCODING) {
> $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
> }
> $this->openFile($filename);
> if ($this->inputEncoding !== 'UTF-8') {
> fclose($this->fileHandle);
> $entireFile = file_get_contents($filename);
> $fileHandle = fopen('php://memory', 'r+b');
> if ($fileHandle !== false && $entireFile !== false) {
> $this->fileHandle = $fileHandle;
> $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
> fwrite($this->fileHandle, $data);
> $this->skipBOM();
> }
> }
> }
>
> public function setTestAutoDetect(bool $value): self
> {
> $this->testAutodetect = $value;
>
> return $this;
> }
>
> private function setAutoDetect(?string $value): ?string
> {
> $retVal = null;
> if ($value !== null && $this->testAutodetect) {
> $retVal2 = @ini_set('auto_detect_line_endings', $value);
> if (is_string($retVal2)) {
> $retVal = $retVal2;
> }
> }
>
> return $retVal;
> }
>
> public function castFormattedNumberToNumeric(
> bool $castFormattedNumberToNumeric,
> bool $preserveNumericFormatting = false
> ): void {
> $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric;
> $this->preserveNumericFormatting = $preserveNumericFormatting;
> }
>
> /**
> * Open data uri for reading.
> */
> private function openDataUri(string $filename): void
> {
> $fileHandle = fopen($filename, 'rb');
> if ($fileHandle === false) {
> // @codeCoverageIgnoreStart
> throw new ReaderException('Could not open file ' . $filename . ' for reading.');
> // @codeCoverageIgnoreEnd
> }
>
> $this->fileHandle = $fileHandle;
> }
>
> /**
> * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
> */
> public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
> {
> return $this->loadStringOrFile($filename, $spreadsheet, false);
}
/**
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
< *
< * @param string $pFilename
< * @param Spreadsheet $spreadsheet
< *
< * @throws Exception
< *
< * @return Spreadsheet
*/
< public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
> private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet
{
< $lineEnding = ini_get('auto_detect_line_endings');
< ini_set('auto_detect_line_endings', true);
> // Deprecated in Php8.1
> $iniset = $this->setAutoDetect('1');
// Open file
< if (!$this->canRead($pFilename)) {
< throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
> if ($dataUri) {
> $this->openDataUri($filename);
> } else {
> $this->openFileOrMemory($filename);
}
< $this->openFile($pFilename);
$fileHandle = $this->fileHandle;
// Skip BOM, if any
$this->skipBOM();
$this->checkSeparator();
$this->inferSeparator();
// Create new PhpSpreadsheet object
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
$spreadsheet->createSheet();
}
$sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
// Set our starting row based on whether we're in contiguous mode or not
$currentRow = 1;
< if ($this->contiguous) {
< $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
< }
> $outRow = 0;
// Loop through each line of the file in turn
< while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
> $valueBinder = Cell::getValueBinder();
> $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
> while (is_array($rowData)) {
> $noOutputYet = true;
$columnLetter = 'A';
foreach ($rowData as $rowDatum) {
< if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
< // Convert encoding if necessary
< if ($this->inputEncoding !== 'UTF-8') {
< $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
< }
<
> $this->convertBoolean($rowDatum, $preserveBooleanString);
> $numberFormatMask = $this->convertFormattedNumber($rowDatum);
> if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) {
> if ($this->contiguous) {
> if ($noOutputYet) {
> $noOutputYet = false;
> ++$outRow;
> }
> } else {
> $outRow = $currentRow;
> }
> // Set basic styling for the value (Note that this could be overloaded by styling in a value binder)
> $sheet->getCell($columnLetter . $outRow)->getStyle()
> ->getNumberFormat()
> ->setFormatCode($numberFormatMask);
// Set cell value
< $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
> $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
}
++$columnLetter;
}
> $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
++$currentRow;
}
// Close file
fclose($fileHandle);
< if ($this->contiguous) {
< $this->contiguousRow = $currentRow;
< }
<
< ini_set('auto_detect_line_endings', $lineEnding);
> $this->setAutoDetect($iniset);
// Return
return $spreadsheet;
}
/**
< * Get delimiter.
> * Convert string true/false to boolean, and null to null-string.
*
< * @return string
> * @param mixed $rowDatum
*/
< public function getDelimiter()
> private function convertBoolean(&$rowDatum, bool $preserveBooleanString): void
{
< return $this->delimiter;
> if (is_string($rowDatum) && !$preserveBooleanString) {
> if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) {
> $rowDatum = true;
> } elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) {
> $rowDatum = false;
> }
> } else {
> $rowDatum = $rowDatum ?? '';
> }
}
/**
< * Set delimiter.
< *
< * @param string $delimiter Delimiter, eg: ','
> * Convert numeric strings to int or float values.
*
< * @return CSV
> * @param mixed $rowDatum
*/
< public function setDelimiter($delimiter)
> private function convertFormattedNumber(&$rowDatum): string
> {
> $numberFormatMask = NumberFormat::FORMAT_GENERAL;
> if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) {
> $numeric = str_replace(
> [StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()],
> ['', '.'],
> $rowDatum
> );
>
> if (is_numeric($numeric)) {
> $decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator());
> if ($this->preserveNumericFormatting === true) {
> $numberFormatMask = (strpos($rowDatum, StringHelper::getThousandsSeparator()) !== false)
> ? '#,##0' : '0';
> if ($decimalPos !== false) {
> $decimals = strlen($rowDatum) - $decimalPos - 1;
> $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6));
> }
> }
>
> $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric;
> }
> }
>
> return $numberFormatMask;
> }
>
> public function getDelimiter(): ?string
> {
> return $this->delimiter;
> }
>
> public function setDelimiter(?string $delimiter): self
{
$this->delimiter = $delimiter;
return $this;
}
< /**
< * Get enclosure.
< *
< * @return string
< */
< public function getEnclosure()
> public function getEnclosure(): string
{
return $this->enclosure;
}
< /**
< * Set enclosure.
< *
< * @param string $enclosure Enclosure, defaults to "
< *
< * @return CSV
< */
< public function setEnclosure($enclosure)
> public function setEnclosure(string $enclosure): self
{
if ($enclosure == '') {
$enclosure = '"';
}
$this->enclosure = $enclosure;
return $this;
}
< /**
< * Get sheet index.
< *
< * @return int
< */
< public function getSheetIndex()
> public function getSheetIndex(): int
{
return $this->sheetIndex;
}
< /**
< * Set sheet index.
< *
< * @param int $pValue Sheet index
< *
< * @return CSV
< */
< public function setSheetIndex($pValue)
> public function setSheetIndex(int $indexValue): self
{
< $this->sheetIndex = $pValue;
> $this->sheetIndex = $indexValue;
return $this;
}
< /**
< * Set Contiguous.
< *
< * @param bool $contiguous
< *
< * @return Csv
< */
< public function setContiguous($contiguous)
> public function setContiguous(bool $contiguous): self
{
< $this->contiguous = (bool) $contiguous;
< if (!$contiguous) {
< $this->contiguousRow = -1;
< }
> $this->contiguous = $contiguous;
return $this;
}
< /**
< * Get Contiguous.
< *
< * @return bool
< */
< public function getContiguous()
> public function getContiguous(): bool
{
return $this->contiguous;
}
< /**
< * Set escape backslashes.
< *
< * @param string $escapeCharacter
< *
< * @return $this
< */
< public function setEscapeCharacter($escapeCharacter)
> public function setEscapeCharacter(string $escapeCharacter): self
{
$this->escapeCharacter = $escapeCharacter;
return $this;
}
< /**
< * Get escape backslashes.
< *
< * @return string
< */
< public function getEscapeCharacter()
> public function getEscapeCharacter(): string
{
return $this->escapeCharacter;
}
/**
* Can the current IReader read the file?
< *
< * @param string $pFilename
< *
< * @return bool
*/
< public function canRead($pFilename)
> public function canRead(string $filename): bool
{
// Check if file exists
try {
< $this->openFile($pFilename);
< } catch (Exception $e) {
> $this->openFile($filename);
> } catch (ReaderException $e) {
return false;
}
fclose($this->fileHandle);
// Trust file extension if any
< $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION));
> $extension = strtolower(/** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_EXTENSION));
if (in_array($extension, ['csv', 'tsv'])) {
return true;
}
// Attempt to guess mimetype
< $type = mime_content_type($pFilename);
> $type = mime_content_type($filename);
$supportedTypes = [
> 'application/csv',
'text/csv',
'text/plain',
'inode/x-empty',
];
return in_array($type, $supportedTypes, true);
> }
}
>
}
> private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
> {
> if ($encoding === '') {
> $pos = strpos($contents, $compare);
> if ($pos !== false && $pos % strlen($compare) === 0) {
> $encoding = $setEncoding;
> }
> }
> }
>
> private static function guessEncodingNoBom(string $filename): string
> {
> $encoding = '';
> $contents = file_get_contents($filename);
> self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
> self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
> self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
> self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
> if ($encoding === '' && preg_match('//u', $contents) === 1) {
> $encoding = 'UTF-8';
> }
>
> return $encoding;
> }
>
> private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
> {
> if ($encoding === '') {
> if ($compare === substr($first4, 0, strlen($compare))) {
> $encoding = $setEncoding;
> }
> }
> }
>
> private static function guessEncodingBom(string $filename): string
> {
> $encoding = '';
> $first4 = file_get_contents($filename, false, null, 0, 4);
> if ($first4 !== false) {
> self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
> self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
> self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
> self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
> self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
> }
>
> return $encoding;
> }
>
> public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
> {
> $encoding = self::guessEncodingBom($filename);
> if ($encoding === '') {
> $encoding = self::guessEncodingNoBom($filename);
> }
>
> return ($encoding === '') ? $dflt : $encoding;
> }
>
> public function setPreserveNullString(bool $value): self
> {
> $this->preserveNullString = $value;
>
> return $this;
> }
>
> public function getPreserveNullString(): bool
> {
> return $this->preserveNullString;