Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403] [Versions 401 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use PhpOffice\PhpSpreadsheet\Calculation\Calculation; 6 use PhpOffice\PhpSpreadsheet\Cell\Cell; 7 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 8 use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter; 9 use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException; 10 use PhpOffice\PhpSpreadsheet\Shared\StringHelper; 11 use PhpOffice\PhpSpreadsheet\Spreadsheet; 12 use PhpOffice\PhpSpreadsheet\Style\NumberFormat; 13 14 class Csv extends BaseReader 15 { 16 const DEFAULT_FALLBACK_ENCODING = 'CP1252'; 17 const GUESS_ENCODING = 'guess'; 18 const UTF8_BOM = "\xEF\xBB\xBF"; 19 const UTF8_BOM_LEN = 3; 20 const UTF16BE_BOM = "\xfe\xff"; 21 const UTF16BE_BOM_LEN = 2; 22 const UTF16BE_LF = "\x00\x0a"; 23 const UTF16LE_BOM = "\xff\xfe"; 24 const UTF16LE_BOM_LEN = 2; 25 const UTF16LE_LF = "\x0a\x00"; 26 const UTF32BE_BOM = "\x00\x00\xfe\xff"; 27 const UTF32BE_BOM_LEN = 4; 28 const UTF32BE_LF = "\x00\x00\x00\x0a"; 29 const UTF32LE_BOM = "\xff\xfe\x00\x00"; 30 const UTF32LE_BOM_LEN = 4; 31 const UTF32LE_LF = "\x0a\x00\x00\x00"; 32 33 /** 34 * Input encoding. 35 * 36 * @var string 37 */ 38 private $inputEncoding = 'UTF-8'; 39 40 /** 41 * Fallback encoding if guess strikes out. 42 * 43 * @var string 44 */ 45 private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING; 46 47 /** 48 * Delimiter. 49 * 50 * @var ?string 51 */ 52 private $delimiter; 53 54 /** 55 * Enclosure. 56 * 57 * @var string 58 */ 59 private $enclosure = '"'; 60 61 /** 62 * Sheet index to read. 63 * 64 * @var int 65 */ 66 private $sheetIndex = 0; 67 68 /** 69 * Load rows contiguously. 70 * 71 * @var bool 72 */ 73 private $contiguous = false; 74 75 /** 76 * The character that can escape the enclosure. 77 * 78 * @var string 79 */ 80 private $escapeCharacter = '\\'; 81 82 /** 83 * Callback for setting defaults in construction. 84 * 85 * @var ?callable 86 */ 87 private static $constructorCallback; 88 89 /** 90 * Attempt autodetect line endings (deprecated after PHP8.1)? 91 * 92 * @var bool 93 */ 94 private $testAutodetect = true; 95 96 /** 97 * @var bool 98 */ 99 protected $castFormattedNumberToNumeric = false; 100 101 /** 102 * @var bool 103 */ 104 protected $preserveNumericFormatting = false; 105 106 /** @var bool */ 107 private $preserveNullString = false; 108 109 /** 110 * Create a new CSV Reader instance. 111 */ 112 public function __construct() 113 { 114 parent::__construct(); 115 $callback = self::$constructorCallback; 116 if ($callback !== null) { 117 $callback($this); 118 } 119 } 120 121 /** 122 * Set a callback to change the defaults. 123 * 124 * The callback must accept the Csv Reader object as the first parameter, 125 * and it should return void. 126 */ 127 public static function setConstructorCallback(?callable $callback): void 128 { 129 self::$constructorCallback = $callback; 130 } 131 132 public static function getConstructorCallback(): ?callable 133 { 134 return self::$constructorCallback; 135 } 136 137 public function setInputEncoding(string $encoding): self 138 { 139 $this->inputEncoding = $encoding; 140 141 return $this; 142 } 143 144 public function getInputEncoding(): string 145 { 146 return $this->inputEncoding; 147 } 148 149 public function setFallbackEncoding(string $fallbackEncoding): self 150 { 151 $this->fallbackEncoding = $fallbackEncoding; 152 153 return $this; 154 } 155 156 public function getFallbackEncoding(): string 157 { 158 return $this->fallbackEncoding; 159 } 160 161 /** 162 * Move filepointer past any BOM marker. 163 */ 164 protected function skipBOM(): void 165 { 166 rewind($this->fileHandle); 167 168 if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) { 169 rewind($this->fileHandle); 170 } 171 } 172 173 /** 174 * Identify any separator that is explicitly set in the file. 175 */ 176 protected function checkSeparator(): void 177 { 178 $line = fgets($this->fileHandle); 179 if ($line === false) { 180 return; 181 } 182 183 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { 184 $this->delimiter = substr($line, 4, 1); 185 186 return; 187 } 188 189 $this->skipBOM(); 190 } 191 192 /** 193 * Infer the separator if it isn't explicitly set in the file or specified by the user. 194 */ 195 protected function inferSeparator(): void 196 { 197 if ($this->delimiter !== null) { 198 return; 199 } 200 201 $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure); 202 203 // If number of lines is 0, nothing to infer : fall back to the default 204 if ($inferenceEngine->linesCounted() === 0) { 205 $this->delimiter = $inferenceEngine->getDefaultDelimiter(); 206 $this->skipBOM(); 207 208 return; 209 } 210 211 $this->delimiter = $inferenceEngine->infer(); 212 213 // If no delimiter could be detected, fall back to the default 214 if ($this->delimiter === null) { 215 $this->delimiter = $inferenceEngine->getDefaultDelimiter(); 216 } 217 218 $this->skipBOM(); 219 } 220 221 /** 222 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 223 */ 224 public function listWorksheetInfo(string $filename): array 225 { 226 // Open file 227 $this->openFileOrMemory($filename); 228 $fileHandle = $this->fileHandle; 229 230 // Skip BOM, if any 231 $this->skipBOM(); 232 $this->checkSeparator(); 233 $this->inferSeparator(); 234 235 $worksheetInfo = []; 236 $worksheetInfo[0]['worksheetName'] = 'Worksheet'; 237 $worksheetInfo[0]['lastColumnLetter'] = 'A'; 238 $worksheetInfo[0]['lastColumnIndex'] = 0; 239 $worksheetInfo[0]['totalRows'] = 0; 240 $worksheetInfo[0]['totalColumns'] = 0; 241 242 // Loop through each line of the file in turn 243 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 244 while (is_array($rowData)) { 245 ++$worksheetInfo[0]['totalRows']; 246 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); 247 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 248 } 249 250 $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); 251 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; 252 253 // Close file 254 fclose($fileHandle); 255 256 return $worksheetInfo; 257 } 258 259 /** 260 * Loads Spreadsheet from file. 261 */ 262 protected function loadSpreadsheetFromFile(string $filename): Spreadsheet 263 { 264 // Create new Spreadsheet 265 $spreadsheet = new Spreadsheet(); 266 267 // Load into this instance 268 return $this->loadIntoExisting($filename, $spreadsheet); 269 } 270 271 /** 272 * Loads Spreadsheet from string. 273 */ 274 public function loadSpreadsheetFromString(string $contents): Spreadsheet 275 { 276 // Create new Spreadsheet 277 $spreadsheet = new Spreadsheet(); 278 279 // Load into this instance 280 return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true); 281 } 282 283 private function openFileOrMemory(string $filename): void 284 { 285 // Open file 286 $fhandle = $this->canRead($filename); 287 if (!$fhandle) { 288 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 289 } 290 if ($this->inputEncoding === self::GUESS_ENCODING) { 291 $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding); 292 } 293 $this->openFile($filename); 294 if ($this->inputEncoding !== 'UTF-8') { 295 fclose($this->fileHandle); 296 $entireFile = file_get_contents($filename); 297 $fileHandle = fopen('php://memory', 'r+b'); 298 if ($fileHandle !== false && $entireFile !== false) { 299 $this->fileHandle = $fileHandle; 300 $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); 301 fwrite($this->fileHandle, $data); 302 $this->skipBOM(); 303 } 304 } 305 } 306 307 public function setTestAutoDetect(bool $value): self 308 { 309 $this->testAutodetect = $value; 310 311 return $this; 312 } 313 314 private function setAutoDetect(?string $value): ?string 315 { 316 $retVal = null; 317 if ($value !== null && $this->testAutodetect) { 318 $retVal2 = @ini_set('auto_detect_line_endings', $value); 319 if (is_string($retVal2)) { 320 $retVal = $retVal2; 321 } 322 } 323 324 return $retVal; 325 } 326 327 public function castFormattedNumberToNumeric( 328 bool $castFormattedNumberToNumeric, 329 bool $preserveNumericFormatting = false 330 ): void { 331 $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric; 332 $this->preserveNumericFormatting = $preserveNumericFormatting; 333 } 334 335 /** 336 * Open data uri for reading. 337 */ 338 private function openDataUri(string $filename): void 339 { 340 $fileHandle = fopen($filename, 'rb'); 341 if ($fileHandle === false) { 342 // @codeCoverageIgnoreStart 343 throw new ReaderException('Could not open file ' . $filename . ' for reading.'); 344 // @codeCoverageIgnoreEnd 345 } 346 347 $this->fileHandle = $fileHandle; 348 } 349 350 /** 351 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. 352 */ 353 public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet 354 { 355 return $this->loadStringOrFile($filename, $spreadsheet, false); 356 } 357 358 /** 359 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. 360 */ 361 private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet 362 { 363 // Deprecated in Php8.1 364 $iniset = $this->setAutoDetect('1'); 365 366 // Open file 367 if ($dataUri) { 368 $this->openDataUri($filename); 369 } else { 370 $this->openFileOrMemory($filename); 371 } 372 $fileHandle = $this->fileHandle; 373 374 // Skip BOM, if any 375 $this->skipBOM(); 376 $this->checkSeparator(); 377 $this->inferSeparator(); 378 379 // Create new PhpSpreadsheet object 380 while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { 381 $spreadsheet->createSheet(); 382 } 383 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); 384 385 // Set our starting row based on whether we're in contiguous mode or not 386 $currentRow = 1; 387 $outRow = 0; 388 389 // Loop through each line of the file in turn 390 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 391 $valueBinder = Cell::getValueBinder(); 392 $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion(); 393 while (is_array($rowData)) { 394 $noOutputYet = true; 395 $columnLetter = 'A'; 396 foreach ($rowData as $rowDatum) { 397 $this->convertBoolean($rowDatum, $preserveBooleanString); 398 $numberFormatMask = $this->convertFormattedNumber($rowDatum); 399 if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) { 400 if ($this->contiguous) { 401 if ($noOutputYet) { 402 $noOutputYet = false; 403 ++$outRow; 404 } 405 } else { 406 $outRow = $currentRow; 407 } 408 // Set basic styling for the value (Note that this could be overloaded by styling in a value binder) 409 $sheet->getCell($columnLetter . $outRow)->getStyle() 410 ->getNumberFormat() 411 ->setFormatCode($numberFormatMask); 412 // Set cell value 413 $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum); 414 } 415 ++$columnLetter; 416 } 417 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 418 ++$currentRow; 419 } 420 421 // Close file 422 fclose($fileHandle); 423 424 $this->setAutoDetect($iniset); 425 426 // Return 427 return $spreadsheet; 428 } 429 430 /** 431 * Convert string true/false to boolean, and null to null-string. 432 * 433 * @param mixed $rowDatum 434 */ 435 private function convertBoolean(&$rowDatum, bool $preserveBooleanString): void 436 { 437 if (is_string($rowDatum) && !$preserveBooleanString) { 438 if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) { 439 $rowDatum = true; 440 } elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) { 441 $rowDatum = false; 442 } 443 } else { 444 $rowDatum = $rowDatum ?? ''; 445 } 446 } 447 448 /** 449 * Convert numeric strings to int or float values. 450 * 451 * @param mixed $rowDatum 452 */ 453 private function convertFormattedNumber(&$rowDatum): string 454 { 455 $numberFormatMask = NumberFormat::FORMAT_GENERAL; 456 if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) { 457 $numeric = str_replace( 458 [StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()], 459 ['', '.'], 460 $rowDatum 461 ); 462 463 if (is_numeric($numeric)) { 464 $decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator()); 465 if ($this->preserveNumericFormatting === true) { 466 $numberFormatMask = (strpos($rowDatum, StringHelper::getThousandsSeparator()) !== false) 467 ? '#,##0' : '0'; 468 if ($decimalPos !== false) { 469 $decimals = strlen($rowDatum) - $decimalPos - 1; 470 $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6)); 471 } 472 } 473 474 $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric; 475 } 476 } 477 478 return $numberFormatMask; 479 } 480 481 public function getDelimiter(): ?string 482 { 483 return $this->delimiter; 484 } 485 486 public function setDelimiter(?string $delimiter): self 487 { 488 $this->delimiter = $delimiter; 489 490 return $this; 491 } 492 493 public function getEnclosure(): string 494 { 495 return $this->enclosure; 496 } 497 498 public function setEnclosure(string $enclosure): self 499 { 500 if ($enclosure == '') { 501 $enclosure = '"'; 502 } 503 $this->enclosure = $enclosure; 504 505 return $this; 506 } 507 508 public function getSheetIndex(): int 509 { 510 return $this->sheetIndex; 511 } 512 513 public function setSheetIndex(int $indexValue): self 514 { 515 $this->sheetIndex = $indexValue; 516 517 return $this; 518 } 519 520 public function setContiguous(bool $contiguous): self 521 { 522 $this->contiguous = $contiguous; 523 524 return $this; 525 } 526 527 public function getContiguous(): bool 528 { 529 return $this->contiguous; 530 } 531 532 public function setEscapeCharacter(string $escapeCharacter): self 533 { 534 $this->escapeCharacter = $escapeCharacter; 535 536 return $this; 537 } 538 539 public function getEscapeCharacter(): string 540 { 541 return $this->escapeCharacter; 542 } 543 544 /** 545 * Can the current IReader read the file? 546 */ 547 public function canRead(string $filename): bool 548 { 549 // Check if file exists 550 try { 551 $this->openFile($filename); 552 } catch (ReaderException $e) { 553 return false; 554 } 555 556 fclose($this->fileHandle); 557 558 // Trust file extension if any 559 $extension = strtolower(/** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_EXTENSION)); 560 if (in_array($extension, ['csv', 'tsv'])) { 561 return true; 562 } 563 564 // Attempt to guess mimetype 565 $type = mime_content_type($filename); 566 $supportedTypes = [ 567 'application/csv', 568 'text/csv', 569 'text/plain', 570 'inode/x-empty', 571 ]; 572 573 return in_array($type, $supportedTypes, true); 574 } 575 576 private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void 577 { 578 if ($encoding === '') { 579 $pos = strpos($contents, $compare); 580 if ($pos !== false && $pos % strlen($compare) === 0) { 581 $encoding = $setEncoding; 582 } 583 } 584 } 585 586 private static function guessEncodingNoBom(string $filename): string 587 { 588 $encoding = ''; 589 $contents = file_get_contents($filename); 590 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE'); 591 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE'); 592 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE'); 593 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE'); 594 if ($encoding === '' && preg_match('//u', $contents) === 1) { 595 $encoding = 'UTF-8'; 596 } 597 598 return $encoding; 599 } 600 601 private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void 602 { 603 if ($encoding === '') { 604 if ($compare === substr($first4, 0, strlen($compare))) { 605 $encoding = $setEncoding; 606 } 607 } 608 } 609 610 private static function guessEncodingBom(string $filename): string 611 { 612 $encoding = ''; 613 $first4 = file_get_contents($filename, false, null, 0, 4); 614 if ($first4 !== false) { 615 self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8'); 616 self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE'); 617 self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE'); 618 self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE'); 619 self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE'); 620 } 621 622 return $encoding; 623 } 624 625 public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string 626 { 627 $encoding = self::guessEncodingBom($filename); 628 if ($encoding === '') { 629 $encoding = self::guessEncodingNoBom($filename); 630 } 631 632 return ($encoding === '') ? $dflt : $encoding; 633 } 634 635 public function setPreserveNullString(bool $value): self 636 { 637 $this->preserveNullString = $value; 638 639 return $this; 640 } 641 642 public function getPreserveNullString(): bool 643 { 644 return $this->preserveNullString; 645 } 646 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body