See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use PhpOffice\PhpSpreadsheet\Calculation\Calculation; 6 use PhpOffice\PhpSpreadsheet\Cell\Cell; 7 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 8 use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter; 9 use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException; 10 use PhpOffice\PhpSpreadsheet\Shared\StringHelper; 11 use PhpOffice\PhpSpreadsheet\Spreadsheet; 12 use PhpOffice\PhpSpreadsheet\Style\NumberFormat; 13 14 class Csv extends BaseReader 15 { 16 const DEFAULT_FALLBACK_ENCODING = 'CP1252'; 17 const GUESS_ENCODING = 'guess'; 18 const UTF8_BOM = "\xEF\xBB\xBF"; 19 const UTF8_BOM_LEN = 3; 20 const UTF16BE_BOM = "\xfe\xff"; 21 const UTF16BE_BOM_LEN = 2; 22 const UTF16BE_LF = "\x00\x0a"; 23 const UTF16LE_BOM = "\xff\xfe"; 24 const UTF16LE_BOM_LEN = 2; 25 const UTF16LE_LF = "\x0a\x00"; 26 const UTF32BE_BOM = "\x00\x00\xfe\xff"; 27 const UTF32BE_BOM_LEN = 4; 28 const UTF32BE_LF = "\x00\x00\x00\x0a"; 29 const UTF32LE_BOM = "\xff\xfe\x00\x00"; 30 const UTF32LE_BOM_LEN = 4; 31 const UTF32LE_LF = "\x0a\x00\x00\x00"; 32 33 /** 34 * Input encoding. 35 * 36 * @var string 37 */ 38 private $inputEncoding = 'UTF-8'; 39 40 /** 41 * Fallback encoding if guess strikes out. 42 * 43 * @var string 44 */ 45 private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING; 46 47 /** 48 * Delimiter. 49 * 50 * @var ?string 51 */ 52 private $delimiter; 53 54 /** 55 * Enclosure. 56 * 57 * @var string 58 */ 59 private $enclosure = '"'; 60 61 /** 62 * Sheet index to read. 63 * 64 * @var int 65 */ 66 private $sheetIndex = 0; 67 68 /** 69 * Load rows contiguously. 70 * 71 * @var bool 72 */ 73 private $contiguous = false; 74 75 /** 76 * The character that can escape the enclosure. 77 * 78 * @var string 79 */ 80 private $escapeCharacter = '\\'; 81 82 /** 83 * Callback for setting defaults in construction. 84 * 85 * @var ?callable 86 */ 87 private static $constructorCallback; 88 89 /** 90 * Attempt autodetect line endings (deprecated after PHP8.1)? 91 * 92 * @var bool 93 */ 94 private $testAutodetect = true; 95 96 /** 97 * @var bool 98 */ 99 protected $castFormattedNumberToNumeric = false; 100 101 /** 102 * @var bool 103 */ 104 protected $preserveNumericFormatting = false; 105 106 /** @var bool */ 107 private $preserveNullString = false; 108 109 /** 110 * Create a new CSV Reader instance. 111 */ 112 public function __construct() 113 { 114 parent::__construct(); 115 $callback = self::$constructorCallback; 116 if ($callback !== null) { 117 $callback($this); 118 } 119 } 120 121 /** 122 * Set a callback to change the defaults. 123 * 124 * The callback must accept the Csv Reader object as the first parameter, 125 * and it should return void. 126 */ 127 public static function setConstructorCallback(?callable $callback): void 128 { 129 self::$constructorCallback = $callback; 130 } 131 132 public static function getConstructorCallback(): ?callable 133 { 134 return self::$constructorCallback; 135 } 136 137 public function setInputEncoding(string $encoding): self 138 { 139 $this->inputEncoding = $encoding; 140 141 return $this; 142 } 143 144 public function getInputEncoding(): string 145 { 146 return $this->inputEncoding; 147 } 148 149 public function setFallbackEncoding(string $fallbackEncoding): self 150 { 151 $this->fallbackEncoding = $fallbackEncoding; 152 153 return $this; 154 } 155 156 public function getFallbackEncoding(): string 157 { 158 return $this->fallbackEncoding; 159 } 160 161 /** 162 * Move filepointer past any BOM marker. 163 */ 164 protected function skipBOM(): void 165 { 166 rewind($this->fileHandle); 167 168 if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) { 169 rewind($this->fileHandle); 170 } 171 } 172 173 /** 174 * Identify any separator that is explicitly set in the file. 175 */ 176 protected function checkSeparator(): void 177 { 178 $line = fgets($this->fileHandle); 179 if ($line === false) { 180 return; 181 } 182 183 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { 184 $this->delimiter = substr($line, 4, 1); 185 186 return; 187 } 188 189 $this->skipBOM(); 190 } 191 192 /** 193 * Infer the separator if it isn't explicitly set in the file or specified by the user. 194 */ 195 protected function inferSeparator(): void 196 { 197 if ($this->delimiter !== null) { 198 return; 199 } 200 201 $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure); 202 203 // If number of lines is 0, nothing to infer : fall back to the default 204 if ($inferenceEngine->linesCounted() === 0) { 205 $this->delimiter = $inferenceEngine->getDefaultDelimiter(); 206 $this->skipBOM(); 207 208 return; 209 } 210 211 $this->delimiter = $inferenceEngine->infer(); 212 213 // If no delimiter could be detected, fall back to the default 214 if ($this->delimiter === null) { 215 $this->delimiter = $inferenceEngine->getDefaultDelimiter(); 216 } 217 218 $this->skipBOM(); 219 } 220 221 /** 222 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 223 */ 224 public function listWorksheetInfo(string $filename): array 225 { 226 // Open file 227 $this->openFileOrMemory($filename); 228 $fileHandle = $this->fileHandle; 229 230 // Skip BOM, if any 231 $this->skipBOM(); 232 $this->checkSeparator(); 233 $this->inferSeparator(); 234 235 $worksheetInfo = []; 236 $worksheetInfo[0]['worksheetName'] = 'Worksheet'; 237 $worksheetInfo[0]['lastColumnLetter'] = 'A'; 238 $worksheetInfo[0]['lastColumnIndex'] = 0; 239 $worksheetInfo[0]['totalRows'] = 0; 240 $worksheetInfo[0]['totalColumns'] = 0; 241 242 // Loop through each line of the file in turn 243 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 244 while (is_array($rowData)) { 245 ++$worksheetInfo[0]['totalRows']; 246 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); 247 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 248 } 249 250 $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); 251 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; 252 253 // Close file 254 fclose($fileHandle); 255 256 return $worksheetInfo; 257 } 258 259 /** 260 * Loads Spreadsheet from file. 261 */ 262 protected function loadSpreadsheetFromFile(string $filename): Spreadsheet 263 { 264 // Create new Spreadsheet 265 $spreadsheet = new Spreadsheet(); 266 267 // Load into this instance 268 return $this->loadIntoExisting($filename, $spreadsheet); 269 } 270 271 /** 272 * Loads Spreadsheet from string. 273 */ 274 public function loadSpreadsheetFromString(string $contents): Spreadsheet 275 { 276 // Create new Spreadsheet 277 $spreadsheet = new Spreadsheet(); 278 279 // Load into this instance 280 return $this->loadStringOrFile('data://text/plain,' . urlencode($contents), $spreadsheet, true); 281 } 282 283 private function openFileOrMemory(string $filename): void 284 { 285 // Open file 286 $fhandle = $this->canRead($filename); 287 if (!$fhandle) { 288 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 289 } 290 if ($this->inputEncoding === self::GUESS_ENCODING) { 291 $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding); 292 } 293 $this->openFile($filename); 294 if ($this->inputEncoding !== 'UTF-8') { 295 fclose($this->fileHandle); 296 $entireFile = file_get_contents($filename); 297 $this->fileHandle = fopen('php://memory', 'r+b'); 298 if ($this->fileHandle !== false && $entireFile !== false) { 299 $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); 300 fwrite($this->fileHandle, $data); 301 $this->skipBOM(); 302 } 303 } 304 } 305 306 public function setTestAutoDetect(bool $value): self 307 { 308 $this->testAutodetect = $value; 309 310 return $this; 311 } 312 313 private function setAutoDetect(?string $value): ?string 314 { 315 $retVal = null; 316 if ($value !== null && $this->testAutodetect) { 317 $retVal2 = @ini_set('auto_detect_line_endings', $value); 318 if (is_string($retVal2)) { 319 $retVal = $retVal2; 320 } 321 } 322 323 return $retVal; 324 } 325 326 public function castFormattedNumberToNumeric( 327 bool $castFormattedNumberToNumeric, 328 bool $preserveNumericFormatting = false 329 ): void { 330 $this->castFormattedNumberToNumeric = $castFormattedNumberToNumeric; 331 $this->preserveNumericFormatting = $preserveNumericFormatting; 332 } 333 334 /** 335 * Open data uri for reading. 336 */ 337 private function openDataUri(string $filename): void 338 { 339 $fileHandle = fopen($filename, 'rb'); 340 if ($fileHandle === false) { 341 // @codeCoverageIgnoreStart 342 throw new ReaderException('Could not open file ' . $filename . ' for reading.'); 343 // @codeCoverageIgnoreEnd 344 } 345 346 $this->fileHandle = $fileHandle; 347 } 348 349 /** 350 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. 351 */ 352 public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet 353 { 354 return $this->loadStringOrFile($filename, $spreadsheet, false); 355 } 356 357 /** 358 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. 359 */ 360 private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bool $dataUri): Spreadsheet 361 { 362 // Deprecated in Php8.1 363 $iniset = $this->setAutoDetect('1'); 364 365 // Open file 366 if ($dataUri) { 367 $this->openDataUri($filename); 368 } else { 369 $this->openFileOrMemory($filename); 370 } 371 $fileHandle = $this->fileHandle; 372 373 // Skip BOM, if any 374 $this->skipBOM(); 375 $this->checkSeparator(); 376 $this->inferSeparator(); 377 378 // Create new PhpSpreadsheet object 379 while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { 380 $spreadsheet->createSheet(); 381 } 382 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); 383 384 // Set our starting row based on whether we're in contiguous mode or not 385 $currentRow = 1; 386 $outRow = 0; 387 388 // Loop through each line of the file in turn 389 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 390 $valueBinder = Cell::getValueBinder(); 391 $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion(); 392 while (is_array($rowData)) { 393 $noOutputYet = true; 394 $columnLetter = 'A'; 395 foreach ($rowData as $rowDatum) { 396 $this->convertBoolean($rowDatum, $preserveBooleanString); 397 $numberFormatMask = $this->convertFormattedNumber($rowDatum); 398 if (($rowDatum !== '' || $this->preserveNullString) && $this->readFilter->readCell($columnLetter, $currentRow)) { 399 if ($this->contiguous) { 400 if ($noOutputYet) { 401 $noOutputYet = false; 402 ++$outRow; 403 } 404 } else { 405 $outRow = $currentRow; 406 } 407 // Set basic styling for the value (Note that this could be overloaded by styling in a value binder) 408 $sheet->getCell($columnLetter . $outRow)->getStyle() 409 ->getNumberFormat() 410 ->setFormatCode($numberFormatMask); 411 // Set cell value 412 $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum); 413 } 414 ++$columnLetter; 415 } 416 $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); 417 ++$currentRow; 418 } 419 420 // Close file 421 fclose($fileHandle); 422 423 $this->setAutoDetect($iniset); 424 425 // Return 426 return $spreadsheet; 427 } 428 429 /** 430 * Convert string true/false to boolean, and null to null-string. 431 * 432 * @param mixed $rowDatum 433 */ 434 private function convertBoolean(&$rowDatum, bool $preserveBooleanString): void 435 { 436 if (is_string($rowDatum) && !$preserveBooleanString) { 437 if (strcasecmp(Calculation::getTRUE(), $rowDatum) === 0 || strcasecmp('true', $rowDatum) === 0) { 438 $rowDatum = true; 439 } elseif (strcasecmp(Calculation::getFALSE(), $rowDatum) === 0 || strcasecmp('false', $rowDatum) === 0) { 440 $rowDatum = false; 441 } 442 } else { 443 $rowDatum = $rowDatum ?? ''; 444 } 445 } 446 447 /** 448 * Convert numeric strings to int or float values. 449 * 450 * @param mixed $rowDatum 451 */ 452 private function convertFormattedNumber(&$rowDatum): string 453 { 454 $numberFormatMask = NumberFormat::FORMAT_GENERAL; 455 if ($this->castFormattedNumberToNumeric === true && is_string($rowDatum)) { 456 $numeric = str_replace( 457 [StringHelper::getThousandsSeparator(), StringHelper::getDecimalSeparator()], 458 ['', '.'], 459 $rowDatum 460 ); 461 462 if (is_numeric($numeric)) { 463 $decimalPos = strpos($rowDatum, StringHelper::getDecimalSeparator()); 464 if ($this->preserveNumericFormatting === true) { 465 $numberFormatMask = (strpos($rowDatum, StringHelper::getThousandsSeparator()) !== false) 466 ? '#,##0' : '0'; 467 if ($decimalPos !== false) { 468 $decimals = strlen($rowDatum) - $decimalPos - 1; 469 $numberFormatMask .= '.' . str_repeat('0', min($decimals, 6)); 470 } 471 } 472 473 $rowDatum = ($decimalPos !== false) ? (float) $numeric : (int) $numeric; 474 } 475 } 476 477 return $numberFormatMask; 478 } 479 480 public function getDelimiter(): ?string 481 { 482 return $this->delimiter; 483 } 484 485 public function setDelimiter(?string $delimiter): self 486 { 487 $this->delimiter = $delimiter; 488 489 return $this; 490 } 491 492 public function getEnclosure(): string 493 { 494 return $this->enclosure; 495 } 496 497 public function setEnclosure(string $enclosure): self 498 { 499 if ($enclosure == '') { 500 $enclosure = '"'; 501 } 502 $this->enclosure = $enclosure; 503 504 return $this; 505 } 506 507 public function getSheetIndex(): int 508 { 509 return $this->sheetIndex; 510 } 511 512 public function setSheetIndex(int $indexValue): self 513 { 514 $this->sheetIndex = $indexValue; 515 516 return $this; 517 } 518 519 public function setContiguous(bool $contiguous): self 520 { 521 $this->contiguous = $contiguous; 522 523 return $this; 524 } 525 526 public function getContiguous(): bool 527 { 528 return $this->contiguous; 529 } 530 531 public function setEscapeCharacter(string $escapeCharacter): self 532 { 533 $this->escapeCharacter = $escapeCharacter; 534 535 return $this; 536 } 537 538 public function getEscapeCharacter(): string 539 { 540 return $this->escapeCharacter; 541 } 542 543 /** 544 * Can the current IReader read the file? 545 */ 546 public function canRead(string $filename): bool 547 { 548 // Check if file exists 549 try { 550 $this->openFile($filename); 551 } catch (ReaderException $e) { 552 return false; 553 } 554 555 fclose($this->fileHandle); 556 557 // Trust file extension if any 558 $extension = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); 559 if (in_array($extension, ['csv', 'tsv'])) { 560 return true; 561 } 562 563 // Attempt to guess mimetype 564 $type = mime_content_type($filename); 565 $supportedTypes = [ 566 'application/csv', 567 'text/csv', 568 'text/plain', 569 'inode/x-empty', 570 ]; 571 572 return in_array($type, $supportedTypes, true); 573 } 574 575 private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void 576 { 577 if ($encoding === '') { 578 $pos = strpos($contents, $compare); 579 if ($pos !== false && $pos % strlen($compare) === 0) { 580 $encoding = $setEncoding; 581 } 582 } 583 } 584 585 private static function guessEncodingNoBom(string $filename): string 586 { 587 $encoding = ''; 588 $contents = file_get_contents($filename); 589 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE'); 590 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE'); 591 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE'); 592 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE'); 593 if ($encoding === '' && preg_match('//u', $contents) === 1) { 594 $encoding = 'UTF-8'; 595 } 596 597 return $encoding; 598 } 599 600 private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void 601 { 602 if ($encoding === '') { 603 if ($compare === substr($first4, 0, strlen($compare))) { 604 $encoding = $setEncoding; 605 } 606 } 607 } 608 609 private static function guessEncodingBom(string $filename): string 610 { 611 $encoding = ''; 612 $first4 = file_get_contents($filename, false, null, 0, 4); 613 if ($first4 !== false) { 614 self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8'); 615 self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE'); 616 self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE'); 617 self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE'); 618 self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE'); 619 } 620 621 return $encoding; 622 } 623 624 public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string 625 { 626 $encoding = self::guessEncodingBom($filename); 627 if ($encoding === '') { 628 $encoding = self::guessEncodingNoBom($filename); 629 } 630 631 return ($encoding === '') ? $dflt : $encoding; 632 } 633 634 public function setPreserveNullString(bool $value): self 635 { 636 $this->preserveNullString = $value; 637 638 return $this; 639 } 640 641 public function getPreserveNullString(): bool 642 { 643 return $this->preserveNullString; 644 } 645 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body