Differences Between: [Versions 310 and 311] [Versions 311 and 400] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403] [Versions 39 and 311]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use InvalidArgumentException; 6 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 7 use PhpOffice\PhpSpreadsheet\Shared\StringHelper; 8 use PhpOffice\PhpSpreadsheet\Spreadsheet; 9 10 class Csv extends BaseReader 11 { 12 const UTF8_BOM = "\xEF\xBB\xBF"; 13 const UTF8_BOM_LEN = 3; 14 const UTF16BE_BOM = "\xfe\xff"; 15 const UTF16BE_BOM_LEN = 2; 16 const UTF16BE_LF = "\x00\x0a"; 17 const UTF16LE_BOM = "\xff\xfe"; 18 const UTF16LE_BOM_LEN = 2; 19 const UTF16LE_LF = "\x0a\x00"; 20 const UTF32BE_BOM = "\x00\x00\xfe\xff"; 21 const UTF32BE_BOM_LEN = 4; 22 const UTF32BE_LF = "\x00\x00\x00\x0a"; 23 const UTF32LE_BOM = "\xff\xfe\x00\x00"; 24 const UTF32LE_BOM_LEN = 4; 25 const UTF32LE_LF = "\x0a\x00\x00\x00"; 26 27 /** 28 * Input encoding. 29 * 30 * @var string 31 */ 32 private $inputEncoding = 'UTF-8'; 33 34 /** 35 * Delimiter. 36 * 37 * @var string 38 */ 39 private $delimiter; 40 41 /** 42 * Enclosure. 43 * 44 * @var string 45 */ 46 private $enclosure = '"'; 47 48 /** 49 * Sheet index to read. 50 * 51 * @var int 52 */ 53 private $sheetIndex = 0; 54 55 /** 56 * Load rows contiguously. 57 * 58 * @var bool 59 */ 60 private $contiguous = false; 61 62 /** 63 * The character that can escape the enclosure. 64 * 65 * @var string 66 */ 67 private $escapeCharacter = '\\'; 68 69 /** 70 * Create a new CSV Reader instance. 71 */ 72 public function __construct() 73 { 74 parent::__construct(); 75 } 76 77 /** 78 * Set input encoding. 79 * 80 * @param string $pValue Input encoding, eg: 'UTF-8' 81 * 82 * @return $this 83 */ 84 public function setInputEncoding($pValue) 85 { 86 $this->inputEncoding = $pValue; 87 88 return $this; 89 } 90 91 /** 92 * Get input encoding. 93 * 94 * @return string 95 */ 96 public function getInputEncoding() 97 { 98 return $this->inputEncoding; 99 } 100 101 /** 102 * Move filepointer past any BOM marker. 103 */ 104 protected function skipBOM(): void 105 { 106 rewind($this->fileHandle); 107 108 if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) { 109 rewind($this->fileHandle); 110 } 111 } 112 113 /** 114 * Identify any separator that is explicitly set in the file. 115 */ 116 protected function checkSeparator(): void 117 { 118 $line = fgets($this->fileHandle); 119 if ($line === false) { 120 return; 121 } 122 123 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { 124 $this->delimiter = substr($line, 4, 1); 125 126 return; 127 } 128 129 $this->skipBOM(); 130 } 131 132 /** 133 * Infer the separator if it isn't explicitly set in the file or specified by the user. 134 */ 135 protected function inferSeparator(): void 136 { 137 if ($this->delimiter !== null) { 138 return; 139 } 140 141 $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~']; 142 $counts = []; 143 foreach ($potentialDelimiters as $delimiter) { 144 $counts[$delimiter] = []; 145 } 146 147 // Count how many times each of the potential delimiters appears in each line 148 $numberLines = 0; 149 while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) { 150 $countLine = []; 151 for ($i = strlen($line) - 1; $i >= 0; --$i) { 152 $char = $line[$i]; 153 if (isset($counts[$char])) { 154 if (!isset($countLine[$char])) { 155 $countLine[$char] = 0; 156 } 157 ++$countLine[$char]; 158 } 159 } 160 foreach ($potentialDelimiters as $delimiter) { 161 $counts[$delimiter][] = $countLine[$delimiter] 162 ?? 0; 163 } 164 } 165 166 // If number of lines is 0, nothing to infer : fall back to the default 167 if ($numberLines === 0) { 168 $this->delimiter = reset($potentialDelimiters); 169 $this->skipBOM(); 170 171 return; 172 } 173 174 // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) 175 $meanSquareDeviations = []; 176 $middleIdx = floor(($numberLines - 1) / 2); 177 178 foreach ($potentialDelimiters as $delimiter) { 179 $series = $counts[$delimiter]; 180 sort($series); 181 182 $median = ($numberLines % 2) 183 ? $series[$middleIdx] 184 : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; 185 186 if ($median === 0) { 187 continue; 188 } 189 190 $meanSquareDeviations[$delimiter] = array_reduce( 191 $series, 192 function ($sum, $value) use ($median) { 193 return $sum + ($value - $median) ** 2; 194 } 195 ) / count($series); 196 } 197 198 // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) 199 $min = INF; 200 foreach ($potentialDelimiters as $delimiter) { 201 if (!isset($meanSquareDeviations[$delimiter])) { 202 continue; 203 } 204 205 if ($meanSquareDeviations[$delimiter] < $min) { 206 $min = $meanSquareDeviations[$delimiter]; 207 $this->delimiter = $delimiter; 208 } 209 } 210 211 // If no delimiter could be detected, fall back to the default 212 if ($this->delimiter === null) { 213 $this->delimiter = reset($potentialDelimiters); 214 } 215 216 $this->skipBOM(); 217 } 218 219 /** 220 * Get the next full line from the file. 221 * 222 * @return false|string 223 */ 224 private function getNextLine() 225 { 226 $line = ''; 227 $enclosure = ($this->escapeCharacter === '' ? '' 228 : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')')) 229 . preg_quote($this->enclosure, '/'); 230 231 do { 232 // Get the next line in the file 233 $newLine = fgets($this->fileHandle); 234 235 // Return false if there is no next line 236 if ($newLine === false) { 237 return false; 238 } 239 240 // Add the new line to the line passed in 241 $line = $line . $newLine; 242 243 // Drop everything that is enclosed to avoid counting false positives in enclosures 244 $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line); 245 246 // See if we have any enclosures left in the line 247 // if we still have an enclosure then we need to read the next line as well 248 } while (preg_match('/(' . $enclosure . ')/', $line) > 0); 249 250 return $line; 251 } 252 253 /** 254 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 255 * 256 * @param string $pFilename 257 * 258 * @return array 259 */ 260 public function listWorksheetInfo($pFilename) 261 { 262 // Open file 263 $this->openFileOrMemory($pFilename); 264 $fileHandle = $this->fileHandle; 265 266 // Skip BOM, if any 267 $this->skipBOM(); 268 $this->checkSeparator(); 269 $this->inferSeparator(); 270 271 $worksheetInfo = []; 272 $worksheetInfo[0]['worksheetName'] = 'Worksheet'; 273 $worksheetInfo[0]['lastColumnLetter'] = 'A'; 274 $worksheetInfo[0]['lastColumnIndex'] = 0; 275 $worksheetInfo[0]['totalRows'] = 0; 276 $worksheetInfo[0]['totalColumns'] = 0; 277 278 // Loop through each line of the file in turn 279 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { 280 ++$worksheetInfo[0]['totalRows']; 281 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); 282 } 283 284 $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); 285 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; 286 287 // Close file 288 fclose($fileHandle); 289 290 return $worksheetInfo; 291 } 292 293 /** 294 * Loads Spreadsheet from file. 295 * 296 * @param string $pFilename 297 * 298 * @return Spreadsheet 299 */ 300 public function load($pFilename) 301 { 302 // Create new Spreadsheet 303 $spreadsheet = new Spreadsheet(); 304 305 // Load into this instance 306 return $this->loadIntoExisting($pFilename, $spreadsheet); 307 } 308 309 private function openFileOrMemory($pFilename): void 310 { 311 // Open file 312 $fhandle = $this->canRead($pFilename); 313 if (!$fhandle) { 314 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); 315 } 316 $this->openFile($pFilename); 317 if ($this->inputEncoding !== 'UTF-8') { 318 fclose($this->fileHandle); 319 $entireFile = file_get_contents($pFilename); 320 $this->fileHandle = fopen('php://memory', 'r+b'); 321 $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding); 322 fwrite($this->fileHandle, $data); 323 $this->skipBOM(); 324 } 325 } 326 327 /** 328 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. 329 * 330 * @param string $pFilename 331 * 332 * @return Spreadsheet 333 */ 334 public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) 335 { 336 $lineEnding = ini_get('auto_detect_line_endings'); 337 ini_set('auto_detect_line_endings', true); 338 339 // Open file 340 $this->openFileOrMemory($pFilename); 341 $fileHandle = $this->fileHandle; 342 343 // Skip BOM, if any 344 $this->skipBOM(); 345 $this->checkSeparator(); 346 $this->inferSeparator(); 347 348 // Create new PhpSpreadsheet object 349 while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { 350 $spreadsheet->createSheet(); 351 } 352 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); 353 354 // Set our starting row based on whether we're in contiguous mode or not 355 $currentRow = 1; 356 $outRow = 0; 357 358 // Loop through each line of the file in turn 359 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { 360 $noOutputYet = true; 361 $columnLetter = 'A'; 362 foreach ($rowData as $rowDatum) { 363 if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { 364 if ($this->contiguous) { 365 if ($noOutputYet) { 366 $noOutputYet = false; 367 ++$outRow; 368 } 369 } else { 370 $outRow = $currentRow; 371 } 372 // Set cell value 373 $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum); 374 } 375 ++$columnLetter; 376 } 377 ++$currentRow; 378 } 379 380 // Close file 381 fclose($fileHandle); 382 383 ini_set('auto_detect_line_endings', $lineEnding); 384 385 // Return 386 return $spreadsheet; 387 } 388 389 /** 390 * Get delimiter. 391 * 392 * @return string 393 */ 394 public function getDelimiter() 395 { 396 return $this->delimiter; 397 } 398 399 /** 400 * Set delimiter. 401 * 402 * @param string $delimiter Delimiter, eg: ',' 403 * 404 * @return $this 405 */ 406 public function setDelimiter($delimiter) 407 { 408 $this->delimiter = $delimiter; 409 410 return $this; 411 } 412 413 /** 414 * Get enclosure. 415 * 416 * @return string 417 */ 418 public function getEnclosure() 419 { 420 return $this->enclosure; 421 } 422 423 /** 424 * Set enclosure. 425 * 426 * @param string $enclosure Enclosure, defaults to " 427 * 428 * @return $this 429 */ 430 public function setEnclosure($enclosure) 431 { 432 if ($enclosure == '') { 433 $enclosure = '"'; 434 } 435 $this->enclosure = $enclosure; 436 437 return $this; 438 } 439 440 /** 441 * Get sheet index. 442 * 443 * @return int 444 */ 445 public function getSheetIndex() 446 { 447 return $this->sheetIndex; 448 } 449 450 /** 451 * Set sheet index. 452 * 453 * @param int $pValue Sheet index 454 * 455 * @return $this 456 */ 457 public function setSheetIndex($pValue) 458 { 459 $this->sheetIndex = $pValue; 460 461 return $this; 462 } 463 464 /** 465 * Set Contiguous. 466 * 467 * @param bool $contiguous 468 * 469 * @return $this 470 */ 471 public function setContiguous($contiguous) 472 { 473 $this->contiguous = (bool) $contiguous; 474 475 return $this; 476 } 477 478 /** 479 * Get Contiguous. 480 * 481 * @return bool 482 */ 483 public function getContiguous() 484 { 485 return $this->contiguous; 486 } 487 488 /** 489 * Set escape backslashes. 490 * 491 * @param string $escapeCharacter 492 * 493 * @return $this 494 */ 495 public function setEscapeCharacter($escapeCharacter) 496 { 497 $this->escapeCharacter = $escapeCharacter; 498 499 return $this; 500 } 501 502 /** 503 * Get escape backslashes. 504 * 505 * @return string 506 */ 507 public function getEscapeCharacter() 508 { 509 return $this->escapeCharacter; 510 } 511 512 /** 513 * Can the current IReader read the file? 514 * 515 * @param string $pFilename 516 * 517 * @return bool 518 */ 519 public function canRead($pFilename) 520 { 521 // Check if file exists 522 try { 523 $this->openFile($pFilename); 524 } catch (InvalidArgumentException $e) { 525 return false; 526 } 527 528 fclose($this->fileHandle); 529 530 // Trust file extension if any 531 $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION)); 532 if (in_array($extension, ['csv', 'tsv'])) { 533 return true; 534 } 535 536 // Attempt to guess mimetype 537 $type = mime_content_type($pFilename); 538 $supportedTypes = [ 539 'application/csv', 540 'text/csv', 541 'text/plain', 542 'inode/x-empty', 543 ]; 544 545 return in_array($type, $supportedTypes, true); 546 } 547 548 private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void 549 { 550 if ($encoding === '') { 551 $pos = strpos($contents, $compare); 552 if ($pos !== false && $pos % strlen($compare) === 0) { 553 $encoding = $setEncoding; 554 } 555 } 556 } 557 558 private static function guessEncodingNoBom(string $filename): string 559 { 560 $encoding = ''; 561 $contents = file_get_contents($filename); 562 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE'); 563 self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE'); 564 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE'); 565 self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE'); 566 if ($encoding === '' && preg_match('//u', $contents) === 1) { 567 $encoding = 'UTF-8'; 568 } 569 570 return $encoding; 571 } 572 573 private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void 574 { 575 if ($encoding === '') { 576 if ($compare === substr($first4, 0, strlen($compare))) { 577 $encoding = $setEncoding; 578 } 579 } 580 } 581 582 private static function guessEncodingBom(string $filename): string 583 { 584 $encoding = ''; 585 $first4 = file_get_contents($filename, false, null, 0, 4); 586 if ($first4 !== false) { 587 self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8'); 588 self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE'); 589 self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE'); 590 self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE'); 591 self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE'); 592 } 593 594 return $encoding; 595 } 596 597 public static function guessEncoding(string $filename, string $dflt = 'CP1252'): string 598 { 599 $encoding = self::guessEncodingBom($filename); 600 if ($encoding === '') { 601 $encoding = self::guessEncodingNoBom($filename); 602 } 603 604 return ($encoding === '') ? $dflt : $encoding; 605 } 606 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body