Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 6 use PhpOffice\PhpSpreadsheet\Shared\StringHelper; 7 use PhpOffice\PhpSpreadsheet\Spreadsheet; 8 9 class Csv extends BaseReader 10 { 11 /** 12 * Input encoding. 13 * 14 * @var string 15 */ 16 private $inputEncoding = 'UTF-8'; 17 18 /** 19 * Delimiter. 20 * 21 * @var string 22 */ 23 private $delimiter; 24 25 /** 26 * Enclosure. 27 * 28 * @var string 29 */ 30 private $enclosure = '"'; 31 32 /** 33 * Sheet index to read. 34 * 35 * @var int 36 */ 37 private $sheetIndex = 0; 38 39 /** 40 * Load rows contiguously. 41 * 42 * @var bool 43 */ 44 private $contiguous = false; 45 46 /** 47 * Row counter for loading rows contiguously. 48 * 49 * @var int 50 */ 51 private $contiguousRow = -1; 52 53 /** 54 * The character that can escape the enclosure. 55 * 56 * @var string 57 */ 58 private $escapeCharacter = '\\'; 59 60 /** 61 * Create a new CSV Reader instance. 62 */ 63 public function __construct() 64 { 65 parent::__construct(); 66 } 67 68 /** 69 * Set input encoding. 70 * 71 * @param string $pValue Input encoding, eg: 'UTF-8' 72 * 73 * @return Csv 74 */ 75 public function setInputEncoding($pValue) 76 { 77 $this->inputEncoding = $pValue; 78 79 return $this; 80 } 81 82 /** 83 * Get input encoding. 84 * 85 * @return string 86 */ 87 public function getInputEncoding() 88 { 89 return $this->inputEncoding; 90 } 91 92 /** 93 * Move filepointer past any BOM marker. 94 */ 95 protected function skipBOM() 96 { 97 rewind($this->fileHandle); 98 99 switch ($this->inputEncoding) { 100 case 'UTF-8': 101 fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ? 102 fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0); 103 104 break; 105 case 'UTF-16LE': 106 fgets($this->fileHandle, 3) == "\xFF\xFE" ? 107 fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); 108 109 break; 110 case 'UTF-16BE': 111 fgets($this->fileHandle, 3) == "\xFE\xFF" ? 112 fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); 113 114 break; 115 case 'UTF-32LE': 116 fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ? 117 fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); 118 119 break; 120 case 'UTF-32BE': 121 fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ? 122 fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); 123 124 break; 125 default: 126 break; 127 } 128 } 129 130 /** 131 * Identify any separator that is explicitly set in the file. 132 */ 133 protected function checkSeparator() 134 { 135 $line = fgets($this->fileHandle); 136 if ($line === false) { 137 return; 138 } 139 140 if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { 141 $this->delimiter = substr($line, 4, 1); 142 143 return; 144 } 145 146 $this->skipBOM(); 147 } 148 149 /** 150 * Infer the separator if it isn't explicitly set in the file or specified by the user. 151 */ 152 protected function inferSeparator() 153 { 154 if ($this->delimiter !== null) { 155 return; 156 } 157 158 $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~']; 159 $counts = []; 160 foreach ($potentialDelimiters as $delimiter) { 161 $counts[$delimiter] = []; 162 } 163 164 // Count how many times each of the potential delimiters appears in each line 165 $numberLines = 0; 166 while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) { 167 $countLine = []; 168 for ($i = strlen($line) - 1; $i >= 0; --$i) { 169 $char = $line[$i]; 170 if (isset($counts[$char])) { 171 if (!isset($countLine[$char])) { 172 $countLine[$char] = 0; 173 } 174 ++$countLine[$char]; 175 } 176 } 177 foreach ($potentialDelimiters as $delimiter) { 178 $counts[$delimiter][] = $countLine[$delimiter] 179 ?? 0; 180 } 181 } 182 183 // If number of lines is 0, nothing to infer : fall back to the default 184 if ($numberLines === 0) { 185 $this->delimiter = reset($potentialDelimiters); 186 $this->skipBOM(); 187 188 return; 189 } 190 191 // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) 192 $meanSquareDeviations = []; 193 $middleIdx = floor(($numberLines - 1) / 2); 194 195 foreach ($potentialDelimiters as $delimiter) { 196 $series = $counts[$delimiter]; 197 sort($series); 198 199 $median = ($numberLines % 2) 200 ? $series[$middleIdx] 201 : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; 202 203 if ($median === 0) { 204 continue; 205 } 206 207 $meanSquareDeviations[$delimiter] = array_reduce( 208 $series, 209 function ($sum, $value) use ($median) { 210 return $sum + pow($value - $median, 2); 211 } 212 ) / count($series); 213 } 214 215 // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) 216 $min = INF; 217 foreach ($potentialDelimiters as $delimiter) { 218 if (!isset($meanSquareDeviations[$delimiter])) { 219 continue; 220 } 221 222 if ($meanSquareDeviations[$delimiter] < $min) { 223 $min = $meanSquareDeviations[$delimiter]; 224 $this->delimiter = $delimiter; 225 } 226 } 227 228 // If no delimiter could be detected, fall back to the default 229 if ($this->delimiter === null) { 230 $this->delimiter = reset($potentialDelimiters); 231 } 232 233 $this->skipBOM(); 234 } 235 236 /** 237 * Get the next full line from the file. 238 * 239 * @param string $line 240 * 241 * @return bool|string 242 */ 243 private function getNextLine($line = '') 244 { 245 // Get the next line in the file 246 $newLine = fgets($this->fileHandle); 247 248 // Return false if there is no next line 249 if ($newLine === false) { 250 return false; 251 } 252 253 // Add the new line to the line passed in 254 $line = $line . $newLine; 255 256 // Drop everything that is enclosed to avoid counting false positives in enclosures 257 $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')' 258 . preg_quote($this->enclosure, '/'); 259 $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line); 260 261 // See if we have any enclosures left in the line 262 // if we still have an enclosure then we need to read the next line as well 263 if (preg_match('/(' . $enclosure . ')/', $line) > 0) { 264 $line = $this->getNextLine($line); 265 } 266 267 return $line; 268 } 269 270 /** 271 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 272 * 273 * @param string $pFilename 274 * 275 * @throws Exception 276 * 277 * @return array 278 */ 279 public function listWorksheetInfo($pFilename) 280 { 281 // Open file 282 if (!$this->canRead($pFilename)) { 283 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); 284 } 285 $this->openFile($pFilename); 286 $fileHandle = $this->fileHandle; 287 288 // Skip BOM, if any 289 $this->skipBOM(); 290 $this->checkSeparator(); 291 $this->inferSeparator(); 292 293 $worksheetInfo = []; 294 $worksheetInfo[0]['worksheetName'] = 'Worksheet'; 295 $worksheetInfo[0]['lastColumnLetter'] = 'A'; 296 $worksheetInfo[0]['lastColumnIndex'] = 0; 297 $worksheetInfo[0]['totalRows'] = 0; 298 $worksheetInfo[0]['totalColumns'] = 0; 299 300 // Loop through each line of the file in turn 301 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { 302 ++$worksheetInfo[0]['totalRows']; 303 $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); 304 } 305 306 $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); 307 $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; 308 309 // Close file 310 fclose($fileHandle); 311 312 return $worksheetInfo; 313 } 314 315 /** 316 * Loads Spreadsheet from file. 317 * 318 * @param string $pFilename 319 * 320 * @throws Exception 321 * 322 * @return Spreadsheet 323 */ 324 public function load($pFilename) 325 { 326 // Create new Spreadsheet 327 $spreadsheet = new Spreadsheet(); 328 329 // Load into this instance 330 return $this->loadIntoExisting($pFilename, $spreadsheet); 331 } 332 333 /** 334 * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. 335 * 336 * @param string $pFilename 337 * @param Spreadsheet $spreadsheet 338 * 339 * @throws Exception 340 * 341 * @return Spreadsheet 342 */ 343 public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) 344 { 345 $lineEnding = ini_get('auto_detect_line_endings'); 346 ini_set('auto_detect_line_endings', true); 347 348 // Open file 349 if (!$this->canRead($pFilename)) { 350 throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); 351 } 352 $this->openFile($pFilename); 353 $fileHandle = $this->fileHandle; 354 355 // Skip BOM, if any 356 $this->skipBOM(); 357 $this->checkSeparator(); 358 $this->inferSeparator(); 359 360 // Create new PhpSpreadsheet object 361 while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { 362 $spreadsheet->createSheet(); 363 } 364 $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); 365 366 // Set our starting row based on whether we're in contiguous mode or not 367 $currentRow = 1; 368 if ($this->contiguous) { 369 $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; 370 } 371 372 // Loop through each line of the file in turn 373 while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { 374 $columnLetter = 'A'; 375 foreach ($rowData as $rowDatum) { 376 if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { 377 // Convert encoding if necessary 378 if ($this->inputEncoding !== 'UTF-8') { 379 $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); 380 } 381 382 // Set cell value 383 $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); 384 } 385 ++$columnLetter; 386 } 387 ++$currentRow; 388 } 389 390 // Close file 391 fclose($fileHandle); 392 393 if ($this->contiguous) { 394 $this->contiguousRow = $currentRow; 395 } 396 397 ini_set('auto_detect_line_endings', $lineEnding); 398 399 // Return 400 return $spreadsheet; 401 } 402 403 /** 404 * Get delimiter. 405 * 406 * @return string 407 */ 408 public function getDelimiter() 409 { 410 return $this->delimiter; 411 } 412 413 /** 414 * Set delimiter. 415 * 416 * @param string $delimiter Delimiter, eg: ',' 417 * 418 * @return CSV 419 */ 420 public function setDelimiter($delimiter) 421 { 422 $this->delimiter = $delimiter; 423 424 return $this; 425 } 426 427 /** 428 * Get enclosure. 429 * 430 * @return string 431 */ 432 public function getEnclosure() 433 { 434 return $this->enclosure; 435 } 436 437 /** 438 * Set enclosure. 439 * 440 * @param string $enclosure Enclosure, defaults to " 441 * 442 * @return CSV 443 */ 444 public function setEnclosure($enclosure) 445 { 446 if ($enclosure == '') { 447 $enclosure = '"'; 448 } 449 $this->enclosure = $enclosure; 450 451 return $this; 452 } 453 454 /** 455 * Get sheet index. 456 * 457 * @return int 458 */ 459 public function getSheetIndex() 460 { 461 return $this->sheetIndex; 462 } 463 464 /** 465 * Set sheet index. 466 * 467 * @param int $pValue Sheet index 468 * 469 * @return CSV 470 */ 471 public function setSheetIndex($pValue) 472 { 473 $this->sheetIndex = $pValue; 474 475 return $this; 476 } 477 478 /** 479 * Set Contiguous. 480 * 481 * @param bool $contiguous 482 * 483 * @return Csv 484 */ 485 public function setContiguous($contiguous) 486 { 487 $this->contiguous = (bool) $contiguous; 488 if (!$contiguous) { 489 $this->contiguousRow = -1; 490 } 491 492 return $this; 493 } 494 495 /** 496 * Get Contiguous. 497 * 498 * @return bool 499 */ 500 public function getContiguous() 501 { 502 return $this->contiguous; 503 } 504 505 /** 506 * Set escape backslashes. 507 * 508 * @param string $escapeCharacter 509 * 510 * @return $this 511 */ 512 public function setEscapeCharacter($escapeCharacter) 513 { 514 $this->escapeCharacter = $escapeCharacter; 515 516 return $this; 517 } 518 519 /** 520 * Get escape backslashes. 521 * 522 * @return string 523 */ 524 public function getEscapeCharacter() 525 { 526 return $this->escapeCharacter; 527 } 528 529 /** 530 * Can the current IReader read the file? 531 * 532 * @param string $pFilename 533 * 534 * @return bool 535 */ 536 public function canRead($pFilename) 537 { 538 // Check if file exists 539 try { 540 $this->openFile($pFilename); 541 } catch (Exception $e) { 542 return false; 543 } 544 545 fclose($this->fileHandle); 546 547 // Trust file extension if any 548 $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION)); 549 if (in_array($extension, ['csv', 'tsv'])) { 550 return true; 551 } 552 553 // Attempt to guess mimetype 554 $type = mime_content_type($pFilename); 555 $supportedTypes = [ 556 'text/csv', 557 'text/plain', 558 'inode/x-empty', 559 ]; 560 561 return in_array($type, $supportedTypes, true); 562 } 563 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body