Differences Between: [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader\Csv; 4 5 class Delimiter 6 { 7 protected const POTENTIAL_DELIMETERS = [',', ';', "\t", '|', ':', ' ', '~']; 8 9 /** @var resource */ 10 protected $fileHandle; 11 12 /** @var string */ 13 protected $escapeCharacter; 14 15 /** @var string */ 16 protected $enclosure; 17 18 /** @var array */ 19 protected $counts = []; 20 21 /** @var int */ 22 protected $numberLines = 0; 23 24 /** @var ?string */ 25 protected $delimiter; 26 27 /** 28 * @param resource $fileHandle 29 */ 30 public function __construct($fileHandle, string $escapeCharacter, string $enclosure) 31 { 32 $this->fileHandle = $fileHandle; 33 $this->escapeCharacter = $escapeCharacter; 34 $this->enclosure = $enclosure; 35 36 $this->countPotentialDelimiters(); 37 } 38 39 public function getDefaultDelimiter(): string 40 { 41 return self::POTENTIAL_DELIMETERS[0]; 42 } 43 44 public function linesCounted(): int 45 { 46 return $this->numberLines; 47 } 48 49 protected function countPotentialDelimiters(): void 50 { 51 $this->counts = array_fill_keys(self::POTENTIAL_DELIMETERS, []); 52 $delimiterKeys = array_flip(self::POTENTIAL_DELIMETERS); 53 54 // Count how many times each of the potential delimiters appears in each line 55 $this->numberLines = 0; 56 while (($line = $this->getNextLine()) !== false && (++$this->numberLines < 1000)) { 57 $this->countDelimiterValues($line, $delimiterKeys); 58 } 59 } 60 61 protected function countDelimiterValues(string $line, array $delimiterKeys): void 62 { 63 $splitString = str_split($line, 1); 64 if (is_array($splitString)) { 65 $distribution = array_count_values($splitString); 66 $countLine = array_intersect_key($distribution, $delimiterKeys); 67 68 foreach (self::POTENTIAL_DELIMETERS as $delimiter) { 69 $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0; 70 } 71 } 72 } 73 74 public function infer(): ?string 75 { 76 // Calculate the mean square deviations for each delimiter 77 // (ignoring delimiters that haven't been found consistently) 78 $meanSquareDeviations = []; 79 $middleIdx = floor(($this->numberLines - 1) / 2); 80 81 foreach (self::POTENTIAL_DELIMETERS as $delimiter) { 82 $series = $this->counts[$delimiter]; 83 sort($series); 84 85 $median = ($this->numberLines % 2) 86 ? $series[$middleIdx] 87 : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; 88 89 if ($median === 0) { 90 continue; 91 } 92 93 $meanSquareDeviations[$delimiter] = array_reduce( 94 $series, 95 function ($sum, $value) use ($median) { 96 return $sum + ($value - $median) ** 2; 97 } 98 ) / count($series); 99 } 100 101 // ... and pick the delimiter with the smallest mean square deviation 102 // (in case of ties, the order in potentialDelimiters is respected) 103 $min = INF; 104 foreach (self::POTENTIAL_DELIMETERS as $delimiter) { 105 if (!isset($meanSquareDeviations[$delimiter])) { 106 continue; 107 } 108 109 if ($meanSquareDeviations[$delimiter] < $min) { 110 $min = $meanSquareDeviations[$delimiter]; 111 $this->delimiter = $delimiter; 112 } 113 } 114 115 return $this->delimiter; 116 } 117 118 /** 119 * Get the next full line from the file. 120 * 121 * @return false|string 122 */ 123 public function getNextLine() 124 { 125 $line = ''; 126 $enclosure = ($this->escapeCharacter === '' ? '' 127 : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')')) 128 . preg_quote($this->enclosure, '/'); 129 130 do { 131 // Get the next line in the file 132 $newLine = fgets($this->fileHandle); 133 134 // Return false if there is no next line 135 if ($newLine === false) { 136 return false; 137 } 138 139 // Add the new line to the line passed in 140 $line = $line . $newLine; 141 142 // Drop everything that is enclosed to avoid counting false positives in enclosures 143 $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line); 144 145 // See if we have any enclosures left in the line 146 // if we still have an enclosure then we need to read the next line as well 147 } while (preg_match('/(' . $enclosure . ')/', $line ?? '') > 0); 148 149 return $line ?? false; 150 } 151 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body