Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader\Csv;
   4  
   5  class Delimiter
   6  {
   7      protected const POTENTIAL_DELIMETERS = [',', ';', "\t", '|', ':', ' ', '~'];
   8  
   9      /** @var resource */
  10      protected $fileHandle;
  11  
  12      /** @var string */
  13      protected $escapeCharacter;
  14  
  15      /** @var string */
  16      protected $enclosure;
  17  
  18      /** @var array */
  19      protected $counts = [];
  20  
  21      /** @var int */
  22      protected $numberLines = 0;
  23  
  24      /** @var ?string */
  25      protected $delimiter;
  26  
  27      /**
  28       * @param resource $fileHandle
  29       */
  30      public function __construct($fileHandle, string $escapeCharacter, string $enclosure)
  31      {
  32          $this->fileHandle = $fileHandle;
  33          $this->escapeCharacter = $escapeCharacter;
  34          $this->enclosure = $enclosure;
  35  
  36          $this->countPotentialDelimiters();
  37      }
  38  
  39      public function getDefaultDelimiter(): string
  40      {
  41          return self::POTENTIAL_DELIMETERS[0];
  42      }
  43  
  44      public function linesCounted(): int
  45      {
  46          return $this->numberLines;
  47      }
  48  
  49      protected function countPotentialDelimiters(): void
  50      {
  51          $this->counts = array_fill_keys(self::POTENTIAL_DELIMETERS, []);
  52          $delimiterKeys = array_flip(self::POTENTIAL_DELIMETERS);
  53  
  54          // Count how many times each of the potential delimiters appears in each line
  55          $this->numberLines = 0;
  56          while (($line = $this->getNextLine()) !== false && (++$this->numberLines < 1000)) {
  57              $this->countDelimiterValues($line, $delimiterKeys);
  58          }
  59      }
  60  
  61      protected function countDelimiterValues(string $line, array $delimiterKeys): void
  62      {
  63          $splitString = str_split($line, 1);
  64          if (is_array($splitString)) {
  65              $distribution = array_count_values($splitString);
  66              $countLine = array_intersect_key($distribution, $delimiterKeys);
  67  
  68              foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
  69                  $this->counts[$delimiter][] = $countLine[$delimiter] ?? 0;
  70              }
  71          }
  72      }
  73  
  74      public function infer(): ?string
  75      {
  76          // Calculate the mean square deviations for each delimiter
  77          //     (ignoring delimiters that haven't been found consistently)
  78          $meanSquareDeviations = [];
  79          $middleIdx = floor(($this->numberLines - 1) / 2);
  80  
  81          foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
  82              $series = $this->counts[$delimiter];
  83              sort($series);
  84  
  85              $median = ($this->numberLines % 2)
  86                  ? $series[$middleIdx]
  87                  : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
  88  
  89              if ($median === 0) {
  90                  continue;
  91              }
  92  
  93              $meanSquareDeviations[$delimiter] = array_reduce(
  94                  $series,
  95                  function ($sum, $value) use ($median) {
  96                      return $sum + ($value - $median) ** 2;
  97                  }
  98              ) / count($series);
  99          }
 100  
 101          // ... and pick the delimiter with the smallest mean square deviation
 102          //         (in case of ties, the order in potentialDelimiters is respected)
 103          $min = INF;
 104          foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
 105              if (!isset($meanSquareDeviations[$delimiter])) {
 106                  continue;
 107              }
 108  
 109              if ($meanSquareDeviations[$delimiter] < $min) {
 110                  $min = $meanSquareDeviations[$delimiter];
 111                  $this->delimiter = $delimiter;
 112              }
 113          }
 114  
 115          return $this->delimiter;
 116      }
 117  
 118      /**
 119       * Get the next full line from the file.
 120       *
 121       * @return false|string
 122       */
 123      public function getNextLine()
 124      {
 125          $line = '';
 126          $enclosure = ($this->escapeCharacter === '' ? ''
 127                  : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')'))
 128              . preg_quote($this->enclosure, '/');
 129  
 130          do {
 131              // Get the next line in the file
 132              $newLine = fgets($this->fileHandle);
 133  
 134              // Return false if there is no next line
 135              if ($newLine === false) {
 136                  return false;
 137              }
 138  
 139              // Add the new line to the line passed in
 140              $line = $line . $newLine;
 141  
 142              // Drop everything that is enclosed to avoid counting false positives in enclosures
 143              $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
 144  
 145              // See if we have any enclosures left in the line
 146              // if we still have an enclosure then we need to read the next line as well
 147          } while (preg_match('/(' . $enclosure . ')/', $line ?? '') > 0);
 148  
 149          return $line ?? false;
 150      }
 151  }