Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Dataset;
   6  
   7  use Phpml\Exception\DatasetException;
   8  use Phpml\Exception\FileException;
   9  
  10  class SvmDataset extends ArrayDataset
  11  {
  12      public function __construct(string $filePath)
  13      {
  14          [$samples, $targets] = self::readProblem($filePath);
  15  
  16          parent::__construct($samples, $targets);
  17      }
  18  
  19      private static function readProblem(string $filePath): array
  20      {
  21          $handle = self::openFile($filePath);
  22  
  23          $samples = [];
  24          $targets = [];
  25          $maxIndex = 0;
  26          while (false !== $line = fgets($handle)) {
  27              [$sample, $target, $maxIndex] = self::processLine($line, $maxIndex);
  28              $samples[] = $sample;
  29              $targets[] = $target;
  30          }
  31  
  32          fclose($handle);
  33  
  34          foreach ($samples as &$sample) {
  35              $sample = array_pad($sample, $maxIndex + 1, 0);
  36          }
  37  
  38          return [$samples, $targets];
  39      }
  40  
  41      /**
  42       * @return resource
  43       */
  44      private static function openFile(string $filePath)
  45      {
  46          if (!file_exists($filePath)) {
  47              throw new FileException(sprintf('File "%s" missing.', basename($filePath)));
  48          }
  49  
  50          $handle = fopen($filePath, 'rb');
  51          if ($handle === false) {
  52              throw new FileException(sprintf('File "%s" can\'t be open.', basename($filePath)));
  53          }
  54  
  55          return $handle;
  56      }
  57  
  58      private static function processLine(string $line, int $maxIndex): array
  59      {
  60          $columns = self::parseLine($line);
  61  
  62          $target = self::parseTargetColumn($columns[0]);
  63          $sample = array_fill(0, $maxIndex + 1, 0);
  64  
  65          $n = count($columns);
  66          for ($i = 1; $i < $n; ++$i) {
  67              [$index, $value] = self::parseFeatureColumn($columns[$i]);
  68              if ($index > $maxIndex) {
  69                  $maxIndex = $index;
  70                  $sample = array_pad($sample, $maxIndex + 1, 0);
  71              }
  72  
  73              $sample[$index] = $value;
  74          }
  75  
  76          return [$sample, $target, $maxIndex];
  77      }
  78  
  79      private static function parseLine(string $line): array
  80      {
  81          $line = explode('#', $line, 2)[0];
  82          $line = rtrim($line);
  83          $line = str_replace("\t", ' ', $line);
  84  
  85          return explode(' ', $line);
  86      }
  87  
  88      private static function parseTargetColumn(string $column): float
  89      {
  90          if (!is_numeric($column)) {
  91              throw new DatasetException(sprintf('Invalid target "%s".', $column));
  92          }
  93  
  94          return (float) $column;
  95      }
  96  
  97      private static function parseFeatureColumn(string $column): array
  98      {
  99          $feature = explode(':', $column, 2);
 100          if (count($feature) !== 2) {
 101              throw new DatasetException(sprintf('Invalid value "%s".', $column));
 102          }
 103  
 104          $index = self::parseFeatureIndex($feature[0]);
 105          $value = self::parseFeatureValue($feature[1]);
 106  
 107          return [$index, $value];
 108      }
 109  
 110      private static function parseFeatureIndex(string $index): int
 111      {
 112          if (!is_numeric($index) || !ctype_digit($index)) {
 113              throw new DatasetException(sprintf('Invalid index "%s".', $index));
 114          }
 115  
 116          if ((int) $index < 1) {
 117              throw new DatasetException(sprintf('Invalid index "%s".', $index));
 118          }
 119  
 120          return (int) $index - 1;
 121      }
 122  
 123      private static function parseFeatureValue(string $value): float
 124      {
 125          if (!is_numeric($value)) {
 126              throw new DatasetException(sprintf('Invalid value "%s".', $value));
 127          }
 128  
 129          return (float) $value;
 130      }
 131  }