See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Dataset; 6 7 use Phpml\Exception\DatasetException; 8 use Phpml\Exception\FileException; 9 10 class SvmDataset extends ArrayDataset 11 { 12 public function __construct(string $filePath) 13 { 14 [$samples, $targets] = self::readProblem($filePath); 15 16 parent::__construct($samples, $targets); 17 } 18 19 private static function readProblem(string $filePath): array 20 { 21 $handle = self::openFile($filePath); 22 23 $samples = []; 24 $targets = []; 25 $maxIndex = 0; 26 while (false !== $line = fgets($handle)) { 27 [$sample, $target, $maxIndex] = self::processLine($line, $maxIndex); 28 $samples[] = $sample; 29 $targets[] = $target; 30 } 31 32 fclose($handle); 33 34 foreach ($samples as &$sample) { 35 $sample = array_pad($sample, $maxIndex + 1, 0); 36 } 37 38 return [$samples, $targets]; 39 } 40 41 /** 42 * @return resource 43 */ 44 private static function openFile(string $filePath) 45 { 46 if (!file_exists($filePath)) { 47 throw new FileException(sprintf('File "%s" missing.', basename($filePath))); 48 } 49 50 $handle = fopen($filePath, 'rb'); 51 if ($handle === false) { 52 throw new FileException(sprintf('File "%s" can\'t be open.', basename($filePath))); 53 } 54 55 return $handle; 56 } 57 58 private static function processLine(string $line, int $maxIndex): array 59 { 60 $columns = self::parseLine($line); 61 62 $target = self::parseTargetColumn($columns[0]); 63 $sample = array_fill(0, $maxIndex + 1, 0); 64 65 $n = count($columns); 66 for ($i = 1; $i < $n; ++$i) { 67 [$index, $value] = self::parseFeatureColumn($columns[$i]); 68 if ($index > $maxIndex) { 69 $maxIndex = $index; 70 $sample = array_pad($sample, $maxIndex + 1, 0); 71 } 72 73 $sample[$index] = $value; 74 } 75 76 return [$sample, $target, $maxIndex]; 77 } 78 79 private static function parseLine(string $line): array 80 { 81 $line = explode('#', $line, 2)[0]; 82 $line = rtrim($line); 83 $line = str_replace("\t", ' ', $line); 84 85 return explode(' ', $line); 86 } 87 88 private static function parseTargetColumn(string $column): float 89 { 90 if (!is_numeric($column)) { 91 throw new DatasetException(sprintf('Invalid target "%s".', $column)); 92 } 93 94 return (float) $column; 95 } 96 97 private static function parseFeatureColumn(string $column): array 98 { 99 $feature = explode(':', $column, 2); 100 if (count($feature) !== 2) { 101 throw new DatasetException(sprintf('Invalid value "%s".', $column)); 102 } 103 104 $index = self::parseFeatureIndex($feature[0]); 105 $value = self::parseFeatureValue($feature[1]); 106 107 return [$index, $value]; 108 } 109 110 private static function parseFeatureIndex(string $index): int 111 { 112 if (!is_numeric($index) || !ctype_digit($index)) { 113 throw new DatasetException(sprintf('Invalid index "%s".', $index)); 114 } 115 116 if ((int) $index < 1) { 117 throw new DatasetException(sprintf('Invalid index "%s".', $index)); 118 } 119 120 return (int) $index - 1; 121 } 122 123 private static function parseFeatureValue(string $value): float 124 { 125 if (!is_numeric($value)) { 126 throw new DatasetException(sprintf('Invalid value "%s".', $value)); 127 } 128 129 return (float) $value; 130 } 131 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body