1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\DimensionReduction; 6 7 use Phpml\Exception\InvalidArgumentException; 8 use Phpml\Exception\InvalidOperationException; 9 use Phpml\Math\Statistic\Covariance; 10 use Phpml\Math\Statistic\Mean; 11 12 class PCA extends EigenTransformerBase 13 { 14 /** 15 * Temporary storage for mean values for each dimension in given data 16 * 17 * @var array 18 */ 19 protected $means = []; 20 21 /** 22 * @var bool 23 */ 24 protected $fit = false; 25 26 /** 27 * PCA (Principal Component Analysis) used to explain given 28 * data with lower number of dimensions. This analysis transforms the 29 * data to a lower dimensional version of it by conserving a proportion of total variance 30 * within the data. It is a lossy data compression technique.<br> 31 * 32 * @param float $totalVariance Total explained variance to be preserved 33 * @param int $numFeatures Number of features to be preserved 34 * 35 * @throws InvalidArgumentException 36 */ 37 public function __construct(?float $totalVariance = null, ?int $numFeatures = null) 38 { 39 if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { 40 throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99'); 41 } 42 43 if ($numFeatures !== null && $numFeatures <= 0) { 44 throw new InvalidArgumentException('Number of features to be preserved should be greater than 0'); 45 } 46 47 if (($totalVariance !== null) === ($numFeatures !== null)) { 48 throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm'); 49 } 50 51 if ($numFeatures !== null) { 52 $this->numFeatures = $numFeatures; 53 } 54 55 if ($totalVariance !== null) { 56 $this->totalVariance = $totalVariance; 57 } 58 } 59 60 /** 61 * Takes a data and returns a lower dimensional version 62 * of this data while preserving $totalVariance or $numFeatures. <br> 63 * $data is an n-by-m matrix and returned array is 64 * n-by-k matrix where k <= m 65 */ 66 public function fit(array $data): array 67 { 68 $n = count($data[0]); 69 70 $data = $this->normalize($data, $n); 71 72 $covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0)); 73 74 $this->eigenDecomposition($covMatrix); 75 76 $this->fit = true; 77 78 return $this->reduce($data); 79 } 80 81 /** 82 * Transforms the given sample to a lower dimensional vector by using 83 * the eigenVectors obtained in the last run of <code>fit</code>. 84 * 85 * @throws InvalidOperationException 86 */ 87 public function transform(array $sample): array 88 { 89 if (!$this->fit) { 90 throw new InvalidOperationException('PCA has not been fitted with respect to original dataset, please run PCA::fit() first'); 91 } 92 93 if (!is_array($sample[0])) { 94 $sample = [$sample]; 95 } 96 97 $sample = $this->normalize($sample, count($sample[0])); 98 99 return $this->reduce($sample); 100 } 101 102 protected function calculateMeans(array $data, int $n): void 103 { 104 // Calculate means for each dimension 105 $this->means = []; 106 for ($i = 0; $i < $n; ++$i) { 107 $column = array_column($data, $i); 108 $this->means[] = Mean::arithmetic($column); 109 } 110 } 111 112 /** 113 * Normalization of the data includes subtracting mean from 114 * each dimension therefore dimensions will be centered to zero 115 */ 116 protected function normalize(array $data, int $n): array 117 { 118 if (count($this->means) === 0) { 119 $this->calculateMeans($data, $n); 120 } 121 122 // Normalize data 123 foreach (array_keys($data) as $i) { 124 for ($k = 0; $k < $n; ++$k) { 125 $data[$i][$k] -= $this->means[$k]; 126 } 127 } 128 129 return $data; 130 } 131 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body