Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\DimensionReduction;
   6  
   7  use Phpml\Exception\InvalidArgumentException;
   8  use Phpml\Exception\InvalidOperationException;
   9  use Phpml\Math\Statistic\Covariance;
  10  use Phpml\Math\Statistic\Mean;
  11  
  12  class PCA extends EigenTransformerBase
  13  {
  14      /**
  15       * Temporary storage for mean values for each dimension in given data
  16       *
  17       * @var array
  18       */
  19      protected $means = [];
  20  
  21      /**
  22       * @var bool
  23       */
  24      protected $fit = false;
  25  
  26      /**
  27       * PCA (Principal Component Analysis) used to explain given
  28       * data with lower number of dimensions. This analysis transforms the
  29       * data to a lower dimensional version of it by conserving a proportion of total variance
  30       * within the data. It is a lossy data compression technique.<br>
  31       *
  32       * @param float $totalVariance Total explained variance to be preserved
  33       * @param int   $numFeatures   Number of features to be preserved
  34       *
  35       * @throws InvalidArgumentException
  36       */
  37      public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
  38      {
  39          if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
  40              throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99');
  41          }
  42  
  43          if ($numFeatures !== null && $numFeatures <= 0) {
  44              throw new InvalidArgumentException('Number of features to be preserved should be greater than 0');
  45          }
  46  
  47          if (($totalVariance !== null) === ($numFeatures !== null)) {
  48              throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm');
  49          }
  50  
  51          if ($numFeatures !== null) {
  52              $this->numFeatures = $numFeatures;
  53          }
  54  
  55          if ($totalVariance !== null) {
  56              $this->totalVariance = $totalVariance;
  57          }
  58      }
  59  
  60      /**
  61       * Takes a data and returns a lower dimensional version
  62       * of this data while preserving $totalVariance or $numFeatures. <br>
  63       * $data is an n-by-m matrix and returned array is
  64       * n-by-k matrix where k <= m
  65       */
  66      public function fit(array $data): array
  67      {
  68          $n = count($data[0]);
  69  
  70          $data = $this->normalize($data, $n);
  71  
  72          $covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0));
  73  
  74          $this->eigenDecomposition($covMatrix);
  75  
  76          $this->fit = true;
  77  
  78          return $this->reduce($data);
  79      }
  80  
  81      /**
  82       * Transforms the given sample to a lower dimensional vector by using
  83       * the eigenVectors obtained in the last run of <code>fit</code>.
  84       *
  85       * @throws InvalidOperationException
  86       */
  87      public function transform(array $sample): array
  88      {
  89          if (!$this->fit) {
  90              throw new InvalidOperationException('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
  91          }
  92  
  93          if (!is_array($sample[0])) {
  94              $sample = [$sample];
  95          }
  96  
  97          $sample = $this->normalize($sample, count($sample[0]));
  98  
  99          return $this->reduce($sample);
 100      }
 101  
 102      protected function calculateMeans(array $data, int $n): void
 103      {
 104          // Calculate means for each dimension
 105          $this->means = [];
 106          for ($i = 0; $i < $n; ++$i) {
 107              $column = array_column($data, $i);
 108              $this->means[] = Mean::arithmetic($column);
 109          }
 110      }
 111  
 112      /**
 113       * Normalization of the data includes subtracting mean from
 114       * each dimension therefore dimensions will be centered to zero
 115       */
 116      protected function normalize(array $data, int $n): array
 117      {
 118          if (count($this->means) === 0) {
 119              $this->calculateMeans($data, $n);
 120          }
 121  
 122          // Normalize data
 123          foreach (array_keys($data) as $i) {
 124              for ($k = 0; $k < $n; ++$k) {
 125                  $data[$i][$k] -= $this->means[$k];
 126              }
 127          }
 128  
 129          return $data;
 130      }
 131  }