Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 310 and 400] [Versions 311 and 400] [Versions 39 and 400]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\DimensionReduction;
   6  
   7  use Closure;
   8  use Phpml\Exception\InvalidArgumentException;
   9  use Phpml\Exception\InvalidOperationException;
  10  use Phpml\Math\Distance\Euclidean;
  11  use Phpml\Math\Distance\Manhattan;
  12  use Phpml\Math\Matrix;
  13  
  14  class KernelPCA extends PCA
  15  {
  16      public const KERNEL_RBF = 1;
  17  
  18      public const KERNEL_SIGMOID = 2;
  19  
  20      public const KERNEL_LAPLACIAN = 3;
  21  
  22      public const KERNEL_LINEAR = 4;
  23  
  24      /**
  25       * Selected kernel function
  26       *
  27       * @var int
  28       */
  29      protected $kernel;
  30  
  31      /**
  32       * Gamma value used by the kernel
  33       *
  34       * @var float|null
  35       */
  36      protected $gamma;
  37  
  38      /**
  39       * Original dataset used to fit KernelPCA
  40       *
  41       * @var array
  42       */
  43      protected $data = [];
  44  
  45      /**
  46       * Kernel principal component analysis (KernelPCA) is an extension of PCA using
  47       * techniques of kernel methods. It is more suitable for data that involves
  48       * vectors that are not linearly separable<br><br>
  49       * Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
  50       * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
  51       * This transformation will return the same number of rows with only <i>2</i> columns.
  52       *
  53       * @param float $totalVariance Total variance to be preserved if numFeatures is not given
  54       * @param int   $numFeatures   Number of columns to be returned
  55       * @param float $gamma         Gamma parameter is used with RBF and Sigmoid kernels
  56       *
  57       * @throws InvalidArgumentException
  58       */
  59      public function __construct(int $kernel = self::KERNEL_RBF, ?float $totalVariance = null, ?int $numFeatures = null, ?float $gamma = null)
  60      {
  61          if (!in_array($kernel, [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR], true)) {
  62              throw new InvalidArgumentException('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
  63          }
  64  
  65          parent::__construct($totalVariance, $numFeatures);
  66  
  67          $this->kernel = $kernel;
  68          $this->gamma = $gamma;
  69      }
  70  
  71      /**
  72       * Takes a data and returns a lower dimensional version
  73       * of this data while preserving $totalVariance or $numFeatures. <br>
  74       * $data is an n-by-m matrix and returned array is
  75       * n-by-k matrix where k <= m
  76       */
  77      public function fit(array $data): array
  78      {
  79          $numRows = count($data);
  80          $this->data = $data;
  81  
  82          if ($this->gamma === null) {
  83              $this->gamma = 1.0 / $numRows;
  84          }
  85  
  86          $matrix = $this->calculateKernelMatrix($this->data, $numRows);
  87          $matrix = $this->centerMatrix($matrix, $numRows);
  88  
  89          $this->eigenDecomposition($matrix);
  90  
  91          $this->fit = true;
  92  
  93          return Matrix::transposeArray($this->eigVectors);
  94      }
  95  
  96      /**
  97       * Transforms the given sample to a lower dimensional vector by using
  98       * the variables obtained during the last run of <code>fit</code>.
  99       *
 100       * @throws InvalidArgumentException
 101       * @throws InvalidOperationException
 102       */
 103      public function transform(array $sample): array
 104      {
 105          if (!$this->fit) {
 106              throw new InvalidOperationException('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
 107          }
 108  
 109          if (is_array($sample[0])) {
 110              throw new InvalidArgumentException('KernelPCA::transform() accepts only one-dimensional arrays');
 111          }
 112  
 113          $pairs = $this->getDistancePairs($sample);
 114  
 115          return $this->projectSample($pairs);
 116      }
 117  
 118      /**
 119       * Calculates similarity matrix by use of selected kernel function<br>
 120       * An n-by-m matrix is given and an n-by-n matrix is returned
 121       */
 122      protected function calculateKernelMatrix(array $data, int $numRows): array
 123      {
 124          $kernelFunc = $this->getKernel();
 125  
 126          $matrix = [];
 127          for ($i = 0; $i < $numRows; ++$i) {
 128              for ($k = 0; $k < $numRows; ++$k) {
 129                  if ($i <= $k) {
 130                      $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
 131                  } else {
 132                      $matrix[$i][$k] = $matrix[$k][$i];
 133                  }
 134              }
 135          }
 136  
 137          return $matrix;
 138      }
 139  
 140      /**
 141       * Kernel matrix is centered in its original space by using the following
 142       * conversion:
 143       *
 144       * K′ = K − N.K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
 145       */
 146      protected function centerMatrix(array $matrix, int $n): array
 147      {
 148          $N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
 149          $N = new Matrix($N, false);
 150          $K = new Matrix($matrix, false);
 151  
 152          // K.N (This term is repeated so we cache it once)
 153          $K_N = $K->multiply($N);
 154          // N.K
 155          $N_K = $N->multiply($K);
 156          // N.K.N
 157          $N_K_N = $N->multiply($K_N);
 158  
 159          return $K->subtract($N_K)
 160              ->subtract($K_N)
 161              ->add($N_K_N)
 162              ->toArray();
 163      }
 164  
 165      /**
 166       * Returns the callable kernel function
 167       *
 168       * @throws \Exception
 169       */
 170      protected function getKernel(): Closure
 171      {
 172          switch ($this->kernel) {
 173              case self::KERNEL_LINEAR:
 174                  // k(x,y) = xT.y
 175                  return function ($x, $y) {
 176                      return Matrix::dot($x, $y)[0];
 177                  };
 178              case self::KERNEL_RBF:
 179                  // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
 180                  $dist = new Euclidean();
 181  
 182                  return function ($x, $y) use ($dist): float {
 183                      return exp(-$this->gamma * $dist->sqDistance($x, $y));
 184                  };
 185  
 186              case self::KERNEL_SIGMOID:
 187                  // k(x,y)=tanh(γ.xT.y+c0) where c0=1
 188                  return function ($x, $y): float {
 189                      $res = Matrix::dot($x, $y)[0] + 1.0;
 190  
 191                      return tanh((float) $this->gamma * $res);
 192                  };
 193  
 194              case self::KERNEL_LAPLACIAN:
 195                  // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
 196                  $dist = new Manhattan();
 197  
 198                  return function ($x, $y) use ($dist): float {
 199                      return exp(-$this->gamma * $dist->distance($x, $y));
 200                  };
 201  
 202              default:
 203                  // Not reached
 204                  throw new InvalidArgumentException(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
 205          }
 206      }
 207  
 208      protected function getDistancePairs(array $sample): array
 209      {
 210          $kernel = $this->getKernel();
 211  
 212          $pairs = [];
 213          foreach ($this->data as $row) {
 214              $pairs[] = $kernel($row, $sample);
 215          }
 216  
 217          return $pairs;
 218      }
 219  
 220      protected function projectSample(array $pairs): array
 221      {
 222          // Normalize eigenvectors by eig = eigVectors / eigValues
 223          $func = function ($eigVal, $eigVect) {
 224              $m = new Matrix($eigVect, false);
 225              $a = $m->divideByScalar($eigVal)->toArray();
 226  
 227              return $a[0];
 228          };
 229          $eig = array_map($func, $this->eigValues, $this->eigVectors);
 230  
 231          // return k.dot(eig)
 232          return Matrix::dot($pairs, $eig);
 233      }
 234  }