Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.
<?php

declare(strict_types=1);

namespace Phpml\DimensionReduction;

use Closure;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\InvalidOperationException;
use Phpml\Math\Distance\Euclidean;
use Phpml\Math\Distance\Manhattan;
use Phpml\Math\Matrix;

class KernelPCA extends PCA
{
    public const KERNEL_RBF = 1;

    public const KERNEL_SIGMOID = 2;

    public const KERNEL_LAPLACIAN = 3;

    public const KERNEL_LINEAR = 4;

    /**
     * Selected kernel function
     *
     * @var int
     */
    protected $kernel;

    /**
     * Gamma value used by the kernel
     *
     * @var float|null
     */
    protected $gamma;

    /**
     * Original dataset used to fit KernelPCA
     *
     * @var array
     */
    protected $data = [];

    /**
     * Kernel principal component analysis (KernelPCA) is an extension of PCA using
     * techniques of kernel methods. It is more suitable for data that involves
     * vectors that are not linearly separable<br><br>
     * Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
     * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
     * This transformation will return the same number of rows with only <i>2</i> columns.
     *
     * @param float $totalVariance Total variance to be preserved if numFeatures is not given
     * @param int   $numFeatures   Number of columns to be returned
     * @param float $gamma         Gamma parameter is used with RBF and Sigmoid kernels
     *
     * @throws InvalidArgumentException
     */
    public function __construct(int $kernel = self::KERNEL_RBF, ?float $totalVariance = null, ?int $numFeatures = null, ?float $gamma = null)
    {
        if (!in_array($kernel, [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR], true)) {
            throw new InvalidArgumentException('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
        }

        parent::__construct($totalVariance, $numFeatures);

        $this->kernel = $kernel;
        $this->gamma = $gamma;
    }

    /**
     * Takes a data and returns a lower dimensional version
     * of this data while preserving $totalVariance or $numFeatures. <br>
     * $data is an n-by-m matrix and returned array is
     * n-by-k matrix where k <= m
     */
    public function fit(array $data): array
    {
        $numRows = count($data);
        $this->data = $data;

        if ($this->gamma === null) {
            $this->gamma = 1.0 / $numRows;
        }

        $matrix = $this->calculateKernelMatrix($this->data, $numRows);
        $matrix = $this->centerMatrix($matrix, $numRows);

        $this->eigenDecomposition($matrix);

        $this->fit = true;

        return Matrix::transposeArray($this->eigVectors);
    }

    /**
     * Transforms the given sample to a lower dimensional vector by using
     * the variables obtained during the last run of <code>fit</code>.
     *
     * @throws InvalidArgumentException
     * @throws InvalidOperationException
     */
    public function transform(array $sample): array
    {
        if (!$this->fit) {
            throw new InvalidOperationException('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
        }

        if (is_array($sample[0])) {
            throw new InvalidArgumentException('KernelPCA::transform() accepts only one-dimensional arrays');
        }

        $pairs = $this->getDistancePairs($sample);

        return $this->projectSample($pairs);
    }

    /**
     * Calculates similarity matrix by use of selected kernel function<br>
     * An n-by-m matrix is given and an n-by-n matrix is returned
     */
    protected function calculateKernelMatrix(array $data, int $numRows): array
    {
        $kernelFunc = $this->getKernel();

        $matrix = [];
        for ($i = 0; $i < $numRows; ++$i) {
            for ($k = 0; $k < $numRows; ++$k) {
                if ($i <= $k) {
                    $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
                } else {
                    $matrix[$i][$k] = $matrix[$k][$i];
                }
            }
        }

        return $matrix;
    }

    /**
     * Kernel matrix is centered in its original space by using the following
     * conversion:
     *
     * K′ = K − N.K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
     */
    protected function centerMatrix(array $matrix, int $n): array
    {
        $N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
        $N = new Matrix($N, false);
        $K = new Matrix($matrix, false);

        // K.N (This term is repeated so we cache it once)
        $K_N = $K->multiply($N);
        // N.K
        $N_K = $N->multiply($K);
        // N.K.N
        $N_K_N = $N->multiply($K_N);

        return $K->subtract($N_K)
            ->subtract($K_N)
            ->add($N_K_N)
            ->toArray();
    }

    /**
     * Returns the callable kernel function
     *
     * @throws \Exception
     */
    protected function getKernel(): Closure
    {
        switch ($this->kernel) {
            case self::KERNEL_LINEAR:
                // k(x,y) = xT.y
                return function ($x, $y) {
                    return Matrix::dot($x, $y)[0];
                };
            case self::KERNEL_RBF:
                // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
                $dist = new Euclidean();

< return function ($x, $y) use ($dist) {
> return function ($x, $y) use ($dist): float {
return exp(-$this->gamma * $dist->sqDistance($x, $y)); }; case self::KERNEL_SIGMOID: // k(x,y)=tanh(γ.xT.y+c0) where c0=1
< return function ($x, $y) {
> return function ($x, $y): float {
$res = Matrix::dot($x, $y)[0] + 1.0; return tanh((float) $this->gamma * $res); }; case self::KERNEL_LAPLACIAN: // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance $dist = new Manhattan();
< return function ($x, $y) use ($dist) {
> return function ($x, $y) use ($dist): float {
return exp(-$this->gamma * $dist->distance($x, $y)); }; default: // Not reached throw new InvalidArgumentException(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel)); } } protected function getDistancePairs(array $sample): array { $kernel = $this->getKernel(); $pairs = []; foreach ($this->data as $row) { $pairs[] = $kernel($row, $sample); } return $pairs; } protected function projectSample(array $pairs): array { // Normalize eigenvectors by eig = eigVectors / eigValues $func = function ($eigVal, $eigVect) { $m = new Matrix($eigVect, false); $a = $m->divideByScalar($eigVal)->toArray(); return $a[0]; }; $eig = array_map($func, $this->eigValues, $this->eigVectors); // return k.dot(eig) return Matrix::dot($pairs, $eig); } }