Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Math\Statistic;
   6  
   7  use Phpml\Exception\InvalidArgumentException;
   8  
   9  class Covariance
  10  {
  11      /**
  12       * Calculates covariance from two given arrays, x and y, respectively
  13       *
  14       * @throws InvalidArgumentException
  15       */
  16      public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
  17      {
  18          $n = count($x);
  19          if ($n === 0 || count($y) === 0) {
  20              throw new InvalidArgumentException('The array has zero elements');
  21          }
  22  
  23          if ($sample && $n === 1) {
  24              throw new InvalidArgumentException('The array must have at least 2 elements');
  25          }
  26  
  27          if ($meanX === null) {
  28              $meanX = Mean::arithmetic($x);
  29          }
  30  
  31          if ($meanY === null) {
  32              $meanY = Mean::arithmetic($y);
  33          }
  34  
  35          $sum = 0.0;
  36          foreach ($x as $index => $xi) {
  37              $yi = $y[$index];
  38              $sum += ($xi - $meanX) * ($yi - $meanY);
  39          }
  40  
  41          if ($sample) {
  42              --$n;
  43          }
  44  
  45          return $sum / $n;
  46      }
  47  
  48      /**
  49       * Calculates covariance of two dimensions, i and k in the given data.
  50       *
  51       * @throws InvalidArgumentException
  52       * @throws \Exception
  53       */
  54      public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
  55      {
  56          if (count($data) === 0) {
  57              throw new InvalidArgumentException('The array has zero elements');
  58          }
  59  
  60          $n = count($data);
  61          if ($sample && $n === 1) {
  62              throw new InvalidArgumentException('The array must have at least 2 elements');
  63          }
  64  
  65          if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) {
  66              throw new InvalidArgumentException('Given indices i and k do not match with the dimensionality of data');
  67          }
  68  
  69          if ($meanX === null || $meanY === null) {
  70              $x = array_column($data, $i);
  71              $y = array_column($data, $k);
  72  
  73              $meanX = Mean::arithmetic($x);
  74              $meanY = Mean::arithmetic($y);
  75              $sum = 0.0;
  76              foreach ($x as $index => $xi) {
  77                  $yi = $y[$index];
  78                  $sum += ($xi - $meanX) * ($yi - $meanY);
  79              }
  80          } else {
  81              // In the case, whole dataset given along with dimension indices, i and k,
  82              // we would like to avoid getting column data with array_column and operate
  83              // over this extra copy of column data for memory efficiency purposes.
  84              //
  85              // Instead we traverse through the whole data and get what we actually need
  86              // without copying the data. This way, memory use will be reduced
  87              // with a slight cost of CPU utilization.
  88              $sum = 0.0;
  89              foreach ($data as $row) {
  90                  $val = [0, 0];
  91                  foreach ($row as $index => $col) {
  92                      if ($index == $i) {
  93                          $val[0] = $col - $meanX;
  94                      }
  95  
  96                      if ($index == $k) {
  97                          $val[1] = $col - $meanY;
  98                      }
  99                  }
 100  
 101                  $sum += $val[0] * $val[1];
 102              }
 103          }
 104  
 105          if ($sample) {
 106              --$n;
 107          }
 108  
 109          return $sum / $n;
 110      }
 111  
 112      /**
 113       * Returns the covariance matrix of n-dimensional data
 114       *
 115       * @param array|null $means
 116       */
 117      public static function covarianceMatrix(array $data, ?array $means = null): array
 118      {
 119          $n = count($data[0]);
 120  
 121          if ($means === null) {
 122              $means = [];
 123              for ($i = 0; $i < $n; ++$i) {
 124                  $means[] = Mean::arithmetic(array_column($data, $i));
 125              }
 126          }
 127  
 128          $cov = [];
 129          for ($i = 0; $i < $n; ++$i) {
 130              for ($k = 0; $k < $n; ++$k) {
 131                  if ($i > $k) {
 132                      $cov[$i][$k] = $cov[$k][$i];
 133                  } else {
 134                      $cov[$i][$k] = self::fromDataset(
 135                          $data,
 136                          $i,
 137                          $k,
 138                          true,
 139                          $means[$i],
 140                          $means[$k]
 141                      );
 142                  }
 143              }
 144          }
 145  
 146          return $cov;
 147      }
 148  }