1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Math\Statistic; 6 7 use Phpml\Exception\InvalidArgumentException; 8 9 class Covariance 10 { 11 /** 12 * Calculates covariance from two given arrays, x and y, respectively 13 * 14 * @throws InvalidArgumentException 15 */ 16 public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float 17 { 18 $n = count($x); 19 if ($n === 0 || count($y) === 0) { 20 throw new InvalidArgumentException('The array has zero elements'); 21 } 22 23 if ($sample && $n === 1) { 24 throw new InvalidArgumentException('The array must have at least 2 elements'); 25 } 26 27 if ($meanX === null) { 28 $meanX = Mean::arithmetic($x); 29 } 30 31 if ($meanY === null) { 32 $meanY = Mean::arithmetic($y); 33 } 34 35 $sum = 0.0; 36 foreach ($x as $index => $xi) { 37 $yi = $y[$index]; 38 $sum += ($xi - $meanX) * ($yi - $meanY); 39 } 40 41 if ($sample) { 42 --$n; 43 } 44 45 return $sum / $n; 46 } 47 48 /** 49 * Calculates covariance of two dimensions, i and k in the given data. 50 * 51 * @throws InvalidArgumentException 52 * @throws \Exception 53 */ 54 public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float 55 { 56 if (count($data) === 0) { 57 throw new InvalidArgumentException('The array has zero elements'); 58 } 59 60 $n = count($data); 61 if ($sample && $n === 1) { 62 throw new InvalidArgumentException('The array must have at least 2 elements'); 63 } 64 65 if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) { 66 throw new InvalidArgumentException('Given indices i and k do not match with the dimensionality of data'); 67 } 68 69 if ($meanX === null || $meanY === null) { 70 $x = array_column($data, $i); 71 $y = array_column($data, $k); 72 73 $meanX = Mean::arithmetic($x); 74 $meanY = Mean::arithmetic($y); 75 $sum = 0.0; 76 foreach ($x as $index => $xi) { 77 $yi = $y[$index]; 78 $sum += ($xi - $meanX) * ($yi - $meanY); 79 } 80 } else { 81 // In the case, whole dataset given along with dimension indices, i and k, 82 // we would like to avoid getting column data with array_column and operate 83 // over this extra copy of column data for memory efficiency purposes. 84 // 85 // Instead we traverse through the whole data and get what we actually need 86 // without copying the data. This way, memory use will be reduced 87 // with a slight cost of CPU utilization. 88 $sum = 0.0; 89 foreach ($data as $row) { 90 $val = [0, 0]; 91 foreach ($row as $index => $col) { 92 if ($index == $i) { 93 $val[0] = $col - $meanX; 94 } 95 96 if ($index == $k) { 97 $val[1] = $col - $meanY; 98 } 99 } 100 101 $sum += $val[0] * $val[1]; 102 } 103 } 104 105 if ($sample) { 106 --$n; 107 } 108 109 return $sum / $n; 110 } 111 112 /** 113 * Returns the covariance matrix of n-dimensional data 114 * 115 * @param array|null $means 116 */ 117 public static function covarianceMatrix(array $data, ?array $means = null): array 118 { 119 $n = count($data[0]); 120 121 if ($means === null) { 122 $means = []; 123 for ($i = 0; $i < $n; ++$i) { 124 $means[] = Mean::arithmetic(array_column($data, $i)); 125 } 126 } 127 128 $cov = []; 129 for ($i = 0; $i < $n; ++$i) { 130 for ($k = 0; $k < $n; ++$k) { 131 if ($i > $k) { 132 $cov[$i][$k] = $cov[$k][$i]; 133 } else { 134 $cov[$i][$k] = self::fromDataset( 135 $data, 136 $i, 137 $k, 138 true, 139 $means[$i], 140 $means[$k] 141 ); 142 } 143 } 144 } 145 146 return $cov; 147 } 148 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body