Differences Between: [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Clustering; 6 7 use Phpml\Clustering\KMeans\Cluster; 8 use Phpml\Clustering\KMeans\Point; 9 use Phpml\Clustering\KMeans\Space; 10 use Phpml\Exception\InvalidArgumentException; 11 use Phpml\Math\Distance\Euclidean; 12 13 class FuzzyCMeans implements Clusterer 14 { 15 /** 16 * @var int 17 */ 18 private $clustersNumber; 19 20 /** 21 * @var Cluster[] 22 */ 23 private $clusters = []; 24 25 /** 26 * @var Space 27 */ 28 private $space; 29 30 /** 31 * @var float[][] 32 */ 33 private $membership = []; 34 35 /** 36 * @var float 37 */ 38 private $fuzziness; 39 40 /** 41 * @var float 42 */ 43 private $epsilon; 44 45 /** 46 * @var int 47 */ 48 private $maxIterations; 49 50 /** 51 * @var int 52 */ 53 private $sampleCount; 54 55 /** 56 * @var array 57 */ 58 private $samples = []; 59 60 /** 61 * @throws InvalidArgumentException 62 */ 63 public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100) 64 { 65 if ($clustersNumber <= 0) { 66 throw new InvalidArgumentException('Invalid clusters number'); 67 } 68 69 $this->clustersNumber = $clustersNumber; 70 $this->fuzziness = $fuzziness; 71 $this->epsilon = $epsilon; 72 $this->maxIterations = $maxIterations; 73 } 74 75 public function getMembershipMatrix(): array 76 { 77 return $this->membership; 78 } 79 80 /** 81 * @param Point[]|int[][] $samples 82 */ 83 public function cluster(array $samples): array 84 { 85 // Initialize variables, clusters and membership matrix 86 $this->sampleCount = count($samples); 87 $this->samples = &$samples; 88 $this->space = new Space(count($samples[0])); 89 $this->initClusters(); 90 91 // Our goal is minimizing the objective value while 92 // executing the clustering steps at a maximum number of iterations 93 $lastObjective = 0.0; 94 $iterations = 0; 95 do { 96 // Update the membership matrix and cluster centers, respectively 97 $this->updateMembershipMatrix(); 98 $this->updateClusters(); 99 100 // Calculate the new value of the objective function 101 $objectiveVal = $this->getObjective(); 102 $difference = abs($lastObjective - $objectiveVal); 103 $lastObjective = $objectiveVal; 104 } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations); 105 106 // Attach (hard cluster) each data point to the nearest cluster 107 for ($k = 0; $k < $this->sampleCount; ++$k) { 108 $column = array_column($this->membership, $k); 109 arsort($column); 110 reset($column); 111 $cluster = $this->clusters[key($column)]; 112 $cluster->attach(new Point($this->samples[$k])); 113 } 114 115 // Return grouped samples 116 $grouped = []; 117 foreach ($this->clusters as $cluster) { 118 $grouped[] = $cluster->getPoints(); 119 } 120 121 return $grouped; 122 } 123 124 protected function initClusters(): void 125 { 126 // Membership array is a matrix of cluster number by sample counts 127 // We initilize the membership array with random values 128 $dim = $this->space->getDimension(); 129 $this->generateRandomMembership($dim, $this->sampleCount); 130 $this->updateClusters(); 131 } 132 133 protected function generateRandomMembership(int $rows, int $cols): void 134 { 135 $this->membership = []; 136 for ($i = 0; $i < $rows; ++$i) { 137 $row = []; 138 $total = 0.0; 139 for ($k = 0; $k < $cols; ++$k) { 140 $val = random_int(1, 5) / 10.0; 141 $row[] = $val; 142 $total += $val; 143 } 144 145 $this->membership[] = array_map(function ($val) use ($total) { 146 return $val / $total; 147 }, $row); 148 } 149 } 150 151 protected function updateClusters(): void 152 { 153 $dim = $this->space->getDimension(); 154 if (count($this->clusters) === 0) { 155 for ($i = 0; $i < $this->clustersNumber; ++$i) { 156 $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0)); 157 } 158 } 159 160 for ($i = 0; $i < $this->clustersNumber; ++$i) { 161 $cluster = $this->clusters[$i]; 162 $center = $cluster->getCoordinates(); 163 for ($k = 0; $k < $dim; ++$k) { 164 $a = $this->getMembershipRowTotal($i, $k, true); 165 $b = $this->getMembershipRowTotal($i, $k, false); 166 $center[$k] = $a / $b; 167 } 168 169 $cluster->setCoordinates($center); 170 } 171 } 172 173 protected function getMembershipRowTotal(int $row, int $col, bool $multiply): float 174 { 175 $sum = 0.0; 176 for ($k = 0; $k < $this->sampleCount; ++$k) { 177 $val = $this->membership[$row][$k] ** $this->fuzziness; 178 if ($multiply) { 179 $val *= $this->samples[$k][$col]; 180 } 181 182 $sum += $val; 183 } 184 185 return $sum; 186 } 187 188 protected function updateMembershipMatrix(): void 189 { 190 for ($i = 0; $i < $this->clustersNumber; ++$i) { 191 for ($k = 0; $k < $this->sampleCount; ++$k) { 192 $distCalc = $this->getDistanceCalc($i, $k); 193 $this->membership[$i][$k] = 1.0 / $distCalc; 194 } 195 } 196 } 197 198 protected function getDistanceCalc(int $row, int $col): float 199 { 200 $sum = 0.0; 201 $distance = new Euclidean(); 202 $dist1 = $distance->distance( 203 $this->clusters[$row]->getCoordinates(), 204 $this->samples[$col] 205 ); 206 207 for ($j = 0; $j < $this->clustersNumber; ++$j) { 208 $dist2 = $distance->distance( 209 $this->clusters[$j]->getCoordinates(), 210 $this->samples[$col] 211 ); 212 213 $val = ($dist1 / $dist2) ** 2.0 / ($this->fuzziness - 1); 214 $sum += $val; 215 } 216 217 return $sum; 218 } 219 220 /** 221 * The objective is to minimize the distance between all data points 222 * and all cluster centers. This method returns the summation of all 223 * these distances 224 */ 225 protected function getObjective(): float 226 { 227 $sum = 0.0; 228 $distance = new Euclidean(); 229 for ($i = 0; $i < $this->clustersNumber; ++$i) { 230 $clust = $this->clusters[$i]->getCoordinates(); 231 for ($k = 0; $k < $this->sampleCount; ++$k) { 232 $point = $this->samples[$k]; 233 $sum += $distance->distance($clust, $point); 234 } 235 } 236 237 return $sum; 238 } 239 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body