Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Clustering; 6 7 use Phpml\Clustering\KMeans\Cluster; 8 use Phpml\Clustering\KMeans\Point; 9 use Phpml\Clustering\KMeans\Space; 10 use Phpml\Exception\InvalidArgumentException; 11 use Phpml\Math\Distance\Euclidean; 12 13 class FuzzyCMeans implements Clusterer 14 { 15 /** 16 * @var int 17 */ 18 private $clustersNumber; 19 20 /** 21 * @var Cluster[] 22 */ 23 private $clusters = []; 24 25 /** 26 * @var Space 27 */ 28 private $space; 29 30 /** 31 * @var float[][] 32 */ 33 private $membership = []; 34 35 /** 36 * @var float 37 */ 38 private $fuzziness; 39 40 /** 41 * @var float 42 */ 43 private $epsilon; 44 45 /** 46 * @var int 47 */ 48 private $maxIterations; 49 50 /** 51 * @var int 52 */ 53 private $sampleCount; 54 55 /** 56 * @var array 57 */ 58 private $samples = []; 59 60 /** 61 * @throws InvalidArgumentException 62 */ 63 public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100) 64 { 65 if ($clustersNumber <= 0) { 66 throw new InvalidArgumentException('Invalid clusters number'); 67 } 68 69 $this->clustersNumber = $clustersNumber; 70 $this->fuzziness = $fuzziness; 71 $this->epsilon = $epsilon; 72 $this->maxIterations = $maxIterations; 73 } 74 75 public function getMembershipMatrix(): array 76 { 77 return $this->membership; 78 } 79 80 public function cluster(array $samples): array 81 { 82 // Initialize variables, clusters and membership matrix 83 $this->sampleCount = count($samples); 84 $this->samples = &$samples; 85 $this->space = new Space(count($samples[0])); 86 $this->initClusters(); 87 88 // Our goal is minimizing the objective value while 89 // executing the clustering steps at a maximum number of iterations 90 $lastObjective = 0.0; 91 $iterations = 0; 92 do { 93 // Update the membership matrix and cluster centers, respectively 94 $this->updateMembershipMatrix(); 95 $this->updateClusters(); 96 97 // Calculate the new value of the objective function 98 $objectiveVal = $this->getObjective(); 99 $difference = abs($lastObjective - $objectiveVal); 100 $lastObjective = $objectiveVal; 101 } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations); 102 103 // Attach (hard cluster) each data point to the nearest cluster 104 for ($k = 0; $k < $this->sampleCount; ++$k) { 105 $column = array_column($this->membership, $k); 106 arsort($column); 107 reset($column); 108 $cluster = $this->clusters[key($column)]; 109 $cluster->attach(new Point($this->samples[$k])); 110 } 111 112 // Return grouped samples 113 $grouped = []; 114 foreach ($this->clusters as $cluster) { 115 $grouped[] = $cluster->getPoints(); 116 } 117 118 return $grouped; 119 } 120 121 protected function initClusters(): void 122 { 123 // Membership array is a matrix of cluster number by sample counts 124 // We initilize the membership array with random values 125 $dim = $this->space->getDimension(); 126 $this->generateRandomMembership($dim, $this->sampleCount); 127 $this->updateClusters(); 128 } 129 130 protected function generateRandomMembership(int $rows, int $cols): void 131 { 132 $this->membership = []; 133 for ($i = 0; $i < $rows; ++$i) { 134 $row = []; 135 $total = 0.0; 136 for ($k = 0; $k < $cols; ++$k) { 137 $val = random_int(1, 5) / 10.0; 138 $row[] = $val; 139 $total += $val; 140 } 141 142 $this->membership[] = array_map(static function ($val) use ($total): float { 143 return $val / $total; 144 }, $row); 145 } 146 } 147 148 protected function updateClusters(): void 149 { 150 $dim = $this->space->getDimension(); 151 if (count($this->clusters) === 0) { 152 for ($i = 0; $i < $this->clustersNumber; ++$i) { 153 $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0)); 154 } 155 } 156 157 for ($i = 0; $i < $this->clustersNumber; ++$i) { 158 $cluster = $this->clusters[$i]; 159 $center = $cluster->getCoordinates(); 160 for ($k = 0; $k < $dim; ++$k) { 161 $a = $this->getMembershipRowTotal($i, $k, true); 162 $b = $this->getMembershipRowTotal($i, $k, false); 163 $center[$k] = $a / $b; 164 } 165 166 $cluster->setCoordinates($center); 167 } 168 } 169 170 protected function getMembershipRowTotal(int $row, int $col, bool $multiply): float 171 { 172 $sum = 0.0; 173 for ($k = 0; $k < $this->sampleCount; ++$k) { 174 $val = $this->membership[$row][$k] ** $this->fuzziness; 175 if ($multiply) { 176 $val *= $this->samples[$k][$col]; 177 } 178 179 $sum += $val; 180 } 181 182 return $sum; 183 } 184 185 protected function updateMembershipMatrix(): void 186 { 187 for ($i = 0; $i < $this->clustersNumber; ++$i) { 188 for ($k = 0; $k < $this->sampleCount; ++$k) { 189 $distCalc = $this->getDistanceCalc($i, $k); 190 $this->membership[$i][$k] = 1.0 / $distCalc; 191 } 192 } 193 } 194 195 protected function getDistanceCalc(int $row, int $col): float 196 { 197 $sum = 0.0; 198 $distance = new Euclidean(); 199 $dist1 = $distance->distance( 200 $this->clusters[$row]->getCoordinates(), 201 $this->samples[$col] 202 ); 203 204 for ($j = 0; $j < $this->clustersNumber; ++$j) { 205 $dist2 = $distance->distance( 206 $this->clusters[$j]->getCoordinates(), 207 $this->samples[$col] 208 ); 209 210 $val = (($dist1 / $dist2) ** 2.0) / ($this->fuzziness - 1); 211 $sum += $val; 212 } 213 214 return $sum; 215 } 216 217 /** 218 * The objective is to minimize the distance between all data points 219 * and all cluster centers. This method returns the summation of all 220 * these distances 221 */ 222 protected function getObjective(): float 223 { 224 $sum = 0.0; 225 $distance = new Euclidean(); 226 for ($i = 0; $i < $this->clustersNumber; ++$i) { 227 $clust = $this->clusters[$i]->getCoordinates(); 228 for ($k = 0; $k < $this->sampleCount; ++$k) { 229 $point = $this->samples[$k]; 230 $sum += $distance->distance($clust, $point); 231 } 232 } 233 234 return $sum; 235 } 236 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body