Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Clustering;
   6  
   7  use Phpml\Clustering\KMeans\Cluster;
   8  use Phpml\Clustering\KMeans\Point;
   9  use Phpml\Clustering\KMeans\Space;
  10  use Phpml\Exception\InvalidArgumentException;
  11  use Phpml\Math\Distance\Euclidean;
  12  
  13  class FuzzyCMeans implements Clusterer
  14  {
  15      /**
  16       * @var int
  17       */
  18      private $clustersNumber;
  19  
  20      /**
  21       * @var Cluster[]
  22       */
  23      private $clusters = [];
  24  
  25      /**
  26       * @var Space
  27       */
  28      private $space;
  29  
  30      /**
  31       * @var float[][]
  32       */
  33      private $membership = [];
  34  
  35      /**
  36       * @var float
  37       */
  38      private $fuzziness;
  39  
  40      /**
  41       * @var float
  42       */
  43      private $epsilon;
  44  
  45      /**
  46       * @var int
  47       */
  48      private $maxIterations;
  49  
  50      /**
  51       * @var int
  52       */
  53      private $sampleCount;
  54  
  55      /**
  56       * @var array
  57       */
  58      private $samples = [];
  59  
  60      /**
  61       * @throws InvalidArgumentException
  62       */
  63      public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
  64      {
  65          if ($clustersNumber <= 0) {
  66              throw new InvalidArgumentException('Invalid clusters number');
  67          }
  68  
  69          $this->clustersNumber = $clustersNumber;
  70          $this->fuzziness = $fuzziness;
  71          $this->epsilon = $epsilon;
  72          $this->maxIterations = $maxIterations;
  73      }
  74  
  75      public function getMembershipMatrix(): array
  76      {
  77          return $this->membership;
  78      }
  79  
  80      /**
  81       * @param Point[]|int[][] $samples
  82       */
  83      public function cluster(array $samples): array
  84      {
  85          // Initialize variables, clusters and membership matrix
  86          $this->sampleCount = count($samples);
  87          $this->samples = &$samples;
  88          $this->space = new Space(count($samples[0]));
  89          $this->initClusters();
  90  
  91          // Our goal is minimizing the objective value while
  92          // executing the clustering steps at a maximum number of iterations
  93          $lastObjective = 0.0;
  94          $iterations = 0;
  95          do {
  96              // Update the membership matrix and cluster centers, respectively
  97              $this->updateMembershipMatrix();
  98              $this->updateClusters();
  99  
 100              // Calculate the new value of the objective function
 101              $objectiveVal = $this->getObjective();
 102              $difference = abs($lastObjective - $objectiveVal);
 103              $lastObjective = $objectiveVal;
 104          } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
 105  
 106          // Attach (hard cluster) each data point to the nearest cluster
 107          for ($k = 0; $k < $this->sampleCount; ++$k) {
 108              $column = array_column($this->membership, $k);
 109              arsort($column);
 110              reset($column);
 111              $cluster = $this->clusters[key($column)];
 112              $cluster->attach(new Point($this->samples[$k]));
 113          }
 114  
 115          // Return grouped samples
 116          $grouped = [];
 117          foreach ($this->clusters as $cluster) {
 118              $grouped[] = $cluster->getPoints();
 119          }
 120  
 121          return $grouped;
 122      }
 123  
 124      protected function initClusters(): void
 125      {
 126          // Membership array is a matrix of cluster number by sample counts
 127          // We initilize the membership array with random values
 128          $dim = $this->space->getDimension();
 129          $this->generateRandomMembership($dim, $this->sampleCount);
 130          $this->updateClusters();
 131      }
 132  
 133      protected function generateRandomMembership(int $rows, int $cols): void
 134      {
 135          $this->membership = [];
 136          for ($i = 0; $i < $rows; ++$i) {
 137              $row = [];
 138              $total = 0.0;
 139              for ($k = 0; $k < $cols; ++$k) {
 140                  $val = random_int(1, 5) / 10.0;
 141                  $row[] = $val;
 142                  $total += $val;
 143              }
 144  
 145              $this->membership[] = array_map(function ($val) use ($total) {
 146                  return $val / $total;
 147              }, $row);
 148          }
 149      }
 150  
 151      protected function updateClusters(): void
 152      {
 153          $dim = $this->space->getDimension();
 154          if (count($this->clusters) === 0) {
 155              for ($i = 0; $i < $this->clustersNumber; ++$i) {
 156                  $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
 157              }
 158          }
 159  
 160          for ($i = 0; $i < $this->clustersNumber; ++$i) {
 161              $cluster = $this->clusters[$i];
 162              $center = $cluster->getCoordinates();
 163              for ($k = 0; $k < $dim; ++$k) {
 164                  $a = $this->getMembershipRowTotal($i, $k, true);
 165                  $b = $this->getMembershipRowTotal($i, $k, false);
 166                  $center[$k] = $a / $b;
 167              }
 168  
 169              $cluster->setCoordinates($center);
 170          }
 171      }
 172  
 173      protected function getMembershipRowTotal(int $row, int $col, bool $multiply): float
 174      {
 175          $sum = 0.0;
 176          for ($k = 0; $k < $this->sampleCount; ++$k) {
 177              $val = $this->membership[$row][$k] ** $this->fuzziness;
 178              if ($multiply) {
 179                  $val *= $this->samples[$k][$col];
 180              }
 181  
 182              $sum += $val;
 183          }
 184  
 185          return $sum;
 186      }
 187  
 188      protected function updateMembershipMatrix(): void
 189      {
 190          for ($i = 0; $i < $this->clustersNumber; ++$i) {
 191              for ($k = 0; $k < $this->sampleCount; ++$k) {
 192                  $distCalc = $this->getDistanceCalc($i, $k);
 193                  $this->membership[$i][$k] = 1.0 / $distCalc;
 194              }
 195          }
 196      }
 197  
 198      protected function getDistanceCalc(int $row, int $col): float
 199      {
 200          $sum = 0.0;
 201          $distance = new Euclidean();
 202          $dist1 = $distance->distance(
 203              $this->clusters[$row]->getCoordinates(),
 204              $this->samples[$col]
 205          );
 206  
 207          for ($j = 0; $j < $this->clustersNumber; ++$j) {
 208              $dist2 = $distance->distance(
 209                  $this->clusters[$j]->getCoordinates(),
 210                  $this->samples[$col]
 211              );
 212  
 213              $val = ($dist1 / $dist2) ** 2.0 / ($this->fuzziness - 1);
 214              $sum += $val;
 215          }
 216  
 217          return $sum;
 218      }
 219  
 220      /**
 221       * The objective is to minimize the distance between all data points
 222       * and all cluster centers. This method returns the summation of all
 223       * these distances
 224       */
 225      protected function getObjective(): float
 226      {
 227          $sum = 0.0;
 228          $distance = new Euclidean();
 229          for ($i = 0; $i < $this->clustersNumber; ++$i) {
 230              $clust = $this->clusters[$i]->getCoordinates();
 231              for ($k = 0; $k < $this->sampleCount; ++$k) {
 232                  $point = $this->samples[$k];
 233                  $sum += $distance->distance($clust, $point);
 234              }
 235          }
 236  
 237          return $sum;
 238      }
 239  }