Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Clustering;
   6  
   7  use Phpml\Clustering\KMeans\Cluster;
   8  use Phpml\Clustering\KMeans\Point;
   9  use Phpml\Clustering\KMeans\Space;
  10  use Phpml\Exception\InvalidArgumentException;
  11  use Phpml\Math\Distance\Euclidean;
  12  
  13  class FuzzyCMeans implements Clusterer
  14  {
  15      /**
  16       * @var int
  17       */
  18      private $clustersNumber;
  19  
  20      /**
  21       * @var Cluster[]
  22       */
  23      private $clusters = [];
  24  
  25      /**
  26       * @var Space
  27       */
  28      private $space;
  29  
  30      /**
  31       * @var float[][]
  32       */
  33      private $membership = [];
  34  
  35      /**
  36       * @var float
  37       */
  38      private $fuzziness;
  39  
  40      /**
  41       * @var float
  42       */
  43      private $epsilon;
  44  
  45      /**
  46       * @var int
  47       */
  48      private $maxIterations;
  49  
  50      /**
  51       * @var int
  52       */
  53      private $sampleCount;
  54  
  55      /**
  56       * @var array
  57       */
  58      private $samples = [];
  59  
  60      /**
  61       * @throws InvalidArgumentException
  62       */
  63      public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
  64      {
  65          if ($clustersNumber <= 0) {
  66              throw new InvalidArgumentException('Invalid clusters number');
  67          }
  68  
  69          $this->clustersNumber = $clustersNumber;
  70          $this->fuzziness = $fuzziness;
  71          $this->epsilon = $epsilon;
  72          $this->maxIterations = $maxIterations;
  73      }
  74  
  75      public function getMembershipMatrix(): array
  76      {
  77          return $this->membership;
  78      }
  79  
  80      public function cluster(array $samples): array
  81      {
  82          // Initialize variables, clusters and membership matrix
  83          $this->sampleCount = count($samples);
  84          $this->samples = &$samples;
  85          $this->space = new Space(count($samples[0]));
  86          $this->initClusters();
  87  
  88          // Our goal is minimizing the objective value while
  89          // executing the clustering steps at a maximum number of iterations
  90          $lastObjective = 0.0;
  91          $iterations = 0;
  92          do {
  93              // Update the membership matrix and cluster centers, respectively
  94              $this->updateMembershipMatrix();
  95              $this->updateClusters();
  96  
  97              // Calculate the new value of the objective function
  98              $objectiveVal = $this->getObjective();
  99              $difference = abs($lastObjective - $objectiveVal);
 100              $lastObjective = $objectiveVal;
 101          } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
 102  
 103          // Attach (hard cluster) each data point to the nearest cluster
 104          for ($k = 0; $k < $this->sampleCount; ++$k) {
 105              $column = array_column($this->membership, $k);
 106              arsort($column);
 107              reset($column);
 108              $cluster = $this->clusters[key($column)];
 109              $cluster->attach(new Point($this->samples[$k]));
 110          }
 111  
 112          // Return grouped samples
 113          $grouped = [];
 114          foreach ($this->clusters as $cluster) {
 115              $grouped[] = $cluster->getPoints();
 116          }
 117  
 118          return $grouped;
 119      }
 120  
 121      protected function initClusters(): void
 122      {
 123          // Membership array is a matrix of cluster number by sample counts
 124          // We initilize the membership array with random values
 125          $dim = $this->space->getDimension();
 126          $this->generateRandomMembership($dim, $this->sampleCount);
 127          $this->updateClusters();
 128      }
 129  
 130      protected function generateRandomMembership(int $rows, int $cols): void
 131      {
 132          $this->membership = [];
 133          for ($i = 0; $i < $rows; ++$i) {
 134              $row = [];
 135              $total = 0.0;
 136              for ($k = 0; $k < $cols; ++$k) {
 137                  $val = random_int(1, 5) / 10.0;
 138                  $row[] = $val;
 139                  $total += $val;
 140              }
 141  
 142              $this->membership[] = array_map(static function ($val) use ($total): float {
 143                  return $val / $total;
 144              }, $row);
 145          }
 146      }
 147  
 148      protected function updateClusters(): void
 149      {
 150          $dim = $this->space->getDimension();
 151          if (count($this->clusters) === 0) {
 152              for ($i = 0; $i < $this->clustersNumber; ++$i) {
 153                  $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
 154              }
 155          }
 156  
 157          for ($i = 0; $i < $this->clustersNumber; ++$i) {
 158              $cluster = $this->clusters[$i];
 159              $center = $cluster->getCoordinates();
 160              for ($k = 0; $k < $dim; ++$k) {
 161                  $a = $this->getMembershipRowTotal($i, $k, true);
 162                  $b = $this->getMembershipRowTotal($i, $k, false);
 163                  $center[$k] = $a / $b;
 164              }
 165  
 166              $cluster->setCoordinates($center);
 167          }
 168      }
 169  
 170      protected function getMembershipRowTotal(int $row, int $col, bool $multiply): float
 171      {
 172          $sum = 0.0;
 173          for ($k = 0; $k < $this->sampleCount; ++$k) {
 174              $val = $this->membership[$row][$k] ** $this->fuzziness;
 175              if ($multiply) {
 176                  $val *= $this->samples[$k][$col];
 177              }
 178  
 179              $sum += $val;
 180          }
 181  
 182          return $sum;
 183      }
 184  
 185      protected function updateMembershipMatrix(): void
 186      {
 187          for ($i = 0; $i < $this->clustersNumber; ++$i) {
 188              for ($k = 0; $k < $this->sampleCount; ++$k) {
 189                  $distCalc = $this->getDistanceCalc($i, $k);
 190                  $this->membership[$i][$k] = 1.0 / $distCalc;
 191              }
 192          }
 193      }
 194  
 195      protected function getDistanceCalc(int $row, int $col): float
 196      {
 197          $sum = 0.0;
 198          $distance = new Euclidean();
 199          $dist1 = $distance->distance(
 200              $this->clusters[$row]->getCoordinates(),
 201              $this->samples[$col]
 202          );
 203  
 204          for ($j = 0; $j < $this->clustersNumber; ++$j) {
 205              $dist2 = $distance->distance(
 206                  $this->clusters[$j]->getCoordinates(),
 207                  $this->samples[$col]
 208              );
 209  
 210              $val = (($dist1 / $dist2) ** 2.0) / ($this->fuzziness - 1);
 211              $sum += $val;
 212          }
 213  
 214          return $sum;
 215      }
 216  
 217      /**
 218       * The objective is to minimize the distance between all data points
 219       * and all cluster centers. This method returns the summation of all
 220       * these distances
 221       */
 222      protected function getObjective(): float
 223      {
 224          $sum = 0.0;
 225          $distance = new Euclidean();
 226          for ($i = 0; $i < $this->clustersNumber; ++$i) {
 227              $clust = $this->clusters[$i]->getCoordinates();
 228              for ($k = 0; $k < $this->sampleCount; ++$k) {
 229                  $point = $this->samples[$k];
 230                  $sum += $distance->distance($clust, $point);
 231              }
 232          }
 233  
 234          return $sum;
 235      }
 236  }