Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Clustering;
   6  
   7  use Phpml\Math\Distance;
   8  use Phpml\Math\Distance\Euclidean;
   9  
  10  class DBSCAN implements Clusterer
  11  {
  12      private const NOISE = -1;
  13  
  14      /**
  15       * @var float
  16       */
  17      private $epsilon;
  18  
  19      /**
  20       * @var int
  21       */
  22      private $minSamples;
  23  
  24      /**
  25       * @var Distance
  26       */
  27      private $distanceMetric;
  28  
  29      public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null)
  30      {
  31          if ($distanceMetric === null) {
  32              $distanceMetric = new Euclidean();
  33          }
  34  
  35          $this->epsilon = $epsilon;
  36          $this->minSamples = $minSamples;
  37          $this->distanceMetric = $distanceMetric;
  38      }
  39  
  40      public function cluster(array $samples): array
  41      {
  42          $labels = [];
  43          $n = 0;
  44  
  45          foreach ($samples as $index => $sample) {
  46              if (isset($labels[$index])) {
  47                  continue;
  48              }
  49  
  50              $neighborIndices = $this->getIndicesInRegion($sample, $samples);
  51  
  52              if (count($neighborIndices) < $this->minSamples) {
  53                  $labels[$index] = self::NOISE;
  54  
  55                  continue;
  56              }
  57  
  58              $labels[$index] = $n;
  59  
  60              $this->expandCluster($samples, $neighborIndices, $labels, $n);
  61  
  62              ++$n;
  63          }
  64  
  65          return $this->groupByCluster($samples, $labels, $n);
  66      }
  67  
  68      private function expandCluster(array $samples, array $seeds, array &$labels, int $n): void
  69      {
  70          while (($index = array_pop($seeds)) !== null) {
  71              if (isset($labels[$index])) {
  72                  if ($labels[$index] === self::NOISE) {
  73                      $labels[$index] = $n;
  74                  }
  75  
  76                  continue;
  77              }
  78  
  79              $labels[$index] = $n;
  80  
  81              $sample = $samples[$index];
  82              $neighborIndices = $this->getIndicesInRegion($sample, $samples);
  83  
  84              if (count($neighborIndices) >= $this->minSamples) {
  85                  $seeds = array_unique(array_merge($seeds, $neighborIndices));
  86              }
  87          }
  88      }
  89  
  90      private function getIndicesInRegion(array $center, array $samples): array
  91      {
  92          $indices = [];
  93  
  94          foreach ($samples as $index => $sample) {
  95              if ($this->distanceMetric->distance($center, $sample) < $this->epsilon) {
  96                  $indices[] = $index;
  97              }
  98          }
  99  
 100          return $indices;
 101      }
 102  
 103      private function groupByCluster(array $samples, array $labels, int $n): array
 104      {
 105          $clusters = array_fill(0, $n, []);
 106  
 107          foreach ($samples as $index => $sample) {
 108              if ($labels[$index] !== self::NOISE) {
 109                  $clusters[$labels[$index]][$index] = $sample;
 110              }
 111          }
 112  
 113          // Reindex (i.e. to 0, 1, 2, ...) integer indices for backword compatibility
 114          foreach ($clusters as $index => $cluster) {
 115              $clusters[$index] = array_merge($cluster, []);
 116          }
 117  
 118          return $clusters;
 119      }
 120  }