1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Clustering; 6 7 use Phpml\Math\Distance; 8 use Phpml\Math\Distance\Euclidean; 9 10 class DBSCAN implements Clusterer 11 { 12 private const NOISE = -1; 13 14 /** 15 * @var float 16 */ 17 private $epsilon; 18 19 /** 20 * @var int 21 */ 22 private $minSamples; 23 24 /** 25 * @var Distance 26 */ 27 private $distanceMetric; 28 29 public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null) 30 { 31 if ($distanceMetric === null) { 32 $distanceMetric = new Euclidean(); 33 } 34 35 $this->epsilon = $epsilon; 36 $this->minSamples = $minSamples; 37 $this->distanceMetric = $distanceMetric; 38 } 39 40 public function cluster(array $samples): array 41 { 42 $labels = []; 43 $n = 0; 44 45 foreach ($samples as $index => $sample) { 46 if (isset($labels[$index])) { 47 continue; 48 } 49 50 $neighborIndices = $this->getIndicesInRegion($sample, $samples); 51 52 if (count($neighborIndices) < $this->minSamples) { 53 $labels[$index] = self::NOISE; 54 55 continue; 56 } 57 58 $labels[$index] = $n; 59 60 $this->expandCluster($samples, $neighborIndices, $labels, $n); 61 62 ++$n; 63 } 64 65 return $this->groupByCluster($samples, $labels, $n); 66 } 67 68 private function expandCluster(array $samples, array $seeds, array &$labels, int $n): void 69 { 70 while (($index = array_pop($seeds)) !== null) { 71 if (isset($labels[$index])) { 72 if ($labels[$index] === self::NOISE) { 73 $labels[$index] = $n; 74 } 75 76 continue; 77 } 78 79 $labels[$index] = $n; 80 81 $sample = $samples[$index]; 82 $neighborIndices = $this->getIndicesInRegion($sample, $samples); 83 84 if (count($neighborIndices) >= $this->minSamples) { 85 $seeds = array_unique(array_merge($seeds, $neighborIndices)); 86 } 87 } 88 } 89 90 private function getIndicesInRegion(array $center, array $samples): array 91 { 92 $indices = []; 93 94 foreach ($samples as $index => $sample) { 95 if ($this->distanceMetric->distance($center, $sample) < $this->epsilon) { 96 $indices[] = $index; 97 } 98 } 99 100 return $indices; 101 } 102 103 private function groupByCluster(array $samples, array $labels, int $n): array 104 { 105 $clusters = array_fill(0, $n, []); 106 107 foreach ($samples as $index => $sample) { 108 if ($labels[$index] !== self::NOISE) { 109 $clusters[$labels[$index]][$index] = $sample; 110 } 111 } 112 113 // Reindex (i.e. to 0, 1, 2, ...) integer indices for backword compatibility 114 foreach ($clusters as $index => $cluster) { 115 $clusters[$index] = array_merge($cluster, []); 116 } 117 118 return $clusters; 119 } 120 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body