Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Association; 6 7 use Phpml\Helper\Predictable; 8 use Phpml\Helper\Trainable; 9 10 class Apriori implements Associator 11 { 12 use Trainable; 13 use Predictable; 14 15 public const ARRAY_KEY_ANTECEDENT = 'antecedent'; 16 17 public const ARRAY_KEY_CONFIDENCE = 'confidence'; 18 19 public const ARRAY_KEY_CONSEQUENT = 'consequent'; 20 21 public const ARRAY_KEY_SUPPORT = 'support'; 22 23 /** 24 * Minimum relative probability of frequent transactions. 25 * 26 * @var float 27 */ 28 private $confidence; 29 30 /** 31 * The large set contains frequent k-length item sets. 32 * 33 * @var mixed[][][] 34 */ 35 private $large = []; 36 37 /** 38 * Minimum relative frequency of transactions. 39 * 40 * @var float 41 */ 42 private $support; 43 44 /** 45 * The generated Apriori association rules. 46 * 47 * @var mixed[][] 48 */ 49 private $rules = []; 50 51 /** 52 * Apriori constructor. 53 */ 54 public function __construct(float $support = 0.0, float $confidence = 0.0) 55 { 56 $this->support = $support; 57 $this->confidence = $confidence; 58 } 59 60 /** 61 * Get all association rules which are generated for every k-length frequent item set. 62 * 63 * @return mixed[][] 64 */ 65 public function getRules(): array 66 { 67 if (count($this->large) === 0) { 68 $this->large = $this->apriori(); 69 } 70 71 if (count($this->rules) > 0) { 72 return $this->rules; 73 } 74 75 $this->rules = []; 76 77 $this->generateAllRules(); 78 79 return $this->rules; 80 } 81 82 /** 83 * Generates frequent item sets. 84 * 85 * @return mixed[][][] 86 */ 87 public function apriori(): array 88 { 89 $L = []; 90 91 $items = $this->frequent($this->items()); 92 for ($k = 1; isset($items[0]); ++$k) { 93 $L[$k] = $items; 94 $items = $this->frequent($this->candidates($items)); 95 } 96 97 return $L; 98 } 99 100 /** 101 * @param mixed[] $sample 102 * 103 * @return mixed[][] 104 */ 105 protected function predictSample(array $sample): array 106 { 107 $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool { 108 return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample); 109 })); 110 111 return array_map(static function ($rule) { 112 return $rule[self::ARRAY_KEY_CONSEQUENT]; 113 }, $predicts); 114 } 115 116 /** 117 * Generate rules for each k-length frequent item set. 118 */ 119 private function generateAllRules(): void 120 { 121 for ($k = 2; isset($this->large[$k]); ++$k) { 122 foreach ($this->large[$k] as $frequent) { 123 $this->generateRules($frequent); 124 } 125 } 126 } 127 128 /** 129 * Generate confident rules for frequent item set. 130 * 131 * @param mixed[] $frequent 132 */ 133 private function generateRules(array $frequent): void 134 { 135 foreach ($this->antecedents($frequent) as $antecedent) { 136 $confidence = $this->confidence($frequent, $antecedent); 137 if ($this->confidence <= $confidence) { 138 $consequent = array_values(array_diff($frequent, $antecedent)); 139 $this->rules[] = [ 140 self::ARRAY_KEY_ANTECEDENT => $antecedent, 141 self::ARRAY_KEY_CONSEQUENT => $consequent, 142 self::ARRAY_KEY_SUPPORT => $this->support($frequent), 143 self::ARRAY_KEY_CONFIDENCE => $confidence, 144 ]; 145 } 146 } 147 } 148 149 /** 150 * Generates the power set for given item set $sample. 151 * 152 * @param mixed[] $sample 153 * 154 * @return mixed[][] 155 */ 156 private function powerSet(array $sample): array 157 { 158 $results = [[]]; 159 foreach ($sample as $item) { 160 foreach ($results as $combination) { 161 $results[] = array_merge([$item], $combination); 162 } 163 } 164 165 return $results; 166 } 167 168 /** 169 * Generates all proper subsets for given set $sample without the empty set. 170 * 171 * @param mixed[] $sample 172 * 173 * @return mixed[][] 174 */ 175 private function antecedents(array $sample): array 176 { 177 $cardinality = count($sample); 178 $antecedents = $this->powerSet($sample); 179 180 return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool { 181 return (count($antecedent) != $cardinality) && ($antecedent != []); 182 }); 183 } 184 185 /** 186 * Calculates frequent k = 1 item sets. 187 * 188 * @return mixed[][] 189 */ 190 private function items(): array 191 { 192 $items = []; 193 194 foreach ($this->samples as $sample) { 195 foreach ($sample as $item) { 196 if (!in_array($item, $items, true)) { 197 $items[] = $item; 198 } 199 } 200 } 201 202 return array_map(static function ($entry): array { 203 return [$entry]; 204 }, $items); 205 } 206 207 /** 208 * Returns frequent item sets only. 209 * 210 * @param mixed[][] $samples 211 * 212 * @return mixed[][] 213 */ 214 private function frequent(array $samples): array 215 { 216 return array_values(array_filter($samples, function ($entry): bool { 217 return $this->support($entry) >= $this->support; 218 })); 219 } 220 221 /** 222 * Calculates frequent k item sets, where count($samples) == $k - 1. 223 * 224 * @param mixed[][] $samples 225 * 226 * @return mixed[][] 227 */ 228 private function candidates(array $samples): array 229 { 230 $candidates = []; 231 232 foreach ($samples as $p) { 233 foreach ($samples as $q) { 234 if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) { 235 continue; 236 } 237 238 $candidate = array_values(array_unique(array_merge($p, $q))); 239 240 if ($this->contains($candidates, $candidate)) { 241 continue; 242 } 243 244 foreach ($this->samples as $sample) { 245 if ($this->subset($sample, $candidate)) { 246 $candidates[] = $candidate; 247 248 continue 2; 249 } 250 } 251 } 252 } 253 254 return $candidates; 255 } 256 257 /** 258 * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain 259 * $set. 260 * 261 * @param mixed[] $set 262 * @param mixed[] $subset 263 */ 264 private function confidence(array $set, array $subset): float 265 { 266 return $this->support($set) / $this->support($subset); 267 } 268 269 /** 270 * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data 271 * pool. 272 * 273 * @see \Phpml\Association\Apriori::samples 274 * 275 * @param mixed[] $sample 276 */ 277 private function support(array $sample): float 278 { 279 return $this->frequency($sample) / count($this->samples); 280 } 281 282 /** 283 * Counts occurrences of $sample as subset in data pool. 284 * 285 * @see \Phpml\Association\Apriori::samples 286 * 287 * @param mixed[] $sample 288 */ 289 private function frequency(array $sample): int 290 { 291 return count(array_filter($this->samples, function ($entry) use ($sample): bool { 292 return $this->subset($entry, $sample); 293 })); 294 } 295 296 /** 297 * Returns true if set is an element of system. 298 * 299 * @see \Phpml\Association\Apriori::equals() 300 * 301 * @param mixed[][] $system 302 * @param mixed[] $set 303 */ 304 private function contains(array $system, array $set): bool 305 { 306 return (bool) array_filter($system, function ($entry) use ($set): bool { 307 return $this->equals($entry, $set); 308 }); 309 } 310 311 /** 312 * Returns true if subset is a (proper) subset of set by its items string representation. 313 * 314 * @param mixed[] $set 315 * @param mixed[] $subset 316 */ 317 private function subset(array $set, array $subset): bool 318 { 319 return count(array_diff($subset, array_intersect($subset, $set))) === 0; 320 } 321 322 /** 323 * Returns true if string representation of items does not differ. 324 * 325 * @param mixed[] $set1 326 * @param mixed[] $set2 327 */ 328 private function equals(array $set1, array $set2): bool 329 { 330 return array_diff($set1, $set2) == array_diff($set2, $set1); 331 } 332 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body