Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.

Differences Between: [Versions 310 and 402] [Versions 311 and 402] [Versions 39 and 402]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Association;
   6  
   7  use Phpml\Helper\Predictable;
   8  use Phpml\Helper\Trainable;
   9  
  10  class Apriori implements Associator
  11  {
  12      use Trainable;
  13      use Predictable;
  14  
  15      public const ARRAY_KEY_ANTECEDENT = 'antecedent';
  16  
  17      public const ARRAY_KEY_CONFIDENCE = 'confidence';
  18  
  19      public const ARRAY_KEY_CONSEQUENT = 'consequent';
  20  
  21      public const ARRAY_KEY_SUPPORT = 'support';
  22  
  23      /**
  24       * Minimum relative probability of frequent transactions.
  25       *
  26       * @var float
  27       */
  28      private $confidence;
  29  
  30      /**
  31       * The large set contains frequent k-length item sets.
  32       *
  33       * @var mixed[][][]
  34       */
  35      private $large = [];
  36  
  37      /**
  38       * Minimum relative frequency of transactions.
  39       *
  40       * @var float
  41       */
  42      private $support;
  43  
  44      /**
  45       * The generated Apriori association rules.
  46       *
  47       * @var mixed[][]
  48       */
  49      private $rules = [];
  50  
  51      /**
  52       * Apriori constructor.
  53       */
  54      public function __construct(float $support = 0.0, float $confidence = 0.0)
  55      {
  56          $this->support = $support;
  57          $this->confidence = $confidence;
  58      }
  59  
  60      /**
  61       * Get all association rules which are generated for every k-length frequent item set.
  62       *
  63       * @return mixed[][]
  64       */
  65      public function getRules(): array
  66      {
  67          if (count($this->large) === 0) {
  68              $this->large = $this->apriori();
  69          }
  70  
  71          if (count($this->rules) > 0) {
  72              return $this->rules;
  73          }
  74  
  75          $this->rules = [];
  76  
  77          $this->generateAllRules();
  78  
  79          return $this->rules;
  80      }
  81  
  82      /**
  83       * Generates frequent item sets.
  84       *
  85       * @return mixed[][][]
  86       */
  87      public function apriori(): array
  88      {
  89          $L = [];
  90  
  91          $items = $this->frequent($this->items());
  92          for ($k = 1; isset($items[0]); ++$k) {
  93              $L[$k] = $items;
  94              $items = $this->frequent($this->candidates($items));
  95          }
  96  
  97          return $L;
  98      }
  99  
 100      /**
 101       * @param mixed[] $sample
 102       *
 103       * @return mixed[][]
 104       */
 105      protected function predictSample(array $sample): array
 106      {
 107          $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool {
 108              return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
 109          }));
 110  
 111          return array_map(static function ($rule) {
 112              return $rule[self::ARRAY_KEY_CONSEQUENT];
 113          }, $predicts);
 114      }
 115  
 116      /**
 117       * Generate rules for each k-length frequent item set.
 118       */
 119      private function generateAllRules(): void
 120      {
 121          for ($k = 2; isset($this->large[$k]); ++$k) {
 122              foreach ($this->large[$k] as $frequent) {
 123                  $this->generateRules($frequent);
 124              }
 125          }
 126      }
 127  
 128      /**
 129       * Generate confident rules for frequent item set.
 130       *
 131       * @param mixed[] $frequent
 132       */
 133      private function generateRules(array $frequent): void
 134      {
 135          foreach ($this->antecedents($frequent) as $antecedent) {
 136              $confidence = $this->confidence($frequent, $antecedent);
 137              if ($this->confidence <= $confidence) {
 138                  $consequent = array_values(array_diff($frequent, $antecedent));
 139                  $this->rules[] = [
 140                      self::ARRAY_KEY_ANTECEDENT => $antecedent,
 141                      self::ARRAY_KEY_CONSEQUENT => $consequent,
 142                      self::ARRAY_KEY_SUPPORT => $this->support($frequent),
 143                      self::ARRAY_KEY_CONFIDENCE => $confidence,
 144                  ];
 145              }
 146          }
 147      }
 148  
 149      /**
 150       * Generates the power set for given item set $sample.
 151       *
 152       * @param mixed[] $sample
 153       *
 154       * @return mixed[][]
 155       */
 156      private function powerSet(array $sample): array
 157      {
 158          $results = [[]];
 159          foreach ($sample as $item) {
 160              foreach ($results as $combination) {
 161                  $results[] = array_merge([$item], $combination);
 162              }
 163          }
 164  
 165          return $results;
 166      }
 167  
 168      /**
 169       * Generates all proper subsets for given set $sample without the empty set.
 170       *
 171       * @param mixed[] $sample
 172       *
 173       * @return mixed[][]
 174       */
 175      private function antecedents(array $sample): array
 176      {
 177          $cardinality = count($sample);
 178          $antecedents = $this->powerSet($sample);
 179  
 180          return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool {
 181              return (count($antecedent) != $cardinality) && ($antecedent != []);
 182          });
 183      }
 184  
 185      /**
 186       * Calculates frequent k = 1 item sets.
 187       *
 188       * @return mixed[][]
 189       */
 190      private function items(): array
 191      {
 192          $items = [];
 193  
 194          foreach ($this->samples as $sample) {
 195              foreach ($sample as $item) {
 196                  if (!in_array($item, $items, true)) {
 197                      $items[] = $item;
 198                  }
 199              }
 200          }
 201  
 202          return array_map(static function ($entry): array {
 203              return [$entry];
 204          }, $items);
 205      }
 206  
 207      /**
 208       * Returns frequent item sets only.
 209       *
 210       * @param mixed[][] $samples
 211       *
 212       * @return mixed[][]
 213       */
 214      private function frequent(array $samples): array
 215      {
 216          return array_values(array_filter($samples, function ($entry): bool {
 217              return $this->support($entry) >= $this->support;
 218          }));
 219      }
 220  
 221      /**
 222       * Calculates frequent k item sets, where count($samples) == $k - 1.
 223       *
 224       * @param mixed[][] $samples
 225       *
 226       * @return mixed[][]
 227       */
 228      private function candidates(array $samples): array
 229      {
 230          $candidates = [];
 231  
 232          foreach ($samples as $p) {
 233              foreach ($samples as $q) {
 234                  if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
 235                      continue;
 236                  }
 237  
 238                  $candidate = array_values(array_unique(array_merge($p, $q)));
 239  
 240                  if ($this->contains($candidates, $candidate)) {
 241                      continue;
 242                  }
 243  
 244                  foreach ($this->samples as $sample) {
 245                      if ($this->subset($sample, $candidate)) {
 246                          $candidates[] = $candidate;
 247  
 248                          continue 2;
 249                      }
 250                  }
 251              }
 252          }
 253  
 254          return $candidates;
 255      }
 256  
 257      /**
 258       * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
 259       * $set.
 260       *
 261       * @param mixed[] $set
 262       * @param mixed[] $subset
 263       */
 264      private function confidence(array $set, array $subset): float
 265      {
 266          return $this->support($set) / $this->support($subset);
 267      }
 268  
 269      /**
 270       * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
 271       * pool.
 272       *
 273       * @see \Phpml\Association\Apriori::samples
 274       *
 275       * @param mixed[] $sample
 276       */
 277      private function support(array $sample): float
 278      {
 279          return $this->frequency($sample) / count($this->samples);
 280      }
 281  
 282      /**
 283       * Counts occurrences of $sample as subset in data pool.
 284       *
 285       * @see \Phpml\Association\Apriori::samples
 286       *
 287       * @param mixed[] $sample
 288       */
 289      private function frequency(array $sample): int
 290      {
 291          return count(array_filter($this->samples, function ($entry) use ($sample): bool {
 292              return $this->subset($entry, $sample);
 293          }));
 294      }
 295  
 296      /**
 297       * Returns true if set is an element of system.
 298       *
 299       * @see \Phpml\Association\Apriori::equals()
 300       *
 301       * @param mixed[][] $system
 302       * @param mixed[]   $set
 303       */
 304      private function contains(array $system, array $set): bool
 305      {
 306          return (bool) array_filter($system, function ($entry) use ($set): bool {
 307              return $this->equals($entry, $set);
 308          });
 309      }
 310  
 311      /**
 312       * Returns true if subset is a (proper) subset of set by its items string representation.
 313       *
 314       * @param mixed[] $set
 315       * @param mixed[] $subset
 316       */
 317      private function subset(array $set, array $subset): bool
 318      {
 319          return count(array_diff($subset, array_intersect($subset, $set))) === 0;
 320      }
 321  
 322      /**
 323       * Returns true if string representation of items does not differ.
 324       *
 325       * @param mixed[] $set1
 326       * @param mixed[] $set2
 327       */
 328      private function equals(array $set1, array $set2): bool
 329      {
 330          return array_diff($set1, $set2) == array_diff($set2, $set1);
 331      }
 332  }