Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Math\Statistic;
   6  
   7  use Phpml\Exception\InvalidArgumentException;
   8  
   9  /**
  10   * Analysis of variance
  11   * https://en.wikipedia.org/wiki/Analysis_of_variance
  12   */
  13  final class ANOVA
  14  {
  15      /**
  16       * The one-way ANOVA tests the null hypothesis that 2 or more groups have
  17       * the same population mean. The test is applied to samples from two or
  18       * more groups, possibly with differing sizes.
  19       *
  20       * @param array[] $samples - each row is class samples
  21       *
  22       * @return float[]
  23       */
  24      public static function oneWayF(array $samples): array
  25      {
  26          $classes = count($samples);
  27          if ($classes < 2) {
  28              throw new InvalidArgumentException('The array must have at least 2 elements');
  29          }
  30  
  31          $samplesPerClass = array_map(static function (array $class): int {
  32              return count($class);
  33          }, $samples);
  34          $allSamples = (int) array_sum($samplesPerClass);
  35          $ssAllSamples = self::sumOfSquaresPerFeature($samples);
  36          $sumSamples = self::sumOfFeaturesPerClass($samples);
  37          $squareSumSamples = self::sumOfSquares($sumSamples);
  38          $sumSamplesSquare = self::squaresSum($sumSamples);
  39          $ssbn = self::calculateSsbn($samples, $sumSamplesSquare, $samplesPerClass, $squareSumSamples, $allSamples);
  40          $sswn = self::calculateSswn($ssbn, $ssAllSamples, $squareSumSamples, $allSamples);
  41          $dfbn = $classes - 1;
  42          $dfwn = $allSamples - $classes;
  43  
  44          $msb = array_map(static function ($s) use ($dfbn) {
  45              return $s / $dfbn;
  46          }, $ssbn);
  47          $msw = array_map(static function ($s) use ($dfwn) {
  48              if ($dfwn === 0) {
  49                  return 1;
  50              }
  51  
  52              return $s / $dfwn;
  53          }, $sswn);
  54  
  55          $f = [];
  56          foreach ($msb as $index => $msbValue) {
  57              $f[$index] = $msbValue / $msw[$index];
  58          }
  59  
  60          return $f;
  61      }
  62  
  63      private static function sumOfSquaresPerFeature(array $samples): array
  64      {
  65          $sum = array_fill(0, count($samples[0][0]), 0);
  66          foreach ($samples as $class) {
  67              foreach ($class as $sample) {
  68                  foreach ($sample as $index => $feature) {
  69                      $sum[$index] += $feature ** 2;
  70                  }
  71              }
  72          }
  73  
  74          return $sum;
  75      }
  76  
  77      private static function sumOfFeaturesPerClass(array $samples): array
  78      {
  79          return array_map(static function (array $class): array {
  80              $sum = array_fill(0, count($class[0]), 0);
  81              foreach ($class as $sample) {
  82                  foreach ($sample as $index => $feature) {
  83                      $sum[$index] += $feature;
  84                  }
  85              }
  86  
  87              return $sum;
  88          }, $samples);
  89      }
  90  
  91      private static function sumOfSquares(array $sums): array
  92      {
  93          $squares = array_fill(0, count($sums[0]), 0);
  94          foreach ($sums as $row) {
  95              foreach ($row as $index => $sum) {
  96                  $squares[$index] += $sum;
  97              }
  98          }
  99  
 100          return array_map(static function ($sum) {
 101              return $sum ** 2;
 102          }, $squares);
 103      }
 104  
 105      private static function squaresSum(array $sums): array
 106      {
 107          foreach ($sums as &$row) {
 108              foreach ($row as &$sum) {
 109                  $sum **= 2;
 110              }
 111          }
 112  
 113          return $sums;
 114      }
 115  
 116      private static function calculateSsbn(array $samples, array $sumSamplesSquare, array $samplesPerClass, array $squareSumSamples, int $allSamples): array
 117      {
 118          $ssbn = array_fill(0, count($samples[0][0]), 0);
 119          foreach ($sumSamplesSquare as $classIndex => $class) {
 120              foreach ($class as $index => $feature) {
 121                  $ssbn[$index] += $feature / $samplesPerClass[$classIndex];
 122              }
 123          }
 124  
 125          foreach ($squareSumSamples as $index => $sum) {
 126              $ssbn[$index] -= $sum / $allSamples;
 127          }
 128  
 129          return $ssbn;
 130      }
 131  
 132      private static function calculateSswn(array $ssbn, array $ssAllSamples, array $squareSumSamples, int $allSamples): array
 133      {
 134          $sswn = [];
 135          foreach ($ssAllSamples as $index => $ss) {
 136              $sswn[$index] = ($ss - $squareSumSamples[$index] / $allSamples) - $ssbn[$index];
 137          }
 138  
 139          return $sswn;
 140      }
 141  }