Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Classification\Ensemble;
   6  
   7  use Phpml\Classification\Classifier;
   8  use Phpml\Classification\DecisionTree;
   9  use Phpml\Exception\InvalidArgumentException;
  10  
  11  class RandomForest extends Bagging
  12  {
  13      /**
  14       * @var float|string
  15       */
  16      protected $featureSubsetRatio = 'log';
  17  
  18      /**
  19       * @var array|null
  20       */
  21      protected $columnNames;
  22  
  23      /**
  24       * Initializes RandomForest with the given number of trees. More trees
  25       * may increase the prediction performance while it will also substantially
  26       * increase the processing time and the required memory
  27       */
  28      public function __construct(int $numClassifier = 50)
  29      {
  30          parent::__construct($numClassifier);
  31  
  32          $this->setSubsetRatio(1.0);
  33      }
  34  
  35      /**
  36       * This method is used to determine how many of the original columns (features)
  37       * will be used to construct subsets to train base classifiers.<br>
  38       *
  39       * Allowed values: 'sqrt', 'log' or any float number between 0.1 and 1.0 <br>
  40       *
  41       * Default value for the ratio is 'log' which results in log(numFeatures, 2) + 1
  42       * features to be taken into consideration while selecting subspace of features
  43       *
  44       * @param string|float $ratio
  45       */
  46      public function setFeatureSubsetRatio($ratio): self
  47      {
  48          if (!is_string($ratio) && !is_float($ratio)) {
  49              throw new InvalidArgumentException('Feature subset ratio must be a string or a float');
  50          }
  51  
  52          if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) {
  53              throw new InvalidArgumentException('When a float is given, feature subset ratio should be between 0.1 and 1.0');
  54          }
  55  
  56          if (is_string($ratio) && $ratio !== 'sqrt' && $ratio !== 'log') {
  57              throw new InvalidArgumentException("When a string is given, feature subset ratio can only be 'sqrt' or 'log'");
  58          }
  59  
  60          $this->featureSubsetRatio = $ratio;
  61  
  62          return $this;
  63      }
  64  
  65      /**
  66       * RandomForest algorithm is usable *only* with DecisionTree
  67       *
  68       * @return $this
  69       */
  70      public function setClassifer(string $classifier, array $classifierOptions = [])
  71      {
  72          if ($classifier !== DecisionTree::class) {
  73              throw new InvalidArgumentException('RandomForest can only use DecisionTree as base classifier');
  74          }
  75  
  76          return parent::setClassifer($classifier, $classifierOptions);
  77      }
  78  
  79      /**
  80       * This will return an array including an importance value for
  81       * each column in the given dataset. Importance values for a column
  82       * is the average importance of that column in all trees in the forest
  83       */
  84      public function getFeatureImportances(): array
  85      {
  86          // Traverse each tree and sum importance of the columns
  87          $sum = [];
  88          foreach ($this->classifiers as $tree) {
  89              /** @var DecisionTree $tree */
  90              $importances = $tree->getFeatureImportances();
  91  
  92              foreach ($importances as $column => $importance) {
  93                  if (array_key_exists($column, $sum)) {
  94                      $sum[$column] += $importance;
  95                  } else {
  96                      $sum[$column] = $importance;
  97                  }
  98              }
  99          }
 100  
 101          // Normalize & sort the importance values
 102          $total = array_sum($sum);
 103          array_walk($sum, function (&$importance) use ($total): void {
 104              $importance /= $total;
 105          });
 106          arsort($sum);
 107  
 108          return $sum;
 109      }
 110  
 111      /**
 112       * A string array to represent the columns is given. They are useful
 113       * when trying to print some information about the trees such as feature importances
 114       *
 115       * @return $this
 116       */
 117      public function setColumnNames(array $names)
 118      {
 119          $this->columnNames = $names;
 120  
 121          return $this;
 122      }
 123  
 124      /**
 125       * @param DecisionTree $classifier
 126       *
 127       * @return DecisionTree
 128       */
 129      protected function initSingleClassifier(Classifier $classifier): Classifier
 130      {
 131          if (is_float($this->featureSubsetRatio)) {
 132              $featureCount = (int) ($this->featureSubsetRatio * $this->featureCount);
 133          } elseif ($this->featureSubsetRatio === 'sqrt') {
 134              $featureCount = (int) ($this->featureCount ** .5) + 1;
 135          } else {
 136              $featureCount = (int) log($this->featureCount, 2) + 1;
 137          }
 138  
 139          if ($featureCount >= $this->featureCount) {
 140              $featureCount = $this->featureCount;
 141          }
 142  
 143          if ($this->columnNames === null) {
 144              $this->columnNames = range(0, $this->featureCount - 1);
 145          }
 146  
 147          return $classifier
 148              ->setColumnNames($this->columnNames)
 149              ->setNumFeatures($featureCount);
 150      }
 151  }