Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Helper\Optimizer;
   6  
   7  use Closure;
   8  use Phpml\Exception\InvalidArgumentException;
   9  use Phpml\Exception\InvalidOperationException;
  10  
  11  /**
  12   * Stochastic Gradient Descent optimization method
  13   * to find a solution for the equation A.ϴ = y where
  14   *  A (samples) and y (targets) are known and ϴ is unknown.
  15   */
  16  class StochasticGD extends Optimizer
  17  {
  18      /**
  19       * A (samples)
  20       *
  21       * @var array
  22       */
  23      protected $samples = [];
  24  
  25      /**
  26       * y (targets)
  27       *
  28       * @var array
  29       */
  30      protected $targets = [];
  31  
  32      /**
  33       * Callback function to get the gradient and cost value
  34       * for a specific set of theta (ϴ) and a pair of sample & target
  35       *
  36       * @var \Closure|null
  37       */
  38      protected $gradientCb;
  39  
  40      /**
  41       * Maximum number of iterations used to train the model
  42       *
  43       * @var int
  44       */
  45      protected $maxIterations = 1000;
  46  
  47      /**
  48       * Learning rate is used to control the speed of the optimization.<br>
  49       *
  50       * Larger values of lr may overshoot the optimum or even cause divergence
  51       * while small values slows down the convergence and increases the time
  52       * required for the training
  53       *
  54       * @var float
  55       */
  56      protected $learningRate = 0.001;
  57  
  58      /**
  59       * Minimum amount of change in the weights and error values
  60       * between iterations that needs to be obtained to continue the training
  61       *
  62       * @var float
  63       */
  64      protected $threshold = 1e-4;
  65  
  66      /**
  67       * Enable/Disable early stopping by checking the weight & cost values
  68       * to see whether they changed large enough to continue the optimization
  69       *
  70       * @var bool
  71       */
  72      protected $enableEarlyStop = true;
  73  
  74      /**
  75       * List of values obtained by evaluating the cost function at each iteration
  76       * of the algorithm
  77       *
  78       * @var array
  79       */
  80      protected $costValues = [];
  81  
  82      /**
  83       * Initializes the SGD optimizer for the given number of dimensions
  84       */
  85      public function __construct(int $dimensions)
  86      {
  87          // Add one more dimension for the bias
  88          parent::__construct($dimensions + 1);
  89  
  90          $this->dimensions = $dimensions;
  91      }
  92  
  93      public function setTheta(array $theta): Optimizer
  94      {
  95          if (count($theta) !== $this->dimensions + 1) {
  96              throw new InvalidArgumentException(sprintf('Number of values in the weights array should be %s', $this->dimensions + 1));
  97          }
  98  
  99          $this->theta = $theta;
 100  
 101          return $this;
 102      }
 103  
 104      /**
 105       * Sets minimum value for the change in the theta values
 106       * between iterations to continue the iterations.<br>
 107       *
 108       * If change in the theta is less than given value then the
 109       * algorithm will stop training
 110       *
 111       * @return $this
 112       */
 113      public function setChangeThreshold(float $threshold = 1e-5)
 114      {
 115          $this->threshold = $threshold;
 116  
 117          return $this;
 118      }
 119  
 120      /**
 121       * Enable/Disable early stopping by checking at each iteration
 122       * whether changes in theta or cost value are not large enough
 123       *
 124       * @return $this
 125       */
 126      public function setEarlyStop(bool $enable = true)
 127      {
 128          $this->enableEarlyStop = $enable;
 129  
 130          return $this;
 131      }
 132  
 133      /**
 134       * @return $this
 135       */
 136      public function setLearningRate(float $learningRate)
 137      {
 138          $this->learningRate = $learningRate;
 139  
 140          return $this;
 141      }
 142  
 143      /**
 144       * @return $this
 145       */
 146      public function setMaxIterations(int $maxIterations)
 147      {
 148          $this->maxIterations = $maxIterations;
 149  
 150          return $this;
 151      }
 152  
 153      /**
 154       * Optimization procedure finds the unknow variables for the equation A.ϴ = y
 155       * for the given samples (A) and targets (y).<br>
 156       *
 157       * The cost function to minimize and the gradient of the function are to be
 158       * handled by the callback function provided as the third parameter of the method.
 159       */
 160      public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
 161      {
 162          $this->samples = $samples;
 163          $this->targets = $targets;
 164          $this->gradientCb = $gradientCb;
 165  
 166          $currIter = 0;
 167          $bestTheta = null;
 168          $bestScore = 0.0;
 169          $this->costValues = [];
 170  
 171          while ($this->maxIterations > $currIter++) {
 172              $theta = $this->theta;
 173  
 174              // Update the guess
 175              $cost = $this->updateTheta();
 176  
 177              // Save the best theta in the "pocket" so that
 178              // any future set of theta worse than this will be disregarded
 179              if ($bestTheta === null || $cost <= $bestScore) {
 180                  $bestTheta = $theta;
 181                  $bestScore = $cost;
 182              }
 183  
 184              // Add the cost value for this iteration to the list
 185              $this->costValues[] = $cost;
 186  
 187              // Check for early stop
 188              if ($this->enableEarlyStop && $this->earlyStop($theta)) {
 189                  break;
 190              }
 191          }
 192  
 193          $this->clear();
 194  
 195          // Solution in the pocket is better than or equal to the last state
 196          // so, we use this solution
 197          return $this->theta = (array) $bestTheta;
 198      }
 199  
 200      /**
 201       * Returns the list of cost values for each iteration executed in
 202       * last run of the optimization
 203       */
 204      public function getCostValues(): array
 205      {
 206          return $this->costValues;
 207      }
 208  
 209      protected function updateTheta(): float
 210      {
 211          $jValue = 0.0;
 212          $theta = $this->theta;
 213  
 214          if ($this->gradientCb === null) {
 215              throw new InvalidOperationException('Gradient callback is not defined');
 216          }
 217  
 218          foreach ($this->samples as $index => $sample) {
 219              $target = $this->targets[$index];
 220  
 221              $result = ($this->gradientCb)($theta, $sample, $target);
 222  
 223              [$error, $gradient, $penalty] = array_pad($result, 3, 0);
 224  
 225              // Update bias
 226              $this->theta[0] -= $this->learningRate * $gradient;
 227  
 228              // Update other values
 229              for ($i = 1; $i <= $this->dimensions; ++$i) {
 230                  $this->theta[$i] -= $this->learningRate *
 231                      ($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]);
 232              }
 233  
 234              // Sum error rate
 235              $jValue += $error;
 236          }
 237  
 238          return $jValue / count($this->samples);
 239      }
 240  
 241      /**
 242       * Checks if the optimization is not effective enough and can be stopped
 243       * in case large enough changes in the solution do not happen
 244       */
 245      protected function earlyStop(array $oldTheta): bool
 246      {
 247          // Check for early stop: No change larger than threshold (default 1e-5)
 248          $diff = array_map(
 249              function ($w1, $w2) {
 250                  return abs($w1 - $w2) > $this->threshold ? 1 : 0;
 251              },
 252              $oldTheta,
 253              $this->theta
 254          );
 255  
 256          if (array_sum($diff) == 0) {
 257              return true;
 258          }
 259  
 260          // Check if the last two cost values are almost the same
 261          $costs = array_slice($this->costValues, -2);
 262          if (count($costs) === 2 && abs($costs[1] - $costs[0]) < $this->threshold) {
 263              return true;
 264          }
 265  
 266          return false;
 267      }
 268  
 269      /**
 270       * Clears the optimizer internal vars after the optimization process.
 271       */
 272      protected function clear(): void
 273      {
 274          $this->samples = [];
 275          $this->targets = [];
 276          $this->gradientCb = null;
 277      }
 278  }