Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace Phpml\Helper\Optimizer;
   6  
   7  use Closure;
   8  
   9  /**
  10   * Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
  11   * See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
  12   *
  13   * The method applied below is explained in the below document in a practical manner
  14   *  - http://web.cs.iastate.edu/~cs577/handouts/conjugate-gradient.pdf
  15   *
  16   * However it is compliant with the general Conjugate Gradient method with
  17   * Fletcher-Reeves update method. Note that, the f(x) is assumed to be one-dimensional
  18   * and one gradient is utilized for all dimensions in the given data.
  19   */
  20  class ConjugateGradient extends GD
  21  {
  22      public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
  23      {
  24          $this->samples = $samples;
  25          $this->targets = $targets;
  26          $this->gradientCb = $gradientCb;
  27          $this->sampleCount = count($samples);
  28          $this->costValues = [];
  29  
  30          $d = MP::muls($this->gradient($this->theta), -1);
  31  
  32          for ($i = 0; $i < $this->maxIterations; ++$i) {
  33              // Obtain α that minimizes f(θ + α.d)
  34              $alpha = $this->getAlpha($d);
  35  
  36              // θ(k+1) = θ(k) + α.d
  37              $thetaNew = $this->getNewTheta($alpha, $d);
  38  
  39              // β = ||∇f(x(k+1))||²  ∕  ||∇f(x(k))||²
  40              $beta = $this->getBeta($thetaNew);
  41  
  42              // d(k+1) =–∇f(x(k+1)) + β(k).d(k)
  43              $d = $this->getNewDirection($thetaNew, $beta, $d);
  44  
  45              // Save values for the next iteration
  46              $oldTheta = $this->theta;
  47              $this->costValues[] = $this->cost($thetaNew);
  48  
  49              $this->theta = $thetaNew;
  50              if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) {
  51                  break;
  52              }
  53          }
  54  
  55          $this->clear();
  56  
  57          return $this->theta;
  58      }
  59  
  60      /**
  61       * Executes the callback function for the problem and returns
  62       * sum of the gradient for all samples & targets.
  63       */
  64      protected function gradient(array $theta): array
  65      {
  66          [, $updates, $penalty] = parent::gradient($theta);
  67  
  68          // Calculate gradient for each dimension
  69          $gradient = [];
  70          for ($i = 0; $i <= $this->dimensions; ++$i) {
  71              if ($i === 0) {
  72                  $gradient[$i] = array_sum($updates);
  73              } else {
  74                  $col = array_column($this->samples, $i - 1);
  75                  $error = 0;
  76                  foreach ($col as $index => $val) {
  77                      $error += $val * $updates[$index];
  78                  }
  79  
  80                  $gradient[$i] = $error + $penalty * $theta[$i];
  81              }
  82          }
  83  
  84          return $gradient;
  85      }
  86  
  87      /**
  88       * Returns the value of f(x) for given solution
  89       */
  90      protected function cost(array $theta): float
  91      {
  92          [$cost] = parent::gradient($theta);
  93  
  94          return array_sum($cost) / (int) $this->sampleCount;
  95      }
  96  
  97      /**
  98       * Calculates alpha that minimizes the function f(θ + α.d)
  99       * by performing a line search that does not rely upon the derivation.
 100       *
 101       * There are several alternatives for this function. For now, we
 102       * prefer a method inspired from the bisection method for its simplicity.
 103       * This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01
 104       *
 105       * Algorithm as follows:
 106       *  a) Probe a small alpha  (0.0001) and calculate cost function
 107       *  b) Probe a larger alpha (0.01) and calculate cost function
 108       *	 	 b-1) If cost function decreases, continue enlarging alpha
 109       *	 	 b-2) If cost function increases, take the midpoint and try again
 110       */
 111      protected function getAlpha(array $d): float
 112      {
 113          $small = MP::muls($d, 0.0001);
 114          $large = MP::muls($d, 0.01);
 115  
 116          // Obtain θ + α.d for two initial values, x0 and x1
 117          $x0 = MP::add($this->theta, $small);
 118          $x1 = MP::add($this->theta, $large);
 119  
 120          $epsilon = 0.0001;
 121          $iteration = 0;
 122          do {
 123              $fx1 = $this->cost($x1);
 124              $fx0 = $this->cost($x0);
 125  
 126              // If the difference between two values is small enough
 127              // then break the loop
 128              if (abs($fx1 - $fx0) <= $epsilon) {
 129                  break;
 130              }
 131  
 132              if ($fx1 < $fx0) {
 133                  $x0 = $x1;
 134                  $x1 = MP::adds($x1, 0.01); // Enlarge second
 135              } else {
 136                  $x1 = MP::divs(MP::add($x1, $x0), 2.0);
 137              } // Get to the midpoint
 138  
 139              $error = $fx1 / $this->dimensions;
 140          } while ($error <= $epsilon || $iteration++ < 10);
 141  
 142          // Return α = θ / d
 143          // For accuracy, choose a dimension which maximize |d[i]|
 144          $imax = 0;
 145          for ($i = 1; $i <= $this->dimensions; ++$i) {
 146              if (abs($d[$i]) > abs($d[$imax])) {
 147                  $imax = $i;
 148              }
 149          }
 150  
 151          if ($d[$imax] == 0) {
 152              return $x1[$imax] - $this->theta[$imax];
 153          }
 154  
 155          return ($x1[$imax] - $this->theta[$imax]) / $d[$imax];
 156      }
 157  
 158      /**
 159       * Calculates new set of solutions with given alpha (for each θ(k)) and
 160       * gradient direction.
 161       *
 162       * θ(k+1) = θ(k) + α.d
 163       */
 164      protected function getNewTheta(float $alpha, array $d): array
 165      {
 166          return MP::add($this->theta, MP::muls($d, $alpha));
 167      }
 168  
 169      /**
 170       * Calculates new beta (β) for given set of solutions by using
 171       * Fletcher–Reeves method.
 172       *
 173       * β = ||f(x(k+1))||²  ∕  ||f(x(k))||²
 174       *
 175       * See:
 176       *  R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154.
 177       */
 178      protected function getBeta(array $newTheta): float
 179      {
 180          $gNew = $this->gradient($newTheta);
 181          $gOld = $this->gradient($this->theta);
 182          $dNew = 0;
 183          $dOld = 1e-100;
 184          for ($i = 0; $i <= $this->dimensions; ++$i) {
 185              $dNew += $gNew[$i] ** 2;
 186              $dOld += $gOld[$i] ** 2;
 187          }
 188  
 189          return $dNew / $dOld;
 190      }
 191  
 192      /**
 193       * Calculates the new conjugate direction
 194       *
 195       * d(k+1) =–∇f(x(k+1)) + β(k).d(k)
 196       */
 197      protected function getNewDirection(array $theta, float $beta, array $d): array
 198      {
 199          $grad = $this->gradient($theta);
 200  
 201          return MP::add(MP::muls($grad, -1), MP::muls($d, $beta));
 202      }
 203  }
 204  
 205  /**
 206   * Handles element-wise vector operations between vector-vector
 207   * and vector-scalar variables
 208   */
 209  class MP
 210  {
 211      /**
 212       * Element-wise <b>multiplication</b> of two vectors of the same size
 213       */
 214      public static function mul(array $m1, array $m2): array
 215      {
 216          $res = [];
 217          foreach ($m1 as $i => $val) {
 218              $res[] = $val * $m2[$i];
 219          }
 220  
 221          return $res;
 222      }
 223  
 224      /**
 225       * Element-wise <b>division</b> of two vectors of the same size
 226       */
 227      public static function div(array $m1, array $m2): array
 228      {
 229          $res = [];
 230          foreach ($m1 as $i => $val) {
 231              $res[] = $val / $m2[$i];
 232          }
 233  
 234          return $res;
 235      }
 236  
 237      /**
 238       * Element-wise <b>addition</b> of two vectors of the same size
 239       */
 240      public static function add(array $m1, array $m2, int $mag = 1): array
 241      {
 242          $res = [];
 243          foreach ($m1 as $i => $val) {
 244              $res[] = $val + $mag * $m2[$i];
 245          }
 246  
 247          return $res;
 248      }
 249  
 250      /**
 251       * Element-wise <b>subtraction</b> of two vectors of the same size
 252       */
 253      public static function sub(array $m1, array $m2): array
 254      {
 255          return self::add($m1, $m2, -1);
 256      }
 257  
 258      /**
 259       * Element-wise <b>multiplication</b> of a vector with a scalar
 260       */
 261      public static function muls(array $m1, float $m2): array
 262      {
 263          $res = [];
 264          foreach ($m1 as $val) {
 265              $res[] = $val * $m2;
 266          }
 267  
 268          return $res;
 269      }
 270  
 271      /**
 272       * Element-wise <b>division</b> of a vector with a scalar
 273       */
 274      public static function divs(array $m1, float $m2): array
 275      {
 276          $res = [];
 277          foreach ($m1 as $val) {
 278              $res[] = $val / ($m2 + 1e-32);
 279          }
 280  
 281          return $res;
 282      }
 283  
 284      /**
 285       * Element-wise <b>addition</b> of a vector with a scalar
 286       */
 287      public static function adds(array $m1, float $m2, int $mag = 1): array
 288      {
 289          $res = [];
 290          foreach ($m1 as $val) {
 291              $res[] = $val + $mag * $m2;
 292          }
 293  
 294          return $res;
 295      }
 296  
 297      /**
 298       * Element-wise <b>subtraction</b> of a vector with a scalar
 299       */
 300      public static function subs(array $m1, float $m2): array
 301      {
 302          return self::adds($m1, $m2, -1);
 303      }
 304  }