1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Helper\Optimizer; 6 7 use Closure; 8 9 /** 10 * Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x 11 * See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method) 12 * 13 * The method applied below is explained in the below document in a practical manner 14 * - http://web.cs.iastate.edu/~cs577/handouts/conjugate-gradient.pdf 15 * 16 * However it is compliant with the general Conjugate Gradient method with 17 * Fletcher-Reeves update method. Note that, the f(x) is assumed to be one-dimensional 18 * and one gradient is utilized for all dimensions in the given data. 19 */ 20 class ConjugateGradient extends GD 21 { 22 public function runOptimization(array $samples, array $targets, Closure $gradientCb): array 23 { 24 $this->samples = $samples; 25 $this->targets = $targets; 26 $this->gradientCb = $gradientCb; 27 $this->sampleCount = count($samples); 28 $this->costValues = []; 29 30 $d = MP::muls($this->gradient($this->theta), -1); 31 32 for ($i = 0; $i < $this->maxIterations; ++$i) { 33 // Obtain α that minimizes f(θ + α.d) 34 $alpha = $this->getAlpha($d); 35 36 // θ(k+1) = θ(k) + α.d 37 $thetaNew = $this->getNewTheta($alpha, $d); 38 39 // β = ||∇f(x(k+1))||² ∕ ||∇f(x(k))||² 40 $beta = $this->getBeta($thetaNew); 41 42 // d(k+1) =–∇f(x(k+1)) + β(k).d(k) 43 $d = $this->getNewDirection($thetaNew, $beta, $d); 44 45 // Save values for the next iteration 46 $oldTheta = $this->theta; 47 $this->costValues[] = $this->cost($thetaNew); 48 49 $this->theta = $thetaNew; 50 if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) { 51 break; 52 } 53 } 54 55 $this->clear(); 56 57 return $this->theta; 58 } 59 60 /** 61 * Executes the callback function for the problem and returns 62 * sum of the gradient for all samples & targets. 63 */ 64 protected function gradient(array $theta): array 65 { 66 [, $updates, $penalty] = parent::gradient($theta); 67 68 // Calculate gradient for each dimension 69 $gradient = []; 70 for ($i = 0; $i <= $this->dimensions; ++$i) { 71 if ($i === 0) { 72 $gradient[$i] = array_sum($updates); 73 } else { 74 $col = array_column($this->samples, $i - 1); 75 $error = 0; 76 foreach ($col as $index => $val) { 77 $error += $val * $updates[$index]; 78 } 79 80 $gradient[$i] = $error + $penalty * $theta[$i]; 81 } 82 } 83 84 return $gradient; 85 } 86 87 /** 88 * Returns the value of f(x) for given solution 89 */ 90 protected function cost(array $theta): float 91 { 92 [$cost] = parent::gradient($theta); 93 94 return array_sum($cost) / (int) $this->sampleCount; 95 } 96 97 /** 98 * Calculates alpha that minimizes the function f(θ + α.d) 99 * by performing a line search that does not rely upon the derivation. 100 * 101 * There are several alternatives for this function. For now, we 102 * prefer a method inspired from the bisection method for its simplicity. 103 * This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01 104 * 105 * Algorithm as follows: 106 * a) Probe a small alpha (0.0001) and calculate cost function 107 * b) Probe a larger alpha (0.01) and calculate cost function 108 * b-1) If cost function decreases, continue enlarging alpha 109 * b-2) If cost function increases, take the midpoint and try again 110 */ 111 protected function getAlpha(array $d): float 112 { 113 $small = MP::muls($d, 0.0001); 114 $large = MP::muls($d, 0.01); 115 116 // Obtain θ + α.d for two initial values, x0 and x1 117 $x0 = MP::add($this->theta, $small); 118 $x1 = MP::add($this->theta, $large); 119 120 $epsilon = 0.0001; 121 $iteration = 0; 122 do { 123 $fx1 = $this->cost($x1); 124 $fx0 = $this->cost($x0); 125 126 // If the difference between two values is small enough 127 // then break the loop 128 if (abs($fx1 - $fx0) <= $epsilon) { 129 break; 130 } 131 132 if ($fx1 < $fx0) { 133 $x0 = $x1; 134 $x1 = MP::adds($x1, 0.01); // Enlarge second 135 } else { 136 $x1 = MP::divs(MP::add($x1, $x0), 2.0); 137 } // Get to the midpoint 138 139 $error = $fx1 / $this->dimensions; 140 } while ($error <= $epsilon || $iteration++ < 10); 141 142 // Return α = θ / d 143 // For accuracy, choose a dimension which maximize |d[i]| 144 $imax = 0; 145 for ($i = 1; $i <= $this->dimensions; ++$i) { 146 if (abs($d[$i]) > abs($d[$imax])) { 147 $imax = $i; 148 } 149 } 150 151 if ($d[$imax] == 0) { 152 return $x1[$imax] - $this->theta[$imax]; 153 } 154 155 return ($x1[$imax] - $this->theta[$imax]) / $d[$imax]; 156 } 157 158 /** 159 * Calculates new set of solutions with given alpha (for each θ(k)) and 160 * gradient direction. 161 * 162 * θ(k+1) = θ(k) + α.d 163 */ 164 protected function getNewTheta(float $alpha, array $d): array 165 { 166 return MP::add($this->theta, MP::muls($d, $alpha)); 167 } 168 169 /** 170 * Calculates new beta (β) for given set of solutions by using 171 * Fletcher–Reeves method. 172 * 173 * β = ||f(x(k+1))||² ∕ ||f(x(k))||² 174 * 175 * See: 176 * R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154. 177 */ 178 protected function getBeta(array $newTheta): float 179 { 180 $gNew = $this->gradient($newTheta); 181 $gOld = $this->gradient($this->theta); 182 $dNew = 0; 183 $dOld = 1e-100; 184 for ($i = 0; $i <= $this->dimensions; ++$i) { 185 $dNew += $gNew[$i] ** 2; 186 $dOld += $gOld[$i] ** 2; 187 } 188 189 return $dNew / $dOld; 190 } 191 192 /** 193 * Calculates the new conjugate direction 194 * 195 * d(k+1) =–∇f(x(k+1)) + β(k).d(k) 196 */ 197 protected function getNewDirection(array $theta, float $beta, array $d): array 198 { 199 $grad = $this->gradient($theta); 200 201 return MP::add(MP::muls($grad, -1), MP::muls($d, $beta)); 202 } 203 } 204 205 /** 206 * Handles element-wise vector operations between vector-vector 207 * and vector-scalar variables 208 */ 209 class MP 210 { 211 /** 212 * Element-wise <b>multiplication</b> of two vectors of the same size 213 */ 214 public static function mul(array $m1, array $m2): array 215 { 216 $res = []; 217 foreach ($m1 as $i => $val) { 218 $res[] = $val * $m2[$i]; 219 } 220 221 return $res; 222 } 223 224 /** 225 * Element-wise <b>division</b> of two vectors of the same size 226 */ 227 public static function div(array $m1, array $m2): array 228 { 229 $res = []; 230 foreach ($m1 as $i => $val) { 231 $res[] = $val / $m2[$i]; 232 } 233 234 return $res; 235 } 236 237 /** 238 * Element-wise <b>addition</b> of two vectors of the same size 239 */ 240 public static function add(array $m1, array $m2, int $mag = 1): array 241 { 242 $res = []; 243 foreach ($m1 as $i => $val) { 244 $res[] = $val + $mag * $m2[$i]; 245 } 246 247 return $res; 248 } 249 250 /** 251 * Element-wise <b>subtraction</b> of two vectors of the same size 252 */ 253 public static function sub(array $m1, array $m2): array 254 { 255 return self::add($m1, $m2, -1); 256 } 257 258 /** 259 * Element-wise <b>multiplication</b> of a vector with a scalar 260 */ 261 public static function muls(array $m1, float $m2): array 262 { 263 $res = []; 264 foreach ($m1 as $val) { 265 $res[] = $val * $m2; 266 } 267 268 return $res; 269 } 270 271 /** 272 * Element-wise <b>division</b> of a vector with a scalar 273 */ 274 public static function divs(array $m1, float $m2): array 275 { 276 $res = []; 277 foreach ($m1 as $val) { 278 $res[] = $val / ($m2 + 1e-32); 279 } 280 281 return $res; 282 } 283 284 /** 285 * Element-wise <b>addition</b> of a vector with a scalar 286 */ 287 public static function adds(array $m1, float $m2, int $mag = 1): array 288 { 289 $res = []; 290 foreach ($m1 as $val) { 291 $res[] = $val + $mag * $m2; 292 } 293 294 return $res; 295 } 296 297 /** 298 * Element-wise <b>subtraction</b> of a vector with a scalar 299 */ 300 public static function subs(array $m1, float $m2): array 301 { 302 return self::adds($m1, $m2, -1); 303 } 304 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body