Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Prediction model representation.
  19   *
  20   * @package   core_analytics
  21   * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
  22   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  namespace core_analytics;
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  /**
  30   * Prediction model representation.
  31   *
  32   * @package   core_analytics
  33   * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
  34   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  35   */
  36  class model {
  37  
  38      /**
  39       * All as expected.
  40       */
  41      const OK = 0;
  42  
  43      /**
  44       * There was a problem.
  45       */
  46      const GENERAL_ERROR = 1;
  47  
  48      /**
  49       * No dataset to analyse.
  50       */
  51      const NO_DATASET = 2;
  52  
  53      /**
  54       * Model with low prediction accuracy.
  55       */
  56      const LOW_SCORE = 4;
  57  
  58      /**
  59       * Not enough data to evaluate the model properly.
  60       */
  61      const NOT_ENOUGH_DATA = 8;
  62  
  63      /**
  64       * Invalid analysable for the time splitting method.
  65       */
  66      const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
  67  
  68      /**
  69       * Invalid analysable for all time splitting methods.
  70       */
  71      const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
  72  
  73      /**
  74       * Invalid analysable for the target
  75       */
  76      const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
  77  
  78      /**
  79       * Minimum score to consider a non-static prediction model as good.
  80       */
  81      const MIN_SCORE = 0.7;
  82  
  83      /**
  84       * Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough.
  85       */
  86      const PREDICTION_MIN_SCORE = 0.6;
  87  
  88      /**
  89       * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
  90       */
  91      const ACCEPTED_DEVIATION = 0.05;
  92  
  93      /**
  94       * Number of evaluation repetitions.
  95       */
  96      const EVALUATION_ITERATIONS = 10;
  97  
  98      /**
  99       * @var \stdClass
 100       */
 101      protected $model = null;
 102  
 103      /**
 104       * @var \core_analytics\local\analyser\base
 105       */
 106      protected $analyser = null;
 107  
 108      /**
 109       * @var \core_analytics\local\target\base
 110       */
 111      protected $target = null;
 112  
 113      /**
 114       * @var \core_analytics\predictor
 115       */
 116      protected $predictionsprocessor = null;
 117  
 118      /**
 119       * @var \core_analytics\local\indicator\base[]
 120       */
 121      protected $indicators = null;
 122  
 123      /**
 124       * @var \context[]
 125       */
 126      protected $contexts = null;
 127  
 128      /**
 129       * Unique Model id created from site info and last model modification.
 130       *
 131       * @var string
 132       */
 133      protected $uniqueid = null;
 134  
 135      /**
 136       * Constructor.
 137       *
 138       * @param int|\stdClass $model
 139       * @return void
 140       */
 141      public function __construct($model) {
 142          global $DB;
 143  
 144          if (is_scalar($model)) {
 145              $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
 146              if (!$model) {
 147                  throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
 148              }
 149          }
 150          $this->model = $model;
 151      }
 152  
 153      /**
 154       * Quick safety check to discard site models which required components are not available anymore.
 155       *
 156       * @return bool
 157       */
 158      public function is_available() {
 159          $target = $this->get_target();
 160          if (!$target) {
 161              return false;
 162          }
 163  
 164          $classname = $target->get_analyser_class();
 165          if (!class_exists($classname)) {
 166              return false;
 167          }
 168  
 169          return true;
 170      }
 171  
 172      /**
 173       * Returns the model id.
 174       *
 175       * @return int
 176       */
 177      public function get_id() {
 178          return $this->model->id;
 179      }
 180  
 181      /**
 182       * Returns a plain \stdClass with the model data.
 183       *
 184       * @return \stdClass
 185       */
 186      public function get_model_obj() {
 187          return $this->model;
 188      }
 189  
 190      /**
 191       * Returns the model target.
 192       *
 193       * @return \core_analytics\local\target\base
 194       */
 195      public function get_target() {
 196          if ($this->target !== null) {
 197              return $this->target;
 198          }
 199          $instance = \core_analytics\manager::get_target($this->model->target);
 200          $this->target = $instance;
 201  
 202          return $this->target;
 203      }
 204  
 205      /**
 206       * Returns the model indicators.
 207       *
 208       * @return \core_analytics\local\indicator\base[]
 209       */
 210      public function get_indicators() {
 211          if ($this->indicators !== null) {
 212              return $this->indicators;
 213          }
 214  
 215          $fullclassnames = json_decode($this->model->indicators);
 216  
 217          if (!is_array($fullclassnames)) {
 218              throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
 219          }
 220  
 221          $this->indicators = array();
 222          foreach ($fullclassnames as $fullclassname) {
 223              $instance = \core_analytics\manager::get_indicator($fullclassname);
 224              if ($instance) {
 225                  $this->indicators[$fullclassname] = $instance;
 226              } else {
 227                  debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
 228              }
 229          }
 230  
 231          return $this->indicators;
 232      }
 233  
 234      /**
 235       * Returns the list of indicators that could potentially be used by the model target.
 236       *
 237       * It includes the indicators that are part of the model.
 238       *
 239       * @return \core_analytics\local\indicator\base[]
 240       */
 241      public function get_potential_indicators() {
 242  
 243          $indicators = \core_analytics\manager::get_all_indicators();
 244  
 245          if (empty($this->analyser)) {
 246              $this->init_analyser(array('notimesplitting' => true));
 247          }
 248  
 249          foreach ($indicators as $classname => $indicator) {
 250              if ($this->analyser->check_indicator_requirements($indicator) !== true) {
 251                  unset($indicators[$classname]);
 252              }
 253          }
 254          return $indicators;
 255      }
 256  
 257      /**
 258       * Returns the model analyser (defined by the model target).
 259       *
 260       * @param array $options Default initialisation with no options.
 261       * @return \core_analytics\local\analyser\base
 262       */
 263      public function get_analyser($options = array()) {
 264          if ($this->analyser !== null) {
 265              return $this->analyser;
 266          }
 267  
 268          $this->init_analyser($options);
 269  
 270          return $this->analyser;
 271      }
 272  
 273      /**
 274       * Initialises the model analyser.
 275       *
 276       * @throws \coding_exception
 277       * @param array $options
 278       * @return void
 279       */
 280      protected function init_analyser($options = array()) {
 281  
 282          $target = $this->get_target();
 283          $indicators = $this->get_indicators();
 284  
 285          if (empty($target)) {
 286              throw new \moodle_exception('errornotarget', 'analytics');
 287          }
 288  
 289          $potentialtimesplittings = $this->get_potential_timesplittings();
 290  
 291          $timesplittings = array();
 292          if (empty($options['notimesplitting'])) {
 293              if (!empty($options['evaluation'])) {
 294                  // The evaluation process will run using all available time splitting methods unless one is specified.
 295                  if (!empty($options['timesplitting'])) {
 296                      $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
 297  
 298                      if (empty($potentialtimesplittings[$timesplitting->get_id()])) {
 299                          throw new \moodle_exception('errorcannotusetimesplitting', 'analytics');
 300                      }
 301                      $timesplittings = array($timesplitting->get_id() => $timesplitting);
 302                  } else {
 303                      $timesplittingsforevaluation = \core_analytics\manager::get_time_splitting_methods_for_evaluation();
 304  
 305                      // They both have the same objects, using $potentialtimesplittings as its items are sorted.
 306                      $timesplittings = array_intersect_key($potentialtimesplittings, $timesplittingsforevaluation);
 307                  }
 308              } else {
 309  
 310                  if (empty($this->model->timesplitting)) {
 311                      throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
 312                  }
 313  
 314                  // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
 315                  $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
 316              }
 317  
 318              if (empty($timesplittings)) {
 319                  throw new \moodle_exception('errornotimesplittings', 'analytics');
 320              }
 321          }
 322  
 323          $classname = $target->get_analyser_class();
 324          if (!class_exists($classname)) {
 325              throw new \coding_exception($classname . ' class does not exists');
 326          }
 327  
 328          // Returns a \core_analytics\local\analyser\base class.
 329          $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
 330      }
 331  
 332      /**
 333       * Returns the model time splitting method.
 334       *
 335       * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
 336       */
 337      public function get_time_splitting() {
 338          if (empty($this->model->timesplitting)) {
 339              return false;
 340          }
 341          return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
 342      }
 343  
 344      /**
 345       * Returns the time-splitting methods that can be used by this model.
 346       *
 347       * @return \core_analytics\local\time_splitting\base[]
 348       */
 349      public function get_potential_timesplittings() {
 350  
 351          $timesplittings = \core_analytics\manager::get_all_time_splittings();
 352          uasort($timesplittings, function($a, $b) {
 353              return strcasecmp($a->get_name(), $b->get_name());
 354          });
 355  
 356          foreach ($timesplittings as $key => $timesplitting) {
 357              if (!$this->get_target()->can_use_timesplitting($timesplitting)) {
 358                  unset($timesplittings[$key]);
 359                  continue;
 360              }
 361          }
 362          return $timesplittings;
 363      }
 364  
 365      /**
 366       * Creates a new model. Enables it if $timesplittingid is specified.
 367       *
 368       * @param \core_analytics\local\target\base $target
 369       * @param \core_analytics\local\indicator\base[] $indicators
 370       * @param string|false $timesplittingid The time splitting method id (its fully qualified class name)
 371       * @param string|null $processor The machine learning backend this model will use.
 372       * @return \core_analytics\model
 373       */
 374      public static function create(\core_analytics\local\target\base $target, array $indicators,
 375                                    $timesplittingid = false, $processor = null) {
 376          global $USER, $DB;
 377  
 378          $indicatorclasses = self::indicator_classes($indicators);
 379  
 380          $now = time();
 381  
 382          $modelobj = new \stdClass();
 383          $modelobj->target = $target->get_id();
 384          $modelobj->indicators = json_encode($indicatorclasses);
 385          $modelobj->version = $now;
 386          $modelobj->timecreated = $now;
 387          $modelobj->timemodified = $now;
 388          $modelobj->usermodified = $USER->id;
 389  
 390          if ($target->based_on_assumptions()) {
 391              $modelobj->trained = 1;
 392          }
 393  
 394          if ($timesplittingid) {
 395              if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
 396                  throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
 397              }
 398              if (substr($timesplittingid, 0, 1) !== '\\') {
 399                  throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
 400              }
 401              $modelobj->timesplitting = $timesplittingid;
 402          }
 403  
 404          if ($processor &&
 405                  !manager::is_valid($processor, '\core_analytics\classifier') &&
 406                  !manager::is_valid($processor, '\core_analytics\regressor')) {
 407              throw new \coding_exception('The provided predictions processor \\' . $processor . '\processor is not valid');
 408          } else {
 409              $modelobj->predictionsprocessor = $processor;
 410          }
 411  
 412          $id = $DB->insert_record('analytics_models', $modelobj);
 413  
 414          // Get db defaults.
 415          $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
 416  
 417          $model = new static($modelobj);
 418  
 419          return $model;
 420      }
 421  
 422      /**
 423       * Does this model exist?
 424       *
 425       * If no indicators are provided it considers any model with the provided
 426       * target a match.
 427       *
 428       * @param \core_analytics\local\target\base $target
 429       * @param \core_analytics\local\indicator\base[]|false $indicators
 430       * @return bool
 431       */
 432      public static function exists(\core_analytics\local\target\base $target, $indicators = false) {
 433          global $DB;
 434  
 435          $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id()));
 436  
 437          if (!$existingmodels) {
 438              return false;
 439          }
 440  
 441          if (!$indicators && $existingmodels) {
 442              return true;
 443          }
 444  
 445          $indicatorids = array_keys($indicators);
 446          sort($indicatorids);
 447  
 448          foreach ($existingmodels as $modelobj) {
 449              $model = new \core_analytics\model($modelobj);
 450              $modelindicatorids = array_keys($model->get_indicators());
 451              sort($modelindicatorids);
 452  
 453              if ($indicatorids === $modelindicatorids) {
 454                  return true;
 455              }
 456          }
 457          return false;
 458      }
 459  
 460      /**
 461       * Updates the model.
 462       *
 463       * @param int|bool $enabled
 464       * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators
 465       * @param string|false $timesplittingid False to respect current time splitting method
 466       * @param string|false $predictionsprocessor False to respect current predictors processor value
 467       * @param int[]|false $contextids List of context ids for this model. False to respect the current list of contexts.
 468       * @return void
 469       */
 470      public function update($enabled, $indicators = false, $timesplittingid = '', $predictionsprocessor = false,
 471              $contextids = false) {
 472          global $USER, $DB;
 473  
 474          \core_analytics\manager::check_can_manage_models();
 475  
 476          $now = time();
 477  
 478          if ($indicators !== false) {
 479              $indicatorclasses = self::indicator_classes($indicators);
 480              $indicatorsstr = json_encode($indicatorclasses);
 481          } else {
 482              // Respect current value.
 483              $indicatorsstr = $this->model->indicators;
 484          }
 485  
 486          if ($timesplittingid === false) {
 487              // Respect current value.
 488              $timesplittingid = $this->model->timesplitting;
 489          }
 490  
 491          if ($predictionsprocessor === false) {
 492              // Respect current value.
 493              $predictionsprocessor = $this->model->predictionsprocessor;
 494          }
 495  
 496          if ($contextids === false) {
 497              $contextsstr = $this->model->contextids;
 498          } else if (!$contextids) {
 499              $contextsstr = null;
 500          } else {
 501              $contextsstr = json_encode($contextids);
 502  
 503              // Reset the internal cache.
 504              $this->contexts = null;
 505          }
 506  
 507          if ($this->model->timesplitting !== $timesplittingid ||
 508                  $this->model->indicators !== $indicatorsstr ||
 509                  $this->model->predictionsprocessor !== $predictionsprocessor) {
 510  
 511              // Delete generated predictions before changing the model version.
 512              $this->clear();
 513  
 514              // It needs to be reset as the version changes.
 515              $this->uniqueid = null;
 516              $this->indicators = null;
 517  
 518              // We update the version of the model so different time splittings are not mixed up.
 519              $this->model->version = $now;
 520  
 521              // Reset trained flag.
 522              if (!$this->is_static()) {
 523                  $this->model->trained = 0;
 524              }
 525  
 526          } else if ($this->model->enabled != $enabled) {
 527              // We purge the cached contexts with insights as some will not be visible anymore.
 528              $this->purge_insights_cache();
 529          }
 530  
 531          $this->model->enabled = intval($enabled);
 532          $this->model->indicators = $indicatorsstr;
 533          $this->model->timesplitting = $timesplittingid;
 534          $this->model->predictionsprocessor = $predictionsprocessor;
 535          $this->model->contextids = $contextsstr;
 536          $this->model->timemodified = $now;
 537          $this->model->usermodified = $USER->id;
 538  
 539          $DB->update_record('analytics_models', $this->model);
 540      }
 541  
 542      /**
 543       * Removes the model.
 544       *
 545       * @return void
 546       */
 547      public function delete() {
 548          global $DB;
 549  
 550          \core_analytics\manager::check_can_manage_models();
 551  
 552          $this->clear();
 553  
 554          // Method self::clear is already clearing the current model version.
 555          $predictor = $this->get_predictions_processor(false);
 556          if ($predictor->is_ready() !== true) {
 557              $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
 558              debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
 559                  $this->model->id . ' could not be deleted.');
 560          } else {
 561              $predictor->delete_output_dir($this->get_output_dir(array(), true), $this->get_unique_id());
 562          }
 563  
 564          $DB->delete_records('analytics_models', array('id' => $this->model->id));
 565          $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id));
 566      }
 567  
 568      /**
 569       * Evaluates the model.
 570       *
 571       * This method gets the site contents (through the analyser) creates a .csv dataset
 572       * with them and evaluates the model prediction accuracy multiple times using the
 573       * machine learning backend. It returns an object where the model score is the average
 574       * prediction accuracy of all executed evaluations.
 575       *
 576       * @param array $options
 577       * @return \stdClass[]
 578       */
 579      public function evaluate($options = array()) {
 580  
 581          \core_analytics\manager::check_can_manage_models();
 582  
 583          if ($this->is_static()) {
 584              $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
 585              $result = new \stdClass();
 586              $result->status = self::NO_DATASET;
 587              return array($result);
 588          }
 589  
 590          $options['evaluation'] = true;
 591  
 592          if (empty($options['mode'])) {
 593              $options['mode'] = 'configuration';
 594          }
 595  
 596          switch ($options['mode']) {
 597              case 'trainedmodel':
 598  
 599                  // We are only interested on the time splitting method used by the trained model.
 600                  $options['timesplitting'] = $this->model->timesplitting;
 601  
 602                  // Provide the trained model directory to the ML backend if that is what we want to evaluate.
 603                  $trainedmodeldir = $this->get_output_dir(['execution']);
 604                  break;
 605              case 'configuration':
 606  
 607                  $trainedmodeldir = false;
 608                  break;
 609  
 610              default:
 611                  throw new \moodle_exception('errorunknownaction', 'analytics');
 612          }
 613  
 614          $this->init_analyser($options);
 615  
 616          if (empty($this->get_indicators())) {
 617              throw new \moodle_exception('errornoindicators', 'analytics');
 618          }
 619  
 620          $this->heavy_duty_mode();
 621  
 622          // Before get_labelled_data call so we get an early exception if it is not ready.
 623          $predictor = $this->get_predictions_processor();
 624  
 625          $datasets = $this->get_analyser()->get_labelled_data($this->get_contexts());
 626  
 627          // No datasets generated.
 628          if (empty($datasets)) {
 629              $result = new \stdClass();
 630              $result->status = self::NO_DATASET;
 631              $result->info = $this->get_analyser()->get_logs();
 632              return array($result);
 633          }
 634  
 635          if (!PHPUNIT_TEST && CLI_SCRIPT) {
 636              echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
 637          }
 638  
 639          $results = array();
 640          foreach ($datasets as $timesplittingid => $dataset) {
 641  
 642              $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
 643  
 644              $result = new \stdClass();
 645  
 646              $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
 647              $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
 648  
 649              // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
 650              if ($this->get_target()->is_linear()) {
 651                  $predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
 652                      self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
 653              } else {
 654                  $predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
 655                      self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
 656              }
 657  
 658              $result->status = $predictorresult->status;
 659              $result->info = $predictorresult->info;
 660  
 661              if (isset($predictorresult->score)) {
 662                  $result->score = $predictorresult->score;
 663              } else {
 664                  // Prediction processors may return an error, default to 0 score in that case.
 665                  $result->score = 0;
 666              }
 667  
 668              $dir = false;
 669              if (!empty($predictorresult->dir)) {
 670                  $dir = $predictorresult->dir;
 671              }
 672  
 673              $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info, $options['mode']);
 674  
 675              $results[$timesplitting->get_id()] = $result;
 676          }
 677  
 678          return $results;
 679      }
 680  
 681      /**
 682       * Trains the model using the site contents.
 683       *
 684       * This method prepares a dataset from the site contents (through the analyser)
 685       * and passes it to the machine learning backends. Static models are skipped as
 686       * they do not require training.
 687       *
 688       * @return \stdClass
 689       */
 690      public function train() {
 691  
 692          \core_analytics\manager::check_can_manage_models();
 693  
 694          if ($this->is_static()) {
 695              $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
 696              $result = new \stdClass();
 697              $result->status = self::OK;
 698              return $result;
 699          }
 700  
 701          if (!$this->is_enabled() || empty($this->model->timesplitting)) {
 702              throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
 703          }
 704  
 705          if (empty($this->get_indicators())) {
 706              throw new \moodle_exception('errornoindicators', 'analytics');
 707          }
 708  
 709          $this->heavy_duty_mode();
 710  
 711          // Before get_labelled_data call so we get an early exception if it is not writable.
 712          $outputdir = $this->get_output_dir(array('execution'));
 713  
 714          // Before get_labelled_data call so we get an early exception if it is not ready.
 715          $predictor = $this->get_predictions_processor();
 716  
 717          $datasets = $this->get_analyser()->get_labelled_data($this->get_contexts());
 718  
 719          // No training if no files have been provided.
 720          if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
 721  
 722              $result = new \stdClass();
 723              $result->status = self::NO_DATASET;
 724              $result->info = $this->get_analyser()->get_logs();
 725              return $result;
 726          }
 727          $samplesfile = $datasets[$this->model->timesplitting];
 728  
 729          // Train using the dataset.
 730          if ($this->get_target()->is_linear()) {
 731              $predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir);
 732          } else {
 733              $predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir);
 734          }
 735  
 736          $result = new \stdClass();
 737          $result->status = $predictorresult->status;
 738          $result->info = $predictorresult->info;
 739  
 740          if ($result->status !== self::OK) {
 741              return $result;
 742          }
 743  
 744          $this->flag_file_as_used($samplesfile, 'trained');
 745  
 746          // Mark the model as trained if it wasn't.
 747          if ($this->model->trained == false) {
 748              $this->mark_as_trained();
 749          }
 750  
 751          return $result;
 752      }
 753  
 754      /**
 755       * Get predictions from the site contents.
 756       *
 757       * It analyses the site contents (through analyser classes) looking for samples
 758       * ready to receive predictions. It generates a dataset with all samples ready to
 759       * get predictions and it passes it to the machine learning backends or to the
 760       * targets based on assumptions to get the predictions.
 761       *
 762       * @return \stdClass
 763       */
 764      public function predict() {
 765          global $DB;
 766  
 767          \core_analytics\manager::check_can_manage_models();
 768  
 769          if (!$this->is_enabled() || empty($this->model->timesplitting)) {
 770              throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
 771          }
 772  
 773          if (empty($this->get_indicators())) {
 774              throw new \moodle_exception('errornoindicators', 'analytics');
 775          }
 776  
 777          $this->heavy_duty_mode();
 778  
 779          // Before get_unlabelled_data call so we get an early exception if it is not writable.
 780          $outputdir = $this->get_output_dir(array('execution'));
 781  
 782          if (!$this->is_static()) {
 783              // Predictions using a machine learning backend.
 784  
 785              // Before get_unlabelled_data call so we get an early exception if it is not ready.
 786              $predictor = $this->get_predictions_processor();
 787  
 788              $samplesdata = $this->get_analyser()->get_unlabelled_data($this->get_contexts());
 789  
 790              // Get the prediction samples file.
 791              if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
 792  
 793                  $result = new \stdClass();
 794                  $result->status = self::NO_DATASET;
 795                  $result->info = $this->get_analyser()->get_logs();
 796                  return $result;
 797              }
 798              $samplesfile = $samplesdata[$this->model->timesplitting];
 799  
 800              // We need to throw an exception if we are trying to predict stuff that was already predicted.
 801              $params = array('modelid' => $this->model->id, 'action' => 'predicted', 'fileid' => $samplesfile->get_id());
 802              if ($predicted = $DB->get_record('analytics_used_files', $params)) {
 803                  throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
 804              }
 805  
 806              $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
 807  
 808              // Estimation and classification processes run on the machine learning backend side.
 809              if ($this->get_target()->is_linear()) {
 810                  $predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir);
 811              } else {
 812                  $predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir);
 813              }
 814  
 815              // Prepare the results object.
 816              $result = new \stdClass();
 817              $result->status = $predictorresult->status;
 818              $result->info = $predictorresult->info;
 819              $result->predictions = $this->format_predictor_predictions($predictorresult);
 820  
 821          } else {
 822              // Predictions based on assumptions.
 823  
 824              $indicatorcalculations = $this->get_analyser()->get_static_data($this->get_contexts());
 825              // Get the prediction samples file.
 826              if (empty($indicatorcalculations) || empty($indicatorcalculations[$this->model->timesplitting])) {
 827  
 828                  $result = new \stdClass();
 829                  $result->status = self::NO_DATASET;
 830                  $result->info = $this->get_analyser()->get_logs();
 831                  return $result;
 832              }
 833  
 834              // Same as reset($indicatorcalculations) as models based on assumptions only analyse 1 single
 835              // time-splitting method.
 836              $indicatorcalculations = $indicatorcalculations[$this->model->timesplitting];
 837  
 838              // Prepare the results object.
 839              $result = new \stdClass();
 840              $result->status = self::OK;
 841              $result->info = [];
 842              $result->predictions = $this->get_static_predictions($indicatorcalculations);
 843          }
 844  
 845          if ($result->status !== self::OK) {
 846              return $result;
 847          }
 848  
 849          if ($result->predictions) {
 850              list($samplecontexts, $predictionrecords) = $this->execute_prediction_callbacks($result->predictions,
 851                  $indicatorcalculations);
 852          }
 853  
 854          if (!empty($samplecontexts) && $this->uses_insights()) {
 855              $this->trigger_insights($samplecontexts, $predictionrecords);
 856          }
 857  
 858          if (!$this->is_static()) {
 859              $this->flag_file_as_used($samplesfile, 'predicted');
 860          }
 861  
 862          return $result;
 863      }
 864  
 865      /**
 866       * Returns the model predictions processor.
 867       *
 868       * @param bool $checkisready
 869       * @return \core_analytics\predictor
 870       */
 871      public function get_predictions_processor($checkisready = true) {
 872          return manager::get_predictions_processor($this->model->predictionsprocessor, $checkisready);
 873      }
 874  
 875      /**
 876       * Formats the predictor results.
 877       *
 878       * @param array $predictorresult
 879       * @return array
 880       */
 881      private function format_predictor_predictions($predictorresult) {
 882  
 883          $predictions = array();
 884          if (!empty($predictorresult->predictions)) {
 885              foreach ($predictorresult->predictions as $sampleinfo) {
 886  
 887                  // We parse each prediction.
 888                  switch (count($sampleinfo)) {
 889                      case 1:
 890                          // For whatever reason the predictions processor could not process this sample, we
 891                          // skip it and do nothing with it.
 892                          debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
 893                              $sampleinfo[0], DEBUG_DEVELOPER);
 894                          continue 2;
 895                      case 2:
 896                          // Prediction processors that do not return a prediction score will have the maximum prediction
 897                          // score.
 898                          list($uniquesampleid, $prediction) = $sampleinfo;
 899                          $predictionscore = 1;
 900                          break;
 901                      case 3:
 902                          list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
 903                          break;
 904                      default:
 905                          break;
 906                  }
 907                  $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
 908                  $predictions[$uniquesampleid] = $predictiondata;
 909              }
 910          }
 911          return $predictions;
 912      }
 913  
 914      /**
 915       * Execute the prediction callbacks defined by the target.
 916       *
 917       * @param \stdClass[] $predictions
 918       * @param array $indicatorcalculations
 919       * @return array
 920       */
 921      protected function execute_prediction_callbacks(&$predictions, $indicatorcalculations) {
 922  
 923          // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
 924          $samplecontexts = array();
 925          $records = array();
 926  
 927          foreach ($predictions as $uniquesampleid => $prediction) {
 928  
 929              // The unique sample id contains both the sampleid and the rangeindex.
 930              list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
 931              if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
 932  
 933                  // Prepare the record to store the predicted values.
 934                  list($record, $samplecontext) = $this->prepare_prediction_record($sampleid, $rangeindex, $prediction->prediction,
 935                      $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
 936  
 937                  // We will later bulk-insert them all.
 938                  $records[$uniquesampleid] = $record;
 939  
 940                  // Also store all samples context to later generate insights or whatever action the target wants to perform.
 941                  $samplecontexts[$samplecontext->id] = $samplecontext;
 942  
 943                  $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
 944                      $prediction->prediction, $prediction->predictionscore);
 945              }
 946          }
 947  
 948          if (!empty($records)) {
 949              $this->save_predictions($records);
 950          }
 951  
 952          return [$samplecontexts, $records];
 953      }
 954  
 955      /**
 956       * Generates insights and updates the cache.
 957       *
 958       * @param \context[] $samplecontexts
 959       * @param  \stdClass[] $predictionrecords
 960       * @return void
 961       */
 962      protected function trigger_insights($samplecontexts, $predictionrecords) {
 963  
 964          // Notify the target that all predictions have been processed.
 965          if ($this->get_analyser()::one_sample_per_analysable()) {
 966  
 967              // We need to do something unusual here. self::save_predictions uses the bulk-insert function (insert_records()) for
 968              // performance reasons and that function does not return us the inserted ids. We need to retrieve them from
 969              // the database, and we need to do it using one single database query (for performance reasons as well).
 970              $predictionrecords = $this->add_prediction_ids($predictionrecords);
 971  
 972              $samplesdata = $this->predictions_sample_data($predictionrecords);
 973              $samplesdata = $this->append_calculations_info($predictionrecords, $samplesdata);
 974  
 975              $predictions = array_map(function($predictionobj) use ($samplesdata) {
 976                  $prediction = new \core_analytics\prediction($predictionobj, $samplesdata[$predictionobj->sampleid]);
 977                  return $prediction;
 978              }, $predictionrecords);
 979          } else {
 980              $predictions = [];
 981          }
 982  
 983          $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts, $predictions);
 984  
 985          if ($this->get_target()->link_insights_report()) {
 986  
 987              // Update cache.
 988              foreach ($samplecontexts as $context) {
 989                  \core_analytics\manager::cached_models_with_insights($context, $this->get_id());
 990              }
 991          }
 992      }
 993  
 994      /**
 995       * Get predictions from a static model.
 996       *
 997       * @param array $indicatorcalculations
 998       * @return \stdClass[]
 999       */
1000      protected function get_static_predictions(&$indicatorcalculations) {
1001  
1002          $headers = array_shift($indicatorcalculations);
1003  
1004          // Get rid of the sampleid header.
1005          array_shift($headers);
1006  
1007          // Group samples by analysable for \core_analytics\local\target::calculate.
1008          $analysables = array();
1009          // List all sampleids together.
1010          $sampleids = array();
1011  
1012          foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
1013  
1014              // Get rid of the sampleid column.
1015              unset($indicators[0]);
1016              $indicators = array_combine($headers, $indicators);
1017              $indicatorcalculations[$uniquesampleid] = $indicators;
1018  
1019              list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
1020  
1021              $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
1022              $analysableclass = get_class($analysable);
1023              if (empty($analysables[$analysableclass])) {
1024                  $analysables[$analysableclass] = array();
1025              }
1026              if (empty($analysables[$analysableclass][$rangeindex])) {
1027                  $analysables[$analysableclass][$rangeindex] = (object)[
1028                      'analysable' => $analysable,
1029                      'indicatorsdata' => array(),
1030                      'sampleids' => array()
1031                  ];
1032              }
1033  
1034              // Using the sampleid as a key so we can easily merge indicators data later.
1035              $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
1036              // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
1037              $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
1038  
1039              // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
1040              $sampleids[$sampleid] = $sampleid;
1041          }
1042  
1043          // Get all samples data.
1044          list($sampleids, $samplesdata) = $this->get_samples($sampleids);
1045  
1046          // Calculate the targets.
1047          $predictions = array();
1048          foreach ($analysables as $analysableclass => $rangedata) {
1049              foreach ($rangedata as $rangeindex => $data) {
1050  
1051                  // Attach samples data and calculated indicators data.
1052                  $this->get_target()->clear_sample_data();
1053                  $this->get_target()->add_sample_data($samplesdata);
1054                  $this->get_target()->add_sample_data($data->indicatorsdata);
1055  
1056                  // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
1057                  $timesplitting = $this->get_time_splitting();
1058                  $timesplitting->set_modelid($this->get_id());
1059                  $timesplitting->set_analysable($data->analysable);
1060                  $range = $timesplitting->get_range_by_index($rangeindex);
1061  
1062                  $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
1063                  $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
1064  
1065                  // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
1066                  // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
1067                  // by self::save_prediction.
1068                  $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid)
1069                          use ($calculations, $rangeindex) {
1070                      list($sampleid, $indicatorsrangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
1071                      if ($rangeindex == $indicatorsrangeindex && !isset($calculations[$sampleid])) {
1072                          return false;
1073                      }
1074                      return true;
1075                  }, ARRAY_FILTER_USE_BOTH);
1076  
1077                  foreach ($calculations as $sampleid => $value) {
1078  
1079                      $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
1080  
1081                      // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
1082                      if (is_null($calculations[$sampleid])) {
1083                          unset($indicatorcalculations[$uniquesampleid]);
1084                          continue;
1085                      }
1086  
1087                      // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
1088                      // true according to what the developer defined.
1089                      $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
1090                  }
1091              }
1092          }
1093          return $predictions;
1094      }
1095  
1096      /**
1097       * Stores the prediction in the database.
1098       *
1099       * @param int $sampleid
1100       * @param int $rangeindex
1101       * @param int $prediction
1102       * @param float $predictionscore
1103       * @param string $calculations
1104       * @return \context
1105       */
1106      protected function prepare_prediction_record($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
1107          $context = $this->get_analyser()->sample_access_context($sampleid);
1108  
1109          $record = new \stdClass();
1110          $record->modelid = $this->model->id;
1111          $record->contextid = $context->id;
1112          $record->sampleid = $sampleid;
1113          $record->rangeindex = $rangeindex;
1114          $record->prediction = $prediction;
1115          $record->predictionscore = $predictionscore;
1116          $record->calculations = $calculations;
1117          $record->timecreated = time();
1118  
1119          $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
1120          $timesplitting = $this->get_time_splitting();
1121          $timesplitting->set_modelid($this->get_id());
1122          $timesplitting->set_analysable($analysable);
1123          $range = $timesplitting->get_range_by_index($rangeindex);
1124          if ($range) {
1125              $record->timestart = $range['start'];
1126              $record->timeend = $range['end'];
1127          }
1128  
1129          return array($record, $context);
1130      }
1131  
1132      /**
1133       * Save the prediction objects.
1134       *
1135       * @param \stdClass[] $records
1136       */
1137      protected function save_predictions($records) {
1138          global $DB;
1139          $DB->insert_records('analytics_predictions', $records);
1140      }
1141  
1142      /**
1143       * Enabled the model using the provided time splitting method.
1144       *
1145       * @param string|false $timesplittingid False to respect the current time splitting method.
1146       * @return void
1147       */
1148      public function enable($timesplittingid = false) {
1149          global $DB, $USER;
1150  
1151          $now = time();
1152  
1153          if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
1154  
1155              if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
1156                  throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1157              }
1158  
1159              if (substr($timesplittingid, 0, 1) !== '\\') {
1160                  throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1161              }
1162  
1163              // Delete generated predictions before changing the model version.
1164              $this->clear();
1165  
1166              // It needs to be reset as the version changes.
1167              $this->uniqueid = null;
1168  
1169              $this->model->timesplitting = $timesplittingid;
1170              $this->model->version = $now;
1171  
1172              // Reset trained flag.
1173              if (!$this->is_static()) {
1174                  $this->model->trained = 0;
1175              }
1176          } else if (empty($this->model->timesplitting)) {
1177              // A valid timesplitting method needs to be supplied before a model can be enabled.
1178              throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
1179  
1180          }
1181  
1182          // Purge pages with insights as this may change things.
1183          if ($this->model->enabled != 1) {
1184              $this->purge_insights_cache();
1185          }
1186  
1187          $this->model->enabled = 1;
1188          $this->model->timemodified = $now;
1189          $this->model->usermodified = $USER->id;
1190  
1191          // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
1192          $DB->update_record('analytics_models', $this->model);
1193      }
1194  
1195      /**
1196       * Is this a static model (as defined by the target)?.
1197       *
1198       * Static models are based on assumptions instead of in machine learning
1199       * backends results.
1200       *
1201       * @return bool
1202       */
1203      public function is_static() {
1204          return (bool)$this->get_target()->based_on_assumptions();
1205      }
1206  
1207      /**
1208       * Is this model enabled?
1209       *
1210       * @return bool
1211       */
1212      public function is_enabled() {
1213          return (bool)$this->model->enabled;
1214      }
1215  
1216      /**
1217       * Is this model already trained?
1218       *
1219       * @return bool
1220       */
1221      public function is_trained() {
1222          // Models which targets are based on assumptions do not need training.
1223          return (bool)$this->model->trained || $this->is_static();
1224      }
1225  
1226      /**
1227       * Marks the model as trained
1228       *
1229       * @return void
1230       */
1231      public function mark_as_trained() {
1232          global $DB;
1233  
1234          \core_analytics\manager::check_can_manage_models();
1235  
1236          $this->model->trained = 1;
1237          $DB->update_record('analytics_models', $this->model);
1238      }
1239  
1240      /**
1241       * Get the contexts with predictions.
1242       *
1243       * @param bool $skiphidden Skip hidden predictions
1244       * @return \stdClass[]
1245       */
1246      public function get_predictions_contexts($skiphidden = true) {
1247          global $DB, $USER;
1248  
1249          $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
1250                    JOIN {context} ctx ON ctx.id = ap.contextid
1251                   WHERE ap.modelid = :modelid";
1252          $params = array('modelid' => $this->model->id);
1253  
1254          if ($skiphidden) {
1255              $sql .= " AND NOT EXISTS (
1256                SELECT 1
1257                  FROM {analytics_prediction_actions} apa
1258                 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND
1259                       (apa.actionname = :fixed OR apa.actionname = :notuseful OR
1260                       apa.actionname = :useful OR apa.actionname = :notapplicable OR
1261                       apa.actionname = :incorrectlyflagged)
1262              )";
1263              $params['userid'] = $USER->id;
1264              $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1265              $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1266              $params['useful'] = \core_analytics\prediction::ACTION_USEFUL;
1267              $params['notapplicable'] = \core_analytics\prediction::ACTION_NOT_APPLICABLE;
1268              $params['incorrectlyflagged'] = \core_analytics\prediction::ACTION_INCORRECTLY_FLAGGED;
1269          }
1270  
1271          return $DB->get_records_sql($sql, $params);
1272      }
1273  
1274      /**
1275       * Has this model generated predictions?
1276       *
1277       * We don't check analytics_predictions table because targets have the ability to
1278       * ignore some predicted values, if that is the case predictions are not even stored
1279       * in db.
1280       *
1281       * @return bool
1282       */
1283      public function any_prediction_obtained() {
1284          global $DB;
1285          return $DB->record_exists('analytics_predict_samples',
1286              array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
1287      }
1288  
1289      /**
1290       * Whether this model generates insights or not (defined by the model's target).
1291       *
1292       * @return bool
1293       */
1294      public function uses_insights() {
1295          $target = $this->get_target();
1296          return $target::uses_insights();
1297      }
1298  
1299      /**
1300       * Whether predictions exist for this context.
1301       *
1302       * @param \context $context
1303       * @return bool
1304       */
1305      public function predictions_exist(\context $context) {
1306          global $DB;
1307  
1308          // Filters out previous predictions keeping only the last time range one.
1309          $select = "modelid = :modelid AND contextid = :contextid";
1310          $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
1311          return $DB->record_exists_select('analytics_predictions', $select, $params);
1312      }
1313  
1314      /**
1315       * Gets the predictions for this context.
1316       *
1317       * @param \context $context
1318       * @param bool $skiphidden Skip hidden predictions
1319       * @param int $page The page of results to fetch. False for all results.
1320       * @param int $perpage The max number of results to fetch. Ignored if $page is false.
1321       * @return array($total, \core_analytics\prediction[])
1322       */
1323      public function get_predictions(\context $context, $skiphidden = true, $page = false, $perpage = 100) {
1324          global $DB, $USER;
1325  
1326          \core_analytics\manager::check_can_list_insights($context);
1327  
1328          // Filters out previous predictions keeping only the last time range one.
1329          $sql = "SELECT ap.*
1330                    FROM {analytics_predictions} ap
1331                    JOIN (
1332                      SELECT sampleid, max(rangeindex) AS rangeindex
1333                        FROM {analytics_predictions}
1334                       WHERE modelid = :modelidsubap and contextid = :contextidsubap
1335                      GROUP BY sampleid
1336                    ) apsub
1337                    ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex
1338                  WHERE ap.modelid = :modelid and ap.contextid = :contextid";
1339  
1340          $params = array('modelid' => $this->model->id, 'contextid' => $context->id,
1341              'modelidsubap' => $this->model->id, 'contextidsubap' => $context->id);
1342  
1343          if ($skiphidden) {
1344              $sql .= " AND NOT EXISTS (
1345                SELECT 1
1346                  FROM {analytics_prediction_actions} apa
1347                 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND
1348                       (apa.actionname = :fixed OR apa.actionname = :notuseful OR
1349                       apa.actionname = :useful OR apa.actionname = :notapplicable OR
1350                       apa.actionname = :incorrectlyflagged)
1351              )";
1352              $params['userid'] = $USER->id;
1353              $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1354              $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1355              $params['useful'] = \core_analytics\prediction::ACTION_USEFUL;
1356              $params['notapplicable'] = \core_analytics\prediction::ACTION_NOT_APPLICABLE;
1357              $params['incorrectlyflagged'] = \core_analytics\prediction::ACTION_INCORRECTLY_FLAGGED;
1358          }
1359  
1360          $sql .= " ORDER BY ap.timecreated DESC";
1361          if (!$predictions = $DB->get_records_sql($sql, $params)) {
1362              return array();
1363          }
1364  
1365          // Get predicted samples' ids.
1366          $sampleids = array_map(function($prediction) {
1367              return $prediction->sampleid;
1368          }, $predictions);
1369  
1370          list($unused, $samplesdata) = $this->get_samples($sampleids);
1371  
1372          $current = 0;
1373  
1374          if ($page !== false) {
1375              $offset = $page * $perpage;
1376              $limit = $offset + $perpage;
1377          }
1378  
1379          foreach ($predictions as $predictionid => $predictiondata) {
1380  
1381              $sampleid = $predictiondata->sampleid;
1382  
1383              // Filter out predictions which samples are not available anymore.
1384              if (empty($samplesdata[$sampleid])) {
1385                  unset($predictions[$predictionid]);
1386                  continue;
1387              }
1388  
1389              // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
1390              if ($page === false || ($current >= $offset && $current < $limit)) {
1391                  // Replace \stdClass object by \core_analytics\prediction objects.
1392                  $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1393                  $predictions[$predictionid] = $prediction;
1394              } else {
1395                  unset($predictions[$predictionid]);
1396              }
1397  
1398              $current++;
1399          }
1400  
1401          if (empty($predictions)) {
1402              return array();
1403          }
1404  
1405          return [$current, $predictions];
1406      }
1407  
1408      /**
1409       * Returns the actions executed by users on the predictions.
1410       *
1411       * @param  \context|null $context
1412       * @return \moodle_recordset
1413       */
1414      public function get_prediction_actions(?\context $context): \moodle_recordset {
1415          global $DB;
1416  
1417          $sql = "SELECT apa.id, apa.predictionid, apa.userid, apa.actionname, apa.timecreated,
1418                         ap.contextid, ap.sampleid, ap.rangeindex, ap.prediction, ap.predictionscore
1419                    FROM {analytics_prediction_actions} apa
1420                    JOIN {analytics_predictions} ap ON ap.id = apa.predictionid
1421                   WHERE ap.modelid = :modelid";
1422          $params = ['modelid' => $this->model->id];
1423  
1424          if ($context) {
1425              $sql .= " AND ap.contextid = :contextid";
1426              $params['contextid'] = $context->id;
1427          }
1428  
1429          return $DB->get_recordset_sql($sql, $params);
1430      }
1431  
1432      /**
1433       * Returns the sample data of a prediction.
1434       *
1435       * @param \stdClass $predictionobj
1436       * @return array
1437       */
1438      public function prediction_sample_data($predictionobj) {
1439  
1440          list($unused, $samplesdata) = $this->get_samples(array($predictionobj->sampleid));
1441  
1442          if (empty($samplesdata[$predictionobj->sampleid])) {
1443              throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1444          }
1445  
1446          return $samplesdata[$predictionobj->sampleid];
1447      }
1448  
1449      /**
1450       * Returns the samples data of the provided predictions.
1451       *
1452       * @param \stdClass[] $predictionrecords
1453       * @return array
1454       */
1455      public function predictions_sample_data(array $predictionrecords): array {
1456  
1457          $sampleids = [];
1458          foreach ($predictionrecords as $predictionobj) {
1459              $sampleids[] = $predictionobj->sampleid;
1460          }
1461          list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1462  
1463          return $samplesdata;
1464      }
1465  
1466      /**
1467       * Appends the calculation info to the samples data.
1468       *
1469       * @param   \stdClass[] $predictionrecords
1470       * @param   array $samplesdata
1471       * @return  array
1472       */
1473      public function append_calculations_info(array $predictionrecords, array $samplesdata): array {
1474  
1475          if ($extrainfo = calculation_info::pull_info($predictionrecords)) {
1476              foreach ($samplesdata as $sampleid => $data) {
1477                  // The extra info come prefixed by extra: so we will not have overwrites here.
1478                  $samplesdata[$sampleid] = $samplesdata[$sampleid] + $extrainfo[$sampleid];
1479              }
1480          }
1481          return $samplesdata;
1482      }
1483  
1484      /**
1485       * Returns the description of a sample
1486       *
1487       * @param \core_analytics\prediction $prediction
1488       * @return array 2 elements: list(string, \renderable)
1489       */
1490      public function prediction_sample_description(\core_analytics\prediction $prediction) {
1491          return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1492              $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1493      }
1494  
1495      /**
1496       * Returns the default output directory for prediction processors
1497       *
1498       * @return string
1499       */
1500      public static function default_output_dir(): string {
1501          global $CFG;
1502  
1503          return $CFG->dataroot . DIRECTORY_SEPARATOR . 'models';
1504      }
1505  
1506      /**
1507       * Returns the output directory for prediction processors.
1508       *
1509       * Directory structure as follows:
1510       * - Evaluation runs:
1511       *   models/$model->id/$model->version/evaluation/$model->timesplitting
1512       * - Training  & prediction runs:
1513       *   models/$model->id/$model->version/execution
1514       *
1515       * @param array $subdirs
1516       * @param bool $onlymodelid Preference over $subdirs
1517       * @return string
1518       */
1519      public function get_output_dir($subdirs = array(), $onlymodelid = false) {
1520          $subdirstr = '';
1521          foreach ($subdirs as $subdir) {
1522              $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1523          }
1524  
1525          $outputdir = get_config('analytics', 'modeloutputdir');
1526          if (empty($outputdir)) {
1527              // Apply default value.
1528              $outputdir = self::default_output_dir();
1529          }
1530  
1531          // Append model id.
1532          $outputdir .= DIRECTORY_SEPARATOR . $this->model->id;
1533          if (!$onlymodelid) {
1534              // Append version + subdirs.
1535              $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1536          }
1537  
1538          make_writable_directory($outputdir);
1539  
1540          return $outputdir;
1541      }
1542  
1543      /**
1544       * Returns a unique id for this model.
1545       *
1546       * This id should be unique for this site.
1547       *
1548       * @return string
1549       */
1550      public function get_unique_id() {
1551          global $CFG;
1552  
1553          if (!is_null($this->uniqueid)) {
1554              return $this->uniqueid;
1555          }
1556  
1557          // Generate a unique id for this site, this model and this time splitting method, considering the last time
1558          // that the model target and indicators were updated.
1559          $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version);
1560          $this->uniqueid = sha1(implode('$$', $ids));
1561  
1562          return $this->uniqueid;
1563      }
1564  
1565      /**
1566       * Exports the model data for displaying it in a template.
1567       *
1568       * @param \renderer_base $output The renderer to use for exporting
1569       * @return \stdClass
1570       */
1571      public function export(\renderer_base $output) {
1572  
1573          \core_analytics\manager::check_can_manage_models();
1574  
1575          $data = clone $this->model;
1576  
1577          $data->modelname = format_string($this->get_name());
1578          $data->name = $this->inplace_editable_name()->export_for_template($output);
1579          $data->target = $this->get_target()->get_name();
1580          $data->targetclass = $this->get_target()->get_id();
1581  
1582          if ($timesplitting = $this->get_time_splitting()) {
1583              $data->timesplitting = $timesplitting->get_name();
1584          }
1585  
1586          $data->indicators = array();
1587          foreach ($this->get_indicators() as $indicator) {
1588              $data->indicators[] = $indicator->get_name();
1589          }
1590          return $data;
1591      }
1592  
1593      /**
1594       * Exports the model data to a zip file.
1595       *
1596       * @param string $zipfilename
1597       * @param bool $includeweights Include the model weights if available
1598       * @return string Zip file path
1599       */
1600      public function export_model(string $zipfilename, bool $includeweights = true) : string {
1601  
1602          \core_analytics\manager::check_can_manage_models();
1603  
1604          $modelconfig = new model_config($this);
1605          return $modelconfig->export($zipfilename, $includeweights);
1606      }
1607  
1608      /**
1609       * Imports the provided model.
1610       *
1611       * Note that this method assumes that model_config::check_dependencies has already been called.
1612       *
1613       * @throws \moodle_exception
1614       * @param  string $zipfilepath Zip file path
1615       * @return \core_analytics\model
1616       */
1617      public static function import_model(string $zipfilepath) : \core_analytics\model {
1618  
1619          \core_analytics\manager::check_can_manage_models();
1620  
1621          $modelconfig = new \core_analytics\model_config();
1622          return $modelconfig->import($zipfilepath);
1623      }
1624  
1625      /**
1626       * Can this model be exported?
1627       *
1628       * @return bool
1629       */
1630      public function can_export_configuration() : bool {
1631  
1632          if (empty($this->model->timesplitting)) {
1633              return false;
1634          }
1635          if (!$this->get_indicators()) {
1636              return false;
1637          }
1638  
1639          if ($this->is_static()) {
1640              return false;
1641          }
1642  
1643          return true;
1644      }
1645  
1646      /**
1647       * Returns the model logs data.
1648       *
1649       * @param int $limitfrom
1650       * @param int $limitnum
1651       * @return \stdClass[]
1652       */
1653      public function get_logs($limitfrom = 0, $limitnum = 0) {
1654          global $DB;
1655  
1656          \core_analytics\manager::check_can_manage_models();
1657  
1658          return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1659              $limitfrom, $limitnum);
1660      }
1661  
1662      /**
1663       * Merges all training data files into one and returns it.
1664       *
1665       * @return \stored_file|false
1666       */
1667      public function get_training_data() {
1668  
1669          \core_analytics\manager::check_can_manage_models();
1670  
1671          $timesplittingid = $this->get_time_splitting()->get_id();
1672          return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid);
1673      }
1674  
1675      /**
1676       * Has the model been trained using data from this site?
1677       *
1678       * This method is useful to determine if a trained model can be evaluated as
1679       * we can not use the same data for training and for evaluation.
1680       *
1681       * @return bool
1682       */
1683      public function trained_locally() : bool {
1684          global $DB;
1685  
1686          if (!$this->is_trained() || $this->is_static()) {
1687              // Early exit.
1688              return false;
1689          }
1690  
1691          if ($DB->record_exists('analytics_train_samples', ['modelid' => $this->model->id])) {
1692              return true;
1693          }
1694  
1695          return false;
1696      }
1697  
1698      /**
1699       * Flag the provided file as used for training or prediction.
1700       *
1701       * @param \stored_file $file
1702       * @param string $action
1703       * @return void
1704       */
1705      protected function flag_file_as_used(\stored_file $file, $action) {
1706          global $DB;
1707  
1708          $usedfile = new \stdClass();
1709          $usedfile->modelid = $this->model->id;
1710          $usedfile->fileid = $file->get_id();
1711          $usedfile->action = $action;
1712          $usedfile->time = time();
1713          $DB->insert_record('analytics_used_files', $usedfile);
1714      }
1715  
1716      /**
1717       * Log the evaluation results in the database.
1718       *
1719       * @param string $timesplittingid
1720       * @param float $score
1721       * @param string $dir
1722       * @param array $info
1723       * @param string $evaluationmode
1724       * @return int The inserted log id
1725       */
1726      protected function log_result($timesplittingid, $score, $dir = false, $info = false, $evaluationmode = 'configuration') {
1727          global $DB, $USER;
1728  
1729          $log = new \stdClass();
1730          $log->modelid = $this->get_id();
1731          $log->version = $this->model->version;
1732          $log->evaluationmode = $evaluationmode;
1733          $log->target = $this->model->target;
1734          $log->indicators = $this->model->indicators;
1735          $log->timesplitting = $timesplittingid;
1736          $log->dir = $dir;
1737          if ($info) {
1738              // Ensure it is not an associative array.
1739              $log->info = json_encode(array_values($info));
1740          }
1741          $log->score = $score;
1742          $log->timecreated = time();
1743          $log->usermodified = $USER->id;
1744  
1745          return $DB->insert_record('analytics_models_log', $log);
1746      }
1747  
1748      /**
1749       * Utility method to return indicator class names from a list of indicator objects
1750       *
1751       * @param \core_analytics\local\indicator\base[] $indicators
1752       * @return string[]
1753       */
1754      private static function indicator_classes($indicators) {
1755  
1756          // What we want to check and store are the indicator classes not the keys.
1757          $indicatorclasses = array();
1758          foreach ($indicators as $indicator) {
1759              if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1760                  if (!is_object($indicator) && !is_scalar($indicator)) {
1761                      $indicator = strval($indicator);
1762                  } else if (is_object($indicator)) {
1763                      $indicator = '\\' . get_class($indicator);
1764                  }
1765                  throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1766              }
1767              $indicatorclasses[] = $indicator->get_id();
1768          }
1769  
1770          return $indicatorclasses;
1771      }
1772  
1773      /**
1774       * Clears the model training and prediction data.
1775       *
1776       * Executed after updating model critical elements like the time splitting method
1777       * or the indicators.
1778       *
1779       * @return void
1780       */
1781      public function clear() {
1782          global $DB, $USER;
1783  
1784          \core_analytics\manager::check_can_manage_models();
1785  
1786          // Delete current model version stored stuff.
1787          $predictor = $this->get_predictions_processor(false);
1788          if ($predictor->is_ready() !== true) {
1789              $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
1790              debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
1791                  $this->model->id . ' could not be cleared.');
1792          } else {
1793              $predictor->clear_model($this->get_unique_id(), $this->get_output_dir());
1794          }
1795  
1796          $DB->delete_records_select('analytics_prediction_actions', "predictionid IN
1797              (SELECT id FROM {analytics_predictions} WHERE modelid = :modelid)", ['modelid' => $this->get_id()]);
1798  
1799          $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
1800          $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
1801          $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1802          $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1803          $DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id));
1804  
1805          // Purge all generated files.
1806          \core_analytics\dataset_manager::clear_model_files($this->model->id);
1807  
1808          // We don't expect people to clear models regularly and the cost of filling the cache is
1809          // 1 db read per context.
1810          $this->purge_insights_cache();
1811  
1812          if (!$this->is_static()) {
1813              $this->model->trained = 0;
1814          }
1815  
1816          $this->model->timemodified = time();
1817          $this->model->usermodified = $USER->id;
1818          $DB->update_record('analytics_models', $this->model);
1819      }
1820  
1821      /**
1822       * Returns the name of the model.
1823       *
1824       * By default, models use their target's name as their own name. They can have their explicit name, too. In which
1825       * case, the explicit name is used instead of the default one.
1826       *
1827       * @return string|lang_string
1828       */
1829      public function get_name() {
1830  
1831          if (trim($this->model->name) === '') {
1832              return $this->get_target()->get_name();
1833  
1834          } else {
1835              return $this->model->name;
1836          }
1837      }
1838  
1839      /**
1840       * Renames the model to the given name.
1841       *
1842       * When given an empty string, the model falls back to using the associated target's name as its name.
1843       *
1844       * @param string $name The new name for the model, empty string for using the default name.
1845       */
1846      public function rename(string $name) {
1847          global $DB, $USER;
1848  
1849          $this->model->name = $name;
1850          $this->model->timemodified = time();
1851          $this->model->usermodified = $USER->id;
1852  
1853          $DB->update_record('analytics_models', $this->model);
1854      }
1855  
1856      /**
1857       * Returns an inplace editable element with the model's name.
1858       *
1859       * @return \core\output\inplace_editable
1860       */
1861      public function inplace_editable_name() {
1862  
1863          $displayname = format_string($this->get_name());
1864  
1865          return new \core\output\inplace_editable('core_analytics', 'modelname', $this->model->id,
1866              has_capability('moodle/analytics:managemodels', \context_system::instance()), $displayname, $this->model->name);
1867      }
1868  
1869      /**
1870       * Returns true if the time-splitting method used by this model is invalid for this model.
1871       * @return  bool
1872       */
1873      public function invalid_timesplitting_selected(): bool {
1874          $currenttimesplitting = $this->model->timesplitting;
1875          if (empty($currenttimesplitting)) {
1876              // Not set is different from invalid. This function is used to identify invalid
1877              // time-splittings.
1878              return false;
1879          }
1880  
1881          $potentialtimesplittings = $this->get_potential_timesplittings();
1882          if ($currenttimesplitting && empty($potentialtimesplittings[$currenttimesplitting])) {
1883              return true;
1884          }
1885  
1886          return false;
1887      }
1888  
1889      /**
1890       * Adds the id from {analytics_predictions} db table to the prediction \stdClass objects.
1891       *
1892       * @param  \stdClass[] $predictionrecords
1893       * @return \stdClass[] The prediction records including their ids in {analytics_predictions} db table.
1894       */
1895      private function add_prediction_ids($predictionrecords) {
1896          global $DB;
1897  
1898          $firstprediction = reset($predictionrecords);
1899  
1900          $contextids = array_map(function($predictionobj) {
1901              return $predictionobj->contextid;
1902          }, $predictionrecords);
1903  
1904          // Limited to 30000 records as a middle point between the ~65000 params limit in pgsql and the size limit for mysql which
1905          // can be increased if required up to a reasonable point.
1906          $chunks = array_chunk($contextids, 30000);
1907          foreach ($chunks as $contextidschunk) {
1908              list($contextsql, $contextparams) = $DB->get_in_or_equal($contextidschunk, SQL_PARAMS_NAMED);
1909  
1910              // We select the fields that will allow us to map ids to $predictionrecords. Given that we already filter by modelid
1911              // we have enough with sampleid and rangeindex. The reason is that the sampleid relation to a site is N - 1.
1912              $fields = 'id, sampleid, rangeindex';
1913  
1914              // We include the contextid and the timecreated filter to reduce the number of records in $dbpredictions. We can not
1915              // add as many OR conditions as records in $predictionrecords.
1916              $sql = "SELECT $fields
1917                        FROM {analytics_predictions}
1918                       WHERE modelid = :modelid
1919                             AND contextid $contextsql
1920                             AND timecreated >= :firsttimecreated";
1921              $params = $contextparams + ['modelid' => $this->model->id, 'firsttimecreated' => $firstprediction->timecreated];
1922              $dbpredictions = $DB->get_recordset_sql($sql, $params);
1923              foreach ($dbpredictions as $id => $dbprediction) {
1924                  // The append_rangeindex implementation is the same regardless of the time splitting method in use.
1925                  $uniqueid = $this->get_time_splitting()->append_rangeindex($dbprediction->sampleid, $dbprediction->rangeindex);
1926                  $predictionrecords[$uniqueid]->id = $dbprediction->id;
1927              }
1928          }
1929  
1930          return $predictionrecords;
1931      }
1932  
1933      /**
1934       * Wrapper around analyser's get_samples to skip DB's max-number-of-params exception.
1935       *
1936       * @param  array  $sampleids
1937       * @return array
1938       */
1939      public function get_samples(array $sampleids): array {
1940  
1941          if (empty($sampleids)) {
1942              throw new \coding_exception('No sample ids provided');
1943          }
1944  
1945          $chunksize = count($sampleids);
1946  
1947          // We start with just 1 chunk, if it is too large for the db we split the list of sampleids in 2 and we
1948          // try again. We repeat this process until the chunk is small enough for the db engine to process. The
1949          // >= has been added in case there are other \dml_read_exceptions unrelated to the max number of params.
1950          while (empty($done) && $chunksize >= 1) {
1951  
1952              $chunks = array_chunk($sampleids, $chunksize);
1953              $allsampleids = [];
1954              $allsamplesdata = [];
1955  
1956              foreach ($chunks as $index => $chunk) {
1957  
1958                  try {
1959                      list($chunksampleids, $chunksamplesdata) = $this->get_analyser()->get_samples($chunk);
1960                  } catch (\dml_read_exception $e) {
1961  
1962                      // Reduce the chunksize, we use floor() so the $chunksize is always less than the previous $chunksize value.
1963                      $chunksize = floor($chunksize / 2);
1964                      break;
1965                  }
1966  
1967                  // We can sum as these two arrays are indexed by sampleid and there are no collisions.
1968                  $allsampleids = $allsampleids + $chunksampleids;
1969                  $allsamplesdata = $allsamplesdata + $chunksamplesdata;
1970  
1971                  if ($index === count($chunks) - 1) {
1972                      // We successfully processed all the samples in all chunks, we are done.
1973                      $done = true;
1974                  }
1975              }
1976          }
1977  
1978          if (empty($done)) {
1979              if (!empty($e)) {
1980                  // Throw the last exception we caught, the \dml_read_exception we have been catching is unrelated to the max number
1981                  // of param's exception.
1982                  throw new \dml_read_exception($e);
1983              } else {
1984                  throw new \coding_exception('We should never reach this point, there is a bug in ' .
1985                      'core_analytics\\model::get_samples\'s code');
1986              }
1987          }
1988          return [$allsampleids, $allsamplesdata];
1989      }
1990  
1991      /**
1992       * Contexts where this model should be active.
1993       *
1994       * @return \context[] Empty array if there are no context restrictions.
1995       */
1996      public function get_contexts() {
1997          if ($this->contexts !== null) {
1998              return $this->contexts;
1999          }
2000  
2001          if (!$this->model->contextids) {
2002              $this->contexts = [];
2003              return $this->contexts;
2004          }
2005          $contextids = json_decode($this->model->contextids);
2006  
2007          // We don't expect this list to be massive as contexts need to be selected manually using the edit model form.
2008          $this->contexts = array_map(function($contextid) {
2009              return \context::instance_by_id($contextid, IGNORE_MISSING);
2010          }, $contextids);
2011  
2012          return $this->contexts;
2013      }
2014  
2015      /**
2016       * Purges the insights cache.
2017       */
2018      private function purge_insights_cache() {
2019          $cache = \cache::make('core', 'contextwithinsights');
2020          $cache->purge();
2021      }
2022  
2023      /**
2024       * Increases system memory and time limits.
2025       *
2026       * @return void
2027       */
2028      private function heavy_duty_mode() {
2029          if (ini_get('memory_limit') != -1) {
2030              raise_memory_limit(MEMORY_HUGE);
2031          }
2032          \core_php_time_limit::raise();
2033      }
2034  }