Differences Between: [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Prediction model representation. 19 * 20 * @package core_analytics 21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com} 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace core_analytics; 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Prediction model representation. 31 * 32 * @package core_analytics 33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com} 34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 35 */ 36 class model { 37 38 /** 39 * All as expected. 40 */ 41 const OK = 0; 42 43 /** 44 * There was a problem. 45 */ 46 const GENERAL_ERROR = 1; 47 48 /** 49 * No dataset to analyse. 50 */ 51 const NO_DATASET = 2; 52 53 /** 54 * Model with low prediction accuracy. 55 */ 56 const LOW_SCORE = 4; 57 58 /** 59 * Not enough data to evaluate the model properly. 60 */ 61 const NOT_ENOUGH_DATA = 8; 62 63 /** 64 * Invalid analysable for the time splitting method. 65 */ 66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4; 67 68 /** 69 * Invalid analysable for all time splitting methods. 70 */ 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8; 72 73 /** 74 * Invalid analysable for the target 75 */ 76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16; 77 78 /** 79 * Minimum score to consider a non-static prediction model as good. 80 */ 81 const MIN_SCORE = 0.7; 82 83 /** 84 * Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough. 85 */ 86 const PREDICTION_MIN_SCORE = 0.6; 87 88 /** 89 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable. 90 */ 91 const ACCEPTED_DEVIATION = 0.05; 92 93 /** 94 * Number of evaluation repetitions. 95 */ 96 const EVALUATION_ITERATIONS = 10; 97 98 /** 99 * @var \stdClass 100 */ 101 protected $model = null; 102 103 /** 104 * @var \core_analytics\local\analyser\base 105 */ 106 protected $analyser = null; 107 108 /** 109 * @var \core_analytics\local\target\base 110 */ 111 protected $target = null; 112 113 /** 114 * @var \core_analytics\predictor 115 */ 116 protected $predictionsprocessor = null; 117 118 /** 119 * @var \core_analytics\local\indicator\base[] 120 */ 121 protected $indicators = null; 122 123 /** 124 * @var \context[] 125 */ 126 protected $contexts = null; 127 128 /** 129 * Unique Model id created from site info and last model modification. 130 * 131 * @var string 132 */ 133 protected $uniqueid = null; 134 135 /** 136 * Constructor. 137 * 138 * @param int|\stdClass $model 139 * @return void 140 */ 141 public function __construct($model) { 142 global $DB; 143 144 if (is_scalar($model)) { 145 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST); 146 if (!$model) { 147 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model); 148 } 149 } 150 $this->model = $model; 151 } 152 153 /** 154 * Quick safety check to discard site models which required components are not available anymore. 155 * 156 * @return bool 157 */ 158 public function is_available() { 159 $target = $this->get_target(); 160 if (!$target) { 161 return false; 162 } 163 164 $classname = $target->get_analyser_class(); 165 if (!class_exists($classname)) { 166 return false; 167 } 168 169 return true; 170 } 171 172 /** 173 * Returns the model id. 174 * 175 * @return int 176 */ 177 public function get_id() { 178 return $this->model->id; 179 } 180 181 /** 182 * Returns a plain \stdClass with the model data. 183 * 184 * @return \stdClass 185 */ 186 public function get_model_obj() { 187 return $this->model; 188 } 189 190 /** 191 * Returns the model target. 192 * 193 * @return \core_analytics\local\target\base 194 */ 195 public function get_target() { 196 if ($this->target !== null) { 197 return $this->target; 198 } 199 $instance = \core_analytics\manager::get_target($this->model->target); 200 $this->target = $instance; 201 202 return $this->target; 203 } 204 205 /** 206 * Returns the model indicators. 207 * 208 * @return \core_analytics\local\indicator\base[] 209 */ 210 public function get_indicators() { 211 if ($this->indicators !== null) { 212 return $this->indicators; 213 } 214 215 $fullclassnames = json_decode($this->model->indicators); 216 217 if (!is_array($fullclassnames)) { 218 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read'); 219 } 220 221 $this->indicators = array(); 222 foreach ($fullclassnames as $fullclassname) { 223 $instance = \core_analytics\manager::get_indicator($fullclassname); 224 if ($instance) { 225 $this->indicators[$fullclassname] = $instance; 226 } else { 227 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER); 228 } 229 } 230 231 return $this->indicators; 232 } 233 234 /** 235 * Returns the list of indicators that could potentially be used by the model target. 236 * 237 * It includes the indicators that are part of the model. 238 * 239 * @return \core_analytics\local\indicator\base[] 240 */ 241 public function get_potential_indicators() { 242 243 $indicators = \core_analytics\manager::get_all_indicators(); 244 245 if (empty($this->analyser)) { 246 $this->init_analyser(array('notimesplitting' => true)); 247 } 248 249 foreach ($indicators as $classname => $indicator) { 250 if ($this->analyser->check_indicator_requirements($indicator) !== true) { 251 unset($indicators[$classname]); 252 } 253 } 254 return $indicators; 255 } 256 257 /** 258 * Returns the model analyser (defined by the model target). 259 * 260 * @param array $options Default initialisation with no options. 261 * @return \core_analytics\local\analyser\base 262 */ 263 public function get_analyser($options = array()) { 264 if ($this->analyser !== null) { 265 return $this->analyser; 266 } 267 268 $this->init_analyser($options); 269 270 return $this->analyser; 271 } 272 273 /** 274 * Initialises the model analyser. 275 * 276 * @throws \coding_exception 277 * @param array $options 278 * @return void 279 */ 280 protected function init_analyser($options = array()) { 281 282 $target = $this->get_target(); 283 $indicators = $this->get_indicators(); 284 285 if (empty($target)) { 286 throw new \moodle_exception('errornotarget', 'analytics'); 287 } 288 289 $potentialtimesplittings = $this->get_potential_timesplittings(); 290 291 $timesplittings = array(); 292 if (empty($options['notimesplitting'])) { 293 if (!empty($options['evaluation'])) { 294 // The evaluation process will run using all available time splitting methods unless one is specified. 295 if (!empty($options['timesplitting'])) { 296 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']); 297 298 if (empty($potentialtimesplittings[$timesplitting->get_id()])) { 299 throw new \moodle_exception('errorcannotusetimesplitting', 'analytics'); 300 } 301 $timesplittings = array($timesplitting->get_id() => $timesplitting); 302 } else { 303 $timesplittingsforevaluation = \core_analytics\manager::get_time_splitting_methods_for_evaluation(); 304 305 // They both have the same objects, using $potentialtimesplittings as its items are sorted. 306 $timesplittings = array_intersect_key($potentialtimesplittings, $timesplittingsforevaluation); 307 } 308 } else { 309 310 if (empty($this->model->timesplitting)) { 311 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id); 312 } 313 314 // Returned as an array as all actions (evaluation, training and prediction) go through the same process. 315 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting()); 316 } 317 318 if (empty($timesplittings)) { 319 throw new \moodle_exception('errornotimesplittings', 'analytics'); 320 } 321 } 322 323 $classname = $target->get_analyser_class(); 324 if (!class_exists($classname)) { 325 throw new \coding_exception($classname . ' class does not exists'); 326 } 327 328 // Returns a \core_analytics\local\analyser\base class. 329 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options); 330 } 331 332 /** 333 * Returns the model time splitting method. 334 * 335 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting. 336 */ 337 public function get_time_splitting() { 338 if (empty($this->model->timesplitting)) { 339 return false; 340 } 341 return \core_analytics\manager::get_time_splitting($this->model->timesplitting); 342 } 343 344 /** 345 * Returns the time-splitting methods that can be used by this model. 346 * 347 * @return \core_analytics\local\time_splitting\base[] 348 */ 349 public function get_potential_timesplittings() { 350 351 $timesplittings = \core_analytics\manager::get_all_time_splittings(); 352 uasort($timesplittings, function($a, $b) { 353 return strcasecmp($a->get_name(), $b->get_name()); 354 }); 355 356 foreach ($timesplittings as $key => $timesplitting) { 357 if (!$this->get_target()->can_use_timesplitting($timesplitting)) { 358 unset($timesplittings[$key]); 359 continue; 360 } 361 } 362 return $timesplittings; 363 } 364 365 /** 366 * Creates a new model. Enables it if $timesplittingid is specified. 367 * 368 * @param \core_analytics\local\target\base $target 369 * @param \core_analytics\local\indicator\base[] $indicators 370 * @param string|false $timesplittingid The time splitting method id (its fully qualified class name) 371 * @param string|null $processor The machine learning backend this model will use. 372 * @return \core_analytics\model 373 */ 374 public static function create(\core_analytics\local\target\base $target, array $indicators, 375 $timesplittingid = false, $processor = null) { 376 global $USER, $DB; 377 378 $indicatorclasses = self::indicator_classes($indicators); 379 380 $now = time(); 381 382 $modelobj = new \stdClass(); 383 $modelobj->target = $target->get_id(); 384 $modelobj->indicators = json_encode($indicatorclasses); 385 $modelobj->version = $now; 386 $modelobj->timecreated = $now; 387 $modelobj->timemodified = $now; 388 $modelobj->usermodified = $USER->id; 389 390 if ($target->based_on_assumptions()) { 391 $modelobj->trained = 1; 392 } 393 394 if ($timesplittingid) { 395 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) { 396 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics'); 397 } 398 if (substr($timesplittingid, 0, 1) !== '\\') { 399 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics'); 400 } 401 $modelobj->timesplitting = $timesplittingid; 402 } 403 404 if ($processor && 405 !manager::is_valid($processor, '\core_analytics\classifier') && 406 !manager::is_valid($processor, '\core_analytics\regressor')) { 407 throw new \coding_exception('The provided predictions processor \\' . $processor . '\processor is not valid'); 408 } else { 409 $modelobj->predictionsprocessor = $processor; 410 } 411 412 $id = $DB->insert_record('analytics_models', $modelobj); 413 414 // Get db defaults. 415 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST); 416 417 $model = new static($modelobj); 418 419 return $model; 420 } 421 422 /** 423 * Does this model exist? 424 * 425 * If no indicators are provided it considers any model with the provided 426 * target a match. 427 * 428 * @param \core_analytics\local\target\base $target 429 * @param \core_analytics\local\indicator\base[]|false $indicators 430 * @return bool 431 */ 432 public static function exists(\core_analytics\local\target\base $target, $indicators = false) { 433 global $DB; 434 435 $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id())); 436 437 if (!$existingmodels) { 438 return false; 439 } 440 441 if (!$indicators && $existingmodels) { 442 return true; 443 } 444 445 $indicatorids = array_keys($indicators); 446 sort($indicatorids); 447 448 foreach ($existingmodels as $modelobj) { 449 $model = new \core_analytics\model($modelobj); 450 $modelindicatorids = array_keys($model->get_indicators()); 451 sort($modelindicatorids); 452 453 if ($indicatorids === $modelindicatorids) { 454 return true; 455 } 456 } 457 return false; 458 } 459 460 /** 461 * Updates the model. 462 * 463 * @param int|bool $enabled 464 * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators 465 * @param string|false $timesplittingid False to respect current time splitting method 466 * @param string|false $predictionsprocessor False to respect current predictors processor value 467 * @param int[]|false $contextids List of context ids for this model. False to respect the current list of contexts. 468 * @return void 469 */ 470 public function update($enabled, $indicators = false, $timesplittingid = '', $predictionsprocessor = false, 471 $contextids = false) { 472 global $USER, $DB; 473 474 \core_analytics\manager::check_can_manage_models(); 475 476 $now = time(); 477 478 if ($indicators !== false) { 479 $indicatorclasses = self::indicator_classes($indicators); 480 $indicatorsstr = json_encode($indicatorclasses); 481 } else { 482 // Respect current value. 483 $indicatorsstr = $this->model->indicators; 484 } 485 486 if ($timesplittingid === false) { 487 // Respect current value. 488 $timesplittingid = $this->model->timesplitting; 489 } 490 491 if ($predictionsprocessor === false) { 492 // Respect current value. 493 $predictionsprocessor = $this->model->predictionsprocessor; 494 } 495 496 if ($contextids === false) { 497 $contextsstr = $this->model->contextids; 498 } else if (!$contextids) { 499 $contextsstr = null; 500 } else { 501 $contextsstr = json_encode($contextids); 502 503 // Reset the internal cache. 504 $this->contexts = null; 505 } 506 507 if ($this->model->timesplitting !== $timesplittingid || 508 $this->model->indicators !== $indicatorsstr || 509 $this->model->predictionsprocessor !== $predictionsprocessor) { 510 511 // Delete generated predictions before changing the model version. 512 $this->clear(); 513 514 // It needs to be reset as the version changes. 515 $this->uniqueid = null; 516 $this->indicators = null; 517 518 // We update the version of the model so different time splittings are not mixed up. 519 $this->model->version = $now; 520 521 // Reset trained flag. 522 if (!$this->is_static()) { 523 $this->model->trained = 0; 524 } 525 526 } else if ($this->model->enabled != $enabled) { 527 // We purge the cached contexts with insights as some will not be visible anymore. 528 $this->purge_insights_cache(); 529 } 530 531 $this->model->enabled = intval($enabled); 532 $this->model->indicators = $indicatorsstr; 533 $this->model->timesplitting = $timesplittingid; 534 $this->model->predictionsprocessor = $predictionsprocessor; 535 $this->model->contextids = $contextsstr; 536 $this->model->timemodified = $now; 537 $this->model->usermodified = $USER->id; 538 539 $DB->update_record('analytics_models', $this->model); 540 } 541 542 /** 543 * Removes the model. 544 * 545 * @return void 546 */ 547 public function delete() { 548 global $DB; 549 550 \core_analytics\manager::check_can_manage_models(); 551 552 $this->clear(); 553 554 // Method self::clear is already clearing the current model version. 555 $predictor = $this->get_predictions_processor(false); 556 if ($predictor->is_ready() !== true) { 557 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor); 558 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' . 559 $this->model->id . ' could not be deleted.'); 560 } else { 561 $predictor->delete_output_dir($this->get_output_dir(array(), true), $this->get_unique_id()); 562 } 563 564 $DB->delete_records('analytics_models', array('id' => $this->model->id)); 565 $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id)); 566 } 567 568 /** 569 * Evaluates the model. 570 * 571 * This method gets the site contents (through the analyser) creates a .csv dataset 572 * with them and evaluates the model prediction accuracy multiple times using the 573 * machine learning backend. It returns an object where the model score is the average 574 * prediction accuracy of all executed evaluations. 575 * 576 * @param array $options 577 * @return \stdClass[] 578 */ 579 public function evaluate($options = array()) { 580 581 \core_analytics\manager::check_can_manage_models(); 582 583 if ($this->is_static()) { 584 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics')); 585 $result = new \stdClass(); 586 $result->status = self::NO_DATASET; 587 return array($result); 588 } 589 590 $options['evaluation'] = true; 591 592 if (empty($options['mode'])) { 593 $options['mode'] = 'configuration'; 594 } 595 596 switch ($options['mode']) { 597 case 'trainedmodel': 598 599 // We are only interested on the time splitting method used by the trained model. 600 $options['timesplitting'] = $this->model->timesplitting; 601 602 // Provide the trained model directory to the ML backend if that is what we want to evaluate. 603 $trainedmodeldir = $this->get_output_dir(['execution']); 604 break; 605 case 'configuration': 606 607 $trainedmodeldir = false; 608 break; 609 610 default: 611 throw new \moodle_exception('errorunknownaction', 'analytics'); 612 } 613 614 $this->init_analyser($options); 615 616 if (empty($this->get_indicators())) { 617 throw new \moodle_exception('errornoindicators', 'analytics'); 618 } 619 620 $this->heavy_duty_mode(); 621 622 // Before get_labelled_data call so we get an early exception if it is not ready. 623 $predictor = $this->get_predictions_processor(); 624 625 $datasets = $this->get_analyser()->get_labelled_data($this->get_contexts()); 626 627 // No datasets generated. 628 if (empty($datasets)) { 629 $result = new \stdClass(); 630 $result->status = self::NO_DATASET; 631 $result->info = $this->get_analyser()->get_logs(); 632 return array($result); 633 } 634 635 if (!PHPUNIT_TEST && CLI_SCRIPT) { 636 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL; 637 } 638 639 $results = array(); 640 foreach ($datasets as $timesplittingid => $dataset) { 641 642 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid); 643 644 $result = new \stdClass(); 645 646 $dashestimesplittingid = str_replace('\\', '', $timesplittingid); 647 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid)); 648 649 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations. 650 if ($this->get_target()->is_linear()) { 651 $predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION, 652 self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir); 653 } else { 654 $predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION, 655 self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir); 656 } 657 658 $result->status = $predictorresult->status; 659 $result->info = $predictorresult->info; 660 661 if (isset($predictorresult->score)) { 662 $result->score = $predictorresult->score; 663 } else { 664 // Prediction processors may return an error, default to 0 score in that case. 665 $result->score = 0; 666 } 667 668 $dir = false; 669 if (!empty($predictorresult->dir)) { 670 $dir = $predictorresult->dir; 671 } 672 673 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info, $options['mode']); 674 675 $results[$timesplitting->get_id()] = $result; 676 } 677 678 return $results; 679 } 680 681 /** 682 * Trains the model using the site contents. 683 * 684 * This method prepares a dataset from the site contents (through the analyser) 685 * and passes it to the machine learning backends. Static models are skipped as 686 * they do not require training. 687 * 688 * @return \stdClass 689 */ 690 public function train() { 691 692 \core_analytics\manager::check_can_manage_models(); 693 694 if ($this->is_static()) { 695 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics')); 696 $result = new \stdClass(); 697 $result->status = self::OK; 698 return $result; 699 } 700 701 if (!$this->is_enabled() || empty($this->model->timesplitting)) { 702 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id); 703 } 704 705 if (empty($this->get_indicators())) { 706 throw new \moodle_exception('errornoindicators', 'analytics'); 707 } 708 709 $this->heavy_duty_mode(); 710 711 // Before get_labelled_data call so we get an early exception if it is not writable. 712 $outputdir = $this->get_output_dir(array('execution')); 713 714 // Before get_labelled_data call so we get an early exception if it is not ready. 715 $predictor = $this->get_predictions_processor(); 716 717 $datasets = $this->get_analyser()->get_labelled_data($this->get_contexts()); 718 719 // No training if no files have been provided. 720 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) { 721 722 $result = new \stdClass(); 723 $result->status = self::NO_DATASET; 724 $result->info = $this->get_analyser()->get_logs(); 725 return $result; 726 } 727 $samplesfile = $datasets[$this->model->timesplitting]; 728 729 // Train using the dataset. 730 if ($this->get_target()->is_linear()) { 731 $predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir); 732 } else { 733 $predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir); 734 } 735 736 $result = new \stdClass(); 737 $result->status = $predictorresult->status; 738 $result->info = $predictorresult->info; 739 740 if ($result->status !== self::OK) { 741 return $result; 742 } 743 744 $this->flag_file_as_used($samplesfile, 'trained'); 745 746 // Mark the model as trained if it wasn't. 747 if ($this->model->trained == false) { 748 $this->mark_as_trained(); 749 } 750 751 return $result; 752 } 753 754 /** 755 * Get predictions from the site contents. 756 * 757 * It analyses the site contents (through analyser classes) looking for samples 758 * ready to receive predictions. It generates a dataset with all samples ready to 759 * get predictions and it passes it to the machine learning backends or to the 760 * targets based on assumptions to get the predictions. 761 * 762 * @return \stdClass 763 */ 764 public function predict() { 765 global $DB; 766 767 \core_analytics\manager::check_can_manage_models(); 768 769 if (!$this->is_enabled() || empty($this->model->timesplitting)) { 770 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id); 771 } 772 773 if (empty($this->get_indicators())) { 774 throw new \moodle_exception('errornoindicators', 'analytics'); 775 } 776 777 $this->heavy_duty_mode(); 778 779 // Before get_unlabelled_data call so we get an early exception if it is not writable. 780 $outputdir = $this->get_output_dir(array('execution')); 781 782 if (!$this->is_static()) { 783 // Predictions using a machine learning backend. 784 785 // Before get_unlabelled_data call so we get an early exception if it is not ready. 786 $predictor = $this->get_predictions_processor(); 787 788 $samplesdata = $this->get_analyser()->get_unlabelled_data($this->get_contexts()); 789 790 // Get the prediction samples file. 791 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) { 792 793 $result = new \stdClass(); 794 $result->status = self::NO_DATASET; 795 $result->info = $this->get_analyser()->get_logs(); 796 return $result; 797 } 798 $samplesfile = $samplesdata[$this->model->timesplitting]; 799 800 // We need to throw an exception if we are trying to predict stuff that was already predicted. 801 $params = array('modelid' => $this->model->id, 'action' => 'predicted', 'fileid' => $samplesfile->get_id()); 802 if ($predicted = $DB->get_record('analytics_used_files', $params)) { 803 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id()); 804 } 805 806 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile); 807 808 // Estimation and classification processes run on the machine learning backend side. 809 if ($this->get_target()->is_linear()) { 810 $predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir); 811 } else { 812 $predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir); 813 } 814 815 // Prepare the results object. 816 $result = new \stdClass(); 817 $result->status = $predictorresult->status; 818 $result->info = $predictorresult->info; 819 $result->predictions = $this->format_predictor_predictions($predictorresult); 820 821 } else { 822 // Predictions based on assumptions. 823 824 $indicatorcalculations = $this->get_analyser()->get_static_data($this->get_contexts()); 825 // Get the prediction samples file. 826 if (empty($indicatorcalculations) || empty($indicatorcalculations[$this->model->timesplitting])) { 827 828 $result = new \stdClass(); 829 $result->status = self::NO_DATASET; 830 $result->info = $this->get_analyser()->get_logs(); 831 return $result; 832 } 833 834 // Same as reset($indicatorcalculations) as models based on assumptions only analyse 1 single 835 // time-splitting method. 836 $indicatorcalculations = $indicatorcalculations[$this->model->timesplitting]; 837 838 // Prepare the results object. 839 $result = new \stdClass(); 840 $result->status = self::OK; 841 $result->info = []; 842 $result->predictions = $this->get_static_predictions($indicatorcalculations); 843 } 844 845 if ($result->status !== self::OK) { 846 return $result; 847 } 848 849 if ($result->predictions) { 850 list($samplecontexts, $predictionrecords) = $this->execute_prediction_callbacks($result->predictions, 851 $indicatorcalculations); 852 } 853 854 if (!empty($samplecontexts) && $this->uses_insights()) { 855 $this->trigger_insights($samplecontexts, $predictionrecords); 856 } 857 858 if (!$this->is_static()) { 859 $this->flag_file_as_used($samplesfile, 'predicted'); 860 } 861 862 return $result; 863 } 864 865 /** 866 * Returns the model predictions processor. 867 * 868 * @param bool $checkisready 869 * @return \core_analytics\predictor 870 */ 871 public function get_predictions_processor($checkisready = true) { 872 return manager::get_predictions_processor($this->model->predictionsprocessor, $checkisready); 873 } 874 875 /** 876 * Formats the predictor results. 877 * 878 * @param array $predictorresult 879 * @return array 880 */ 881 private function format_predictor_predictions($predictorresult) { 882 883 $predictions = array(); 884 if (!empty($predictorresult->predictions)) { 885 foreach ($predictorresult->predictions as $sampleinfo) { 886 887 // We parse each prediction. 888 switch (count($sampleinfo)) { 889 case 1: 890 // For whatever reason the predictions processor could not process this sample, we 891 // skip it and do nothing with it. 892 debugging($this->model->id . ' model predictions processor could not process the sample with id ' . 893 $sampleinfo[0], DEBUG_DEVELOPER); 894 continue 2; 895 case 2: 896 // Prediction processors that do not return a prediction score will have the maximum prediction 897 // score. 898 list($uniquesampleid, $prediction) = $sampleinfo; 899 $predictionscore = 1; 900 break; 901 case 3: 902 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo; 903 break; 904 default: 905 break; 906 } 907 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore]; 908 $predictions[$uniquesampleid] = $predictiondata; 909 } 910 } 911 return $predictions; 912 } 913 914 /** 915 * Execute the prediction callbacks defined by the target. 916 * 917 * @param \stdClass[] $predictions 918 * @param array $indicatorcalculations 919 * @return array 920 */ 921 protected function execute_prediction_callbacks(&$predictions, $indicatorcalculations) { 922 923 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions. 924 $samplecontexts = array(); 925 $records = array(); 926 927 foreach ($predictions as $uniquesampleid => $prediction) { 928 929 // The unique sample id contains both the sampleid and the rangeindex. 930 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid); 931 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) { 932 933 // Prepare the record to store the predicted values. 934 list($record, $samplecontext) = $this->prepare_prediction_record($sampleid, $rangeindex, $prediction->prediction, 935 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid])); 936 937 // We will later bulk-insert them all. 938 $records[$uniquesampleid] = $record; 939 940 // Also store all samples context to later generate insights or whatever action the target wants to perform. 941 $samplecontexts[$samplecontext->id] = $samplecontext; 942 943 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext, 944 $prediction->prediction, $prediction->predictionscore); 945 } 946 } 947 948 if (!empty($records)) { 949 $this->save_predictions($records); 950 } 951 952 return [$samplecontexts, $records]; 953 } 954 955 /** 956 * Generates insights and updates the cache. 957 * 958 * @param \context[] $samplecontexts 959 * @param \stdClass[] $predictionrecords 960 * @return void 961 */ 962 protected function trigger_insights($samplecontexts, $predictionrecords) { 963 964 // Notify the target that all predictions have been processed. 965 if ($this->get_analyser()::one_sample_per_analysable()) { 966 967 // We need to do something unusual here. self::save_predictions uses the bulk-insert function (insert_records()) for 968 // performance reasons and that function does not return us the inserted ids. We need to retrieve them from 969 // the database, and we need to do it using one single database query (for performance reasons as well). 970 $predictionrecords = $this->add_prediction_ids($predictionrecords); 971 972 $samplesdata = $this->predictions_sample_data($predictionrecords); 973 $samplesdata = $this->append_calculations_info($predictionrecords, $samplesdata); 974 975 $predictions = array_map(function($predictionobj) use ($samplesdata) { 976 $prediction = new \core_analytics\prediction($predictionobj, $samplesdata[$predictionobj->sampleid]); 977 return $prediction; 978 }, $predictionrecords); 979 } else { 980 $predictions = []; 981 } 982 983 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts, $predictions); 984 985 if ($this->get_target()->link_insights_report()) { 986 987 // Update cache. 988 foreach ($samplecontexts as $context) { 989 \core_analytics\manager::cached_models_with_insights($context, $this->get_id()); 990 } 991 } 992 } 993 994 /** 995 * Get predictions from a static model. 996 * 997 * @param array $indicatorcalculations 998 * @return \stdClass[] 999 */ 1000 protected function get_static_predictions(&$indicatorcalculations) { 1001 1002 $headers = array_shift($indicatorcalculations); 1003 1004 // Get rid of the sampleid header. 1005 array_shift($headers); 1006 1007 // Group samples by analysable for \core_analytics\local\target::calculate. 1008 $analysables = array(); 1009 // List all sampleids together. 1010 $sampleids = array(); 1011 1012 foreach ($indicatorcalculations as $uniquesampleid => $indicators) { 1013 1014 // Get rid of the sampleid column. 1015 unset($indicators[0]); 1016 $indicators = array_combine($headers, $indicators); 1017 $indicatorcalculations[$uniquesampleid] = $indicators; 1018 1019 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid); 1020 1021 $analysable = $this->get_analyser()->get_sample_analysable($sampleid); 1022 $analysableclass = get_class($analysable); 1023 if (empty($analysables[$analysableclass])) { 1024 $analysables[$analysableclass] = array(); 1025 } 1026 if (empty($analysables[$analysableclass][$rangeindex])) { 1027 $analysables[$analysableclass][$rangeindex] = (object)[ 1028 'analysable' => $analysable, 1029 'indicatorsdata' => array(), 1030 'sampleids' => array() 1031 ]; 1032 } 1033 1034 // Using the sampleid as a key so we can easily merge indicators data later. 1035 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators; 1036 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner. 1037 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid; 1038 1039 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples). 1040 $sampleids[$sampleid] = $sampleid; 1041 } 1042 1043 // Get all samples data. 1044 list($sampleids, $samplesdata) = $this->get_samples($sampleids); 1045 1046 // Calculate the targets. 1047 $predictions = array(); 1048 foreach ($analysables as $analysableclass => $rangedata) { 1049 foreach ($rangedata as $rangeindex => $data) { 1050 1051 // Attach samples data and calculated indicators data. 1052 $this->get_target()->clear_sample_data(); 1053 $this->get_target()->add_sample_data($samplesdata); 1054 $this->get_target()->add_sample_data($data->indicatorsdata); 1055 1056 // Append new elements (we can not get duplicates because sample-analysable relation is N-1). 1057 $timesplitting = $this->get_time_splitting(); 1058 $timesplitting->set_modelid($this->get_id()); 1059 $timesplitting->set_analysable($data->analysable); 1060 $range = $timesplitting->get_range_by_index($rangeindex); 1061 1062 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false); 1063 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']); 1064 1065 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove 1066 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated 1067 // by self::save_prediction. 1068 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) 1069 use ($calculations, $rangeindex) { 1070 list($sampleid, $indicatorsrangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid); 1071 if ($rangeindex == $indicatorsrangeindex && !isset($calculations[$sampleid])) { 1072 return false; 1073 } 1074 return true; 1075 }, ARRAY_FILTER_USE_BOTH); 1076 1077 foreach ($calculations as $sampleid => $value) { 1078 1079 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex); 1080 1081 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations. 1082 if (is_null($calculations[$sampleid])) { 1083 unset($indicatorcalculations[$uniquesampleid]); 1084 continue; 1085 } 1086 1087 // Even if static predictions are based on assumptions we flag them as 100% because they are 100% 1088 // true according to what the developer defined. 1089 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1]; 1090 } 1091 } 1092 } 1093 return $predictions; 1094 } 1095 1096 /** 1097 * Stores the prediction in the database. 1098 * 1099 * @param int $sampleid 1100 * @param int $rangeindex 1101 * @param int $prediction 1102 * @param float $predictionscore 1103 * @param string $calculations 1104 * @return \context 1105 */ 1106 protected function prepare_prediction_record($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) { 1107 $context = $this->get_analyser()->sample_access_context($sampleid); 1108 1109 $record = new \stdClass(); 1110 $record->modelid = $this->model->id; 1111 $record->contextid = $context->id; 1112 $record->sampleid = $sampleid; 1113 $record->rangeindex = $rangeindex; 1114 $record->prediction = $prediction; 1115 $record->predictionscore = $predictionscore; 1116 $record->calculations = $calculations; 1117 $record->timecreated = time(); 1118 1119 $analysable = $this->get_analyser()->get_sample_analysable($sampleid); 1120 $timesplitting = $this->get_time_splitting(); 1121 $timesplitting->set_modelid($this->get_id()); 1122 $timesplitting->set_analysable($analysable); 1123 $range = $timesplitting->get_range_by_index($rangeindex); 1124 if ($range) { 1125 $record->timestart = $range['start']; 1126 $record->timeend = $range['end']; 1127 } 1128 1129 return array($record, $context); 1130 } 1131 1132 /** 1133 * Save the prediction objects. 1134 * 1135 * @param \stdClass[] $records 1136 */ 1137 protected function save_predictions($records) { 1138 global $DB; 1139 $DB->insert_records('analytics_predictions', $records); 1140 } 1141 1142 /** 1143 * Enabled the model using the provided time splitting method. 1144 * 1145 * @param string|false $timesplittingid False to respect the current time splitting method. 1146 * @return void 1147 */ 1148 public function enable($timesplittingid = false) { 1149 global $DB, $USER; 1150 1151 $now = time(); 1152 1153 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) { 1154 1155 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) { 1156 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics'); 1157 } 1158 1159 if (substr($timesplittingid, 0, 1) !== '\\') { 1160 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics'); 1161 } 1162 1163 // Delete generated predictions before changing the model version. 1164 $this->clear(); 1165 1166 // It needs to be reset as the version changes. 1167 $this->uniqueid = null; 1168 1169 $this->model->timesplitting = $timesplittingid; 1170 $this->model->version = $now; 1171 1172 // Reset trained flag. 1173 if (!$this->is_static()) { 1174 $this->model->trained = 0; 1175 } 1176 } else if (empty($this->model->timesplitting)) { 1177 // A valid timesplitting method needs to be supplied before a model can be enabled. 1178 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id); 1179 1180 } 1181 1182 // Purge pages with insights as this may change things. 1183 if ($this->model->enabled != 1) { 1184 $this->purge_insights_cache(); 1185 } 1186 1187 $this->model->enabled = 1; 1188 $this->model->timemodified = $now; 1189 $this->model->usermodified = $USER->id; 1190 1191 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates. 1192 $DB->update_record('analytics_models', $this->model); 1193 } 1194 1195 /** 1196 * Is this a static model (as defined by the target)?. 1197 * 1198 * Static models are based on assumptions instead of in machine learning 1199 * backends results. 1200 * 1201 * @return bool 1202 */ 1203 public function is_static() { 1204 return (bool)$this->get_target()->based_on_assumptions(); 1205 } 1206 1207 /** 1208 * Is this model enabled? 1209 * 1210 * @return bool 1211 */ 1212 public function is_enabled() { 1213 return (bool)$this->model->enabled; 1214 } 1215 1216 /** 1217 * Is this model already trained? 1218 * 1219 * @return bool 1220 */ 1221 public function is_trained() { 1222 // Models which targets are based on assumptions do not need training. 1223 return (bool)$this->model->trained || $this->is_static(); 1224 } 1225 1226 /** 1227 * Marks the model as trained 1228 * 1229 * @return void 1230 */ 1231 public function mark_as_trained() { 1232 global $DB; 1233 1234 \core_analytics\manager::check_can_manage_models(); 1235 1236 $this->model->trained = 1; 1237 $DB->update_record('analytics_models', $this->model); 1238 } 1239 1240 /** 1241 * Get the contexts with predictions. 1242 * 1243 * @param bool $skiphidden Skip hidden predictions 1244 * @return \stdClass[] 1245 */ 1246 public function get_predictions_contexts($skiphidden = true) { 1247 global $DB, $USER; 1248 1249 $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap 1250 JOIN {context} ctx ON ctx.id = ap.contextid 1251 WHERE ap.modelid = :modelid"; 1252 $params = array('modelid' => $this->model->id); 1253 1254 if ($skiphidden) { 1255 $sql .= " AND NOT EXISTS ( 1256 SELECT 1 1257 FROM {analytics_prediction_actions} apa 1258 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND 1259 (apa.actionname = :fixed OR apa.actionname = :notuseful OR 1260 apa.actionname = :useful OR apa.actionname = :notapplicable OR 1261 apa.actionname = :incorrectlyflagged) 1262 )"; 1263 $params['userid'] = $USER->id; 1264 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED; 1265 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL; 1266 $params['useful'] = \core_analytics\prediction::ACTION_USEFUL; 1267 $params['notapplicable'] = \core_analytics\prediction::ACTION_NOT_APPLICABLE; 1268 $params['incorrectlyflagged'] = \core_analytics\prediction::ACTION_INCORRECTLY_FLAGGED; 1269 } 1270 1271 return $DB->get_records_sql($sql, $params); 1272 } 1273 1274 /** 1275 * Has this model generated predictions? 1276 * 1277 * We don't check analytics_predictions table because targets have the ability to 1278 * ignore some predicted values, if that is the case predictions are not even stored 1279 * in db. 1280 * 1281 * @return bool 1282 */ 1283 public function any_prediction_obtained() { 1284 global $DB; 1285 return $DB->record_exists('analytics_predict_samples', 1286 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting)); 1287 } 1288 1289 /** 1290 * Whether this model generates insights or not (defined by the model's target). 1291 * 1292 * @return bool 1293 */ 1294 public function uses_insights() { 1295 $target = $this->get_target(); 1296 return $target::uses_insights(); 1297 } 1298 1299 /** 1300 * Whether predictions exist for this context. 1301 * 1302 * @param \context $context 1303 * @return bool 1304 */ 1305 public function predictions_exist(\context $context) { 1306 global $DB; 1307 1308 // Filters out previous predictions keeping only the last time range one. 1309 $select = "modelid = :modelid AND contextid = :contextid"; 1310 $params = array('modelid' => $this->model->id, 'contextid' => $context->id); 1311 return $DB->record_exists_select('analytics_predictions', $select, $params); 1312 } 1313 1314 /** 1315 * Gets the predictions for this context. 1316 * 1317 * @param \context $context 1318 * @param bool $skiphidden Skip hidden predictions 1319 * @param int $page The page of results to fetch. False for all results. 1320 * @param int $perpage The max number of results to fetch. Ignored if $page is false. 1321 * @return array($total, \core_analytics\prediction[]) 1322 */ 1323 public function get_predictions(\context $context, $skiphidden = true, $page = false, $perpage = 100) { 1324 global $DB, $USER; 1325 1326 \core_analytics\manager::check_can_list_insights($context); 1327 1328 // Filters out previous predictions keeping only the last time range one. 1329 $sql = "SELECT ap.* 1330 FROM {analytics_predictions} ap 1331 JOIN ( 1332 SELECT sampleid, max(rangeindex) AS rangeindex 1333 FROM {analytics_predictions} 1334 WHERE modelid = :modelidsubap and contextid = :contextidsubap 1335 GROUP BY sampleid 1336 ) apsub 1337 ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex 1338 WHERE ap.modelid = :modelid and ap.contextid = :contextid"; 1339 1340 $params = array('modelid' => $this->model->id, 'contextid' => $context->id, 1341 'modelidsubap' => $this->model->id, 'contextidsubap' => $context->id); 1342 1343 if ($skiphidden) { 1344 $sql .= " AND NOT EXISTS ( 1345 SELECT 1 1346 FROM {analytics_prediction_actions} apa 1347 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND 1348 (apa.actionname = :fixed OR apa.actionname = :notuseful OR 1349 apa.actionname = :useful OR apa.actionname = :notapplicable OR 1350 apa.actionname = :incorrectlyflagged) 1351 )"; 1352 $params['userid'] = $USER->id; 1353 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED; 1354 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL; 1355 $params['useful'] = \core_analytics\prediction::ACTION_USEFUL; 1356 $params['notapplicable'] = \core_analytics\prediction::ACTION_NOT_APPLICABLE; 1357 $params['incorrectlyflagged'] = \core_analytics\prediction::ACTION_INCORRECTLY_FLAGGED; 1358 } 1359 1360 $sql .= " ORDER BY ap.timecreated DESC"; 1361 if (!$predictions = $DB->get_records_sql($sql, $params)) { 1362 return array(); 1363 } 1364 1365 // Get predicted samples' ids. 1366 $sampleids = array_map(function($prediction) { 1367 return $prediction->sampleid; 1368 }, $predictions); 1369 1370 list($unused, $samplesdata) = $this->get_samples($sampleids); 1371 1372 $current = 0; 1373 1374 if ($page !== false) { 1375 $offset = $page * $perpage; 1376 $limit = $offset + $perpage; 1377 } 1378 1379 foreach ($predictions as $predictionid => $predictiondata) { 1380 1381 $sampleid = $predictiondata->sampleid; 1382 1383 // Filter out predictions which samples are not available anymore. 1384 if (empty($samplesdata[$sampleid])) { 1385 unset($predictions[$predictionid]); 1386 continue; 1387 } 1388 1389 // Return paginated dataset - we cannot paginate in the DB because we post filter the list. 1390 if ($page === false || ($current >= $offset && $current < $limit)) { 1391 // Replace \stdClass object by \core_analytics\prediction objects. 1392 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]); 1393 $predictions[$predictionid] = $prediction; 1394 } else { 1395 unset($predictions[$predictionid]); 1396 } 1397 1398 $current++; 1399 } 1400 1401 if (empty($predictions)) { 1402 return array(); 1403 } 1404 1405 return [$current, $predictions]; 1406 } 1407 1408 /** 1409 * Returns the actions executed by users on the predictions. 1410 * 1411 * @param \context|null $context 1412 * @return \moodle_recordset 1413 */ 1414 public function get_prediction_actions(?\context $context): \moodle_recordset { 1415 global $DB; 1416 1417 $sql = "SELECT apa.id, apa.predictionid, apa.userid, apa.actionname, apa.timecreated, 1418 ap.contextid, ap.sampleid, ap.rangeindex, ap.prediction, ap.predictionscore 1419 FROM {analytics_prediction_actions} apa 1420 JOIN {analytics_predictions} ap ON ap.id = apa.predictionid 1421 WHERE ap.modelid = :modelid"; 1422 $params = ['modelid' => $this->model->id]; 1423 1424 if ($context) { 1425 $sql .= " AND ap.contextid = :contextid"; 1426 $params['contextid'] = $context->id; 1427 } 1428 1429 return $DB->get_recordset_sql($sql, $params); 1430 } 1431 1432 /** 1433 * Returns the sample data of a prediction. 1434 * 1435 * @param \stdClass $predictionobj 1436 * @return array 1437 */ 1438 public function prediction_sample_data($predictionobj) { 1439 1440 list($unused, $samplesdata) = $this->get_samples(array($predictionobj->sampleid)); 1441 1442 if (empty($samplesdata[$predictionobj->sampleid])) { 1443 throw new \moodle_exception('errorsamplenotavailable', 'analytics'); 1444 } 1445 1446 return $samplesdata[$predictionobj->sampleid]; 1447 } 1448 1449 /** 1450 * Returns the samples data of the provided predictions. 1451 * 1452 * @param \stdClass[] $predictionrecords 1453 * @return array 1454 */ 1455 public function predictions_sample_data(array $predictionrecords): array { 1456 1457 $sampleids = []; 1458 foreach ($predictionrecords as $predictionobj) { 1459 $sampleids[] = $predictionobj->sampleid; 1460 } 1461 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids); 1462 1463 return $samplesdata; 1464 } 1465 1466 /** 1467 * Appends the calculation info to the samples data. 1468 * 1469 * @param \stdClass[] $predictionrecords 1470 * @param array $samplesdata 1471 * @return array 1472 */ 1473 public function append_calculations_info(array $predictionrecords, array $samplesdata): array { 1474 1475 if ($extrainfo = calculation_info::pull_info($predictionrecords)) { 1476 foreach ($samplesdata as $sampleid => $data) { 1477 // The extra info come prefixed by extra: so we will not have overwrites here. 1478 $samplesdata[$sampleid] = $samplesdata[$sampleid] + $extrainfo[$sampleid]; 1479 } 1480 } 1481 return $samplesdata; 1482 } 1483 1484 /** 1485 * Returns the description of a sample 1486 * 1487 * @param \core_analytics\prediction $prediction 1488 * @return array 2 elements: list(string, \renderable) 1489 */ 1490 public function prediction_sample_description(\core_analytics\prediction $prediction) { 1491 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid, 1492 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data()); 1493 } 1494 1495 /** 1496 * Returns the default output directory for prediction processors 1497 * 1498 * @return string 1499 */ 1500 public static function default_output_dir(): string { 1501 global $CFG; 1502 1503 return $CFG->dataroot . DIRECTORY_SEPARATOR . 'models'; 1504 } 1505 1506 /** 1507 * Returns the output directory for prediction processors. 1508 * 1509 * Directory structure as follows: 1510 * - Evaluation runs: 1511 * models/$model->id/$model->version/evaluation/$model->timesplitting 1512 * - Training & prediction runs: 1513 * models/$model->id/$model->version/execution 1514 * 1515 * @param array $subdirs 1516 * @param bool $onlymodelid Preference over $subdirs 1517 * @return string 1518 */ 1519 public function get_output_dir($subdirs = array(), $onlymodelid = false) { 1520 $subdirstr = ''; 1521 foreach ($subdirs as $subdir) { 1522 $subdirstr .= DIRECTORY_SEPARATOR . $subdir; 1523 } 1524 1525 $outputdir = get_config('analytics', 'modeloutputdir'); 1526 if (empty($outputdir)) { 1527 // Apply default value. 1528 $outputdir = self::default_output_dir(); 1529 } 1530 1531 // Append model id. 1532 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id; 1533 if (!$onlymodelid) { 1534 // Append version + subdirs. 1535 $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr; 1536 } 1537 1538 make_writable_directory($outputdir); 1539 1540 return $outputdir; 1541 } 1542 1543 /** 1544 * Returns a unique id for this model. 1545 * 1546 * This id should be unique for this site. 1547 * 1548 * @return string 1549 */ 1550 public function get_unique_id() { 1551 global $CFG; 1552 1553 if (!is_null($this->uniqueid)) { 1554 return $this->uniqueid; 1555 } 1556 1557 // Generate a unique id for this site, this model and this time splitting method, considering the last time 1558 // that the model target and indicators were updated. 1559 $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version); 1560 $this->uniqueid = sha1(implode('$$', $ids)); 1561 1562 return $this->uniqueid; 1563 } 1564 1565 /** 1566 * Exports the model data for displaying it in a template. 1567 * 1568 * @param \renderer_base $output The renderer to use for exporting 1569 * @return \stdClass 1570 */ 1571 public function export(\renderer_base $output) { 1572 1573 \core_analytics\manager::check_can_manage_models(); 1574 1575 $data = clone $this->model; 1576 1577 $data->modelname = format_string($this->get_name()); 1578 $data->name = $this->inplace_editable_name()->export_for_template($output); 1579 $data->target = $this->get_target()->get_name(); 1580 $data->targetclass = $this->get_target()->get_id(); 1581 1582 if ($timesplitting = $this->get_time_splitting()) { 1583 $data->timesplitting = $timesplitting->get_name(); 1584 } 1585 1586 $data->indicators = array(); 1587 foreach ($this->get_indicators() as $indicator) { 1588 $data->indicators[] = $indicator->get_name(); 1589 } 1590 return $data; 1591 } 1592 1593 /** 1594 * Exports the model data to a zip file. 1595 * 1596 * @param string $zipfilename 1597 * @param bool $includeweights Include the model weights if available 1598 * @return string Zip file path 1599 */ 1600 public function export_model(string $zipfilename, bool $includeweights = true) : string { 1601 1602 \core_analytics\manager::check_can_manage_models(); 1603 1604 $modelconfig = new model_config($this); 1605 return $modelconfig->export($zipfilename, $includeweights); 1606 } 1607 1608 /** 1609 * Imports the provided model. 1610 * 1611 * Note that this method assumes that model_config::check_dependencies has already been called. 1612 * 1613 * @throws \moodle_exception 1614 * @param string $zipfilepath Zip file path 1615 * @return \core_analytics\model 1616 */ 1617 public static function import_model(string $zipfilepath) : \core_analytics\model { 1618 1619 \core_analytics\manager::check_can_manage_models(); 1620 1621 $modelconfig = new \core_analytics\model_config(); 1622 return $modelconfig->import($zipfilepath); 1623 } 1624 1625 /** 1626 * Can this model be exported? 1627 * 1628 * @return bool 1629 */ 1630 public function can_export_configuration() : bool { 1631 1632 if (empty($this->model->timesplitting)) { 1633 return false; 1634 } 1635 if (!$this->get_indicators()) { 1636 return false; 1637 } 1638 1639 if ($this->is_static()) { 1640 return false; 1641 } 1642 1643 return true; 1644 } 1645 1646 /** 1647 * Returns the model logs data. 1648 * 1649 * @param int $limitfrom 1650 * @param int $limitnum 1651 * @return \stdClass[] 1652 */ 1653 public function get_logs($limitfrom = 0, $limitnum = 0) { 1654 global $DB; 1655 1656 \core_analytics\manager::check_can_manage_models(); 1657 1658 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*', 1659 $limitfrom, $limitnum); 1660 } 1661 1662 /** 1663 * Merges all training data files into one and returns it. 1664 * 1665 * @return \stored_file|false 1666 */ 1667 public function get_training_data() { 1668 1669 \core_analytics\manager::check_can_manage_models(); 1670 1671 $timesplittingid = $this->get_time_splitting()->get_id(); 1672 return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid); 1673 } 1674 1675 /** 1676 * Has the model been trained using data from this site? 1677 * 1678 * This method is useful to determine if a trained model can be evaluated as 1679 * we can not use the same data for training and for evaluation. 1680 * 1681 * @return bool 1682 */ 1683 public function trained_locally() : bool { 1684 global $DB; 1685 1686 if (!$this->is_trained() || $this->is_static()) { 1687 // Early exit. 1688 return false; 1689 } 1690 1691 if ($DB->record_exists('analytics_train_samples', ['modelid' => $this->model->id])) { 1692 return true; 1693 } 1694 1695 return false; 1696 } 1697 1698 /** 1699 * Flag the provided file as used for training or prediction. 1700 * 1701 * @param \stored_file $file 1702 * @param string $action 1703 * @return void 1704 */ 1705 protected function flag_file_as_used(\stored_file $file, $action) { 1706 global $DB; 1707 1708 $usedfile = new \stdClass(); 1709 $usedfile->modelid = $this->model->id; 1710 $usedfile->fileid = $file->get_id(); 1711 $usedfile->action = $action; 1712 $usedfile->time = time(); 1713 $DB->insert_record('analytics_used_files', $usedfile); 1714 } 1715 1716 /** 1717 * Log the evaluation results in the database. 1718 * 1719 * @param string $timesplittingid 1720 * @param float $score 1721 * @param string $dir 1722 * @param array $info 1723 * @param string $evaluationmode 1724 * @return int The inserted log id 1725 */ 1726 protected function log_result($timesplittingid, $score, $dir = false, $info = false, $evaluationmode = 'configuration') { 1727 global $DB, $USER; 1728 1729 $log = new \stdClass(); 1730 $log->modelid = $this->get_id(); 1731 $log->version = $this->model->version; 1732 $log->evaluationmode = $evaluationmode; 1733 $log->target = $this->model->target; 1734 $log->indicators = $this->model->indicators; 1735 $log->timesplitting = $timesplittingid; 1736 $log->dir = $dir; 1737 if ($info) { 1738 // Ensure it is not an associative array. 1739 $log->info = json_encode(array_values($info)); 1740 } 1741 $log->score = $score; 1742 $log->timecreated = time(); 1743 $log->usermodified = $USER->id; 1744 1745 return $DB->insert_record('analytics_models_log', $log); 1746 } 1747 1748 /** 1749 * Utility method to return indicator class names from a list of indicator objects 1750 * 1751 * @param \core_analytics\local\indicator\base[] $indicators 1752 * @return string[] 1753 */ 1754 private static function indicator_classes($indicators) { 1755 1756 // What we want to check and store are the indicator classes not the keys. 1757 $indicatorclasses = array(); 1758 foreach ($indicators as $indicator) { 1759 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) { 1760 if (!is_object($indicator) && !is_scalar($indicator)) { 1761 $indicator = strval($indicator); 1762 } else if (is_object($indicator)) { 1763 $indicator = '\\' . get_class($indicator); 1764 } 1765 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator); 1766 } 1767 $indicatorclasses[] = $indicator->get_id(); 1768 } 1769 1770 return $indicatorclasses; 1771 } 1772 1773 /** 1774 * Clears the model training and prediction data. 1775 * 1776 * Executed after updating model critical elements like the time splitting method 1777 * or the indicators. 1778 * 1779 * @return void 1780 */ 1781 public function clear() { 1782 global $DB, $USER; 1783 1784 \core_analytics\manager::check_can_manage_models(); 1785 1786 // Delete current model version stored stuff. 1787 $predictor = $this->get_predictions_processor(false); 1788 if ($predictor->is_ready() !== true) { 1789 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor); 1790 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' . 1791 $this->model->id . ' could not be cleared.'); 1792 } else { 1793 $predictor->clear_model($this->get_unique_id(), $this->get_output_dir()); 1794 } 1795 1796 $DB->delete_records_select('analytics_prediction_actions', "predictionid IN 1797 (SELECT id FROM {analytics_predictions} WHERE modelid = :modelid)", ['modelid' => $this->get_id()]); 1798 1799 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id)); 1800 $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id)); 1801 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id)); 1802 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id)); 1803 $DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id)); 1804 1805 // Purge all generated files. 1806 \core_analytics\dataset_manager::clear_model_files($this->model->id); 1807 1808 // We don't expect people to clear models regularly and the cost of filling the cache is 1809 // 1 db read per context. 1810 $this->purge_insights_cache(); 1811 1812 if (!$this->is_static()) { 1813 $this->model->trained = 0; 1814 } 1815 1816 $this->model->timemodified = time(); 1817 $this->model->usermodified = $USER->id; 1818 $DB->update_record('analytics_models', $this->model); 1819 } 1820 1821 /** 1822 * Returns the name of the model. 1823 * 1824 * By default, models use their target's name as their own name. They can have their explicit name, too. In which 1825 * case, the explicit name is used instead of the default one. 1826 * 1827 * @return string|lang_string 1828 */ 1829 public function get_name() { 1830 1831 if (trim($this->model->name) === '') { 1832 return $this->get_target()->get_name(); 1833 1834 } else { 1835 return $this->model->name; 1836 } 1837 } 1838 1839 /** 1840 * Renames the model to the given name. 1841 * 1842 * When given an empty string, the model falls back to using the associated target's name as its name. 1843 * 1844 * @param string $name The new name for the model, empty string for using the default name. 1845 */ 1846 public function rename(string $name) { 1847 global $DB, $USER; 1848 1849 $this->model->name = $name; 1850 $this->model->timemodified = time(); 1851 $this->model->usermodified = $USER->id; 1852 1853 $DB->update_record('analytics_models', $this->model); 1854 } 1855 1856 /** 1857 * Returns an inplace editable element with the model's name. 1858 * 1859 * @return \core\output\inplace_editable 1860 */ 1861 public function inplace_editable_name() { 1862 1863 $displayname = format_string($this->get_name()); 1864 1865 return new \core\output\inplace_editable('core_analytics', 'modelname', $this->model->id, 1866 has_capability('moodle/analytics:managemodels', \context_system::instance()), $displayname, $this->model->name); 1867 } 1868 1869 /** 1870 * Returns true if the time-splitting method used by this model is invalid for this model. 1871 * @return bool 1872 */ 1873 public function invalid_timesplitting_selected(): bool { 1874 $currenttimesplitting = $this->model->timesplitting; 1875 if (empty($currenttimesplitting)) { 1876 // Not set is different from invalid. This function is used to identify invalid 1877 // time-splittings. 1878 return false; 1879 } 1880 1881 $potentialtimesplittings = $this->get_potential_timesplittings(); 1882 if ($currenttimesplitting && empty($potentialtimesplittings[$currenttimesplitting])) { 1883 return true; 1884 } 1885 1886 return false; 1887 } 1888 1889 /** 1890 * Adds the id from {analytics_predictions} db table to the prediction \stdClass objects. 1891 * 1892 * @param \stdClass[] $predictionrecords 1893 * @return \stdClass[] The prediction records including their ids in {analytics_predictions} db table. 1894 */ 1895 private function add_prediction_ids($predictionrecords) { 1896 global $DB; 1897 1898 $firstprediction = reset($predictionrecords); 1899 1900 $contextids = array_map(function($predictionobj) { 1901 return $predictionobj->contextid; 1902 }, $predictionrecords); 1903 1904 // Limited to 30000 records as a middle point between the ~65000 params limit in pgsql and the size limit for mysql which 1905 // can be increased if required up to a reasonable point. 1906 $chunks = array_chunk($contextids, 30000); 1907 foreach ($chunks as $contextidschunk) { 1908 list($contextsql, $contextparams) = $DB->get_in_or_equal($contextidschunk, SQL_PARAMS_NAMED); 1909 1910 // We select the fields that will allow us to map ids to $predictionrecords. Given that we already filter by modelid 1911 // we have enough with sampleid and rangeindex. The reason is that the sampleid relation to a site is N - 1. 1912 $fields = 'id, sampleid, rangeindex'; 1913 1914 // We include the contextid and the timecreated filter to reduce the number of records in $dbpredictions. We can not 1915 // add as many OR conditions as records in $predictionrecords. 1916 $sql = "SELECT $fields 1917 FROM {analytics_predictions} 1918 WHERE modelid = :modelid 1919 AND contextid $contextsql 1920 AND timecreated >= :firsttimecreated"; 1921 $params = $contextparams + ['modelid' => $this->model->id, 'firsttimecreated' => $firstprediction->timecreated]; 1922 $dbpredictions = $DB->get_recordset_sql($sql, $params); 1923 foreach ($dbpredictions as $id => $dbprediction) { 1924 // The append_rangeindex implementation is the same regardless of the time splitting method in use. 1925 $uniqueid = $this->get_time_splitting()->append_rangeindex($dbprediction->sampleid, $dbprediction->rangeindex); 1926 $predictionrecords[$uniqueid]->id = $dbprediction->id; 1927 } 1928 } 1929 1930 return $predictionrecords; 1931 } 1932 1933 /** 1934 * Wrapper around analyser's get_samples to skip DB's max-number-of-params exception. 1935 * 1936 * @param array $sampleids 1937 * @return array 1938 */ 1939 public function get_samples(array $sampleids): array { 1940 1941 if (empty($sampleids)) { 1942 throw new \coding_exception('No sample ids provided'); 1943 } 1944 1945 $chunksize = count($sampleids); 1946 1947 // We start with just 1 chunk, if it is too large for the db we split the list of sampleids in 2 and we 1948 // try again. We repeat this process until the chunk is small enough for the db engine to process. The 1949 // >= has been added in case there are other \dml_read_exceptions unrelated to the max number of params. 1950 while (empty($done) && $chunksize >= 1) { 1951 1952 $chunks = array_chunk($sampleids, $chunksize); 1953 $allsampleids = []; 1954 $allsamplesdata = []; 1955 1956 foreach ($chunks as $index => $chunk) { 1957 1958 try { 1959 list($chunksampleids, $chunksamplesdata) = $this->get_analyser()->get_samples($chunk); 1960 } catch (\dml_read_exception $e) { 1961 1962 // Reduce the chunksize, we use floor() so the $chunksize is always less than the previous $chunksize value. 1963 $chunksize = floor($chunksize / 2); 1964 break; 1965 } 1966 1967 // We can sum as these two arrays are indexed by sampleid and there are no collisions. 1968 $allsampleids = $allsampleids + $chunksampleids; 1969 $allsamplesdata = $allsamplesdata + $chunksamplesdata; 1970 1971 if ($index === count($chunks) - 1) { 1972 // We successfully processed all the samples in all chunks, we are done. 1973 $done = true; 1974 } 1975 } 1976 } 1977 1978 if (empty($done)) { 1979 if (!empty($e)) { 1980 // Throw the last exception we caught, the \dml_read_exception we have been catching is unrelated to the max number 1981 // of param's exception. 1982 throw new \dml_read_exception($e); 1983 } else { 1984 throw new \coding_exception('We should never reach this point, there is a bug in ' . 1985 'core_analytics\\model::get_samples\'s code'); 1986 } 1987 } 1988 return [$allsampleids, $allsamplesdata]; 1989 } 1990 1991 /** 1992 * Contexts where this model should be active. 1993 * 1994 * @return \context[] Empty array if there are no context restrictions. 1995 */ 1996 public function get_contexts() { 1997 if ($this->contexts !== null) { 1998 return $this->contexts; 1999 } 2000 2001 if (!$this->model->contextids) { 2002 $this->contexts = []; 2003 return $this->contexts; 2004 } 2005 $contextids = json_decode($this->model->contextids); 2006 2007 // We don't expect this list to be massive as contexts need to be selected manually using the edit model form. 2008 $this->contexts = array_map(function($contextid) { 2009 return \context::instance_by_id($contextid, IGNORE_MISSING); 2010 }, $contextids); 2011 2012 return $this->contexts; 2013 } 2014 2015 /** 2016 * Purges the insights cache. 2017 */ 2018 private function purge_insights_cache() { 2019 $cache = \cache::make('core', 'contextwithinsights'); 2020 $cache->purge(); 2021 } 2022 2023 /** 2024 * Increases system memory and time limits. 2025 * 2026 * @return void 2027 */ 2028 private function heavy_duty_mode() { 2029 if (ini_get('memory_limit') != -1) { 2030 raise_memory_limit(MEMORY_HUGE); 2031 } 2032 \core_php_time_limit::raise(); 2033 } 2034 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body