Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.
   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Keeps track of the analysis results by storing the results in files.
  19   *
  20   * @package   core_analytics
  21   * @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
  22   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  namespace core_analytics\local\analysis;
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  /**
  30   * Keeps track of the analysis results by storing the results in files.
  31   *
  32   * @package   core_analytics
  33   * @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
  34   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  35   */
  36  class result_file extends result {
  37  
  38      /**
  39       * Stores the analysis results by time-splitting method.
  40       * @var array
  41       */
  42      private $filesbytimesplitting = [];
  43  
  44      /**
  45       * Stores the analysis results.
  46       * @param  array $results
  47       * @return bool            True if anything was successfully analysed
  48       */
  49      public function add_analysable_results(array $results): bool {
  50  
  51          $any = false;
  52  
  53          // Process all provided time splitting methods.
  54          foreach ($results as $timesplittingid => $result) {
  55              if (!empty($result->result)) {
  56                  $this->filesbytimesplitting[$timesplittingid][] = $result->result;
  57                  $any = true;
  58              }
  59          }
  60  
  61          if (empty($any)) {
  62              return false;
  63          }
  64          return true;
  65      }
  66  
  67      /**
  68       * Retrieves cached results during evaluation.
  69       *
  70       * @param  \core_analytics\local\time_splitting\base $timesplitting
  71       * @param  \core_analytics\analysable                $analysable
  72       * @return mixed A \stored_file in this case.
  73       */
  74      public function retrieve_cached_result(\core_analytics\local\time_splitting\base $timesplitting,
  75          \core_analytics\analysable $analysable) {
  76  
  77          // For evaluation purposes we don't need to be that strict about how updated the data is,
  78          // if this analyser was analysed less that 1 week ago we skip generating a new one. This
  79          // helps scale the evaluation process as sites with tons of courses may need a lot of time to
  80          // complete an evaluation.
  81          if (!empty($this->options['evaluation']) && !empty($this->options['reuseprevanalysed'])) {
  82  
  83              $previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->modelid,
  84                  $analysable->get_id(), $timesplitting->get_id());
  85              // 1 week is a partly random time interval, no need to worry about DST.
  86              $boundary = time() - WEEKSECS;
  87              if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) {
  88                  // Recover the previous analysed file and avoid generating a new one.
  89                  return $previousanalysis;
  90              }
  91          }
  92  
  93          return false;
  94      }
  95  
  96      /**
  97       * Formats the result.
  98       *
  99       * @param  array                                     $data
 100       * @param  \core_analytics\local\target\base         $target
 101       * @param  \core_analytics\local\time_splitting\base $timesplitting
 102       * @param  \core_analytics\analysable                $analysable
 103       * @return mixed A \stored_file in this case
 104       */
 105      public function format_result(array $data, \core_analytics\local\target\base $target,
 106              \core_analytics\local\time_splitting\base $timesplitting, \core_analytics\analysable $analysable) {
 107  
 108          if (!empty($this->includetarget)) {
 109              $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
 110          } else {
 111              $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
 112          }
 113          $dataset = new \core_analytics\dataset_manager($this->modelid, $analysable->get_id(),
 114              $timesplitting->get_id(), $filearea, $this->options['evaluation']);
 115  
 116          // Add extra metadata.
 117          $this->add_model_metadata($data, $timesplitting, $target);
 118  
 119          // Write all calculated data to a file.
 120          if (!$result = $dataset->store($data)) {
 121              return false;
 122          }
 123  
 124          return $result;
 125      }
 126  
 127      /**
 128       * Returns the results of the analysis.
 129       * @return array
 130       */
 131      public function get(): array {
 132  
 133          if ($this->options['evaluation'] === false) {
 134              // Look for previous training and prediction files we generated and couldn't be used
 135              // by machine learning backends because they weren't big enough.
 136  
 137              $pendingfiles = \core_analytics\dataset_manager::get_pending_files($this->modelid, $this->includetarget,
 138                  array_keys($this->filesbytimesplitting));
 139              foreach ($pendingfiles as $timesplittingid => $files) {
 140                  foreach ($files as $file) {
 141                      $this->filesbytimesplitting[$timesplittingid][] = $file;
 142                  }
 143              }
 144          }
 145  
 146          // We join the datasets by time splitting method.
 147          $timesplittingfiles = array();
 148          foreach ($this->filesbytimesplitting as $timesplittingid => $files) {
 149  
 150              if ($this->options['evaluation'] === true) {
 151                  // Delete the previous copy. Only when evaluating.
 152                  \core_analytics\dataset_manager::delete_previous_evaluation_file($this->modelid, $timesplittingid);
 153              }
 154  
 155              // Merge all course files into one.
 156              if ($this->includetarget) {
 157                  $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
 158              } else {
 159                  $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
 160              }
 161              $timesplittingfiles[$timesplittingid] = \core_analytics\dataset_manager::merge_datasets($files,
 162                  $this->modelid, $timesplittingid, $filearea, $this->options['evaluation']);
 163          }
 164  
 165          if (!empty($pendingfiles)) {
 166              // We must remove them now as they are already part of another dataset.
 167              foreach ($pendingfiles as $timesplittingid => $files) {
 168                  foreach ($files as $file) {
 169                      $file->delete();
 170                  }
 171              }
 172          }
 173  
 174          return $timesplittingfiles;
 175      }
 176  
 177      /**
 178       * Adds target metadata to the dataset.
 179       *
 180       * The final dataset document will look like this:
 181       * ----------------------------------------------------
 182       * metadata1,metadata2,metadata3,.....
 183       * value1, value2, value3,.....
 184       *
 185       * header1,header2,header3,header4,.....
 186       * stud1value1,stud1value2,stud1value3,stud1value4,.....
 187       * stud2value1,stud2value2,stud2value3,stud2value4,.....
 188       * .....
 189       * ----------------------------------------------------
 190       *
 191       * @param array $data
 192       * @param \core_analytics\local\time_splitting\base $timesplitting
 193       * @param \core_analytics\local\target\base         $target
 194       * @return null
 195       */
 196      private function add_model_metadata(array &$data, \core_analytics\local\time_splitting\base $timesplitting,
 197              \core_analytics\local\target\base $target) {
 198          global $CFG;
 199  
 200          // If no target the first column is the sampleid, if target the last column is the target.
 201          // This will need to be updated when we support unsupervised learning models.
 202          $metadata = array(
 203              'timesplitting' => $timesplitting->get_id(),
 204              'nfeatures' => count(current($data)) - 1,
 205              'moodleversion' => $CFG->version,
 206              'targetcolumn' => $target->get_id()
 207          );
 208          if ($target->is_linear()) {
 209              $metadata['targettype'] = 'linear';
 210              $metadata['targetmin'] = $target::get_min_value();
 211              $metadata['targetmax'] = $target::get_max_value();
 212          } else {
 213              $metadata['targettype'] = 'discrete';
 214              $metadata['targetclasses'] = json_encode($target::get_classes());
 215          }
 216  
 217          // The first 2 samples will be used to store metadata about the dataset.
 218          $metadatacolumns = [];
 219          $metadatavalues = [];
 220          foreach ($metadata as $key => $value) {
 221              $metadatacolumns[] = $key;
 222              $metadatavalues[] = $value;
 223          }
 224  
 225          // This will also reset samples' dataset keys.
 226          array_unshift($data, $metadatacolumns, $metadatavalues);
 227      }
 228  }