1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Keeps track of the analysis results by storing the results in files. 19 * 20 * @package core_analytics 21 * @copyright 2019 David Monllao {@link http://www.davidmonllao.com} 22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 23 */ 24 25 namespace core_analytics\local\analysis; 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Keeps track of the analysis results by storing the results in files. 31 * 32 * @package core_analytics 33 * @copyright 2019 David Monllao {@link http://www.davidmonllao.com} 34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 35 */ 36 class result_file extends result { 37 38 /** 39 * Stores the analysis results by time-splitting method. 40 * @var array 41 */ 42 private $filesbytimesplitting = []; 43 44 /** 45 * Stores the analysis results. 46 * @param array $results 47 * @return bool True if anything was successfully analysed 48 */ 49 public function add_analysable_results(array $results): bool { 50 51 $any = false; 52 53 // Process all provided time splitting methods. 54 foreach ($results as $timesplittingid => $result) { 55 if (!empty($result->result)) { 56 $this->filesbytimesplitting[$timesplittingid][] = $result->result; 57 $any = true; 58 } 59 } 60 61 if (empty($any)) { 62 return false; 63 } 64 return true; 65 } 66 67 /** 68 * Retrieves cached results during evaluation. 69 * 70 * @param \core_analytics\local\time_splitting\base $timesplitting 71 * @param \core_analytics\analysable $analysable 72 * @return mixed A \stored_file in this case. 73 */ 74 public function retrieve_cached_result(\core_analytics\local\time_splitting\base $timesplitting, 75 \core_analytics\analysable $analysable) { 76 77 // For evaluation purposes we don't need to be that strict about how updated the data is, 78 // if this analyser was analysed less that 1 week ago we skip generating a new one. This 79 // helps scale the evaluation process as sites with tons of courses may need a lot of time to 80 // complete an evaluation. 81 if (!empty($this->options['evaluation']) && !empty($this->options['reuseprevanalysed'])) { 82 83 $previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->modelid, 84 $analysable->get_id(), $timesplitting->get_id()); 85 // 1 week is a partly random time interval, no need to worry about DST. 86 $boundary = time() - WEEKSECS; 87 if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) { 88 // Recover the previous analysed file and avoid generating a new one. 89 return $previousanalysis; 90 } 91 } 92 93 return false; 94 } 95 96 /** 97 * Formats the result. 98 * 99 * @param array $data 100 * @param \core_analytics\local\target\base $target 101 * @param \core_analytics\local\time_splitting\base $timesplitting 102 * @param \core_analytics\analysable $analysable 103 * @return mixed A \stored_file in this case 104 */ 105 public function format_result(array $data, \core_analytics\local\target\base $target, 106 \core_analytics\local\time_splitting\base $timesplitting, \core_analytics\analysable $analysable) { 107 108 if (!empty($this->includetarget)) { 109 $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA; 110 } else { 111 $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA; 112 } 113 $dataset = new \core_analytics\dataset_manager($this->modelid, $analysable->get_id(), 114 $timesplitting->get_id(), $filearea, $this->options['evaluation']); 115 116 // Add extra metadata. 117 $this->add_model_metadata($data, $timesplitting, $target); 118 119 // Write all calculated data to a file. 120 if (!$result = $dataset->store($data)) { 121 return false; 122 } 123 124 return $result; 125 } 126 127 /** 128 * Returns the results of the analysis. 129 * @return array 130 */ 131 public function get(): array { 132 133 if ($this->options['evaluation'] === false) { 134 // Look for previous training and prediction files we generated and couldn't be used 135 // by machine learning backends because they weren't big enough. 136 137 $pendingfiles = \core_analytics\dataset_manager::get_pending_files($this->modelid, $this->includetarget, 138 array_keys($this->filesbytimesplitting)); 139 foreach ($pendingfiles as $timesplittingid => $files) { 140 foreach ($files as $file) { 141 $this->filesbytimesplitting[$timesplittingid][] = $file; 142 } 143 } 144 } 145 146 // We join the datasets by time splitting method. 147 $timesplittingfiles = array(); 148 foreach ($this->filesbytimesplitting as $timesplittingid => $files) { 149 150 if ($this->options['evaluation'] === true) { 151 // Delete the previous copy. Only when evaluating. 152 \core_analytics\dataset_manager::delete_previous_evaluation_file($this->modelid, $timesplittingid); 153 } 154 155 // Merge all course files into one. 156 if ($this->includetarget) { 157 $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA; 158 } else { 159 $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA; 160 } 161 $timesplittingfiles[$timesplittingid] = \core_analytics\dataset_manager::merge_datasets($files, 162 $this->modelid, $timesplittingid, $filearea, $this->options['evaluation']); 163 } 164 165 if (!empty($pendingfiles)) { 166 // We must remove them now as they are already part of another dataset. 167 foreach ($pendingfiles as $timesplittingid => $files) { 168 foreach ($files as $file) { 169 $file->delete(); 170 } 171 } 172 } 173 174 return $timesplittingfiles; 175 } 176 177 /** 178 * Adds target metadata to the dataset. 179 * 180 * The final dataset document will look like this: 181 * ---------------------------------------------------- 182 * metadata1,metadata2,metadata3,..... 183 * value1, value2, value3,..... 184 * 185 * header1,header2,header3,header4,..... 186 * stud1value1,stud1value2,stud1value3,stud1value4,..... 187 * stud2value1,stud2value2,stud2value3,stud2value4,..... 188 * ..... 189 * ---------------------------------------------------- 190 * 191 * @param array $data 192 * @param \core_analytics\local\time_splitting\base $timesplitting 193 * @param \core_analytics\local\target\base $target 194 * @return null 195 */ 196 private function add_model_metadata(array &$data, \core_analytics\local\time_splitting\base $timesplitting, 197 \core_analytics\local\target\base $target) { 198 global $CFG; 199 200 // If no target the first column is the sampleid, if target the last column is the target. 201 // This will need to be updated when we support unsupervised learning models. 202 $metadata = array( 203 'timesplitting' => $timesplitting->get_id(), 204 'nfeatures' => count(current($data)) - 1, 205 'moodleversion' => $CFG->version, 206 'targetcolumn' => $target->get_id() 207 ); 208 if ($target->is_linear()) { 209 $metadata['targettype'] = 'linear'; 210 $metadata['targetmin'] = $target::get_min_value(); 211 $metadata['targetmax'] = $target::get_max_value(); 212 } else { 213 $metadata['targettype'] = 'discrete'; 214 $metadata['targetclasses'] = json_encode($target::get_classes()); 215 } 216 217 // The first 2 samples will be used to store metadata about the dataset. 218 $metadatacolumns = []; 219 $metadatavalues = []; 220 foreach ($metadata as $key => $value) { 221 $metadatacolumns[] = $key; 222 $metadatavalues[] = $value; 223 } 224 225 // This will also reset samples' dataset keys. 226 array_unshift($data, $metadatacolumns, $metadatavalues); 227 } 228 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body