Moodle 4.2 XRef and Diffs

Search moodle.org's
Developer Documentation
Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.
Moodle 4.2 Database Schema (by Marcus Green)
/mod/book/tool/importhtml/ -> locallib.php (source)
Differences Between: [Versions 402 and 403]
   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * HTML import lib
  19   *
  20   * @package    booktool_importhtml
  21   * @copyright  2011 Petr Skoda {@link http://skodak.org}
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  defined('MOODLE_INTERNAL') || die;
  26  
  27  require_once (__DIR__.'/lib.php');
  28  require_once($CFG->dirroot.'/mod/book/locallib.php');
  29  
  30  /**
  31   * Import HTML pages packaged into one zip archive
  32   *
  33   * @param stored_file $package
  34   * @param string $type type of the package ('typezipdirs' or 'typezipfiles')
  35   * @param stdClass $book
  36   * @param context_module $context
  37   * @param bool $verbose
  38   */
  39  function toolbook_importhtml_import_chapters($package, $type, $book, $context, $verbose = true) {
  40      global $DB, $OUTPUT;
  41  
  42      $fs = get_file_storage();
  43      $chapterfiles = toolbook_importhtml_get_chapter_files($package, $type);
  44      $packer = get_file_packer('application/zip');
  45      $fs->delete_area_files($context->id, 'mod_book', 'importhtmltemp', 0);
  46      $package->extract_to_storage($packer, $context->id, 'mod_book', 'importhtmltemp', 0, '/');
  47      // $datafiles = $fs->get_area_files($context->id, 'mod_book', 'importhtmltemp', 0, 'id', false);
  48      // echo "<pre>";p(var_export($datafiles, true));
  49  
  50      $chapters = array();
  51  
  52      if ($verbose) {
  53          echo $OUTPUT->notification(get_string('importing', 'booktool_importhtml'), 'notifysuccess');
  54      }
  55      if ($type == 0) {
  56          $chapterfile = reset($chapterfiles);
  57          if ($file = $fs->get_file_by_hash(sha1("$context->id/mod_book/importhtmltemp/0/$chapterfile->pathname"))) {
  58              $htmlcontent = toolbook_importhtml_fix_encoding($file->get_content());
  59              $htmlchapters = toolbook_importhtml_parse_headings(toolbook_importhtml_parse_body($htmlcontent));
  60              // TODO: process h1 as main chapter and h2 as subchapters
  61          }
  62      } else {
  63          foreach ($chapterfiles as $chapterfile) {
  64              if ($file = $fs->get_file_by_hash(sha1("/$context->id/mod_book/importhtmltemp/0/$chapterfile->pathname"))) {
  65                  $chapter = new stdClass();
  66                  $htmlcontent = toolbook_importhtml_fix_encoding($file->get_content());
  67  
  68                  $chapter->bookid        = $book->id;
  69                  $chapter->pagenum       = $DB->get_field_sql('SELECT MAX(pagenum) FROM {book_chapters} WHERE bookid = ?', array($book->id)) + 1;
  70                  $chapter->importsrc     = '/'.$chapterfile->pathname;
  71                  $chapter->content       = toolbook_importhtml_parse_styles($htmlcontent);
  72                  $chapter->content       .= toolbook_importhtml_parse_body($htmlcontent);
  73                  $chapter->title         = toolbook_importhtml_parse_title($htmlcontent, $chapterfile->pathname);
  74                  $chapter->contentformat = FORMAT_HTML;
  75                  $chapter->hidden        = 0;
  76                  $chapter->timecreated   = time();
  77                  $chapter->timemodified  = time();
  78                  if (preg_match('/_sub(\/|\.htm)/i', $chapter->importsrc)) { // If filename or directory ends with *_sub treat as subchapters
  79                      $chapter->subchapter = 1;
  80                  } else {
  81                      $chapter->subchapter = 0;
  82                  }
  83  
  84                  $chapter->id = $DB->insert_record('book_chapters', $chapter);
  85                  $chapter = $DB->get_record('book_chapters', array('id' => $chapter->id));
  86                  $chapters[$chapter->id] = $chapter;
  87  
  88                  \mod_book\event\chapter_created::create_from_chapter($book, $context, $chapter)->trigger();
  89              }
  90          }
  91      }
  92  
  93      if ($verbose) {
  94          echo $OUTPUT->notification(get_string('relinking', 'booktool_importhtml'), 'notifysuccess');
  95      }
  96      $allchapters = $DB->get_records('book_chapters', array('bookid'=>$book->id), 'pagenum');
  97      foreach ($chapters as $chapter) {
  98          // find references to all files and copy them + relink them
  99          $matches = null;
 100          if (preg_match_all('/(src|codebase|name|href)\s*=\s*"([^"]+)"/i', $chapter->content, $matches)) {
 101              $file_record = array('contextid'=>$context->id, 'component'=>'mod_book', 'filearea'=>'chapter', 'itemid'=>$chapter->id);
 102              foreach ($matches[0] as $i => $match) {
 103                  $filepath = dirname($chapter->importsrc).'/'.$matches[2][$i];
 104                  $filepath = toolbook_importhtml_fix_path($filepath);
 105  
 106                  if (strtolower($matches[1][$i]) === 'href') {
 107                      // skip linked html files, we will try chapter relinking later
 108                      foreach ($allchapters as $target) {
 109                          if ($target->importsrc === $filepath) {
 110                              continue 2;
 111                          }
 112                      }
 113                  }
 114  
 115                  if ($file = $fs->get_file_by_hash(sha1("/$context->id/mod_book/importhtmltemp/0$filepath"))) {
 116                      if (!$oldfile = $fs->get_file_by_hash(sha1("/$context->id/mod_book/chapter/$chapter->id$filepath"))) {
 117                          $fs->create_file_from_storedfile($file_record, $file);
 118                      }
 119                      $chapter->content = str_replace($match, $matches[1][$i].'="@@PLUGINFILE@@'.$filepath.'"', $chapter->content);
 120                  }
 121              }
 122              $DB->set_field('book_chapters', 'content', $chapter->content, array('id'=>$chapter->id));
 123          }
 124      }
 125      unset($chapters);
 126  
 127      $allchapters = $DB->get_records('book_chapters', array('bookid'=>$book->id), 'pagenum');
 128      foreach ($allchapters as $chapter) {
 129          $newcontent = $chapter->content;
 130          $matches = null;
 131          if (preg_match_all('/(href)\s*=\s*"([^"]+)"/i', $chapter->content, $matches)) {
 132              foreach ($matches[0] as $i => $match) {
 133                  if (strpos($matches[2][$i], ':') !== false or strpos($matches[2][$i], '@') !== false) {
 134                      // it is either absolute or pluginfile link
 135                      continue;
 136                  }
 137                  $chapterpath = dirname($chapter->importsrc).'/'.$matches[2][$i];
 138                  $chapterpath = toolbook_importhtml_fix_path($chapterpath);
 139                  foreach ($allchapters as $target) {
 140                      if ($target->importsrc === $chapterpath) {
 141                          $newcontent = str_replace($match, 'href="'.new moodle_url('/mod/book/view.php',
 142                                  array('id'=>$context->instanceid, 'chapterid'=>$target->id)).'"', $newcontent);
 143                      }
 144                  }
 145              }
 146          }
 147          if ($newcontent !== $chapter->content) {
 148              $DB->set_field('book_chapters', 'content', $newcontent, array('id'=>$chapter->id));
 149          }
 150      }
 151  
 152      $fs->delete_area_files($context->id, 'mod_book', 'importhtmltemp', 0);
 153  
 154      // update the revision flag - this takes a long time, better to refetch the current value
 155      $book = $DB->get_record('book', array('id'=>$book->id));
 156      $DB->set_field('book', 'revision', $book->revision+1, array('id'=>$book->id));
 157  }
 158  
 159  /**
 160   * Parse the headings of the imported package of type 'typeonefile'
 161   * (currently unsupported)
 162   *
 163   * @param string $html html content to parse
 164   * @todo implement this once the type 'typeonefile' is enabled
 165   */
 166  function toolbook_importhtml_parse_headings($html) {
 167  }
 168  
 169  /**
 170   * Parse the links to external css sheets of the imported html content
 171   *
 172   * @param string $html html content to parse
 173   * @return string all the links to external css sheets
 174   */
 175  function toolbook_importhtml_parse_styles($html) {
 176      $styles = '';
 177      if (preg_match('/<head[^>]*>(.+)<\/head>/is', $html, $matches)) {
 178          $head = $matches[1];
 179          if (preg_match_all('/<link[^>]+rel="stylesheet"[^>]*>/i', $head, $matches)) { // Extract links to css.
 180              for ($i=0; $i<count($matches[0]); $i++) {
 181                  $styles .= $matches[0][$i]."\n";
 182              }
 183          }
 184      }
 185      return $styles;
 186  }
 187  
 188  /**
 189   * Normalize paths to be absolute
 190   *
 191   * @param string $path original path with MS/relative separators
 192   * @return string the normalized and cleaned absolute path
 193   */
 194  function toolbook_importhtml_fix_path($path) {
 195      $path = str_replace('\\', '/', $path); // anti MS hack
 196      $path = '/'.ltrim($path, './'); // dirname() produces . for top level files + our paths start with /
 197  
 198      $cnt = substr_count($path, '..');
 199      for ($i=0; $i<$cnt; $i++) {
 200          $path = preg_replace('|[^/]+/\.\./|', '', $path, 1);
 201      }
 202  
 203      $path = clean_param($path, PARAM_PATH);
 204      return $path;
 205  }
 206  
 207  /**
 208   * Convert some html content to utf8, getting original encoding from html headers
 209   *
 210   * @param string $html html content to convert
 211   * @return string html content converted to utf8
 212   */
 213  function toolbook_importhtml_fix_encoding($html) {
 214      if (preg_match('/<head[^>]*>(.+)<\/head>/is', $html, $matches)) {
 215          $head = $matches[1];
 216          if (preg_match('/charset=([^"]+)/is', $head, $matches)) {
 217              $enc = $matches[1];
 218              return core_text::convert($html, $enc, 'utf-8');
 219          }
 220      }
 221      return iconv('UTF-8', 'UTF-8//IGNORE', $html);
 222  }
 223  
 224  /**
 225   * Extract the body from any html contents
 226   *
 227   * @param string $html the html to parse
 228   * @return string the contents of the body
 229   */
 230  function toolbook_importhtml_parse_body($html) {
 231      $matches = null;
 232      if (preg_match('/<body[^>]*>(.+)<\/body>/is', $html, $matches)) {
 233          return $matches[1];
 234      } else {
 235          return '';
 236      }
 237  }
 238  
 239  /**
 240   * Extract the title of any html content, getting it from the title tag
 241   *
 242   * @param string $html the html to parse
 243   * @param string $default default title to apply if no title is found
 244   * @return string the resulting title
 245   */
 246  function toolbook_importhtml_parse_title($html, $default) {
 247      $matches = null;
 248      if (preg_match('/<title>([^<]+)<\/title>/i', $html, $matches)) {
 249          return $matches[1];
 250      } else {
 251          return $default;
 252      }
 253  }
 254  
 255  /**
 256   * Returns all the html files (chapters) from a file package
 257   *
 258   * @param stored_file $package file to be processed
 259   * @param string $type type of the package ('typezipdirs' or 'typezipfiles')
 260   *
 261   * @return array the html files found in the package
 262   */
 263  function toolbook_importhtml_get_chapter_files($package, $type) {
 264      $packer = get_file_packer('application/zip');
 265      $files = $package->list_files($packer);
 266      $tophtmlfiles = array();
 267      $subhtmlfiles = array();
 268      $topdirs = array();
 269  
 270      foreach ($files as $file) {
 271          if (empty($file->pathname)) {
 272              continue;
 273          }
 274          if (substr($file->pathname, -1) === '/') {
 275              if (substr_count($file->pathname, '/') !== 1) {
 276                  // skip subdirs
 277                  continue;
 278              }
 279              if (!isset($topdirs[$file->pathname])) {
 280                  $topdirs[$file->pathname] = array();
 281              }
 282  
 283          } else {
 284              $mime = mimeinfo('icon', $file->pathname);
 285              if ($mime !== 'html') {
 286                  continue;
 287              }
 288              $level = substr_count($file->pathname, '/');
 289              if ($level === 0) {
 290                  $tophtmlfiles[$file->pathname] = $file;
 291              } else if ($level === 1) {
 292                  $subhtmlfiles[$file->pathname] = $file;
 293                  $dir = preg_replace('|/.*$|', '', $file->pathname);
 294                  $topdirs[$dir][$file->pathname] = $file;
 295              } else {
 296                  // lower levels are not interesting
 297                  continue;
 298              }
 299          }
 300      }
 301  
 302      core_collator::ksort($tophtmlfiles, core_collator::SORT_NATURAL);
 303      core_collator::ksort($subhtmlfiles, core_collator::SORT_NATURAL);
 304      core_collator::ksort($topdirs, core_collator::SORT_NATURAL);
 305  
 306      $chapterfiles = array();
 307  
 308      if ($type == 2) {
 309          $chapterfiles = $tophtmlfiles;
 310  
 311      } else if ($type == 1) {
 312          foreach ($topdirs as $dir => $htmlfiles) {
 313              if (empty($htmlfiles)) {
 314                  continue;
 315              }
 316              core_collator::ksort($htmlfiles, core_collator::SORT_NATURAL);
 317              if (isset($htmlfiles[$dir.'/index.html'])) {
 318                  $htmlfile = $htmlfiles[$dir.'/index.html'];
 319              } else if (isset($htmlfiles[$dir.'/index.htm'])) {
 320                  $htmlfile = $htmlfiles[$dir.'/index.htm'];
 321              } else if (isset($htmlfiles[$dir.'/Default.htm'])) {
 322                  $htmlfile = $htmlfiles[$dir.'/Default.htm'];
 323              } else {
 324                  $htmlfile = reset($htmlfiles);
 325              }
 326              $chapterfiles[$htmlfile->pathname] = $htmlfile;
 327          }
 328      } else if ($type == 0) {
 329          if ($tophtmlfiles) {
 330              if (isset($tophtmlfiles['index.html'])) {
 331                  $htmlfile = $tophtmlfiles['index.html'];
 332              } else if (isset($tophtmlfiles['index.htm'])) {
 333                  $htmlfile = $tophtmlfiles['index.htm'];
 334              } else if (isset($tophtmlfiles['Default.htm'])) {
 335                  $htmlfile = $tophtmlfiles['Default.htm'];
 336              } else {
 337                  $htmlfile = reset($tophtmlfiles);
 338              }
 339          } else {
 340              $htmlfile = reset($subhtmlfiles);
 341          }
 342          $chapterfiles[$htmlfile->pathname] = $htmlfile;
 343      }
 344  
 345      return $chapterfiles;
 346  }