Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401]

   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Implementation of .tar.gz extractor. Handles extraction of .tar.gz files.
  19   * Do not call directly; use methods in tgz_packer.
  20   *
  21   * @see tgz_packer
  22   * @package core_files
  23   * @copyright 2013 The Open University
  24   * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  25   */
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  /**
  30   * Extracts .tar.gz files (POSIX format).
  31   */
  32  class tgz_extractor {
  33      /**
  34       * @var int When writing data, the system writes blocks of this size.
  35       */
  36      const WRITE_BLOCK_SIZE = 65536;
  37      /**
  38       * @var int When reading data, the system reads blocks of this size.
  39       */
  40      const READ_BLOCK_SIZE = 65536;
  41      /**
  42       * @var stored_file File object for archive.
  43       */
  44      protected $storedfile;
  45      /**
  46       * @var string OS path for archive.
  47       */
  48      protected $ospath;
  49      /**
  50       * @var int Number of files (-1 if not known).
  51       */
  52      protected $numfiles;
  53      /**
  54       * @var int Number of files processed so far.
  55       */
  56      protected $donefiles;
  57      /**
  58       * @var string Current file path within archive.
  59       */
  60      protected $currentarchivepath;
  61      /**
  62       * @var string Full path to current file.
  63       */
  64      protected $currentfile;
  65      /**
  66       * @var int Size of current file in bytes.
  67       */
  68      protected $currentfilesize;
  69      /**
  70       * @var int Number of bytes of current file already written into buffer.
  71       */
  72      protected $currentfileprocessed;
  73      /**
  74       * @var resource File handle to current file.
  75       */
  76      protected $currentfp;
  77      /**
  78       * @var int Modified time of current file.
  79       */
  80      protected $currentmtime;
  81      /**
  82       * @var string Buffer containing file data awaiting write.
  83       */
  84      protected $filebuffer;
  85      /**
  86       * @var int Current length of buffer in bytes.
  87       */
  88      protected $filebufferlength;
  89      /**
  90       * @var array Results array of all files processed.
  91       */
  92      protected $results;
  93  
  94      /**
  95       * @var array In list mode, content of the list; outside list mode, null.
  96       */
  97      protected $listresults = null;
  98  
  99      /**
 100       * @var int Whether listing or extracting.
 101       */
 102      protected $mode = self::MODE_EXTRACT;
 103  
 104      /**
 105       * @var int If extracting (default).
 106       */
 107      const MODE_EXTRACT = 0;
 108  
 109      /**
 110       * @var int Listing contents.
 111       */
 112      const MODE_LIST = 1;
 113  
 114      /**
 115       * @var int Listing contents; list now complete.
 116       */
 117      const MODE_LIST_COMPLETE = 2;
 118  
 119      /**
 120       * Constructor.
 121       *
 122       * @param stored_file|string $archivefile Moodle file or OS path to archive
 123       */
 124      public function __construct($archivefile) {
 125          if (is_a($archivefile, 'stored_file')) {
 126              $this->storedfile = $archivefile;
 127          } else {
 128              $this->ospath = $archivefile;
 129          }
 130      }
 131  
 132      /**
 133       * Extracts the archive.
 134       *
 135       * @param tgz_extractor_handler $handler Will be called for extracted files
 136       * @param file_progress $progress Optional progress reporting
 137       * @return array Array from archive path => true of processed files
 138       * @throws moodle_exception If there is any error processing the archive
 139       */
 140      public function extract(tgz_extractor_handler $handler, file_progress $progress = null) {
 141          $this->mode = self::MODE_EXTRACT;
 142          $this->extract_or_list($handler, $progress);
 143          $results = $this->results;
 144          unset($this->results);
 145          return $results;
 146      }
 147  
 148      /**
 149       * Extracts or lists the archive depending on $this->listmode.
 150       *
 151       * @param tgz_extractor_handler $handler Optional handler
 152       * @param file_progress $progress Optional progress reporting
 153       * @throws moodle_exception If there is any error processing the archive
 154       */
 155      protected function extract_or_list(tgz_extractor_handler $handler = null, file_progress $progress = null) {
 156          // Open archive.
 157          if ($this->storedfile) {
 158              $gz = $this->storedfile->get_content_file_handle(stored_file::FILE_HANDLE_GZOPEN);
 159              // Estimate number of read-buffers (64KB) in file. Guess that the
 160              // uncompressed size is 2x compressed size. Add one just to ensure
 161              // it's non-zero.
 162              $estimatedbuffers = ($this->storedfile->get_filesize() * 2 / self::READ_BLOCK_SIZE) + 1;
 163          } else {
 164              $gz = gzopen($this->ospath, 'rb');
 165              $estimatedbuffers = (filesize($this->ospath) * 2 / self::READ_BLOCK_SIZE) + 1;
 166          }
 167          if (!$gz) {
 168              throw new moodle_exception('errorprocessingarchive', '', '', null,
 169                      'Failed to open gzip file');
 170          }
 171  
 172          // Calculate how much progress to report per buffer read.
 173          $progressperbuffer = (int)(tgz_packer::PROGRESS_MAX / $estimatedbuffers);
 174  
 175          // Process archive in 512-byte blocks (but reading 64KB at a time).
 176          $buffer = '';
 177          $bufferpos = 0;
 178          $bufferlength = 0;
 179          $this->numfiles = -1;
 180          $read = 0;
 181          $done = 0;
 182          $beforeprogress = -1;
 183          while (true) {
 184              if ($bufferpos == $bufferlength) {
 185                  $buffer = gzread($gz, self::READ_BLOCK_SIZE);
 186                  $bufferpos = 0;
 187                  $bufferlength = strlen($buffer);
 188                  if ($bufferlength == 0) {
 189                      // EOF.
 190                      break;
 191                  }
 192  
 193                  // Report progress if enabled.
 194                  if ($progress) {
 195                      if ($this->numfiles === -1) {
 196                          // If we don't know the number of files, do an estimate based
 197                          // on number of buffers read.
 198                          $done += $progressperbuffer;
 199                          if ($done >= tgz_packer::PROGRESS_MAX) {
 200                              $done = tgz_packer::PROGRESS_MAX - 1;
 201                          }
 202                          $progress->progress($done, tgz_packer::PROGRESS_MAX);
 203                      } else {
 204                          // Once we know the number of files, use this.
 205                          if ($beforeprogress === -1) {
 206                              $beforeprogress = $done;
 207                          }
 208                          // Calculate progress as whatever progress we reported
 209                          // before we knew how many files there were (might be 0)
 210                          // plus a proportion of the number of files out of the
 211                          // remaining progress value.
 212                          $done = $beforeprogress + (int)(($this->donefiles / $this->numfiles) *
 213                                  (tgz_packer::PROGRESS_MAX - $beforeprogress));
 214                      }
 215                      $progress->progress($done, tgz_packer::PROGRESS_MAX);
 216                  }
 217              }
 218  
 219              $block = substr($buffer, $bufferpos, tgz_packer::TAR_BLOCK_SIZE);
 220              if ($this->currentfile) {
 221                  $this->process_file_block($block, $handler);
 222              } else {
 223                  $this->process_header($block, $handler);
 224              }
 225  
 226              // When listing, if we read an index file, we abort archive processing.
 227              if ($this->mode === self::MODE_LIST_COMPLETE) {
 228                  break;
 229              }
 230  
 231              $bufferpos += tgz_packer::TAR_BLOCK_SIZE;
 232              $read++;
 233          }
 234  
 235          // Close archive and finish.
 236          gzclose($gz);
 237      }
 238  
 239      /**
 240       * Lists files in the archive, either using the index file (if present),
 241       * or by basically extracting the whole thing if there isn't an index file.
 242       *
 243       * @return array Array of file listing results:
 244       */
 245      public function list_files() {
 246          $this->listresults = array();
 247          $this->mode = self::MODE_LIST;
 248          $this->extract_or_list();
 249          $listresults = $this->listresults;
 250          $this->listresults = null;
 251          return $listresults;
 252      }
 253  
 254      /**
 255       * Process 512-byte header block.
 256       *
 257       * @param string $block Tar block
 258       * @param tgz_extractor_handler $handler Will be called for extracted files
 259       */
 260      protected function process_header($block, $handler) {
 261          // If the block consists entirely of nulls, ignore it. (This happens
 262          // twice at end of archive.)
 263          if ($block === str_pad('', tgz_packer::TAR_BLOCK_SIZE, "\0")) {
 264              return;
 265          }
 266  
 267          // struct header_posix_ustar {
 268          //    char name[100];
 269          $name = rtrim(substr($block, 0, 100), "\0");
 270  
 271          //    char mode[8];
 272          //    char uid[8];
 273          //    char gid[8];
 274          //    char size[12];
 275          $filesize = octdec(substr($block, 124, 11));
 276  
 277          //    char mtime[12];
 278          $mtime = octdec(substr($block, 136, 11));
 279  
 280          //    char checksum[8];
 281          //    char typeflag[1];
 282          $typeflag = substr($block, 156, 1);
 283  
 284          //    char linkname[100];
 285          //    char magic[6];
 286          $magic = substr($block, 257, 6);
 287          if ($magic !== "ustar\0" && $magic !== "ustar ") {
 288              // There are two checks above; the first is the correct POSIX format
 289              // and the second is for GNU tar default format.
 290              throw new moodle_exception('errorprocessingarchive', '', '', null,
 291                      'Header does not have POSIX ustar magic string');
 292          }
 293  
 294          //    char version[2];
 295          //    char uname[32];
 296          //    char gname[32];
 297          //    char devmajor[8];
 298          //    char devminor[8];
 299          //    char prefix[155];
 300          $prefix = rtrim(substr($block, 345, 155), "\0");
 301  
 302          //    char pad[12];
 303          // };
 304  
 305          $archivepath = ltrim($prefix . '/' . $name, '/');
 306  
 307          // For security, ensure there is no .. folder in the archivepath.
 308          $archivepath = clean_param($archivepath, PARAM_PATH);
 309  
 310          // Handle file depending on the type.
 311          switch ($typeflag) {
 312              case '1' :
 313              case '2' :
 314              case '3' :
 315              case '4' :
 316              case '6' :
 317              case '7' :
 318                  // Ignore these special cases.
 319                  break;
 320  
 321              case '5' :
 322                  // Directory.
 323                  if ($this->mode === self::MODE_LIST) {
 324                      $this->listresults[] = (object)array(
 325                              'original_pathname' => $archivepath,
 326                              'pathname' => $archivepath,
 327                              'mtime' => $mtime,
 328                              'is_directory' => true,
 329                              'size' => 0);
 330                  } else if ($handler->tgz_directory($archivepath, $mtime)) {
 331                      $this->results[$archivepath] = true;
 332                  }
 333                  break;
 334  
 335              default:
 336                  // All other values treated as normal file.
 337                  $this->start_current_file($archivepath, $filesize, $mtime, $handler);
 338                  break;
 339          }
 340      }
 341  
 342      /**
 343       * Processes one 512-byte block of an existing file.
 344       *
 345       * @param string $block Data block
 346       * @param tgz_extractor_handler $handler Will be called for extracted files
 347       */
 348      protected function process_file_block($block, tgz_extractor_handler $handler = null) {
 349          // Write block into buffer.
 350          $blocksize = tgz_packer::TAR_BLOCK_SIZE;
 351          if ($this->currentfileprocessed + tgz_packer::TAR_BLOCK_SIZE > $this->currentfilesize) {
 352              // Partial block at end of file.
 353              $blocksize = $this->currentfilesize - $this->currentfileprocessed;
 354              $this->filebuffer .= substr($block, 0, $blocksize);
 355          } else {
 356              // Full-length block.
 357              $this->filebuffer .= $block;
 358          }
 359          $this->filebufferlength += $blocksize;
 360          $this->currentfileprocessed += $blocksize;
 361  
 362          // Write block to file if necessary.
 363          $eof = $this->currentfileprocessed == $this->currentfilesize;
 364          if ($this->filebufferlength >= self::WRITE_BLOCK_SIZE || $eof) {
 365              // Except when skipping the file, write it out.
 366              if ($this->currentfile !== true) {
 367                  if (!fwrite($this->currentfp, $this->filebuffer)) {
 368                      throw new moodle_exception('errorprocessingarchive', '', '', null,
 369                              'Failed to write buffer to output file: ' . $this->currentfile);
 370                  }
 371              }
 372              $this->filebuffer = '';
 373              $this->filebufferlength = 0;
 374          }
 375  
 376          // If file is finished, close it.
 377          if ($eof) {
 378              $this->close_current_file($handler);
 379          }
 380      }
 381  
 382      /**
 383       * Starts processing a file from archive.
 384       *
 385       * @param string $archivepath Path inside archive
 386       * @param int $filesize Size in bytes
 387       * @param int $mtime File-modified time
 388       * @param tgz_extractor_handler $handler Will be called for extracted files
 389       * @throws moodle_exception
 390       */
 391      protected function start_current_file($archivepath, $filesize, $mtime,
 392              tgz_extractor_handler $handler = null) {
 393          global $CFG;
 394  
 395          $this->currentarchivepath = $archivepath;
 396          $this->currentmtime = $mtime;
 397          $this->currentfilesize = $filesize;
 398          $this->currentfileprocessed = 0;
 399  
 400          if ($archivepath === tgz_packer::ARCHIVE_INDEX_FILE) {
 401              // For index file, store in temp directory.
 402              $tempfolder = $CFG->tempdir . '/core_files';
 403              check_dir_exists($tempfolder);
 404              $this->currentfile = tempnam($tempfolder, '.index');
 405          } else {
 406              if ($this->mode === self::MODE_LIST) {
 407                  // If listing, add to list.
 408                  $this->listresults[] = (object)array(
 409                          'original_pathname' => $archivepath,
 410                          'pathname' => $archivepath,
 411                          'mtime' => $mtime,
 412                          'is_directory' => false,
 413                          'size' => $filesize);
 414  
 415                  // Discard file.
 416                  $this->currentfile = true;
 417              } else {
 418                  // For other files, ask handler for location.
 419                  $this->currentfile = $handler->tgz_start_file($archivepath);
 420                  if ($this->currentfile === null) {
 421                      // This indicates that we are discarding the current file.
 422                      $this->currentfile = true;
 423                  }
 424              }
 425          }
 426          $this->filebuffer = '';
 427          $this->filebufferlength = 0;
 428  
 429          // Open file.
 430          if ($this->currentfile !== true) {
 431              $this->currentfp = fopen($this->currentfile, 'wb');
 432              if (!$this->currentfp) {
 433                  throw new moodle_exception('errorprocessingarchive', '', '', null,
 434                          'Failed to open output file: ' . $this->currentfile);
 435              }
 436          } else {
 437              $this->currentfp = null;
 438          }
 439  
 440          // If it has no size, close it right away.
 441          if ($filesize == 0) {
 442              $this->close_current_file($handler);
 443          }
 444      }
 445  
 446      /**
 447       * Closes the current file, calls handler, and sets up data.
 448       *
 449       * @param tgz_extractor_handler $handler Will be called for extracted files
 450       * @throws moodle_exception If there is an error closing it
 451       */
 452      protected function close_current_file($handler) {
 453          if ($this->currentfp !== null) {
 454              if (!fclose($this->currentfp)) {
 455                  throw new moodle_exception('errorprocessingarchive', '', '', null,
 456                          'Failed to close output file: ' .  $this->currentfile);
 457              }
 458  
 459              // At this point we should touch the file to set its modified
 460              // time to $this->currentmtime. However, when extracting to the
 461              // temp directory, cron will delete files more than a week old,
 462              // so to avoid problems we leave all files at their current time.
 463          }
 464  
 465          if ($this->currentarchivepath === tgz_packer::ARCHIVE_INDEX_FILE) {
 466              if ($this->mode === self::MODE_LIST) {
 467                  // When listing array, use the archive index to produce the list.
 468                  $index = file($this->currentfile);
 469                  $ok = true;
 470                  foreach ($index as $num => $value) {
 471                      // For first line (header), check it's valid then skip it.
 472                      if ($num == 0) {
 473                          if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) . '~', $value)) {
 474                              continue;
 475                          } else {
 476                              // Not valid, better ignore the file.
 477                              $ok = false;
 478                              break;
 479                          }
 480                      }
 481                      // Split on tabs and store in results array.
 482                      $values = explode("\t", trim($value));
 483                      $this->listresults[] = (object)array(
 484                          'original_pathname' => $values[0],
 485                          'pathname' => $values[0],
 486                          'mtime' => ($values[3] === '?' ? tgz_packer::DEFAULT_TIMESTAMP : (int)$values[3]),
 487                          'is_directory' => $values[1] === 'd',
 488                          'size' => (int)$values[2]);
 489                  }
 490                  if ($ok) {
 491                      $this->mode = self::MODE_LIST_COMPLETE;
 492                  }
 493                  unlink($this->currentfile);
 494              } else {
 495                  // For index file, get number of files and delete temp file.
 496                  $contents = file_get_contents($this->currentfile, false, null, 0, 128);
 497                  $matches = array();
 498                  if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) .
 499                          '([0-9]+)~', $contents, $matches)) {
 500                      $this->numfiles = (int)$matches[1];
 501                  }
 502                  unlink($this->currentfile);
 503              }
 504          } else {
 505              // Report to handler and put in results.
 506              if ($this->currentfp !== null) {
 507                  $handler->tgz_end_file($this->currentarchivepath, $this->currentfile);
 508                  $this->results[$this->currentarchivepath] = true;
 509              }
 510              $this->donefiles++;
 511          }
 512  
 513          // No longer have a current file.
 514          $this->currentfp = null;
 515          $this->currentfile = null;
 516          $this->currentarchivepath = null;
 517      }
 518  
 519  }
 520  
 521  /**
 522   * Interface for callback from tgz_extractor::extract.
 523   *
 524   * The file functions will be called (in pairs tgz_start_file, tgz_end_file) for
 525   * each file in the archive. (There is only one exception, the special
 526   * .ARCHIVE_INDEX file which is not reported to the handler.)
 527   *
 528   * The directory function is called whenever the archive contains a directory
 529   * entry.
 530   */
 531  interface tgz_extractor_handler {
 532      /**
 533       * Called when the system begins to extract a file. At this point, the
 534       * handler must decide where on disk the extracted file should be located.
 535       * This can be a temporary location or final target, as preferred.
 536       *
 537       * The handler can request for files to be skipped, in which case no data
 538       * will be written and tgz_end_file will not be called.
 539       *
 540       * @param string $archivepath Path and name of file within archive
 541       * @return string Location for output file in filesystem, or null to skip file
 542       */
 543      public function tgz_start_file($archivepath);
 544  
 545      /**
 546       * Called when the system has finished extracting a file. The handler can
 547       * now process the extracted file if required.
 548       *
 549       * @param string $archivepath Path and name of file within archive
 550       * @param string $realpath Path in filesystem (from tgz_start_file return)
 551       * @return bool True to continue processing, false to abort archive extract
 552       */
 553      public function tgz_end_file($archivepath, $realpath);
 554  
 555      /**
 556       * Called when a directory entry is found in the archive.
 557       *
 558       * The handler can create a corresponding directory if required.
 559       *
 560       * @param string $archivepath Path and name of directory within archive
 561       * @param int $mtime Modified time of directory
 562       * @return bool True if directory was created, false if skipped
 563       */
 564      public function tgz_directory($archivepath, $mtime);
 565  }