Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403]
1 <?php 2 // This file is part of Moodle - http://moodle.org/ 3 // 4 // Moodle is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // Moodle is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 // 14 // You should have received a copy of the GNU General Public License 15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 16 17 /** 18 * Implementation of .tar.gz extractor. Handles extraction of .tar.gz files. 19 * Do not call directly; use methods in tgz_packer. 20 * 21 * @see tgz_packer 22 * @package core_files 23 * @copyright 2013 The Open University 24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later 25 */ 26 27 defined('MOODLE_INTERNAL') || die(); 28 29 /** 30 * Extracts .tar.gz files (POSIX format). 31 */ 32 class tgz_extractor { 33 /** 34 * @var int When writing data, the system writes blocks of this size. 35 */ 36 const WRITE_BLOCK_SIZE = 65536; 37 /** 38 * @var int When reading data, the system reads blocks of this size. 39 */ 40 const READ_BLOCK_SIZE = 65536; 41 /** 42 * @var stored_file File object for archive. 43 */ 44 protected $storedfile; 45 /** 46 * @var string OS path for archive. 47 */ 48 protected $ospath; 49 /** 50 * @var int Number of files (-1 if not known). 51 */ 52 protected $numfiles; 53 /** 54 * @var int Number of files processed so far. 55 */ 56 protected $donefiles; 57 /** 58 * @var string Current file path within archive. 59 */ 60 protected $currentarchivepath; 61 /** 62 * @var string Full path to current file. 63 */ 64 protected $currentfile; 65 /** 66 * @var int Size of current file in bytes. 67 */ 68 protected $currentfilesize; 69 /** 70 * @var int Number of bytes of current file already written into buffer. 71 */ 72 protected $currentfileprocessed; 73 /** 74 * @var resource File handle to current file. 75 */ 76 protected $currentfp; 77 /** 78 * @var int Modified time of current file. 79 */ 80 protected $currentmtime; 81 /** 82 * @var string Buffer containing file data awaiting write. 83 */ 84 protected $filebuffer; 85 /** 86 * @var int Current length of buffer in bytes. 87 */ 88 protected $filebufferlength; 89 /** 90 * @var array Results array of all files processed. 91 */ 92 protected $results; 93 94 /** 95 * @var array In list mode, content of the list; outside list mode, null. 96 */ 97 protected $listresults = null; 98 99 /** 100 * @var int Whether listing or extracting. 101 */ 102 protected $mode = self::MODE_EXTRACT; 103 104 /** 105 * @var int If extracting (default). 106 */ 107 const MODE_EXTRACT = 0; 108 109 /** 110 * @var int Listing contents. 111 */ 112 const MODE_LIST = 1; 113 114 /** 115 * @var int Listing contents; list now complete. 116 */ 117 const MODE_LIST_COMPLETE = 2; 118 119 /** 120 * Constructor. 121 * 122 * @param stored_file|string $archivefile Moodle file or OS path to archive 123 */ 124 public function __construct($archivefile) { 125 if (is_a($archivefile, 'stored_file')) { 126 $this->storedfile = $archivefile; 127 } else { 128 $this->ospath = $archivefile; 129 } 130 } 131 132 /** 133 * Extracts the archive. 134 * 135 * @param tgz_extractor_handler $handler Will be called for extracted files 136 * @param file_progress $progress Optional progress reporting 137 * @return array Array from archive path => true of processed files 138 * @throws moodle_exception If there is any error processing the archive 139 */ 140 public function extract(tgz_extractor_handler $handler, file_progress $progress = null) { 141 $this->mode = self::MODE_EXTRACT; 142 $this->extract_or_list($handler, $progress); 143 $results = $this->results; 144 unset($this->results); 145 return $results; 146 } 147 148 /** 149 * Extracts or lists the archive depending on $this->listmode. 150 * 151 * @param tgz_extractor_handler $handler Optional handler 152 * @param file_progress $progress Optional progress reporting 153 * @throws moodle_exception If there is any error processing the archive 154 */ 155 protected function extract_or_list(tgz_extractor_handler $handler = null, file_progress $progress = null) { 156 // Open archive. 157 if ($this->storedfile) { 158 $gz = $this->storedfile->get_content_file_handle(stored_file::FILE_HANDLE_GZOPEN); 159 // Estimate number of read-buffers (64KB) in file. Guess that the 160 // uncompressed size is 2x compressed size. Add one just to ensure 161 // it's non-zero. 162 $estimatedbuffers = ($this->storedfile->get_filesize() * 2 / self::READ_BLOCK_SIZE) + 1; 163 } else { 164 $gz = gzopen($this->ospath, 'rb'); 165 $estimatedbuffers = (filesize($this->ospath) * 2 / self::READ_BLOCK_SIZE) + 1; 166 } 167 if (!$gz) { 168 throw new moodle_exception('errorprocessingarchive', '', '', null, 169 'Failed to open gzip file'); 170 } 171 172 // Calculate how much progress to report per buffer read. 173 $progressperbuffer = (int)(tgz_packer::PROGRESS_MAX / $estimatedbuffers); 174 175 // Process archive in 512-byte blocks (but reading 64KB at a time). 176 $buffer = ''; 177 $bufferpos = 0; 178 $bufferlength = 0; 179 $this->numfiles = -1; 180 $read = 0; 181 $done = 0; 182 $beforeprogress = -1; 183 while (true) { 184 if ($bufferpos == $bufferlength) { 185 $buffer = gzread($gz, self::READ_BLOCK_SIZE); 186 $bufferpos = 0; 187 $bufferlength = strlen($buffer); 188 if ($bufferlength == 0) { 189 // EOF. 190 break; 191 } 192 193 // Report progress if enabled. 194 if ($progress) { 195 if ($this->numfiles === -1) { 196 // If we don't know the number of files, do an estimate based 197 // on number of buffers read. 198 $done += $progressperbuffer; 199 if ($done >= tgz_packer::PROGRESS_MAX) { 200 $done = tgz_packer::PROGRESS_MAX - 1; 201 } 202 $progress->progress($done, tgz_packer::PROGRESS_MAX); 203 } else { 204 // Once we know the number of files, use this. 205 if ($beforeprogress === -1) { 206 $beforeprogress = $done; 207 } 208 // Calculate progress as whatever progress we reported 209 // before we knew how many files there were (might be 0) 210 // plus a proportion of the number of files out of the 211 // remaining progress value. 212 $done = $beforeprogress + (int)(($this->donefiles / $this->numfiles) * 213 (tgz_packer::PROGRESS_MAX - $beforeprogress)); 214 } 215 $progress->progress($done, tgz_packer::PROGRESS_MAX); 216 } 217 } 218 219 $block = substr($buffer, $bufferpos, tgz_packer::TAR_BLOCK_SIZE); 220 if ($this->currentfile) { 221 $this->process_file_block($block, $handler); 222 } else { 223 $this->process_header($block, $handler); 224 } 225 226 // When listing, if we read an index file, we abort archive processing. 227 if ($this->mode === self::MODE_LIST_COMPLETE) { 228 break; 229 } 230 231 $bufferpos += tgz_packer::TAR_BLOCK_SIZE; 232 $read++; 233 } 234 235 // Close archive and finish. 236 gzclose($gz); 237 } 238 239 /** 240 * Lists files in the archive, either using the index file (if present), 241 * or by basically extracting the whole thing if there isn't an index file. 242 * 243 * @return array Array of file listing results: 244 */ 245 public function list_files() { 246 $this->listresults = array(); 247 $this->mode = self::MODE_LIST; 248 $this->extract_or_list(); 249 $listresults = $this->listresults; 250 $this->listresults = null; 251 return $listresults; 252 } 253 254 /** 255 * Process 512-byte header block. 256 * 257 * @param string $block Tar block 258 * @param tgz_extractor_handler $handler Will be called for extracted files 259 */ 260 protected function process_header($block, $handler) { 261 // If the block consists entirely of nulls, ignore it. (This happens 262 // twice at end of archive.) 263 if ($block === str_pad('', tgz_packer::TAR_BLOCK_SIZE, "\0")) { 264 return; 265 } 266 267 // struct header_posix_ustar { 268 // char name[100]; 269 $name = rtrim(substr($block, 0, 100), "\0"); 270 271 // char mode[8]; 272 // char uid[8]; 273 // char gid[8]; 274 // char size[12]; 275 $filesize = octdec(substr($block, 124, 11)); 276 277 // char mtime[12]; 278 $mtime = octdec(substr($block, 136, 11)); 279 280 // char checksum[8]; 281 // char typeflag[1]; 282 $typeflag = substr($block, 156, 1); 283 284 // char linkname[100]; 285 // char magic[6]; 286 $magic = substr($block, 257, 6); 287 if ($magic !== "ustar\0" && $magic !== "ustar ") { 288 // There are two checks above; the first is the correct POSIX format 289 // and the second is for GNU tar default format. 290 throw new moodle_exception('errorprocessingarchive', '', '', null, 291 'Header does not have POSIX ustar magic string'); 292 } 293 294 // char version[2]; 295 // char uname[32]; 296 // char gname[32]; 297 // char devmajor[8]; 298 // char devminor[8]; 299 // char prefix[155]; 300 $prefix = rtrim(substr($block, 345, 155), "\0"); 301 302 // char pad[12]; 303 // }; 304 305 $archivepath = ltrim($prefix . '/' . $name, '/'); 306 307 // For security, ensure there is no .. folder in the archivepath. 308 $archivepath = clean_param($archivepath, PARAM_PATH); 309 310 // Handle file depending on the type. 311 switch ($typeflag) { 312 case '1' : 313 case '2' : 314 case '3' : 315 case '4' : 316 case '6' : 317 case '7' : 318 // Ignore these special cases. 319 break; 320 321 case '5' : 322 // Directory. 323 if ($this->mode === self::MODE_LIST) { 324 $this->listresults[] = (object)array( 325 'original_pathname' => $archivepath, 326 'pathname' => $archivepath, 327 'mtime' => $mtime, 328 'is_directory' => true, 329 'size' => 0); 330 } else if ($handler->tgz_directory($archivepath, $mtime)) { 331 $this->results[$archivepath] = true; 332 } 333 break; 334 335 default: 336 // All other values treated as normal file. 337 $this->start_current_file($archivepath, $filesize, $mtime, $handler); 338 break; 339 } 340 } 341 342 /** 343 * Processes one 512-byte block of an existing file. 344 * 345 * @param string $block Data block 346 * @param tgz_extractor_handler $handler Will be called for extracted files 347 */ 348 protected function process_file_block($block, tgz_extractor_handler $handler = null) { 349 // Write block into buffer. 350 $blocksize = tgz_packer::TAR_BLOCK_SIZE; 351 if ($this->currentfileprocessed + tgz_packer::TAR_BLOCK_SIZE > $this->currentfilesize) { 352 // Partial block at end of file. 353 $blocksize = $this->currentfilesize - $this->currentfileprocessed; 354 $this->filebuffer .= substr($block, 0, $blocksize); 355 } else { 356 // Full-length block. 357 $this->filebuffer .= $block; 358 } 359 $this->filebufferlength += $blocksize; 360 $this->currentfileprocessed += $blocksize; 361 362 // Write block to file if necessary. 363 $eof = $this->currentfileprocessed == $this->currentfilesize; 364 if ($this->filebufferlength >= self::WRITE_BLOCK_SIZE || $eof) { 365 // Except when skipping the file, write it out. 366 if ($this->currentfile !== true) { 367 if (!fwrite($this->currentfp, $this->filebuffer)) { 368 throw new moodle_exception('errorprocessingarchive', '', '', null, 369 'Failed to write buffer to output file: ' . $this->currentfile); 370 } 371 } 372 $this->filebuffer = ''; 373 $this->filebufferlength = 0; 374 } 375 376 // If file is finished, close it. 377 if ($eof) { 378 $this->close_current_file($handler); 379 } 380 } 381 382 /** 383 * Starts processing a file from archive. 384 * 385 * @param string $archivepath Path inside archive 386 * @param int $filesize Size in bytes 387 * @param int $mtime File-modified time 388 * @param tgz_extractor_handler $handler Will be called for extracted files 389 * @throws moodle_exception 390 */ 391 protected function start_current_file($archivepath, $filesize, $mtime, 392 tgz_extractor_handler $handler = null) { 393 global $CFG; 394 395 $this->currentarchivepath = $archivepath; 396 $this->currentmtime = $mtime; 397 $this->currentfilesize = $filesize; 398 $this->currentfileprocessed = 0; 399 400 if ($archivepath === tgz_packer::ARCHIVE_INDEX_FILE) { 401 // For index file, store in temp directory. 402 $tempfolder = $CFG->tempdir . '/core_files'; 403 check_dir_exists($tempfolder); 404 $this->currentfile = tempnam($tempfolder, '.index'); 405 } else { 406 if ($this->mode === self::MODE_LIST) { 407 // If listing, add to list. 408 $this->listresults[] = (object)array( 409 'original_pathname' => $archivepath, 410 'pathname' => $archivepath, 411 'mtime' => $mtime, 412 'is_directory' => false, 413 'size' => $filesize); 414 415 // Discard file. 416 $this->currentfile = true; 417 } else { 418 // For other files, ask handler for location. 419 $this->currentfile = $handler->tgz_start_file($archivepath); 420 if ($this->currentfile === null) { 421 // This indicates that we are discarding the current file. 422 $this->currentfile = true; 423 } 424 } 425 } 426 $this->filebuffer = ''; 427 $this->filebufferlength = 0; 428 429 // Open file. 430 if ($this->currentfile !== true) { 431 $this->currentfp = fopen($this->currentfile, 'wb'); 432 if (!$this->currentfp) { 433 throw new moodle_exception('errorprocessingarchive', '', '', null, 434 'Failed to open output file: ' . $this->currentfile); 435 } 436 } else { 437 $this->currentfp = null; 438 } 439 440 // If it has no size, close it right away. 441 if ($filesize == 0) { 442 $this->close_current_file($handler); 443 } 444 } 445 446 /** 447 * Closes the current file, calls handler, and sets up data. 448 * 449 * @param tgz_extractor_handler $handler Will be called for extracted files 450 * @throws moodle_exception If there is an error closing it 451 */ 452 protected function close_current_file($handler) { 453 if ($this->currentfp !== null) { 454 if (!fclose($this->currentfp)) { 455 throw new moodle_exception('errorprocessingarchive', '', '', null, 456 'Failed to close output file: ' . $this->currentfile); 457 } 458 459 // At this point we should touch the file to set its modified 460 // time to $this->currentmtime. However, when extracting to the 461 // temp directory, cron will delete files more than a week old, 462 // so to avoid problems we leave all files at their current time. 463 } 464 465 if ($this->currentarchivepath === tgz_packer::ARCHIVE_INDEX_FILE) { 466 if ($this->mode === self::MODE_LIST) { 467 // When listing array, use the archive index to produce the list. 468 $index = file($this->currentfile); 469 $ok = true; 470 foreach ($index as $num => $value) { 471 // For first line (header), check it's valid then skip it. 472 if ($num == 0) { 473 if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) . '~', $value)) { 474 continue; 475 } else { 476 // Not valid, better ignore the file. 477 $ok = false; 478 break; 479 } 480 } 481 // Split on tabs and store in results array. 482 $values = explode("\t", trim($value)); 483 $this->listresults[] = (object)array( 484 'original_pathname' => $values[0], 485 'pathname' => $values[0], 486 'mtime' => ($values[3] === '?' ? tgz_packer::DEFAULT_TIMESTAMP : (int)$values[3]), 487 'is_directory' => $values[1] === 'd', 488 'size' => (int)$values[2]); 489 } 490 if ($ok) { 491 $this->mode = self::MODE_LIST_COMPLETE; 492 } 493 unlink($this->currentfile); 494 } else { 495 // For index file, get number of files and delete temp file. 496 $contents = file_get_contents($this->currentfile, false, null, 0, 128); 497 $matches = array(); 498 if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) . 499 '([0-9]+)~', $contents, $matches)) { 500 $this->numfiles = (int)$matches[1]; 501 } 502 unlink($this->currentfile); 503 } 504 } else { 505 // Report to handler and put in results. 506 if ($this->currentfp !== null) { 507 $handler->tgz_end_file($this->currentarchivepath, $this->currentfile); 508 $this->results[$this->currentarchivepath] = true; 509 } 510 $this->donefiles++; 511 } 512 513 // No longer have a current file. 514 $this->currentfp = null; 515 $this->currentfile = null; 516 $this->currentarchivepath = null; 517 } 518 519 } 520 521 /** 522 * Interface for callback from tgz_extractor::extract. 523 * 524 * The file functions will be called (in pairs tgz_start_file, tgz_end_file) for 525 * each file in the archive. (There is only one exception, the special 526 * .ARCHIVE_INDEX file which is not reported to the handler.) 527 * 528 * The directory function is called whenever the archive contains a directory 529 * entry. 530 */ 531 interface tgz_extractor_handler { 532 /** 533 * Called when the system begins to extract a file. At this point, the 534 * handler must decide where on disk the extracted file should be located. 535 * This can be a temporary location or final target, as preferred. 536 * 537 * The handler can request for files to be skipped, in which case no data 538 * will be written and tgz_end_file will not be called. 539 * 540 * @param string $archivepath Path and name of file within archive 541 * @return string Location for output file in filesystem, or null to skip file 542 */ 543 public function tgz_start_file($archivepath); 544 545 /** 546 * Called when the system has finished extracting a file. The handler can 547 * now process the extracted file if required. 548 * 549 * @param string $archivepath Path and name of file within archive 550 * @param string $realpath Path in filesystem (from tgz_start_file return) 551 * @return bool True to continue processing, false to abort archive extract 552 */ 553 public function tgz_end_file($archivepath, $realpath); 554 555 /** 556 * Called when a directory entry is found in the archive. 557 * 558 * The handler can create a corresponding directory if required. 559 * 560 * @param string $archivepath Path and name of directory within archive 561 * @param int $mtime Modified time of directory 562 * @return bool True if directory was created, false if skipped 563 */ 564 public function tgz_directory($archivepath, $mtime); 565 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body