Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 310 and 311] [Versions 39 and 311]

   1  <?php
   2  
   3  /**
   4   * This file is part of FPDI
   5   *
   6   * @package   setasign\Fpdi
   7   * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
   8   * @license   http://opensource.org/licenses/mit-license The MIT License
   9   */
  10  
  11  namespace setasign\Fpdi\PdfParser;
  12  
  13  use setasign\Fpdi\PdfParser\CrossReference\CrossReference;
  14  use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
  15  use setasign\Fpdi\PdfParser\Type\PdfArray;
  16  use setasign\Fpdi\PdfParser\Type\PdfBoolean;
  17  use setasign\Fpdi\PdfParser\Type\PdfDictionary;
  18  use setasign\Fpdi\PdfParser\Type\PdfHexString;
  19  use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
  20  use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
  21  use setasign\Fpdi\PdfParser\Type\PdfName;
  22  use setasign\Fpdi\PdfParser\Type\PdfNull;
  23  use setasign\Fpdi\PdfParser\Type\PdfNumeric;
  24  use setasign\Fpdi\PdfParser\Type\PdfStream;
  25  use setasign\Fpdi\PdfParser\Type\PdfString;
  26  use setasign\Fpdi\PdfParser\Type\PdfToken;
  27  use setasign\Fpdi\PdfParser\Type\PdfType;
  28  
  29  /**
  30   * A PDF parser class
  31   */
  32  class PdfParser
  33  {
  34      /**
  35       * @var StreamReader
  36       */
  37      protected $streamReader;
  38  
  39      /**
  40       * @var Tokenizer
  41       */
  42      protected $tokenizer;
  43  
  44      /**
  45       * The file header.
  46       *
  47       * @var string
  48       */
  49      protected $fileHeader;
  50  
  51      /**
  52       * The offset to the file header.
  53       *
  54       * @var int
  55       */
  56      protected $fileHeaderOffset;
  57  
  58      /**
  59       * @var CrossReference|null
  60       */
  61      protected $xref;
  62  
  63      /**
  64       * All read objects.
  65       *
  66       * @var array
  67       */
  68      protected $objects = [];
  69  
  70      /**
  71       * PdfParser constructor.
  72       *
  73       * @param StreamReader $streamReader
  74       */
  75      public function __construct(StreamReader $streamReader)
  76      {
  77          $this->streamReader = $streamReader;
  78          $this->tokenizer = new Tokenizer($streamReader);
  79      }
  80  
  81      /**
  82       * Removes cycled references.
  83       *
  84       * @internal
  85       */
  86      public function cleanUp()
  87      {
  88          $this->xref = null;
  89      }
  90  
  91      /**
  92       * Get the stream reader instance.
  93       *
  94       * @return StreamReader
  95       */
  96      public function getStreamReader()
  97      {
  98          return $this->streamReader;
  99      }
 100  
 101      /**
 102       * Get the tokenizer instance.
 103       *
 104       * @return Tokenizer
 105       */
 106      public function getTokenizer()
 107      {
 108          return $this->tokenizer;
 109      }
 110  
 111      /**
 112       * Resolves the file header.
 113       *
 114       * @throws PdfParserException
 115       * @return int
 116       */
 117      protected function resolveFileHeader()
 118      {
 119          if ($this->fileHeader) {
 120              return $this->fileHeaderOffset;
 121          }
 122  
 123          $this->streamReader->reset(0);
 124          $maxIterations = 1000;
 125          while (true) {
 126              $buffer = $this->streamReader->getBuffer(false);
 127              $offset = \strpos($buffer, '%PDF-');
 128              if ($offset === false) {
 129                  if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) {
 130                      throw new PdfParserException(
 131                          'Unable to find PDF file header.',
 132                          PdfParserException::FILE_HEADER_NOT_FOUND
 133                      );
 134                  }
 135                  continue;
 136              }
 137              break;
 138          }
 139  
 140          $this->fileHeaderOffset = $offset;
 141          $this->streamReader->setOffset($offset);
 142  
 143          $this->fileHeader = \trim($this->streamReader->readLine());
 144          return $this->fileHeaderOffset;
 145      }
 146  
 147      /**
 148       * Get the cross reference instance.
 149       *
 150       * @return CrossReference
 151       * @throws CrossReferenceException
 152       * @throws PdfParserException
 153       */
 154      public function getCrossReference()
 155      {
 156          if ($this->xref === null) {
 157              $this->xref = new CrossReference($this, $this->resolveFileHeader());
 158          }
 159  
 160          return $this->xref;
 161      }
 162  
 163      /**
 164       * Get the PDF version.
 165       *
 166       * @return int[] An array of major and minor version.
 167       * @throws PdfParserException
 168       */
 169      public function getPdfVersion()
 170      {
 171          $this->resolveFileHeader();
 172  
 173          if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) {
 174              throw new PdfParserException(
 175                  'Unable to extract PDF version from file header.',
 176                  PdfParserException::PDF_VERSION_NOT_FOUND
 177              );
 178          }
 179          list(, $major, $minor) = $result;
 180  
 181          $catalog = $this->getCatalog();
 182          if (isset($catalog->value['Version'])) {
 183              $versionParts = \explode(
 184                  '.',
 185                  PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value)
 186              );
 187              if (count($versionParts) === 2) {
 188                  list($major, $minor) = $versionParts;
 189              }
 190          }
 191  
 192          return [(int) $major, (int) $minor];
 193      }
 194  
 195      /**
 196       * Get the catalog dictionary.
 197       *
 198       * @return PdfDictionary
 199       * @throws Type\PdfTypeException
 200       * @throws CrossReferenceException
 201       * @throws PdfParserException
 202       */
 203      public function getCatalog()
 204      {
 205          $trailer = $this->getCrossReference()->getTrailer();
 206  
 207          $catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this);
 208  
 209          return PdfDictionary::ensure($catalog);
 210      }
 211  
 212      /**
 213       * Get an indirect object by its object number.
 214       *
 215       * @param int $objectNumber
 216       * @param bool $cache
 217       * @return PdfIndirectObject
 218       * @throws CrossReferenceException
 219       * @throws PdfParserException
 220       */
 221      public function getIndirectObject($objectNumber, $cache = false)
 222      {
 223          $objectNumber = (int) $objectNumber;
 224          if (isset($this->objects[$objectNumber])) {
 225              return $this->objects[$objectNumber];
 226          }
 227  
 228          $object = $this->getCrossReference()->getIndirectObject($objectNumber);
 229  
 230          if ($cache) {
 231              $this->objects[$objectNumber] = $object;
 232          }
 233  
 234          return $object;
 235      }
 236  
 237      /**
 238       * Read a PDF value.
 239       *
 240       * @param null|bool|string $token
 241       * @param null|string $expectedType
 242       * @return false|PdfArray|PdfBoolean|PdfDictionary|PdfHexString|PdfIndirectObject|PdfIndirectObjectReference|PdfName|PdfNull|PdfNumeric|PdfStream|PdfString|PdfToken
 243       * @throws Type\PdfTypeException
 244       */
 245      public function readValue($token = null, $expectedType = null)
 246      {
 247          if ($token === null) {
 248              $token = $this->tokenizer->getNextToken();
 249          }
 250  
 251          if ($token === false) {
 252              if ($expectedType !== null) {
 253                  throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
 254              }
 255              return false;
 256          }
 257  
 258          switch ($token) {
 259              case '(':
 260                  $this->ensureExpectedType($token, $expectedType);
 261                  return PdfString::parse($this->streamReader);
 262  
 263              case '<':
 264                  if ($this->streamReader->getByte() === '<') {
 265                      $this->ensureExpectedType('<<', $expectedType);
 266                      $this->streamReader->addOffset(1);
 267                      return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
 268                  }
 269  
 270                  $this->ensureExpectedType($token, $expectedType);
 271                  return PdfHexString::parse($this->streamReader);
 272  
 273              case '/':
 274                  $this->ensureExpectedType($token, $expectedType);
 275                  return PdfName::parse($this->tokenizer, $this->streamReader);
 276  
 277              case '[':
 278                  $this->ensureExpectedType($token, $expectedType);
 279                  return PdfArray::parse($this->tokenizer, $this);
 280  
 281              default:
 282                  if (\is_numeric($token)) {
 283                      if (($token2 = $this->tokenizer->getNextToken()) !== false) {
 284                          if (\is_numeric($token2) && ($token3 = $this->tokenizer->getNextToken()) !== false) {
 285                              switch ($token3) {
 286                                  case 'obj':
 287                                      if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) {
 288                                          throw new Type\PdfTypeException(
 289                                              'Got unexpected token type.',
 290                                              Type\PdfTypeException::INVALID_DATA_TYPE
 291                                          );
 292                                      }
 293  
 294                                      return PdfIndirectObject::parse(
 295                                          (int) $token,
 296                                          (int) $token2,
 297                                          $this,
 298                                          $this->tokenizer,
 299                                          $this->streamReader
 300                                      );
 301                                  case 'R':
 302                                      if (
 303                                          $expectedType !== null &&
 304                                          $expectedType !== PdfIndirectObjectReference::class
 305                                      ) {
 306                                          throw new Type\PdfTypeException(
 307                                              'Got unexpected token type.',
 308                                              Type\PdfTypeException::INVALID_DATA_TYPE
 309                                          );
 310                                      }
 311  
 312                                      return PdfIndirectObjectReference::create((int) $token, (int) $token2);
 313                              }
 314  
 315                              $this->tokenizer->pushStack($token3);
 316                          }
 317  
 318                          $this->tokenizer->pushStack($token2);
 319                      }
 320  
 321                      if ($expectedType !== null && $expectedType !== PdfNumeric::class) {
 322                          throw new Type\PdfTypeException(
 323                              'Got unexpected token type.',
 324                              Type\PdfTypeException::INVALID_DATA_TYPE
 325                          );
 326                      }
 327                      return PdfNumeric::create($token + 0);
 328                  }
 329  
 330                  if ($token === 'true' || $token === 'false') {
 331                      $this->ensureExpectedType($token, $expectedType);
 332                      return PdfBoolean::create($token === 'true');
 333                  }
 334  
 335                  if ($token === 'null') {
 336                      $this->ensureExpectedType($token, $expectedType);
 337                      return new PdfNull();
 338                  }
 339  
 340                  if ($expectedType !== null && $expectedType !== PdfToken::class) {
 341                      throw new Type\PdfTypeException(
 342                          'Got unexpected token type.',
 343                          Type\PdfTypeException::INVALID_DATA_TYPE
 344                      );
 345                  }
 346  
 347                  $v = new PdfToken();
 348                  $v->value = $token;
 349  
 350                  return $v;
 351          }
 352      }
 353  
 354      /**
 355       * Ensures that the token will evaluate to an expected object type (or not).
 356       *
 357       * @param string $token
 358       * @param string|null $expectedType
 359       * @return bool
 360       * @throws Type\PdfTypeException
 361       */
 362      private function ensureExpectedType($token, $expectedType)
 363      {
 364          static $mapping = [
 365              '(' => PdfString::class,
 366              '<' => PdfHexString::class,
 367              '<<' => PdfDictionary::class,
 368              '/' => PdfName::class,
 369              '[' => PdfArray::class,
 370              'true' => PdfBoolean::class,
 371              'false' => PdfBoolean::class,
 372              'null' => PdfNull::class
 373          ];
 374  
 375          if ($expectedType === null || $mapping[$token] === $expectedType) {
 376              return true;
 377          }
 378  
 379          throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
 380      }
 381  }