Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]

   1  <?php
   2  /**
   3   * This file is part of FPDI
   4   *
   5   * @package   setasign\Fpdi
   6   * @copyright Copyright (c) 2019 Setasign - Jan Slabon (https://www.setasign.com)
   7   * @license   http://opensource.org/licenses/mit-license The MIT License
   8   */
   9  
  10  namespace setasign\Fpdi\PdfParser;
  11  
  12  use setasign\Fpdi\PdfParser\CrossReference\CrossReference;
  13  use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
  14  use setasign\Fpdi\PdfParser\Type\PdfArray;
  15  use setasign\Fpdi\PdfParser\Type\PdfBoolean;
  16  use setasign\Fpdi\PdfParser\Type\PdfDictionary;
  17  use setasign\Fpdi\PdfParser\Type\PdfHexString;
  18  use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
  19  use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
  20  use setasign\Fpdi\PdfParser\Type\PdfName;
  21  use setasign\Fpdi\PdfParser\Type\PdfNull;
  22  use setasign\Fpdi\PdfParser\Type\PdfNumeric;
  23  use setasign\Fpdi\PdfParser\Type\PdfString;
  24  use setasign\Fpdi\PdfParser\Type\PdfToken;
  25  use setasign\Fpdi\PdfParser\Type\PdfType;
  26  
  27  /**
  28   * A PDF parser class
  29   *
  30   * @package setasign\Fpdi\PdfParser
  31   */
  32  class PdfParser
  33  {
  34      /**
  35       * @var StreamReader
  36       */
  37      protected $streamReader;
  38  
  39      /**
  40       * @var Tokenizer
  41       */
  42      protected $tokenizer;
  43  
  44      /**
  45       * The file header.
  46       *
  47       * @var string
  48       */
  49      protected $fileHeader;
  50  
  51      /**
  52       * The offset to the file header.
  53       *
  54       * @var int
  55       */
  56      protected $fileHeaderOffset;
  57  
  58      /**
  59       * @var CrossReference
  60       */
  61      protected $xref;
  62  
  63      /**
  64       * All read objects.
  65       *
  66       * @var array
  67       */
  68      protected $objects = [];
  69  
  70      /**
  71       * PdfParser constructor.
  72       *
  73       * @param StreamReader $streamReader
  74       */
  75      public function __construct(StreamReader $streamReader)
  76      {
  77          $this->streamReader = $streamReader;
  78          $this->tokenizer = new Tokenizer($streamReader);
  79      }
  80  
  81      /**
  82       * Removes cycled references.
  83       *
  84       * @internal
  85       */
  86      public function cleanUp()
  87      {
  88          $this->xref = null;
  89      }
  90  
  91      /**
  92       * Get the stream reader instance.
  93       *
  94       * @return StreamReader
  95       */
  96      public function getStreamReader()
  97      {
  98          return $this->streamReader;
  99      }
 100  
 101      /**
 102       * Get the tokenizer instance.
 103       *
 104       * @return Tokenizer
 105       */
 106      public function getTokenizer()
 107      {
 108          return $this->tokenizer;
 109      }
 110  
 111      /**
 112       * Resolves the file header.
 113       *
 114       * @throws PdfParserException
 115       * @return int
 116       */
 117      protected function resolveFileHeader()
 118      {
 119          if ($this->fileHeader) {
 120              return $this->fileHeaderOffset;
 121          }
 122  
 123          $this->streamReader->reset(0);
 124          $offset = false;
 125          $maxIterations = 1000;
 126          while (true) {
 127              $buffer = $this->streamReader->getBuffer(false);
 128              $offset = \strpos($buffer, '%PDF-');
 129              if ($offset === false) {
 130                  if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) {
 131                      throw new PdfParserException(
 132                          'Unable to find PDF file header.',
 133                          PdfParserException::FILE_HEADER_NOT_FOUND
 134                      );
 135                  }
 136                  continue;
 137              }
 138              break;
 139          }
 140  
 141          $this->fileHeaderOffset = $offset;
 142          $this->streamReader->setOffset($offset);
 143  
 144          $this->fileHeader = \trim($this->streamReader->readLine());
 145          return $this->fileHeaderOffset;
 146      }
 147  
 148      /**
 149       * Get the cross reference instance.
 150       *
 151       * @return CrossReference
 152       * @throws CrossReferenceException
 153       * @throws PdfParserException
 154       */
 155      public function getCrossReference()
 156      {
 157          if ($this->xref === null) {
 158              $this->xref = new CrossReference($this, $this->resolveFileHeader());
 159          }
 160  
 161          return $this->xref;
 162      }
 163  
 164      /**
 165       * Get the PDF version.
 166       *
 167       * @return int[] An array of major and minor version.
 168       * @throws PdfParserException
 169       */
 170      public function getPdfVersion()
 171      {
 172          $this->resolveFileHeader();
 173  
 174          if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) {
 175              throw new PdfParserException(
 176                  'Unable to extract PDF version from file header.',
 177                  PdfParserException::PDF_VERSION_NOT_FOUND
 178              );
 179          }
 180          list(, $major, $minor) = $result;
 181  
 182          $catalog = $this->getCatalog();
 183          if (isset($catalog->value['Version'])) {
 184              $versionParts = \explode('.', PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value));
 185              if (count($versionParts) === 2) {
 186                  list($major, $minor) = $versionParts;
 187              }
 188          }
 189  
 190          return [(int) $major, (int) $minor];
 191      }
 192  
 193      /**
 194       * Get the catalog dictionary.
 195       *
 196       * @return PdfDictionary
 197       * @throws Type\PdfTypeException
 198       * @throws CrossReferenceException
 199       * @throws PdfParserException
 200       */
 201      public function getCatalog()
 202      {
 203          $xref = $this->getCrossReference();
 204          $trailer = $xref->getTrailer();
 205  
 206          $catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this);
 207  
 208          return PdfDictionary::ensure($catalog);
 209      }
 210  
 211      /**
 212       * Get an indirect object by its object number.
 213       *
 214       * @param int $objectNumber
 215       * @param bool $cache
 216       * @return PdfIndirectObject
 217       * @throws CrossReferenceException
 218       * @throws PdfParserException
 219       */
 220      public function getIndirectObject($objectNumber, $cache = false)
 221      {
 222          $objectNumber = (int) $objectNumber;
 223          if (isset($this->objects[$objectNumber])) {
 224              return $this->objects[$objectNumber];
 225          }
 226  
 227          $xref = $this->getCrossReference();
 228          $object = $xref->getIndirectObject($objectNumber);
 229  
 230          if ($cache) {
 231              $this->objects[$objectNumber] = $object;
 232          }
 233  
 234          return $object;
 235      }
 236  
 237      /**
 238       * Read a PDF value.
 239       *
 240       * @param null|bool|string $token
 241       * @param null|string $expectedType
 242       * @return bool|PdfArray|PdfBoolean|PdfHexString|PdfName|PdfNull|PdfNumeric|PdfString|PdfToken|PdfIndirectObjectReference
 243       * @throws Type\PdfTypeException
 244       */
 245      public function readValue($token = null, $expectedType = null)
 246      {
 247          if ($token === null) {
 248              $token = $this->tokenizer->getNextToken();
 249          }
 250  
 251          if ($token === false) {
 252              if ($expectedType !== null) {
 253                  throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
 254              }
 255              return false;
 256          }
 257  
 258          switch ($token) {
 259              case '(':
 260                  $this->ensureExpectedType($token, $expectedType);
 261                  return PdfString::parse($this->streamReader);
 262  
 263              case '<':
 264                  if ($this->streamReader->getByte() === '<') {
 265                      $this->ensureExpectedType('<<', $expectedType);
 266                      $this->streamReader->addOffset(1);
 267                      return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this);
 268                  }
 269  
 270                  $this->ensureExpectedType($token, $expectedType);
 271                  return PdfHexString::parse($this->streamReader);
 272  
 273              case '/':
 274                  $this->ensureExpectedType($token, $expectedType);
 275                  return PdfName::parse($this->tokenizer, $this->streamReader);
 276  
 277              case '[':
 278                  $this->ensureExpectedType($token, $expectedType);
 279                  return PdfArray::parse($this->tokenizer, $this);
 280  
 281              default:
 282                  if (\is_numeric($token)) {
 283                      if (($token2 = $this->tokenizer->getNextToken()) !== false) {
 284                          if (\is_numeric($token2)) {
 285                              if (($token3 = $this->tokenizer->getNextToken()) !== false) {
 286                                  switch ($token3) {
 287                                      case 'obj':
 288                                          if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) {
 289                                              throw new Type\PdfTypeException(
 290                                                  'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
 291                                              );
 292                                          }
 293  
 294                                          return PdfIndirectObject::parse(
 295                                              $token,
 296                                              $token2,
 297                                              $this,
 298                                              $this->tokenizer,
 299                                              $this->streamReader
 300                                          );
 301                                      case 'R':
 302                                          if ($expectedType !== null &&
 303                                              $expectedType !== PdfIndirectObjectReference::class
 304                                          ) {
 305                                              throw new Type\PdfTypeException(
 306                                                  'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
 307                                              );
 308                                          }
 309  
 310                                          return PdfIndirectObjectReference::create($token, $token2);
 311                                  }
 312  
 313                                  $this->tokenizer->pushStack($token3);
 314                              }
 315                          }
 316  
 317                          $this->tokenizer->pushStack($token2);
 318                      }
 319  
 320                      if ($expectedType !== null && $expectedType !== PdfNumeric::class) {
 321                          throw new Type\PdfTypeException(
 322                              'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
 323                          );
 324                      }
 325                      return PdfNumeric::create($token);
 326                  }
 327  
 328                  if ($token === 'true' || $token === 'false') {
 329                      $this->ensureExpectedType($token, $expectedType);
 330                      return PdfBoolean::create($token === 'true');
 331                  }
 332  
 333                  if ($token === 'null') {
 334                      $this->ensureExpectedType($token, $expectedType);
 335                      return new PdfNull();
 336                  }
 337  
 338                  if ($expectedType !== null && $expectedType !== PdfToken::class) {
 339                      throw new Type\PdfTypeException(
 340                          'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE
 341                      );
 342                  }
 343  
 344                  $v = new PdfToken();
 345                  $v->value = $token;
 346  
 347                  return $v;
 348          }
 349      }
 350  
 351      /**
 352       * Ensures that the token will evaluate to an expected object type (or not).
 353       *
 354       * @param string $token
 355       * @param string|null $expectedType
 356       * @return bool
 357       * @throws Type\PdfTypeException
 358       */
 359      private function ensureExpectedType($token, $expectedType)
 360      {
 361          static $mapping = [
 362              '(' => PdfString::class,
 363              '<' => PdfHexString::class,
 364              '<<' => PdfDictionary::class,
 365              '/' => PdfName::class,
 366              '[' => PdfArray::class,
 367              'true' => PdfBoolean::class,
 368              'false' => PdfBoolean::class,
 369              'null' => PdfNull::class
 370          ];
 371  
 372          if ($expectedType === null || $mapping[$token] === $expectedType) {
 373              return true;
 374          }
 375  
 376          throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE);
 377      }
 378  }