Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 310 and 311] [Versions 39 and 311]

   1  <?php
   2  
   3  /**
   4   * This file is part of FPDI
   5   *
   6   * @package   setasign\Fpdi
   7   * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
   8   * @license   http://opensource.org/licenses/mit-license The MIT License
   9   */
  10  
  11  namespace setasign\Fpdi\PdfParser\CrossReference;
  12  
  13  use setasign\Fpdi\PdfParser\PdfParser;
  14  use setasign\Fpdi\PdfParser\Type\PdfDictionary;
  15  use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
  16  use setasign\Fpdi\PdfParser\Type\PdfNumeric;
  17  use setasign\Fpdi\PdfParser\Type\PdfStream;
  18  use setasign\Fpdi\PdfParser\Type\PdfToken;
  19  use setasign\Fpdi\PdfParser\Type\PdfTypeException;
  20  
  21  /**
  22   * Class CrossReference
  23   *
  24   * This class processes the standard cross reference of a PDF document.
  25   */
  26  class CrossReference
  27  {
  28      /**
  29       * The byte length in which the "startxref" keyword should be searched.
  30       *
  31       * @var int
  32       */
  33      public static $trailerSearchLength = 5500;
  34  
  35      /**
  36       * @var int
  37       */
  38      protected $fileHeaderOffset = 0;
  39  
  40      /**
  41       * @var PdfParser
  42       */
  43      protected $parser;
  44  
  45      /**
  46       * @var ReaderInterface[]
  47       */
  48      protected $readers = [];
  49  
  50      /**
  51       * CrossReference constructor.
  52       *
  53       * @param PdfParser $parser
  54       * @throws CrossReferenceException
  55       * @throws PdfTypeException
  56       */
  57      public function __construct(PdfParser $parser, $fileHeaderOffset = 0)
  58      {
  59          $this->parser = $parser;
  60          $this->fileHeaderOffset = $fileHeaderOffset;
  61  
  62          $offset = $this->findStartXref();
  63          $reader = null;
  64          /** @noinspection TypeUnsafeComparisonInspection */
  65          while ($offset != false) { // By doing an unsafe comparsion we ignore faulty references to byte offset 0
  66              try {
  67                  $reader = $this->readXref($offset + $this->fileHeaderOffset);
  68              } catch (CrossReferenceException $e) {
  69                  // sometimes the file header offset is part of the byte offsets, so let's retry by resetting it to zero.
  70                  if ($e->getCode() === CrossReferenceException::INVALID_DATA && $this->fileHeaderOffset !== 0) {
  71                      $this->fileHeaderOffset = 0;
  72                      $reader = $this->readXref($offset + $this->fileHeaderOffset);
  73                  } else {
  74                      throw $e;
  75                  }
  76              }
  77  
  78              $trailer = $reader->getTrailer();
  79              $this->checkForEncryption($trailer);
  80              $this->readers[] = $reader;
  81  
  82              if (isset($trailer->value['Prev'])) {
  83                  $offset = $trailer->value['Prev']->value;
  84              } else {
  85                  $offset = false;
  86              }
  87          }
  88  
  89          // fix faulty sub-section header
  90          if ($reader instanceof FixedReader) {
  91              /**
  92               * @var FixedReader $reader
  93               */
  94              $reader->fixFaultySubSectionShift();
  95          }
  96  
  97          if ($reader === null) {
  98              throw new CrossReferenceException('No cross-reference found.', CrossReferenceException::NO_XREF_FOUND);
  99          }
 100      }
 101  
 102      /**
 103       * Get the size of the cross reference.
 104       *
 105       * @return integer
 106       */
 107      public function getSize()
 108      {
 109          return $this->getTrailer()->value['Size']->value;
 110      }
 111  
 112      /**
 113       * Get the trailer dictionary.
 114       *
 115       * @return PdfDictionary
 116       */
 117      public function getTrailer()
 118      {
 119          return $this->readers[0]->getTrailer();
 120      }
 121  
 122      /**
 123       * Get the cross reference readser instances.
 124       *
 125       * @return ReaderInterface[]
 126       */
 127      public function getReaders()
 128      {
 129          return $this->readers;
 130      }
 131  
 132      /**
 133       * Get the offset by an object number.
 134       *
 135       * @param int $objectNumber
 136       * @return integer|bool
 137       */
 138      public function getOffsetFor($objectNumber)
 139      {
 140          foreach ($this->getReaders() as $reader) {
 141              $offset = $reader->getOffsetFor($objectNumber);
 142              if ($offset !== false) {
 143                  return $offset;
 144              }
 145          }
 146  
 147          return false;
 148      }
 149  
 150      /**
 151       * Get an indirect object by its object number.
 152       *
 153       * @param int $objectNumber
 154       * @return PdfIndirectObject
 155       * @throws CrossReferenceException
 156       */
 157      public function getIndirectObject($objectNumber)
 158      {
 159          $offset = $this->getOffsetFor($objectNumber);
 160          if ($offset === false) {
 161              throw new CrossReferenceException(
 162                  \sprintf('Object (id:%s) not found.', $objectNumber),
 163                  CrossReferenceException::OBJECT_NOT_FOUND
 164              );
 165          }
 166  
 167          $parser = $this->parser;
 168  
 169          $parser->getTokenizer()->clearStack();
 170          $parser->getStreamReader()->reset($offset + $this->fileHeaderOffset);
 171  
 172          try {
 173              /** @var PdfIndirectObject $object */
 174              $object = $parser->readValue(null, PdfIndirectObject::class);
 175          } catch (PdfTypeException $e) {
 176              throw new CrossReferenceException(
 177                  \sprintf('Object (id:%s) not found at location (%s).', $objectNumber, $offset),
 178                  CrossReferenceException::OBJECT_NOT_FOUND,
 179                  $e
 180              );
 181          }
 182  
 183          if ($object->objectNumber !== $objectNumber) {
 184              throw new CrossReferenceException(
 185                  \sprintf('Wrong object found, got %s while %s was expected.', $object->objectNumber, $objectNumber),
 186                  CrossReferenceException::OBJECT_NOT_FOUND
 187              );
 188          }
 189  
 190          return $object;
 191      }
 192  
 193      /**
 194       * Read the cross-reference table at a given offset.
 195       *
 196       * Internally the method will try to evaluate the best reader for this cross-reference.
 197       *
 198       * @param int $offset
 199       * @return ReaderInterface
 200       * @throws CrossReferenceException
 201       * @throws PdfTypeException
 202       */
 203      protected function readXref($offset)
 204      {
 205          $this->parser->getStreamReader()->reset($offset);
 206          $this->parser->getTokenizer()->clearStack();
 207          $initValue = $this->parser->readValue();
 208  
 209          return $this->initReaderInstance($initValue);
 210      }
 211  
 212      /**
 213       * Get a cross-reference reader instance.
 214       *
 215       * @param PdfToken|PdfIndirectObject $initValue
 216       * @return ReaderInterface|bool
 217       * @throws CrossReferenceException
 218       * @throws PdfTypeException
 219       */
 220      protected function initReaderInstance($initValue)
 221      {
 222          $position = $this->parser->getStreamReader()->getPosition()
 223              + $this->parser->getStreamReader()->getOffset() + $this->fileHeaderOffset;
 224  
 225          if ($initValue instanceof PdfToken && $initValue->value === 'xref') {
 226              try {
 227                  return new FixedReader($this->parser);
 228              } catch (CrossReferenceException $e) {
 229                  $this->parser->getStreamReader()->reset($position);
 230                  $this->parser->getTokenizer()->clearStack();
 231  
 232                  return new LineReader($this->parser);
 233              }
 234          }
 235  
 236          if ($initValue instanceof PdfIndirectObject) {
 237              try {
 238                  $stream = PdfStream::ensure($initValue->value);
 239              } catch (PdfTypeException $e) {
 240                  throw new CrossReferenceException(
 241                      'Invalid object type at xref reference offset.',
 242                      CrossReferenceException::INVALID_DATA,
 243                      $e
 244                  );
 245              }
 246  
 247              $type = PdfDictionary::get($stream->value, 'Type');
 248              if ($type->value !== 'XRef') {
 249                  throw new CrossReferenceException(
 250                      'The xref position points to an incorrect object type.',
 251                      CrossReferenceException::INVALID_DATA
 252                  );
 253              }
 254  
 255              $this->checkForEncryption($stream->value);
 256  
 257              throw new CrossReferenceException(
 258                  'This PDF document probably uses a compression technique which is not supported by the ' .
 259                  'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',
 260                  CrossReferenceException::COMPRESSED_XREF
 261              );
 262          }
 263  
 264          throw new CrossReferenceException(
 265              'The xref position points to an incorrect object type.',
 266              CrossReferenceException::INVALID_DATA
 267          );
 268      }
 269  
 270      /**
 271       * Check for encryption.
 272       *
 273       * @param PdfDictionary $dictionary
 274       * @throws CrossReferenceException
 275       */
 276      protected function checkForEncryption(PdfDictionary $dictionary)
 277      {
 278          if (isset($dictionary->value['Encrypt'])) {
 279              throw new CrossReferenceException(
 280                  'This PDF document is encrypted and cannot be processed with FPDI.',
 281                  CrossReferenceException::ENCRYPTED
 282              );
 283          }
 284      }
 285  
 286      /**
 287       * Find the start position for the first cross-reference.
 288       *
 289       * @return int The byte-offset position of the first cross-reference.
 290       * @throws CrossReferenceException
 291       */
 292      protected function findStartXref()
 293      {
 294          $reader = $this->parser->getStreamReader();
 295          $reader->reset(-self::$trailerSearchLength, self::$trailerSearchLength);
 296  
 297          $buffer = $reader->getBuffer(false);
 298          $pos = \strrpos($buffer, 'startxref');
 299          $addOffset = 9;
 300          if ($pos === false) {
 301              // Some corrupted documents uses startref, instead of startxref
 302              $pos = \strrpos($buffer, 'startref');
 303              if ($pos === false) {
 304                  throw new CrossReferenceException(
 305                      'Unable to find pointer to xref table',
 306                      CrossReferenceException::NO_STARTXREF_FOUND
 307                  );
 308              }
 309              $addOffset = 8;
 310          }
 311  
 312          $reader->setOffset($pos + $addOffset);
 313  
 314          try {
 315              $value = $this->parser->readValue(null, PdfNumeric::class);
 316          } catch (PdfTypeException $e) {
 317              throw new CrossReferenceException(
 318                  'Invalid data after startxref keyword.',
 319                  CrossReferenceException::INVALID_DATA,
 320                  $e
 321              );
 322          }
 323  
 324          return $value->value;
 325      }
 326  }