Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.3.x will end 7 October 2024 (12 months).
  • Bug fixes for security issues in 4.3.x will end 21 April 2025 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.2.x is supported too.

Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403] [Versions 401 and 403] [Versions 402 and 403]

   1  <?php
   2  
   3  /**
   4   * This file is part of FPDI
   5   *
   6   * @package   setasign\Fpdi
   7   * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
   8   * @license   http://opensource.org/licenses/mit-license The MIT License
   9   */
  10  
  11  namespace setasign\Fpdi\PdfReader;
  12  
  13  use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
  14  use setasign\Fpdi\PdfParser\PdfParser;
  15  use setasign\Fpdi\PdfParser\PdfParserException;
  16  use setasign\Fpdi\PdfParser\Type\PdfArray;
  17  use setasign\Fpdi\PdfParser\Type\PdfDictionary;
  18  use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
  19  use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
  20  use setasign\Fpdi\PdfParser\Type\PdfNumeric;
  21  use setasign\Fpdi\PdfParser\Type\PdfType;
  22  use setasign\Fpdi\PdfParser\Type\PdfTypeException;
  23  
  24  /**
  25   * A PDF reader class
  26   */
  27  class PdfReader
  28  {
  29      /**
  30       * @var PdfParser
  31       */
  32      protected $parser;
  33  
  34      /**
  35       * @var int
  36       */
  37      protected $pageCount;
  38  
  39      /**
  40       * Indirect objects of resolved pages.
  41       *
  42       * @var PdfIndirectObjectReference[]|PdfIndirectObject[]
  43       */
  44      protected $pages = [];
  45  
  46      /**
  47       * PdfReader constructor.
  48       *
  49       * @param PdfParser $parser
  50       */
  51      public function __construct(PdfParser $parser)
  52      {
  53          $this->parser = $parser;
  54      }
  55  
  56      /**
  57       * PdfReader destructor.
  58       */
  59      public function __destruct()
  60      {
  61          if ($this->parser !== null) {
  62              $this->parser->cleanUp();
  63          }
  64      }
  65  
  66      /**
  67       * Get the pdf parser instance.
  68       *
  69       * @return PdfParser
  70       */
  71      public function getParser()
  72      {
  73          return $this->parser;
  74      }
  75  
  76      /**
  77       * Get the PDF version.
  78       *
  79       * @return string
  80       * @throws PdfParserException
  81       */
  82      public function getPdfVersion()
  83      {
  84          return \implode('.', $this->parser->getPdfVersion());
  85      }
  86  
  87      /**
  88       * Get the page count.
  89       *
  90       * @return int
  91       * @throws PdfTypeException
  92       * @throws CrossReferenceException
  93       * @throws PdfParserException
  94       */
  95      public function getPageCount()
  96      {
  97          if ($this->pageCount === null) {
  98              $catalog = $this->parser->getCatalog();
  99  
 100              $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
 101              $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
 102  
 103              $this->pageCount = PdfNumeric::ensure($count)->value;
 104          }
 105  
 106          return $this->pageCount;
 107      }
 108  
 109      /**
 110       * Get a page instance.
 111       *
 112       * @param int $pageNumber
 113       * @return Page
 114       * @throws PdfTypeException
 115       * @throws CrossReferenceException
 116       * @throws PdfParserException
 117       * @throws \InvalidArgumentException
 118       */
 119      public function getPage($pageNumber)
 120      {
 121          if (!\is_numeric($pageNumber)) {
 122              throw new \InvalidArgumentException(
 123                  'Page number needs to be a number.'
 124              );
 125          }
 126  
 127          if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) {
 128              throw new \InvalidArgumentException(
 129                  \sprintf(
 130                      'Page number "%s" out of available page range (1 - %s)',
 131                      $pageNumber,
 132                      $this->getPageCount()
 133                  )
 134              );
 135          }
 136  
 137          $this->readPages();
 138  
 139          $page = $this->pages[$pageNumber - 1];
 140  
 141          if ($page instanceof PdfIndirectObjectReference) {
 142              $readPages = function ($kids) use (&$readPages) {
 143                  $kids = PdfArray::ensure($kids);
 144  
 145                  /** @noinspection LoopWhichDoesNotLoopInspection */
 146                  foreach ($kids->value as $reference) {
 147                      $reference = PdfIndirectObjectReference::ensure($reference);
 148                      $object = $this->parser->getIndirectObject($reference->value);
 149                      $type = PdfDictionary::get($object->value, 'Type');
 150  
 151                      if ($type->value === 'Pages') {
 152                          return $readPages(PdfDictionary::get($object->value, 'Kids'));
 153                      }
 154  
 155                      return $object;
 156                  }
 157  
 158                  throw new PdfReaderException(
 159                      'Kids array cannot be empty.',
 160                      PdfReaderException::KIDS_EMPTY
 161                  );
 162              };
 163  
 164              $page = $this->parser->getIndirectObject($page->value);
 165              $dict = PdfType::resolve($page, $this->parser);
 166              $type = PdfDictionary::get($dict, 'Type');
 167  
 168              if ($type->value === 'Pages') {
 169                  $kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser);
 170                  try {
 171                      $page = $this->pages[$pageNumber - 1] = $readPages($kids);
 172                  } catch (PdfReaderException $e) {
 173                      if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) {
 174                          throw $e;
 175                      }
 176  
 177                      // let's reset the pages array and read all page objects
 178                      $this->pages = [];
 179                      $this->readPages(true);
 180                      // @phpstan-ignore-next-line
 181                      $page = $this->pages[$pageNumber - 1];
 182                  }
 183              } else {
 184                  $this->pages[$pageNumber - 1] = $page;
 185              }
 186          }
 187  
 188          return new Page($page, $this->parser);
 189      }
 190  
 191      /**
 192       * Walk the page tree and resolve all indirect objects of all pages.
 193       *
 194       * @param bool $readAll
 195       * @throws CrossReferenceException
 196       * @throws PdfParserException
 197       * @throws PdfTypeException
 198       */
 199      protected function readPages($readAll = false)
 200      {
 201          if (\count($this->pages) > 0) {
 202              return;
 203          }
 204  
 205          $expectedPageCount = $this->getPageCount();
 206          $readPages = function ($kids, $count) use (&$readPages, $readAll, $expectedPageCount) {
 207              $kids = PdfArray::ensure($kids);
 208              $isLeaf = ($count->value === \count($kids->value));
 209  
 210              foreach ($kids->value as $reference) {
 211                  $reference = PdfIndirectObjectReference::ensure($reference);
 212  
 213                  if (!$readAll && $isLeaf) {
 214                      $this->pages[] = $reference;
 215                      continue;
 216                  }
 217  
 218                  $object = $this->parser->getIndirectObject($reference->value);
 219                  $type = PdfDictionary::get($object->value, 'Type');
 220  
 221                  if ($type->value === 'Pages') {
 222                      $readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count'));
 223                  } else {
 224                      $this->pages[] = $object;
 225                  }
 226  
 227                  // stop if all pages are read - faulty documents exists with additional entries with invalid data.
 228                  if (count($this->pages) === $expectedPageCount) {
 229                      break;
 230                  }
 231              }
 232          };
 233  
 234          $catalog = $this->parser->getCatalog();
 235          $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
 236          $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
 237          $kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser);
 238          $readPages($kids, $count);
 239      }
 240  }