Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.
<?php

/**
 * This file is part of FPDI
 *
 * @package   setasign\Fpdi
 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
 * @license   http://opensource.org/licenses/mit-license The MIT License
 */

namespace setasign\Fpdi\PdfReader;

use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
use setasign\Fpdi\PdfParser\PdfParser;
use setasign\Fpdi\PdfParser\PdfParserException;
use setasign\Fpdi\PdfParser\Type\PdfArray;
use setasign\Fpdi\PdfParser\Type\PdfDictionary;
use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
use setasign\Fpdi\PdfParser\Type\PdfNumeric;
use setasign\Fpdi\PdfParser\Type\PdfType;
use setasign\Fpdi\PdfParser\Type\PdfTypeException;

/**
 * A PDF reader class
 */
class PdfReader
{
    /**
     * @var PdfParser
     */
    protected $parser;

    /**
     * @var int
     */
    protected $pageCount;

    /**
     * Indirect objects of resolved pages.
     *
     * @var PdfIndirectObjectReference[]|PdfIndirectObject[]
     */
    protected $pages = [];

    /**
     * PdfReader constructor.
     *
     * @param PdfParser $parser
     */
    public function __construct(PdfParser $parser)
    {
        $this->parser = $parser;
    }

    /**
     * PdfReader destructor.
     */
    public function __destruct()
    {
        if ($this->parser !== null) {
            $this->parser->cleanUp();
        }
    }

    /**
     * Get the pdf parser instance.
     *
     * @return PdfParser
     */
    public function getParser()
    {
        return $this->parser;
    }

    /**
     * Get the PDF version.
     *
     * @return string
     * @throws PdfParserException
     */
    public function getPdfVersion()
    {
        return \implode('.', $this->parser->getPdfVersion());
    }

    /**
     * Get the page count.
     *
     * @return int
     * @throws PdfTypeException
     * @throws CrossReferenceException
     * @throws PdfParserException
     */
    public function getPageCount()
    {
        if ($this->pageCount === null) {
            $catalog = $this->parser->getCatalog();

            $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
            $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);

            $this->pageCount = PdfNumeric::ensure($count)->value;
        }

        return $this->pageCount;
    }

    /**
     * Get a page instance.
     *
     * @param int $pageNumber
     * @return Page
     * @throws PdfTypeException
     * @throws CrossReferenceException
     * @throws PdfParserException
     * @throws \InvalidArgumentException
     */
    public function getPage($pageNumber)
    {
        if (!\is_numeric($pageNumber)) {
            throw new \InvalidArgumentException(
                'Page number needs to be a number.'
            );
        }

        if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) {
            throw new \InvalidArgumentException(
                \sprintf(
                    'Page number "%s" out of available page range (1 - %s)',
                    $pageNumber,
                    $this->getPageCount()
                )
            );
        }

        $this->readPages();

        $page = $this->pages[$pageNumber - 1];

        if ($page instanceof PdfIndirectObjectReference) {
            $readPages = function ($kids) use (&$readPages) {
                $kids = PdfArray::ensure($kids);

                /** @noinspection LoopWhichDoesNotLoopInspection */
                foreach ($kids->value as $reference) {
                    $reference = PdfIndirectObjectReference::ensure($reference);
                    $object = $this->parser->getIndirectObject($reference->value);
                    $type = PdfDictionary::get($object->value, 'Type');

                    if ($type->value === 'Pages') {
                        return $readPages(PdfDictionary::get($object->value, 'Kids'));
                    }

                    return $object;
                }

                throw new PdfReaderException(
                    'Kids array cannot be empty.',
                    PdfReaderException::KIDS_EMPTY
                );
            };

            $page = $this->parser->getIndirectObject($page->value);
            $dict = PdfType::resolve($page, $this->parser);
            $type = PdfDictionary::get($dict, 'Type');

            if ($type->value === 'Pages') {
                $kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser);
                try {
                    $page = $this->pages[$pageNumber - 1] = $readPages($kids);
                } catch (PdfReaderException $e) {
                    if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) {
                        throw $e;
                    }

                    // let's reset the pages array and read all page objects
                    $this->pages = [];
                    $this->readPages(true);
                    // @phpstan-ignore-next-line
                    $page = $this->pages[$pageNumber - 1];
                }
            } else {
                $this->pages[$pageNumber - 1] = $page;
            }
        }

        return new Page($page, $this->parser);
    }

    /**
     * Walk the page tree and resolve all indirect objects of all pages.
     *
     * @param bool $readAll
     * @throws CrossReferenceException
     * @throws PdfParserException
     * @throws PdfTypeException
     */
    protected function readPages($readAll = false)
    {
        if (\count($this->pages) > 0) {
            return;
        }

< $readPages = function ($kids, $count) use (&$readPages, $readAll) {
> $expectedPageCount = $this->getPageCount(); > $readPages = function ($kids, $count) use (&$readPages, $readAll, $expectedPageCount) {
$kids = PdfArray::ensure($kids); $isLeaf = ($count->value === \count($kids->value)); foreach ($kids->value as $reference) { $reference = PdfIndirectObjectReference::ensure($reference); if (!$readAll && $isLeaf) { $this->pages[] = $reference; continue; } $object = $this->parser->getIndirectObject($reference->value); $type = PdfDictionary::get($object->value, 'Type'); if ($type->value === 'Pages') { $readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count')); } else { $this->pages[] = $object;
> } } > } > // stop if all pages are read - faulty documents exists with additional entries with invalid data. }; > if (count($this->pages) === $expectedPageCount) { > break;
$catalog = $this->parser->getCatalog(); $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser); $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser); $kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser); $readPages($kids, $count); } }