See Release Notes
Long Term Support Release
<?php>/** * This file is part of FPDI * * @package setasign\Fpdi< * @copyright Copyright (c) 2019 Setasign - Jan Slabon (https://www.setasign.com)> * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)* @license http://opensource.org/licenses/mit-license The MIT License */ namespace setasign\Fpdi\PdfParser; use setasign\Fpdi\PdfParser\CrossReference\CrossReference; use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException; use setasign\Fpdi\PdfParser\Type\PdfArray; use setasign\Fpdi\PdfParser\Type\PdfBoolean; use setasign\Fpdi\PdfParser\Type\PdfDictionary; use setasign\Fpdi\PdfParser\Type\PdfHexString; use setasign\Fpdi\PdfParser\Type\PdfIndirectObject; use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference; use setasign\Fpdi\PdfParser\Type\PdfName; use setasign\Fpdi\PdfParser\Type\PdfNull; use setasign\Fpdi\PdfParser\Type\PdfNumeric;> use setasign\Fpdi\PdfParser\Type\PdfStream;use setasign\Fpdi\PdfParser\Type\PdfString; use setasign\Fpdi\PdfParser\Type\PdfToken; use setasign\Fpdi\PdfParser\Type\PdfType; /** * A PDF parser class< * < * @package setasign\Fpdi\PdfParser*/ class PdfParser { /** * @var StreamReader */ protected $streamReader; /** * @var Tokenizer */ protected $tokenizer; /** * The file header. * * @var string */ protected $fileHeader; /** * The offset to the file header. * * @var int */ protected $fileHeaderOffset; /**< * @var CrossReference> * @var CrossReference|null*/ protected $xref; /** * All read objects. * * @var array */ protected $objects = []; /** * PdfParser constructor. * * @param StreamReader $streamReader */ public function __construct(StreamReader $streamReader) { $this->streamReader = $streamReader; $this->tokenizer = new Tokenizer($streamReader); } /** * Removes cycled references. * * @internal */ public function cleanUp() { $this->xref = null; } /** * Get the stream reader instance. * * @return StreamReader */ public function getStreamReader() { return $this->streamReader; } /** * Get the tokenizer instance. * * @return Tokenizer */ public function getTokenizer() { return $this->tokenizer; } /** * Resolves the file header. * * @throws PdfParserException * @return int */ protected function resolveFileHeader() { if ($this->fileHeader) { return $this->fileHeaderOffset; } $this->streamReader->reset(0);< $offset = false;$maxIterations = 1000; while (true) { $buffer = $this->streamReader->getBuffer(false); $offset = \strpos($buffer, '%PDF-'); if ($offset === false) { if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) { throw new PdfParserException( 'Unable to find PDF file header.', PdfParserException::FILE_HEADER_NOT_FOUND ); } continue; } break; } $this->fileHeaderOffset = $offset; $this->streamReader->setOffset($offset); $this->fileHeader = \trim($this->streamReader->readLine()); return $this->fileHeaderOffset; } /** * Get the cross reference instance. * * @return CrossReference * @throws CrossReferenceException * @throws PdfParserException */ public function getCrossReference() { if ($this->xref === null) { $this->xref = new CrossReference($this, $this->resolveFileHeader()); } return $this->xref; } /** * Get the PDF version. * * @return int[] An array of major and minor version. * @throws PdfParserException */ public function getPdfVersion() { $this->resolveFileHeader(); if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) { throw new PdfParserException( 'Unable to extract PDF version from file header.', PdfParserException::PDF_VERSION_NOT_FOUND ); } list(, $major, $minor) = $result; $catalog = $this->getCatalog(); if (isset($catalog->value['Version'])) {< $versionParts = \explode('.', PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value));> $versionParts = \explode( > '.', > PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value) > );if (count($versionParts) === 2) { list($major, $minor) = $versionParts; } } return [(int) $major, (int) $minor]; } /** * Get the catalog dictionary. * * @return PdfDictionary * @throws Type\PdfTypeException * @throws CrossReferenceException * @throws PdfParserException */ public function getCatalog() {< $xref = $this->getCrossReference(); < $trailer = $xref->getTrailer();> $trailer = $this->getCrossReference()->getTrailer();$catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this); return PdfDictionary::ensure($catalog); } /** * Get an indirect object by its object number. * * @param int $objectNumber * @param bool $cache * @return PdfIndirectObject * @throws CrossReferenceException * @throws PdfParserException */ public function getIndirectObject($objectNumber, $cache = false) { $objectNumber = (int) $objectNumber; if (isset($this->objects[$objectNumber])) { return $this->objects[$objectNumber]; }< $xref = $this->getCrossReference(); < $object = $xref->getIndirectObject($objectNumber);> $object = $this->getCrossReference()->getIndirectObject($objectNumber);if ($cache) { $this->objects[$objectNumber] = $object; } return $object; } /** * Read a PDF value. * * @param null|bool|string $token * @param null|string $expectedType< * @return bool|PdfArray|PdfBoolean|PdfHexString|PdfName|PdfNull|PdfNumeric|PdfString|PdfToken|PdfIndirectObjectReference> * @return false|PdfArray|PdfBoolean|PdfDictionary|PdfHexString|PdfIndirectObject|PdfIndirectObjectReference|PdfName|PdfNull|PdfNumeric|PdfStream|PdfString|PdfToken* @throws Type\PdfTypeException */ public function readValue($token = null, $expectedType = null) { if ($token === null) { $token = $this->tokenizer->getNextToken(); } if ($token === false) { if ($expectedType !== null) { throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE); } return false; } switch ($token) { case '(': $this->ensureExpectedType($token, $expectedType); return PdfString::parse($this->streamReader); case '<': if ($this->streamReader->getByte() === '<') { $this->ensureExpectedType('<<', $expectedType); $this->streamReader->addOffset(1); return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this); } $this->ensureExpectedType($token, $expectedType); return PdfHexString::parse($this->streamReader); case '/': $this->ensureExpectedType($token, $expectedType); return PdfName::parse($this->tokenizer, $this->streamReader); case '[': $this->ensureExpectedType($token, $expectedType); return PdfArray::parse($this->tokenizer, $this); default: if (\is_numeric($token)) { if (($token2 = $this->tokenizer->getNextToken()) !== false) {< if (\is_numeric($token2)) { < if (($token3 = $this->tokenizer->getNextToken()) !== false) {> if (\is_numeric($token2) && ($token3 = $this->tokenizer->getNextToken()) !== false) {switch ($token3) { case 'obj': if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) { throw new Type\PdfTypeException(< 'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE> 'Got unexpected token type.', > Type\PdfTypeException::INVALID_DATA_TYPE); } return PdfIndirectObject::parse(< $token, < $token2,> (int) $token, > (int) $token2,$this, $this->tokenizer, $this->streamReader ); case 'R':< if ($expectedType !== null &&> if ( > $expectedType !== null &&$expectedType !== PdfIndirectObjectReference::class ) { throw new Type\PdfTypeException(< 'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE> 'Got unexpected token type.', > Type\PdfTypeException::INVALID_DATA_TYPE); }< return PdfIndirectObjectReference::create($token, $token2);> return PdfIndirectObjectReference::create((int) $token, (int) $token2);} $this->tokenizer->pushStack($token3); }< }$this->tokenizer->pushStack($token2); } if ($expectedType !== null && $expectedType !== PdfNumeric::class) { throw new Type\PdfTypeException(< 'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE> 'Got unexpected token type.', > Type\PdfTypeException::INVALID_DATA_TYPE); }< return PdfNumeric::create($token);> return PdfNumeric::create($token + 0);} if ($token === 'true' || $token === 'false') { $this->ensureExpectedType($token, $expectedType); return PdfBoolean::create($token === 'true'); } if ($token === 'null') { $this->ensureExpectedType($token, $expectedType); return new PdfNull(); } if ($expectedType !== null && $expectedType !== PdfToken::class) { throw new Type\PdfTypeException(< 'Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE> 'Got unexpected token type.', > Type\PdfTypeException::INVALID_DATA_TYPE); } $v = new PdfToken(); $v->value = $token; return $v; } } /** * Ensures that the token will evaluate to an expected object type (or not). * * @param string $token * @param string|null $expectedType * @return bool * @throws Type\PdfTypeException */ private function ensureExpectedType($token, $expectedType) { static $mapping = [ '(' => PdfString::class, '<' => PdfHexString::class, '<<' => PdfDictionary::class, '/' => PdfName::class, '[' => PdfArray::class, 'true' => PdfBoolean::class, 'false' => PdfBoolean::class, 'null' => PdfNull::class ]; if ($expectedType === null || $mapping[$token] === $expectedType) { return true; } throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE); } }