See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 39 and 401]
1 <?php 2 3 /** 4 * This file is part of FPDI 5 * 6 * @package setasign\Fpdi 7 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 8 * @license http://opensource.org/licenses/mit-license The MIT License 9 */ 10 11 namespace setasign\Fpdi\PdfParser; 12 13 use setasign\Fpdi\PdfParser\CrossReference\CrossReference; 14 use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException; 15 use setasign\Fpdi\PdfParser\Type\PdfArray; 16 use setasign\Fpdi\PdfParser\Type\PdfBoolean; 17 use setasign\Fpdi\PdfParser\Type\PdfDictionary; 18 use setasign\Fpdi\PdfParser\Type\PdfHexString; 19 use setasign\Fpdi\PdfParser\Type\PdfIndirectObject; 20 use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference; 21 use setasign\Fpdi\PdfParser\Type\PdfName; 22 use setasign\Fpdi\PdfParser\Type\PdfNull; 23 use setasign\Fpdi\PdfParser\Type\PdfNumeric; 24 use setasign\Fpdi\PdfParser\Type\PdfStream; 25 use setasign\Fpdi\PdfParser\Type\PdfString; 26 use setasign\Fpdi\PdfParser\Type\PdfToken; 27 use setasign\Fpdi\PdfParser\Type\PdfType; 28 29 /** 30 * A PDF parser class 31 */ 32 class PdfParser 33 { 34 /** 35 * @var StreamReader 36 */ 37 protected $streamReader; 38 39 /** 40 * @var Tokenizer 41 */ 42 protected $tokenizer; 43 44 /** 45 * The file header. 46 * 47 * @var string 48 */ 49 protected $fileHeader; 50 51 /** 52 * The offset to the file header. 53 * 54 * @var int 55 */ 56 protected $fileHeaderOffset; 57 58 /** 59 * @var CrossReference|null 60 */ 61 protected $xref; 62 63 /** 64 * All read objects. 65 * 66 * @var array 67 */ 68 protected $objects = []; 69 70 /** 71 * PdfParser constructor. 72 * 73 * @param StreamReader $streamReader 74 */ 75 public function __construct(StreamReader $streamReader) 76 { 77 $this->streamReader = $streamReader; 78 $this->tokenizer = new Tokenizer($streamReader); 79 } 80 81 /** 82 * Removes cycled references. 83 * 84 * @internal 85 */ 86 public function cleanUp() 87 { 88 $this->xref = null; 89 } 90 91 /** 92 * Get the stream reader instance. 93 * 94 * @return StreamReader 95 */ 96 public function getStreamReader() 97 { 98 return $this->streamReader; 99 } 100 101 /** 102 * Get the tokenizer instance. 103 * 104 * @return Tokenizer 105 */ 106 public function getTokenizer() 107 { 108 return $this->tokenizer; 109 } 110 111 /** 112 * Resolves the file header. 113 * 114 * @throws PdfParserException 115 * @return int 116 */ 117 protected function resolveFileHeader() 118 { 119 if ($this->fileHeader) { 120 return $this->fileHeaderOffset; 121 } 122 123 $this->streamReader->reset(0); 124 $maxIterations = 1000; 125 while (true) { 126 $buffer = $this->streamReader->getBuffer(false); 127 $offset = \strpos($buffer, '%PDF-'); 128 if ($offset === false) { 129 if (!$this->streamReader->increaseLength(100) || (--$maxIterations === 0)) { 130 throw new PdfParserException( 131 'Unable to find PDF file header.', 132 PdfParserException::FILE_HEADER_NOT_FOUND 133 ); 134 } 135 continue; 136 } 137 break; 138 } 139 140 $this->fileHeaderOffset = $offset; 141 $this->streamReader->setOffset($offset); 142 143 $this->fileHeader = \trim($this->streamReader->readLine()); 144 return $this->fileHeaderOffset; 145 } 146 147 /** 148 * Get the cross reference instance. 149 * 150 * @return CrossReference 151 * @throws CrossReferenceException 152 * @throws PdfParserException 153 */ 154 public function getCrossReference() 155 { 156 if ($this->xref === null) { 157 $this->xref = new CrossReference($this, $this->resolveFileHeader()); 158 } 159 160 return $this->xref; 161 } 162 163 /** 164 * Get the PDF version. 165 * 166 * @return int[] An array of major and minor version. 167 * @throws PdfParserException 168 */ 169 public function getPdfVersion() 170 { 171 $this->resolveFileHeader(); 172 173 if (\preg_match('/%PDF-(\d)\.(\d)/', $this->fileHeader, $result) === 0) { 174 throw new PdfParserException( 175 'Unable to extract PDF version from file header.', 176 PdfParserException::PDF_VERSION_NOT_FOUND 177 ); 178 } 179 list(, $major, $minor) = $result; 180 181 $catalog = $this->getCatalog(); 182 if (isset($catalog->value['Version'])) { 183 $versionParts = \explode( 184 '.', 185 PdfName::unescape(PdfType::resolve($catalog->value['Version'], $this)->value) 186 ); 187 if (count($versionParts) === 2) { 188 list($major, $minor) = $versionParts; 189 } 190 } 191 192 return [(int) $major, (int) $minor]; 193 } 194 195 /** 196 * Get the catalog dictionary. 197 * 198 * @return PdfDictionary 199 * @throws Type\PdfTypeException 200 * @throws CrossReferenceException 201 * @throws PdfParserException 202 */ 203 public function getCatalog() 204 { 205 $trailer = $this->getCrossReference()->getTrailer(); 206 207 $catalog = PdfType::resolve(PdfDictionary::get($trailer, 'Root'), $this); 208 209 return PdfDictionary::ensure($catalog); 210 } 211 212 /** 213 * Get an indirect object by its object number. 214 * 215 * @param int $objectNumber 216 * @param bool $cache 217 * @return PdfIndirectObject 218 * @throws CrossReferenceException 219 * @throws PdfParserException 220 */ 221 public function getIndirectObject($objectNumber, $cache = false) 222 { 223 $objectNumber = (int) $objectNumber; 224 if (isset($this->objects[$objectNumber])) { 225 return $this->objects[$objectNumber]; 226 } 227 228 $object = $this->getCrossReference()->getIndirectObject($objectNumber); 229 230 if ($cache) { 231 $this->objects[$objectNumber] = $object; 232 } 233 234 return $object; 235 } 236 237 /** 238 * Read a PDF value. 239 * 240 * @param null|bool|string $token 241 * @param null|string $expectedType 242 * @return false|PdfArray|PdfBoolean|PdfDictionary|PdfHexString|PdfIndirectObject|PdfIndirectObjectReference|PdfName|PdfNull|PdfNumeric|PdfStream|PdfString|PdfToken 243 * @throws Type\PdfTypeException 244 */ 245 public function readValue($token = null, $expectedType = null) 246 { 247 if ($token === null) { 248 $token = $this->tokenizer->getNextToken(); 249 } 250 251 if ($token === false) { 252 if ($expectedType !== null) { 253 throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE); 254 } 255 return false; 256 } 257 258 switch ($token) { 259 case '(': 260 $this->ensureExpectedType($token, $expectedType); 261 return PdfString::parse($this->streamReader); 262 263 case '<': 264 if ($this->streamReader->getByte() === '<') { 265 $this->ensureExpectedType('<<', $expectedType); 266 $this->streamReader->addOffset(1); 267 return PdfDictionary::parse($this->tokenizer, $this->streamReader, $this); 268 } 269 270 $this->ensureExpectedType($token, $expectedType); 271 return PdfHexString::parse($this->streamReader); 272 273 case '/': 274 $this->ensureExpectedType($token, $expectedType); 275 return PdfName::parse($this->tokenizer, $this->streamReader); 276 277 case '[': 278 $this->ensureExpectedType($token, $expectedType); 279 return PdfArray::parse($this->tokenizer, $this); 280 281 default: 282 if (\is_numeric($token)) { 283 if (($token2 = $this->tokenizer->getNextToken()) !== false) { 284 if (\is_numeric($token2) && ($token3 = $this->tokenizer->getNextToken()) !== false) { 285 switch ($token3) { 286 case 'obj': 287 if ($expectedType !== null && $expectedType !== PdfIndirectObject::class) { 288 throw new Type\PdfTypeException( 289 'Got unexpected token type.', 290 Type\PdfTypeException::INVALID_DATA_TYPE 291 ); 292 } 293 294 return PdfIndirectObject::parse( 295 (int) $token, 296 (int) $token2, 297 $this, 298 $this->tokenizer, 299 $this->streamReader 300 ); 301 case 'R': 302 if ( 303 $expectedType !== null && 304 $expectedType !== PdfIndirectObjectReference::class 305 ) { 306 throw new Type\PdfTypeException( 307 'Got unexpected token type.', 308 Type\PdfTypeException::INVALID_DATA_TYPE 309 ); 310 } 311 312 return PdfIndirectObjectReference::create((int) $token, (int) $token2); 313 } 314 315 $this->tokenizer->pushStack($token3); 316 } 317 318 $this->tokenizer->pushStack($token2); 319 } 320 321 if ($expectedType !== null && $expectedType !== PdfNumeric::class) { 322 throw new Type\PdfTypeException( 323 'Got unexpected token type.', 324 Type\PdfTypeException::INVALID_DATA_TYPE 325 ); 326 } 327 return PdfNumeric::create($token + 0); 328 } 329 330 if ($token === 'true' || $token === 'false') { 331 $this->ensureExpectedType($token, $expectedType); 332 return PdfBoolean::create($token === 'true'); 333 } 334 335 if ($token === 'null') { 336 $this->ensureExpectedType($token, $expectedType); 337 return new PdfNull(); 338 } 339 340 if ($expectedType !== null && $expectedType !== PdfToken::class) { 341 throw new Type\PdfTypeException( 342 'Got unexpected token type.', 343 Type\PdfTypeException::INVALID_DATA_TYPE 344 ); 345 } 346 347 $v = new PdfToken(); 348 $v->value = $token; 349 350 return $v; 351 } 352 } 353 354 /** 355 * Ensures that the token will evaluate to an expected object type (or not). 356 * 357 * @param string $token 358 * @param string|null $expectedType 359 * @return bool 360 * @throws Type\PdfTypeException 361 */ 362 private function ensureExpectedType($token, $expectedType) 363 { 364 static $mapping = [ 365 '(' => PdfString::class, 366 '<' => PdfHexString::class, 367 '<<' => PdfDictionary::class, 368 '/' => PdfName::class, 369 '[' => PdfArray::class, 370 'true' => PdfBoolean::class, 371 'false' => PdfBoolean::class, 372 'null' => PdfNull::class 373 ]; 374 375 if ($expectedType === null || $mapping[$token] === $expectedType) { 376 return true; 377 } 378 379 throw new Type\PdfTypeException('Got unexpected token type.', Type\PdfTypeException::INVALID_DATA_TYPE); 380 } 381 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body