Differences Between: [Versions 310 and 402] [Versions 39 and 402]
1 <?php 2 3 /** 4 * This file is part of FPDI 5 * 6 * @package setasign\Fpdi 7 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 8 * @license http://opensource.org/licenses/mit-license The MIT License 9 */ 10 11 namespace setasign\Fpdi\PdfParser\CrossReference; 12 13 use setasign\Fpdi\PdfParser\PdfParser; 14 use setasign\Fpdi\PdfParser\Type\PdfDictionary; 15 use setasign\Fpdi\PdfParser\Type\PdfIndirectObject; 16 use setasign\Fpdi\PdfParser\Type\PdfNumeric; 17 use setasign\Fpdi\PdfParser\Type\PdfStream; 18 use setasign\Fpdi\PdfParser\Type\PdfToken; 19 use setasign\Fpdi\PdfParser\Type\PdfTypeException; 20 21 /** 22 * Class CrossReference 23 * 24 * This class processes the standard cross reference of a PDF document. 25 */ 26 class CrossReference 27 { 28 /** 29 * The byte length in which the "startxref" keyword should be searched. 30 * 31 * @var int 32 */ 33 public static $trailerSearchLength = 5500; 34 35 /** 36 * @var int 37 */ 38 protected $fileHeaderOffset = 0; 39 40 /** 41 * @var PdfParser 42 */ 43 protected $parser; 44 45 /** 46 * @var ReaderInterface[] 47 */ 48 protected $readers = []; 49 50 /** 51 * CrossReference constructor. 52 * 53 * @param PdfParser $parser 54 * @throws CrossReferenceException 55 * @throws PdfTypeException 56 */ 57 public function __construct(PdfParser $parser, $fileHeaderOffset = 0) 58 { 59 $this->parser = $parser; 60 $this->fileHeaderOffset = $fileHeaderOffset; 61 62 $offset = $this->findStartXref(); 63 $reader = null; 64 /** @noinspection TypeUnsafeComparisonInspection */ 65 while ($offset != false) { // By doing an unsafe comparsion we ignore faulty references to byte offset 0 66 try { 67 $reader = $this->readXref($offset + $this->fileHeaderOffset); 68 } catch (CrossReferenceException $e) { 69 // sometimes the file header offset is part of the byte offsets, so let's retry by resetting it to zero. 70 if ($e->getCode() === CrossReferenceException::INVALID_DATA && $this->fileHeaderOffset !== 0) { 71 $this->fileHeaderOffset = 0; 72 $reader = $this->readXref($offset + $this->fileHeaderOffset); 73 } else { 74 throw $e; 75 } 76 } 77 78 $trailer = $reader->getTrailer(); 79 $this->checkForEncryption($trailer); 80 $this->readers[] = $reader; 81 82 if (isset($trailer->value['Prev'])) { 83 $offset = $trailer->value['Prev']->value; 84 } else { 85 $offset = false; 86 } 87 } 88 89 // fix faulty sub-section header 90 if ($reader instanceof FixedReader) { 91 /** 92 * @var FixedReader $reader 93 */ 94 $reader->fixFaultySubSectionShift(); 95 } 96 97 if ($reader === null) { 98 throw new CrossReferenceException('No cross-reference found.', CrossReferenceException::NO_XREF_FOUND); 99 } 100 } 101 102 /** 103 * Get the size of the cross reference. 104 * 105 * @return integer 106 */ 107 public function getSize() 108 { 109 return $this->getTrailer()->value['Size']->value; 110 } 111 112 /** 113 * Get the trailer dictionary. 114 * 115 * @return PdfDictionary 116 */ 117 public function getTrailer() 118 { 119 return $this->readers[0]->getTrailer(); 120 } 121 122 /** 123 * Get the cross reference readser instances. 124 * 125 * @return ReaderInterface[] 126 */ 127 public function getReaders() 128 { 129 return $this->readers; 130 } 131 132 /** 133 * Get the offset by an object number. 134 * 135 * @param int $objectNumber 136 * @return integer|bool 137 */ 138 public function getOffsetFor($objectNumber) 139 { 140 foreach ($this->getReaders() as $reader) { 141 $offset = $reader->getOffsetFor($objectNumber); 142 if ($offset !== false) { 143 return $offset; 144 } 145 } 146 147 return false; 148 } 149 150 /** 151 * Get an indirect object by its object number. 152 * 153 * @param int $objectNumber 154 * @return PdfIndirectObject 155 * @throws CrossReferenceException 156 */ 157 public function getIndirectObject($objectNumber) 158 { 159 $offset = $this->getOffsetFor($objectNumber); 160 if ($offset === false) { 161 throw new CrossReferenceException( 162 \sprintf('Object (id:%s) not found.', $objectNumber), 163 CrossReferenceException::OBJECT_NOT_FOUND 164 ); 165 } 166 167 $parser = $this->parser; 168 169 $parser->getTokenizer()->clearStack(); 170 $parser->getStreamReader()->reset($offset + $this->fileHeaderOffset); 171 172 try { 173 /** @var PdfIndirectObject $object */ 174 $object = $parser->readValue(null, PdfIndirectObject::class); 175 } catch (PdfTypeException $e) { 176 throw new CrossReferenceException( 177 \sprintf('Object (id:%s) not found at location (%s).', $objectNumber, $offset), 178 CrossReferenceException::OBJECT_NOT_FOUND, 179 $e 180 ); 181 } 182 183 if ($object->objectNumber !== $objectNumber) { 184 throw new CrossReferenceException( 185 \sprintf('Wrong object found, got %s while %s was expected.', $object->objectNumber, $objectNumber), 186 CrossReferenceException::OBJECT_NOT_FOUND 187 ); 188 } 189 190 return $object; 191 } 192 193 /** 194 * Read the cross-reference table at a given offset. 195 * 196 * Internally the method will try to evaluate the best reader for this cross-reference. 197 * 198 * @param int $offset 199 * @return ReaderInterface 200 * @throws CrossReferenceException 201 * @throws PdfTypeException 202 */ 203 protected function readXref($offset) 204 { 205 $this->parser->getStreamReader()->reset($offset); 206 $this->parser->getTokenizer()->clearStack(); 207 $initValue = $this->parser->readValue(); 208 209 return $this->initReaderInstance($initValue); 210 } 211 212 /** 213 * Get a cross-reference reader instance. 214 * 215 * @param PdfToken|PdfIndirectObject $initValue 216 * @return ReaderInterface|bool 217 * @throws CrossReferenceException 218 * @throws PdfTypeException 219 */ 220 protected function initReaderInstance($initValue) 221 { 222 $position = $this->parser->getStreamReader()->getPosition() 223 + $this->parser->getStreamReader()->getOffset() + $this->fileHeaderOffset; 224 225 if ($initValue instanceof PdfToken && $initValue->value === 'xref') { 226 try { 227 return new FixedReader($this->parser); 228 } catch (CrossReferenceException $e) { 229 $this->parser->getStreamReader()->reset($position); 230 $this->parser->getTokenizer()->clearStack(); 231 232 return new LineReader($this->parser); 233 } 234 } 235 236 if ($initValue instanceof PdfIndirectObject) { 237 try { 238 $stream = PdfStream::ensure($initValue->value); 239 } catch (PdfTypeException $e) { 240 throw new CrossReferenceException( 241 'Invalid object type at xref reference offset.', 242 CrossReferenceException::INVALID_DATA, 243 $e 244 ); 245 } 246 247 $type = PdfDictionary::get($stream->value, 'Type'); 248 if ($type->value !== 'XRef') { 249 throw new CrossReferenceException( 250 'The xref position points to an incorrect object type.', 251 CrossReferenceException::INVALID_DATA 252 ); 253 } 254 255 $this->checkForEncryption($stream->value); 256 257 throw new CrossReferenceException( 258 'This PDF document probably uses a compression technique which is not supported by the ' . 259 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)', 260 CrossReferenceException::COMPRESSED_XREF 261 ); 262 } 263 264 throw new CrossReferenceException( 265 'The xref position points to an incorrect object type.', 266 CrossReferenceException::INVALID_DATA 267 ); 268 } 269 270 /** 271 * Check for encryption. 272 * 273 * @param PdfDictionary $dictionary 274 * @throws CrossReferenceException 275 */ 276 protected function checkForEncryption(PdfDictionary $dictionary) 277 { 278 if (isset($dictionary->value['Encrypt'])) { 279 throw new CrossReferenceException( 280 'This PDF document is encrypted and cannot be processed with FPDI.', 281 CrossReferenceException::ENCRYPTED 282 ); 283 } 284 } 285 286 /** 287 * Find the start position for the first cross-reference. 288 * 289 * @return int The byte-offset position of the first cross-reference. 290 * @throws CrossReferenceException 291 */ 292 protected function findStartXref() 293 { 294 $reader = $this->parser->getStreamReader(); 295 $reader->reset(-self::$trailerSearchLength, self::$trailerSearchLength); 296 297 $buffer = $reader->getBuffer(false); 298 $pos = \strrpos($buffer, 'startxref'); 299 $addOffset = 9; 300 if ($pos === false) { 301 // Some corrupted documents uses startref, instead of startxref 302 $pos = \strrpos($buffer, 'startref'); 303 if ($pos === false) { 304 throw new CrossReferenceException( 305 'Unable to find pointer to xref table', 306 CrossReferenceException::NO_STARTXREF_FOUND 307 ); 308 } 309 $addOffset = 8; 310 } 311 312 $reader->setOffset($pos + $addOffset); 313 314 try { 315 $value = $this->parser->readValue(null, PdfNumeric::class); 316 } catch (PdfTypeException $e) { 317 throw new CrossReferenceException( 318 'Invalid data after startxref keyword.', 319 CrossReferenceException::INVALID_DATA, 320 $e 321 ); 322 } 323 324 return $value->value; 325 } 326 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body