Differences Between: [Versions 310 and 311] [Versions 310 and 400] [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 /** 3 * This file is part of FPDI 4 * 5 * @package setasign\Fpdi 6 * @copyright Copyright (c) 2019 Setasign - Jan Slabon (https://www.setasign.com) 7 * @license http://opensource.org/licenses/mit-license The MIT License 8 */ 9 10 namespace setasign\Fpdi\PdfParser\CrossReference; 11 12 use setasign\Fpdi\PdfParser\PdfParser; 13 use setasign\Fpdi\PdfParser\Type\PdfDictionary; 14 use setasign\Fpdi\PdfParser\Type\PdfIndirectObject; 15 use setasign\Fpdi\PdfParser\Type\PdfNumeric; 16 use setasign\Fpdi\PdfParser\Type\PdfStream; 17 use setasign\Fpdi\PdfParser\Type\PdfToken; 18 use setasign\Fpdi\PdfParser\Type\PdfTypeException; 19 20 /** 21 * Class CrossReference 22 * 23 * This class processes the standard cross reference of a PDF document. 24 * 25 * @package setasign\Fpdi\PdfParser\CrossReference 26 */ 27 class CrossReference 28 { 29 /** 30 * The byte length in which the "startxref" keyword should be searched. 31 * 32 * @var int 33 */ 34 static public $trailerSearchLength = 5500; 35 36 /** 37 * @var int 38 */ 39 protected $fileHeaderOffset = 0; 40 41 /** 42 * @var PdfParser 43 */ 44 protected $parser; 45 46 /** 47 * @var ReaderInterface[] 48 */ 49 protected $readers = []; 50 51 /** 52 * CrossReference constructor. 53 * 54 * @param PdfParser $parser 55 * @throws CrossReferenceException 56 * @throws PdfTypeException 57 */ 58 public function __construct(PdfParser $parser, $fileHeaderOffset = 0) 59 { 60 $this->parser = $parser; 61 $this->fileHeaderOffset = $fileHeaderOffset; 62 63 $offset = $this->findStartXref(); 64 $reader = null; 65 /** @noinspection TypeUnsafeComparisonInspection */ 66 while ($offset != false) { // By doing an unsafe comparsion we ignore faulty references to byte offset 0 67 try { 68 $reader = $this->readXref($offset + $this->fileHeaderOffset); 69 } catch (CrossReferenceException $e) { 70 // sometimes the file header offset is part of the byte offsets, so let's retry by resetting it to zero. 71 if ($e->getCode() === CrossReferenceException::INVALID_DATA && $this->fileHeaderOffset !== 0) { 72 $this->fileHeaderOffset = 0; 73 $reader = $this->readXref($offset + $this->fileHeaderOffset); 74 } else { 75 throw $e; 76 } 77 } 78 79 $trailer = $reader->getTrailer(); 80 $this->checkForEncryption($trailer); 81 $this->readers[] = $reader; 82 83 if (isset($trailer->value['Prev'])) { 84 $offset = $trailer->value['Prev']->value; 85 } else { 86 $offset = false; 87 } 88 } 89 90 // fix faulty sub-section header 91 if ($reader instanceof FixedReader) { 92 /** 93 * @var FixedReader $reader 94 */ 95 $reader->fixFaultySubSectionShift(); 96 } 97 98 if ($reader === null) { 99 throw new CrossReferenceException('No cross-reference found.', CrossReferenceException::NO_XREF_FOUND); 100 } 101 } 102 103 /** 104 * Get the size of the cross reference. 105 * 106 * @return integer 107 */ 108 public function getSize() 109 { 110 return $this->getTrailer()->value['Size']->value; 111 } 112 113 /** 114 * Get the trailer dictionary. 115 * 116 * @return PdfDictionary 117 */ 118 public function getTrailer() 119 { 120 return $this->readers[0]->getTrailer(); 121 } 122 123 /** 124 * Get the cross reference readser instances. 125 * 126 * @return ReaderInterface[] 127 */ 128 public function getReaders() 129 { 130 return $this->readers; 131 } 132 133 /** 134 * Get the offset by an object number. 135 * 136 * @param int $objectNumber 137 * @return integer|bool 138 */ 139 public function getOffsetFor($objectNumber) 140 { 141 foreach ($this->getReaders() as $reader) { 142 $offset = $reader->getOffsetFor($objectNumber); 143 if ($offset !== false) { 144 return $offset; 145 } 146 } 147 148 return false; 149 } 150 151 /** 152 * Get an indirect object by its object number. 153 * 154 * @param int $objectNumber 155 * @return PdfIndirectObject 156 * @throws CrossReferenceException 157 */ 158 public function getIndirectObject($objectNumber) 159 { 160 $offset = $this->getOffsetFor($objectNumber); 161 if ($offset === false) { 162 throw new CrossReferenceException( 163 \sprintf('Object (id:%s) not found.', $objectNumber), 164 CrossReferenceException::OBJECT_NOT_FOUND 165 ); 166 } 167 168 $parser = $this->parser; 169 170 $parser->getTokenizer()->clearStack(); 171 $parser->getStreamReader()->reset($offset + $this->fileHeaderOffset); 172 173 try { 174 /** @var PdfIndirectObject $object */ 175 $object = $parser->readValue(null, PdfIndirectObject::class); 176 } catch (PdfTypeException $e) { 177 throw new CrossReferenceException( 178 \sprintf('Object (id:%s) not found at location (%s).', $objectNumber, $offset), 179 CrossReferenceException::OBJECT_NOT_FOUND, 180 $e 181 ); 182 } 183 184 if ($object->objectNumber !== $objectNumber) { 185 throw new CrossReferenceException( 186 \sprintf('Wrong object found, got %s while %s was expected.', $object->objectNumber, $objectNumber), 187 CrossReferenceException::OBJECT_NOT_FOUND 188 ); 189 } 190 191 return $object; 192 } 193 194 /** 195 * Read the cross-reference table at a given offset. 196 * 197 * Internally the method will try to evaluate the best reader for this cross-reference. 198 * 199 * @param int $offset 200 * @return ReaderInterface 201 * @throws CrossReferenceException 202 * @throws PdfTypeException 203 */ 204 protected function readXref($offset) 205 { 206 $this->parser->getStreamReader()->reset($offset); 207 $this->parser->getTokenizer()->clearStack(); 208 $initValue = $this->parser->readValue(); 209 210 return $this->initReaderInstance($initValue); 211 } 212 213 /** 214 * Get a cross-reference reader instance. 215 * 216 * @param PdfToken|PdfIndirectObject $initValue 217 * @return ReaderInterface|bool 218 * @throws CrossReferenceException 219 * @throws PdfTypeException 220 */ 221 protected function initReaderInstance($initValue) 222 { 223 $position = $this->parser->getStreamReader()->getPosition() 224 + $this->parser->getStreamReader()->getOffset() + $this->fileHeaderOffset; 225 226 if ($initValue instanceof PdfToken && $initValue->value === 'xref') { 227 try { 228 return new FixedReader($this->parser); 229 } catch (CrossReferenceException $e) { 230 $this->parser->getStreamReader()->reset($position); 231 $this->parser->getTokenizer()->clearStack(); 232 233 return new LineReader($this->parser); 234 } 235 } 236 237 if ($initValue instanceof PdfIndirectObject) { 238 // check for encryption 239 $stream = PdfStream::ensure($initValue->value); 240 241 $type = PdfDictionary::get($stream->value, 'Type'); 242 if ($type->value !== 'XRef') { 243 throw new CrossReferenceException( 244 'The xref position points to an incorrect object type.', 245 CrossReferenceException::INVALID_DATA 246 ); 247 } 248 249 $this->checkForEncryption($stream->value); 250 251 throw new CrossReferenceException( 252 'This PDF document probably uses a compression technique which is not supported by the ' . 253 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)', 254 CrossReferenceException::COMPRESSED_XREF 255 ); 256 } 257 258 throw new CrossReferenceException( 259 'The xref position points to an incorrect object type.', 260 CrossReferenceException::INVALID_DATA 261 ); 262 } 263 264 /** 265 * Check for encryption. 266 * 267 * @param PdfDictionary $dictionary 268 * @throws CrossReferenceException 269 */ 270 protected function checkForEncryption(PdfDictionary $dictionary) 271 { 272 if (isset($dictionary->value['Encrypt'])) { 273 throw new CrossReferenceException( 274 'This PDF document is encrypted and cannot be processed with FPDI.', 275 CrossReferenceException::ENCRYPTED 276 ); 277 } 278 } 279 280 /** 281 * Find the start position for the first cross-reference. 282 * 283 * @return int The byte-offset position of the first cross-reference. 284 * @throws CrossReferenceException 285 */ 286 protected function findStartXref() 287 { 288 $reader = $this->parser->getStreamReader(); 289 $reader->reset(-self::$trailerSearchLength, self::$trailerSearchLength); 290 291 $buffer = $reader->getBuffer(false); 292 $pos = \strrpos($buffer, 'startxref'); 293 $addOffset = 9; 294 if ($pos === false) { 295 // Some corrupted documents uses startref, instead of startxref 296 $pos = \strrpos($buffer, 'startref'); 297 if ($pos === false) { 298 throw new CrossReferenceException( 299 'Unable to find pointer to xref table', 300 CrossReferenceException::NO_STARTXREF_FOUND 301 ); 302 } 303 $addOffset = 8; 304 } 305 306 $reader->setOffset($pos + $addOffset); 307 308 try { 309 $value = $this->parser->readValue(null, PdfNumeric::class); 310 } catch (PdfTypeException $e) { 311 throw new CrossReferenceException( 312 'Invalid data after startxref keyword.', 313 CrossReferenceException::INVALID_DATA, 314 $e 315 ); 316 } 317 318 return $value->value; 319 } 320 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body