Differences Between: [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]
1 <?php 2 //============================================================+ 3 // File name : tcpdf_parser.php 4 // Version : 1.0.16 5 // Begin : 2011-05-23 6 // Last Update : 2015-04-28 7 // Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com 8 // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3 9 // ------------------------------------------------------------------- 10 // Copyright (C) 2011-2015 Nicola Asuni - Tecnick.com LTD 11 // 12 // This file is part of TCPDF software library. 13 // 14 // TCPDF is free software: you can redistribute it and/or modify it 15 // under the terms of the GNU Lesser General Public License as 16 // published by the Free Software Foundation, either version 3 of the 17 // License, or (at your option) any later version. 18 // 19 // TCPDF is distributed in the hope that it will be useful, but 20 // WITHOUT ANY WARRANTY; without even the implied warranty of 21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 22 // See the GNU Lesser General Public License for more details. 23 // 24 // You should have received a copy of the License 25 // along with TCPDF. If not, see 26 // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>. 27 // 28 // See LICENSE.TXT file for more information. 29 // ------------------------------------------------------------------- 30 // 31 // Description : This is a PHP class for parsing PDF documents. 32 // 33 //============================================================+ 34 35 /** 36 * @file 37 * This is a PHP class for parsing PDF documents.<br> 38 * @package com.tecnick.tcpdf 39 * @author Nicola Asuni 40 * @version 1.0.15 41 */ 42 43 // include class for decoding filters 44 require_once(dirname(__FILE__).'/include/tcpdf_filters.php'); 45 46 /** 47 * @class TCPDF_PARSER 48 * This is a PHP class for parsing PDF documents.<br> 49 * @package com.tecnick.tcpdf 50 * @brief This is a PHP class for parsing PDF documents.. 51 * @version 1.0.15 52 * @author Nicola Asuni - info@tecnick.com 53 */ 54 class TCPDF_PARSER { 55 56 /** 57 * Raw content of the PDF document. 58 * @private 59 */ 60 private $pdfdata = ''; 61 62 /** 63 * XREF data. 64 * @protected 65 */ 66 protected $xref = array(); 67 68 /** 69 * Array of PDF objects. 70 * @protected 71 */ 72 protected $objects = array(); 73 74 /** 75 * Class object for decoding filters. 76 * @private 77 */ 78 private $FilterDecoders; 79 80 /** 81 * Array of configuration parameters. 82 * @private 83 */ 84 private $cfg = array( 85 'die_for_errors' => false, 86 'ignore_filter_decoding_errors' => true, 87 'ignore_missing_filter_decoders' => true, 88 ); 89 90 // ----------------------------------------------------------------------------- 91 92 /** 93 * Parse a PDF document an return an array of objects. 94 * @param $data (string) PDF data to parse. 95 * @param $cfg (array) Array of configuration parameters: 96 * 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception; 97 * 'ignore_filter_decoding_errors' : if true ignore filter decoding errors; 98 * 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors. 99 * @public 100 * @since 1.0.000 (2011-05-24) 101 */ 102 public function __construct($data, $cfg=array()) { 103 if (empty($data)) { 104 $this->Error('Empty PDF data.'); 105 } 106 // find the pdf header starting position 107 if (($trimpos = strpos($data, '%PDF-')) === FALSE) { 108 $this->Error('Invalid PDF data: missing %PDF header.'); 109 } 110 // get PDF content string 111 $this->pdfdata = substr($data, $trimpos); 112 // get length 113 $pdflen = strlen($this->pdfdata); 114 // set configuration parameters 115 $this->setConfig($cfg); 116 // get xref and trailer data 117 $this->xref = $this->getXrefData(); 118 // parse all document objects 119 $this->objects = array(); 120 foreach ($this->xref['xref'] as $obj => $offset) { 121 if (!isset($this->objects[$obj]) AND ($offset > 0)) { 122 // decode objects with positive offset 123 $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true); 124 } 125 } 126 // release some memory 127 unset($this->pdfdata); 128 $this->pdfdata = ''; 129 } 130 131 /** 132 * Set the configuration parameters. 133 * @param $cfg (array) Array of configuration parameters: 134 * 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception; 135 * 'ignore_filter_decoding_errors' : if true ignore filter decoding errors; 136 * 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors. 137 * @public 138 */ 139 protected function setConfig($cfg) { 140 if (isset($cfg['die_for_errors'])) { 141 $this->cfg['die_for_errors'] = !!$cfg['die_for_errors']; 142 } 143 if (isset($cfg['ignore_filter_decoding_errors'])) { 144 $this->cfg['ignore_filter_decoding_errors'] = !!$cfg['ignore_filter_decoding_errors']; 145 } 146 if (isset($cfg['ignore_missing_filter_decoders'])) { 147 $this->cfg['ignore_missing_filter_decoders'] = !!$cfg['ignore_missing_filter_decoders']; 148 } 149 } 150 151 /** 152 * Return an array of parsed PDF document objects. 153 * @return (array) Array of parsed PDF document objects. 154 * @public 155 * @since 1.0.000 (2011-06-26) 156 */ 157 public function getParsedData() { 158 return array($this->xref, $this->objects); 159 } 160 161 /** 162 * Get Cross-Reference (xref) table and trailer data from PDF document data. 163 * @param $offset (int) xref offset (if know). 164 * @param $xref (array) previous xref array (if any). 165 * @return Array containing xref and trailer data. 166 * @protected 167 * @since 1.0.000 (2011-05-24) 168 */ 169 protected function getXrefData($offset=0, $xref=array()) { 170 if ($offset == 0) { 171 // find last startxref 172 if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) { 173 $this->Error('Unable to find startxref'); 174 } 175 $matches = array_pop($matches); 176 $startxref = $matches[1]; 177 } elseif (strpos($this->pdfdata, 'xref', $offset) == $offset) { 178 // Already pointing at the xref table 179 $startxref = $offset; 180 } elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { 181 // Cross-Reference Stream object 182 $startxref = $offset; 183 } elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { 184 // startxref found 185 $startxref = $matches[1][0]; 186 } else { 187 $this->Error('Unable to find startxref'); 188 } 189 // check xref position 190 if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) { 191 // Cross-Reference 192 $xref = $this->decodeXref($startxref, $xref); 193 } else { 194 // Cross-Reference Stream 195 $xref = $this->decodeXrefStream($startxref, $xref); 196 } 197 if (empty($xref)) { 198 $this->Error('Unable to find xref'); 199 } 200 return $xref; 201 } 202 203 /** 204 * Decode the Cross-Reference section 205 * @param $startxref (int) Offset at which the xref section starts (position of the 'xref' keyword). 206 * @param $xref (array) Previous xref array (if any). 207 * @return Array containing xref and trailer data. 208 * @protected 209 * @since 1.0.000 (2011-06-20) 210 */ 211 protected function decodeXref($startxref, $xref=array()) { 212 $startxref += 4; // 4 is the length of the word 'xref' 213 // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) 214 $offset = $startxref + strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $startxref); 215 // initialize object number 216 $obj_num = 0; 217 // search for cross-reference entries or subsection 218 while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { 219 if ($matches[0][1] != $offset) { 220 // we are on another section 221 break; 222 } 223 $offset += strlen($matches[0][0]); 224 if ($matches[3][0] == 'n') { 225 // create unique object index: [object number]_[generation number] 226 $index = $obj_num.'_'.intval($matches[2][0]); 227 // check if object already exist 228 if (!isset($xref['xref'][$index])) { 229 // store object offset position 230 $xref['xref'][$index] = intval($matches[1][0]); 231 } 232 ++$obj_num; 233 } elseif ($matches[3][0] == 'f') { 234 ++$obj_num; 235 } else { 236 // object number (index) 237 $obj_num = intval($matches[1][0]); 238 } 239 } 240 // get trailer data 241 if (preg_match('/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { 242 $trailer_data = $matches[1][0]; 243 if (!isset($xref['trailer']) OR empty($xref['trailer'])) { 244 // get only the last updated version 245 $xref['trailer'] = array(); 246 // parse trailer_data 247 if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { 248 $xref['trailer']['size'] = intval($matches[1]); 249 } 250 if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 251 $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]); 252 } 253 if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 254 $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]); 255 } 256 if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 257 $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]); 258 } 259 if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) { 260 $xref['trailer']['id'] = array(); 261 $xref['trailer']['id'][0] = $matches[1]; 262 $xref['trailer']['id'][1] = $matches[2]; 263 } 264 } 265 if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { 266 // get previous xref 267 $xref = $this->getXrefData(intval($matches[1]), $xref); 268 } 269 } else { 270 $this->Error('Unable to find trailer'); 271 } 272 return $xref; 273 } 274 275 /** 276 * Decode the Cross-Reference Stream section 277 * @param $startxref (int) Offset at which the xref section starts. 278 * @param $xref (array) Previous xref array (if any). 279 * @return Array containing xref and trailer data. 280 * @protected 281 * @since 1.0.003 (2013-03-16) 282 */ 283 protected function decodeXrefStream($startxref, $xref=array()) { 284 // try to read Cross-Reference Stream 285 $xrefobj = $this->getRawObject($startxref); 286 $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true); 287 if (!isset($xref['trailer']) OR empty($xref['trailer'])) { 288 // get only the last updated version 289 $xref['trailer'] = array(); 290 $filltrailer = true; 291 } else { 292 $filltrailer = false; 293 } 294 if (!isset($xref['xref'])) { 295 $xref['xref'] = array(); 296 } 297 $valid_crs = false; 298 $columns = 0; 299 $sarr = $xrefcrs[0][1]; 300 if (!is_array($sarr)) { 301 $sarr = array(); 302 } 303 foreach ($sarr as $k => $v) { 304 if (($v[0] == '/') AND ($v[1] == 'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == '/') AND ($sarr[($k +1)][1] == 'XRef'))) { 305 $valid_crs = true; 306 } elseif (($v[0] == '/') AND ($v[1] == 'Index') AND (isset($sarr[($k +1)]))) { 307 // first object number in the subsection 308 $index_first = intval($sarr[($k +1)][1][0][1]); 309 // number of entries in the subsection 310 $index_entries = intval($sarr[($k +1)][1][1][1]); 311 } elseif (($v[0] == '/') AND ($v[1] == 'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) { 312 // get previous xref offset 313 $prevxref = intval($sarr[($k +1)][1]); 314 } elseif (($v[0] == '/') AND ($v[1] == 'W') AND (isset($sarr[($k +1)]))) { 315 // number of bytes (in the decoded stream) of the corresponding field 316 $wb = array(); 317 $wb[0] = intval($sarr[($k +1)][1][0][1]); 318 $wb[1] = intval($sarr[($k +1)][1][1][1]); 319 $wb[2] = intval($sarr[($k +1)][1][2][1]); 320 } elseif (($v[0] == '/') AND ($v[1] == 'DecodeParms') AND (isset($sarr[($k +1)][1]))) { 321 $decpar = $sarr[($k +1)][1]; 322 foreach ($decpar as $kdc => $vdc) { 323 if (($vdc[0] == '/') AND ($vdc[1] == 'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) { 324 $columns = intval($decpar[($kdc +1)][1]); 325 } elseif (($vdc[0] == '/') AND ($vdc[1] == 'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) { 326 $predictor = intval($decpar[($kdc +1)][1]); 327 } 328 } 329 } elseif ($filltrailer) { 330 if (($v[0] == '/') AND ($v[1] == 'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) { 331 $xref['trailer']['size'] = $sarr[($k +1)][1]; 332 } elseif (($v[0] == '/') AND ($v[1] == 'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { 333 $xref['trailer']['root'] = $sarr[($k +1)][1]; 334 } elseif (($v[0] == '/') AND ($v[1] == 'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { 335 $xref['trailer']['info'] = $sarr[($k +1)][1]; 336 } elseif (($v[0] == '/') AND ($v[1] == 'Encrypt') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { 337 $xref['trailer']['encrypt'] = $sarr[($k +1)][1]; 338 } elseif (($v[0] == '/') AND ($v[1] == 'ID') AND (isset($sarr[($k +1)]))) { 339 $xref['trailer']['id'] = array(); 340 $xref['trailer']['id'][0] = $sarr[($k +1)][1][0][1]; 341 $xref['trailer']['id'][1] = $sarr[($k +1)][1][1][1]; 342 } 343 } 344 } 345 // decode data 346 if ($valid_crs AND isset($xrefcrs[1][3][0])) { 347 // number of bytes in a row 348 $rowlen = ($columns + 1); 349 // convert the stream into an array of integers 350 $sdata = unpack('C*', $xrefcrs[1][3][0]); 351 // split the rows 352 $sdata = array_chunk($sdata, $rowlen); 353 // initialize decoded array 354 $ddata = array(); 355 // initialize first row with zeros 356 $prev_row = array_fill (0, $rowlen, 0); 357 // for each row apply PNG unpredictor 358 foreach ($sdata as $k => $row) { 359 // initialize new row 360 $ddata[$k] = array(); 361 // get PNG predictor value 362 $predictor = (10 + $row[0]); 363 // for each byte on the row 364 for ($i=1; $i<=$columns; ++$i) { 365 // new index 366 $j = ($i - 1); 367 $row_up = $prev_row[$j]; 368 if ($i == 1) { 369 $row_left = 0; 370 $row_upleft = 0; 371 } else { 372 $row_left = $row[($i - 1)]; 373 $row_upleft = $prev_row[($j - 1)]; 374 } 375 switch ($predictor) { 376 case 10: { // PNG prediction (on encoding, PNG None on all rows) 377 $ddata[$k][$j] = $row[$i]; 378 break; 379 } 380 case 11: { // PNG prediction (on encoding, PNG Sub on all rows) 381 $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); 382 break; 383 } 384 case 12: { // PNG prediction (on encoding, PNG Up on all rows) 385 $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); 386 break; 387 } 388 case 13: { // PNG prediction (on encoding, PNG Average on all rows) 389 $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff); 390 break; 391 } 392 case 14: { // PNG prediction (on encoding, PNG Paeth on all rows) 393 // initial estimate 394 $p = ($row_left + $row_up - $row_upleft); 395 // distances 396 $pa = abs($p - $row_left); 397 $pb = abs($p - $row_up); 398 $pc = abs($p - $row_upleft); 399 $pmin = min($pa, $pb, $pc); 400 // return minimum distance 401 switch ($pmin) { 402 case $pa: { 403 $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); 404 break; 405 } 406 case $pb: { 407 $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); 408 break; 409 } 410 case $pc: { 411 $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff); 412 break; 413 } 414 } 415 break; 416 } 417 default: { // PNG prediction (on encoding, PNG optimum) 418 $this->Error('Unknown PNG predictor'); 419 break; 420 } 421 } 422 } 423 $prev_row = $ddata[$k]; 424 } // end for each row 425 // complete decoding 426 $sdata = array(); 427 // for every row 428 foreach ($ddata as $k => $row) { 429 // initialize new row 430 $sdata[$k] = array(0, 0, 0); 431 if ($wb[0] == 0) { 432 // default type field 433 $sdata[$k][0] = 1; 434 } 435 $i = 0; // count bytes in the row 436 // for every column 437 for ($c = 0; $c < 3; ++$c) { 438 // for every byte on the column 439 for ($b = 0; $b < $wb[$c]; ++$b) { 440 if (isset($row[$i])) { 441 $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8)); 442 } 443 ++$i; 444 } 445 } 446 } 447 $ddata = array(); 448 // fill xref 449 if (isset($index_first)) { 450 $obj_num = $index_first; 451 } else { 452 $obj_num = 0; 453 } 454 foreach ($sdata as $k => $row) { 455 switch ($row[0]) { 456 case 0: { // (f) linked list of free objects 457 break; 458 } 459 case 1: { // (n) objects that are in use but are not compressed 460 // create unique object index: [object number]_[generation number] 461 $index = $obj_num.'_'.$row[2]; 462 // check if object already exist 463 if (!isset($xref['xref'][$index])) { 464 // store object offset position 465 $xref['xref'][$index] = $row[1]; 466 } 467 break; 468 } 469 case 2: { // compressed objects 470 // $row[1] = object number of the object stream in which this object is stored 471 // $row[2] = index of this object within the object stream 472 $index = $row[1].'_0_'.$row[2]; 473 $xref['xref'][$index] = -1; 474 break; 475 } 476 default: { // null objects 477 break; 478 } 479 } 480 ++$obj_num; 481 } 482 } // end decoding data 483 if (isset($prevxref)) { 484 // get previous xref 485 $xref = $this->getXrefData($prevxref, $xref); 486 } 487 return $xref; 488 } 489 490 /** 491 * Get object type, raw value and offset to next object 492 * @param $offset (int) Object offset. 493 * @return array containing object type, raw value and offset to next object 494 * @protected 495 * @since 1.0.000 (2011-06-20) 496 */ 497 protected function getRawObject($offset=0) { 498 $objtype = ''; // object type to be returned 499 $objval = ''; // object value to be returned 500 // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) 501 $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset); 502 // get first char 503 $char = $this->pdfdata[$offset]; 504 // get object type 505 switch ($char) { 506 case '%': { // \x25 PERCENT SIGN 507 // skip comment and search for next token 508 $next = strcspn($this->pdfdata, "\r\n", $offset); 509 if ($next > 0) { 510 $offset += $next; 511 return $this->getRawObject($offset); 512 } 513 break; 514 } 515 case '/': { // \x2F SOLIDUS 516 // name object 517 $objtype = $char; 518 ++$offset; 519 if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) { 520 $objval = $matches[1]; // unescaped value 521 $offset += strlen($objval); 522 } 523 break; 524 } 525 case '(': // \x28 LEFT PARENTHESIS 526 case ')': { // \x29 RIGHT PARENTHESIS 527 // literal string object 528 $objtype = $char; 529 ++$offset; 530 $strpos = $offset; 531 if ($char == '(') { 532 $open_bracket = 1; 533 while ($open_bracket > 0) { 534 if (!isset($this->pdfdata[$strpos])) { 535 break; 536 } 537 $ch = $this->pdfdata[$strpos]; 538 switch ($ch) { 539 case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash) 540 // skip next character 541 ++$strpos; 542 break; 543 } 544 case '(': { // LEFT PARENHESIS (28h) 545 ++$open_bracket; 546 break; 547 } 548 case ')': { // RIGHT PARENTHESIS (29h) 549 --$open_bracket; 550 break; 551 } 552 } 553 ++$strpos; 554 } 555 $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1)); 556 $offset = $strpos; 557 } 558 break; 559 } 560 case '[': // \x5B LEFT SQUARE BRACKET 561 case ']': { // \x5D RIGHT SQUARE BRACKET 562 // array object 563 $objtype = $char; 564 ++$offset; 565 if ($char == '[') { 566 // get array content 567 $objval = array(); 568 do { 569 // get element 570 $element = $this->getRawObject($offset); 571 $offset = $element[2]; 572 $objval[] = $element; 573 } while ($element[0] != ']'); 574 // remove closing delimiter 575 array_pop($objval); 576 } 577 break; 578 } 579 case '<': // \x3C LESS-THAN SIGN 580 case '>': { // \x3E GREATER-THAN SIGN 581 if (isset($this->pdfdata[($offset + 1)]) AND ($this->pdfdata[($offset + 1)] == $char)) { 582 // dictionary object 583 $objtype = $char.$char; 584 $offset += 2; 585 if ($char == '<') { 586 // get array content 587 $objval = array(); 588 do { 589 // get element 590 $element = $this->getRawObject($offset); 591 $offset = $element[2]; 592 $objval[] = $element; 593 } while ($element[0] != '>>'); 594 // remove closing delimiter 595 array_pop($objval); 596 } 597 } else { 598 // hexadecimal string object 599 $objtype = $char; 600 ++$offset; 601 if (($char == '<') AND (preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU', substr($this->pdfdata, $offset), $matches) == 1)) { 602 // remove white space characters 603 $objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", ''); 604 $offset += strlen($matches[0]); 605 } elseif (($endpos = strpos($this->pdfdata, '>', $offset)) !== FALSE) { 606 $offset = $endpos + 1; 607 } 608 } 609 break; 610 } 611 default: { 612 if (substr($this->pdfdata, $offset, 6) == 'endobj') { 613 // indirect object 614 $objtype = 'endobj'; 615 $offset += 6; 616 } elseif (substr($this->pdfdata, $offset, 4) == 'null') { 617 // null object 618 $objtype = 'null'; 619 $offset += 4; 620 $objval = 'null'; 621 } elseif (substr($this->pdfdata, $offset, 4) == 'true') { 622 // boolean true object 623 $objtype = 'boolean'; 624 $offset += 4; 625 $objval = 'true'; 626 } elseif (substr($this->pdfdata, $offset, 5) == 'false') { 627 // boolean false object 628 $objtype = 'boolean'; 629 $offset += 5; 630 $objval = 'false'; 631 } elseif (substr($this->pdfdata, $offset, 6) == 'stream') { 632 // start stream object 633 $objtype = 'stream'; 634 $offset += 6; 635 if (preg_match('/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) { 636 $offset += strlen($matches[0]); 637 if (preg_match('/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) { 638 $objval = substr($this->pdfdata, $offset, $matches[0][1]); 639 $offset += $matches[1][1]; 640 } 641 } 642 } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') { 643 // end stream object 644 $objtype = 'endstream'; 645 $offset += 9; 646 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { 647 // indirect object reference 648 $objtype = 'objref'; 649 $offset += strlen($matches[0]); 650 $objval = intval($matches[1]).'_'.intval($matches[2]); 651 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { 652 // object start 653 $objtype = 'obj'; 654 $objval = intval($matches[1]).'_'.intval($matches[2]); 655 $offset += strlen ($matches[0]); 656 } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) { 657 // numeric object 658 $objtype = 'numeric'; 659 $objval = substr($this->pdfdata, $offset, $numlen); 660 $offset += $numlen; 661 } 662 break; 663 } 664 } 665 return array($objtype, $objval, $offset); 666 } 667 668 /** 669 * Get content of indirect object. 670 * @param $obj_ref (string) Object number and generation number separated by underscore character. 671 * @param $offset (int) Object offset. 672 * @param $decoding (boolean) If true decode streams. 673 * @return array containing object data. 674 * @protected 675 * @since 1.0.000 (2011-05-24) 676 */ 677 protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) { 678 $obj = explode('_', $obj_ref); 679 if (($obj === false) OR (count($obj) != 2)) { 680 $this->Error('Invalid object reference: '.$obj); 681 return; 682 } 683 $objref = $obj[0].' '.$obj[1].' obj'; 684 // ignore leading zeros 685 $offset += strspn($this->pdfdata, '0', $offset); 686 if (strpos($this->pdfdata, $objref, $offset) != $offset) { 687 // an indirect reference to an undefined object shall be considered a reference to the null object 688 return array('null', 'null', $offset); 689 } 690 // starting position of object content 691 $offset += strlen($objref); 692 // get array of object content 693 $objdata = array(); 694 $i = 0; // object main index 695 do { 696 $oldoffset = $offset; 697 // get element 698 $element = $this->getRawObject($offset); 699 $offset = $element[2]; 700 // decode stream using stream's dictionary information 701 if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) { 702 $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]); 703 } 704 $objdata[$i] = $element; 705 ++$i; 706 } while (($element[0] != 'endobj') AND ($offset != $oldoffset)); 707 // remove closing delimiter 708 array_pop($objdata); 709 // return raw object content 710 return $objdata; 711 } 712 713 /** 714 * Get the content of object, resolving indect object reference if necessary. 715 * @param $obj (string) Object value. 716 * @return array containing object data. 717 * @protected 718 * @since 1.0.000 (2011-06-26) 719 */ 720 protected function getObjectVal($obj) { 721 if ($obj[0] == 'objref') { 722 // reference to indirect object 723 if (isset($this->objects[$obj[1]])) { 724 // this object has been already parsed 725 return $this->objects[$obj[1]]; 726 } elseif (isset($this->xref[$obj[1]])) { 727 // parse new object 728 $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false); 729 return $this->objects[$obj[1]]; 730 } 731 } 732 return $obj; 733 } 734 735 /** 736 * Decode the specified stream. 737 * @param $sdic (array) Stream's dictionary array. 738 * @param $stream (string) Stream to decode. 739 * @return array containing decoded stream data and remaining filters. 740 * @protected 741 * @since 1.0.000 (2011-06-22) 742 */ 743 protected function decodeStream($sdic, $stream) { 744 // get stream length and filters 745 $slength = strlen($stream); 746 if ($slength <= 0) { 747 return array('', array()); 748 } 749 $filters = array(); 750 foreach ($sdic as $k => $v) { 751 if ($v[0] == '/') { 752 if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) { 753 // get declared stream length 754 $declength = intval($sdic[($k + 1)][1]); 755 if ($declength < $slength) { 756 $stream = substr($stream, 0, $declength); 757 $slength = $declength; 758 } 759 } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) { 760 // resolve indirect object 761 $objval = $this->getObjectVal($sdic[($k + 1)]); 762 if ($objval[0] == '/') { 763 // single filter 764 $filters[] = $objval[1]; 765 } elseif ($objval[0] == '[') { 766 // array of filters 767 foreach ($objval[1] as $flt) { 768 if ($flt[0] == '/') { 769 $filters[] = $flt[1]; 770 } 771 } 772 } 773 } 774 } 775 } 776 // decode the stream 777 $remaining_filters = array(); 778 foreach ($filters as $filter) { 779 if (in_array($filter, TCPDF_FILTERS::getAvailableFilters())) { 780 try { 781 $stream = TCPDF_FILTERS::decodeFilter($filter, $stream); 782 } catch (Exception $e) { 783 $emsg = $e->getMessage(); 784 if ((($emsg[0] == '~') AND !$this->cfg['ignore_missing_filter_decoders']) 785 OR (($emsg[0] != '~') AND !$this->cfg['ignore_filter_decoding_errors'])) { 786 $this->Error($e->getMessage()); 787 } 788 } 789 } else { 790 // add missing filter to array 791 $remaining_filters[] = $filter; 792 } 793 } 794 return array($stream, $remaining_filters); 795 } 796 797 /** 798 * Throw an exception or print an error message and die if the K_TCPDF_PARSER_THROW_EXCEPTION_ERROR constant is set to true. 799 * @param $msg (string) The error message 800 * @public 801 * @since 1.0.000 (2011-05-23) 802 */ 803 public function Error($msg) { 804 if ($this->cfg['die_for_errors']) { 805 die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg); 806 } else { 807 throw new Exception('TCPDF_PARSER ERROR: '.$msg); 808 } 809 } 810 811 } // END OF TCPDF_PARSER CLASS 812 813 //============================================================+ 814 // END OF FILE 815 //============================================================+
title
Description
Body
title
Description
Body
title
Description
Body
title
Body