Differences Between: [Versions 310 and 400] [Versions 311 and 400] [Versions 39 and 400] [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 6 use PhpOffice\PhpSpreadsheet\Cell\DataType; 7 use PhpOffice\PhpSpreadsheet\DefinedName; 8 use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\PageSetup; 9 use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Properties; 10 use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Styles; 11 use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; 12 use PhpOffice\PhpSpreadsheet\ReferenceHelper; 13 use PhpOffice\PhpSpreadsheet\RichText\RichText; 14 use PhpOffice\PhpSpreadsheet\Settings; 15 use PhpOffice\PhpSpreadsheet\Shared\File; 16 use PhpOffice\PhpSpreadsheet\Spreadsheet; 17 use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; 18 use SimpleXMLElement; 19 use XMLReader; 20 21 class Gnumeric extends BaseReader 22 { 23 const NAMESPACE_GNM = 'http://www.gnumeric.org/v10.dtd'; // gmr in old sheets 24 25 const NAMESPACE_XSI = 'http://www.w3.org/2001/XMLSchema-instance'; 26 27 const NAMESPACE_OFFICE = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'; 28 29 const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; 30 31 const NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/'; 32 33 const NAMESPACE_META = 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0'; 34 35 const NAMESPACE_OOO = 'http://openoffice.org/2004/office'; 36 37 /** 38 * Shared Expressions. 39 * 40 * @var array 41 */ 42 private $expressions = []; 43 44 /** 45 * Spreadsheet shared across all functions. 46 * 47 * @var Spreadsheet 48 */ 49 private $spreadsheet; 50 51 /** @var ReferenceHelper */ 52 private $referenceHelper; 53 54 /** @var array */ 55 public static $mappings = [ 56 'dataType' => [ 57 '10' => DataType::TYPE_NULL, 58 '20' => DataType::TYPE_BOOL, 59 '30' => DataType::TYPE_NUMERIC, // Integer doesn't exist in Excel 60 '40' => DataType::TYPE_NUMERIC, // Float 61 '50' => DataType::TYPE_ERROR, 62 '60' => DataType::TYPE_STRING, 63 //'70': // Cell Range 64 //'80': // Array 65 ], 66 ]; 67 68 /** 69 * Create a new Gnumeric. 70 */ 71 public function __construct() 72 { 73 parent::__construct(); 74 $this->referenceHelper = ReferenceHelper::getInstance(); 75 $this->securityScanner = XmlScanner::getInstance($this); 76 } 77 78 /** 79 * Can the current IReader read the file? 80 */ 81 public function canRead(string $filename): bool 82 { 83 // Check if gzlib functions are available 84 if (File::testFileNoThrow($filename) && function_exists('gzread')) { 85 // Read signature data (first 3 bytes) 86 $fh = fopen($filename, 'rb'); 87 if ($fh !== false) { 88 $data = fread($fh, 2); 89 fclose($fh); 90 } 91 } 92 93 return isset($data) && $data === chr(0x1F) . chr(0x8B); 94 } 95 96 private static function matchXml(XMLReader $xml, string $expectedLocalName): bool 97 { 98 return $xml->namespaceURI === self::NAMESPACE_GNM 99 && $xml->localName === $expectedLocalName 100 && $xml->nodeType === XMLReader::ELEMENT; 101 } 102 103 /** 104 * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object. 105 * 106 * @param string $filename 107 * 108 * @return array 109 */ 110 public function listWorksheetNames($filename) 111 { 112 File::assertFile($filename); 113 114 $xml = new XMLReader(); 115 $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions()); 116 $xml->setParserProperty(2, true); 117 118 $worksheetNames = []; 119 while ($xml->read()) { 120 if (self::matchXml($xml, 'SheetName')) { 121 $xml->read(); // Move onto the value node 122 $worksheetNames[] = (string) $xml->value; 123 } elseif (self::matchXml($xml, 'Sheets')) { 124 // break out of the loop once we've got our sheet names rather than parse the entire file 125 break; 126 } 127 } 128 129 return $worksheetNames; 130 } 131 132 /** 133 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 134 * 135 * @param string $filename 136 * 137 * @return array 138 */ 139 public function listWorksheetInfo($filename) 140 { 141 File::assertFile($filename); 142 143 $xml = new XMLReader(); 144 $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions()); 145 $xml->setParserProperty(2, true); 146 147 $worksheetInfo = []; 148 while ($xml->read()) { 149 if (self::matchXml($xml, 'Sheet')) { 150 $tmpInfo = [ 151 'worksheetName' => '', 152 'lastColumnLetter' => 'A', 153 'lastColumnIndex' => 0, 154 'totalRows' => 0, 155 'totalColumns' => 0, 156 ]; 157 158 while ($xml->read()) { 159 if (self::matchXml($xml, 'Name')) { 160 $xml->read(); // Move onto the value node 161 $tmpInfo['worksheetName'] = (string) $xml->value; 162 } elseif (self::matchXml($xml, 'MaxCol')) { 163 $xml->read(); // Move onto the value node 164 $tmpInfo['lastColumnIndex'] = (int) $xml->value; 165 $tmpInfo['totalColumns'] = (int) $xml->value + 1; 166 } elseif (self::matchXml($xml, 'MaxRow')) { 167 $xml->read(); // Move onto the value node 168 $tmpInfo['totalRows'] = (int) $xml->value + 1; 169 170 break; 171 } 172 } 173 $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); 174 $worksheetInfo[] = $tmpInfo; 175 } 176 } 177 178 return $worksheetInfo; 179 } 180 181 /** 182 * @param string $filename 183 * 184 * @return string 185 */ 186 private function gzfileGetContents($filename) 187 { 188 $file = @gzopen($filename, 'rb'); 189 $data = ''; 190 if ($file !== false) { 191 while (!gzeof($file)) { 192 $data .= gzread($file, 1024); 193 } 194 gzclose($file); 195 } 196 197 return $data; 198 } 199 200 public static function gnumericMappings(): array 201 { 202 return array_merge(self::$mappings, Styles::$mappings); 203 } 204 205 private function processComments(SimpleXMLElement $sheet): void 206 { 207 if ((!$this->readDataOnly) && (isset($sheet->Objects))) { 208 foreach ($sheet->Objects->children(self::NAMESPACE_GNM) as $key => $comment) { 209 $commentAttributes = $comment->attributes(); 210 // Only comment objects are handled at the moment 211 if ($commentAttributes && $commentAttributes->Text) { 212 $this->spreadsheet->getActiveSheet()->getComment((string) $commentAttributes->ObjectBound) 213 ->setAuthor((string) $commentAttributes->Author) 214 ->setText($this->parseRichText((string) $commentAttributes->Text)); 215 } 216 } 217 } 218 } 219 220 /** 221 * @param mixed $value 222 */ 223 private static function testSimpleXml($value): SimpleXMLElement 224 { 225 return ($value instanceof SimpleXMLElement) ? $value : new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><root></root>'); 226 } 227 228 /** 229 * Loads Spreadsheet from file. 230 * 231 * @return Spreadsheet 232 */ 233 public function load(string $filename, int $flags = 0) 234 { 235 $this->processFlags($flags); 236 237 // Create new Spreadsheet 238 $spreadsheet = new Spreadsheet(); 239 $spreadsheet->removeSheetByIndex(0); 240 241 // Load into this instance 242 return $this->loadIntoExisting($filename, $spreadsheet); 243 } 244 245 /** 246 * Loads from file into Spreadsheet instance. 247 */ 248 public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet 249 { 250 $this->spreadsheet = $spreadsheet; 251 File::assertFile($filename); 252 253 $gFileData = $this->gzfileGetContents($filename); 254 255 $xml2 = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); 256 $xml = self::testSimpleXml($xml2); 257 258 $gnmXML = $xml->children(self::NAMESPACE_GNM); 259 (new Properties($this->spreadsheet))->readProperties($xml, $gnmXML); 260 261 $worksheetID = 0; 262 foreach ($gnmXML->Sheets->Sheet as $sheetOrNull) { 263 $sheet = self::testSimpleXml($sheetOrNull); 264 $worksheetName = (string) $sheet->Name; 265 if (is_array($this->loadSheetsOnly) && !in_array($worksheetName, $this->loadSheetsOnly, true)) { 266 continue; 267 } 268 269 $maxRow = $maxCol = 0; 270 271 // Create new Worksheet 272 $this->spreadsheet->createSheet(); 273 $this->spreadsheet->setActiveSheetIndex($worksheetID); 274 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in formula 275 // cells... during the load, all formulae should be correct, and we're simply bringing the worksheet 276 // name in line with the formula, not the reverse 277 $this->spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false); 278 279 if (!$this->readDataOnly) { 280 (new PageSetup($this->spreadsheet)) 281 ->printInformation($sheet) 282 ->sheetMargins($sheet); 283 } 284 285 foreach ($sheet->Cells->Cell as $cellOrNull) { 286 $cell = self::testSimpleXml($cellOrNull); 287 $cellAttributes = self::testSimpleXml($cell->attributes()); 288 $row = (int) $cellAttributes->Row + 1; 289 $column = (int) $cellAttributes->Col; 290 291 if ($row > $maxRow) { 292 $maxRow = $row; 293 } 294 if ($column > $maxCol) { 295 $maxCol = $column; 296 } 297 298 $column = Coordinate::stringFromColumnIndex($column + 1); 299 300 // Read cell? 301 if ($this->getReadFilter() !== null) { 302 if (!$this->getReadFilter()->readCell($column, $row, $worksheetName)) { 303 continue; 304 } 305 } 306 307 $ValueType = $cellAttributes->ValueType; 308 $ExprID = (string) $cellAttributes->ExprID; 309 $type = DataType::TYPE_FORMULA; 310 if ($ExprID > '') { 311 if (((string) $cell) > '') { 312 $this->expressions[$ExprID] = [ 313 'column' => $cellAttributes->Col, 314 'row' => $cellAttributes->Row, 315 'formula' => (string) $cell, 316 ]; 317 } else { 318 $expression = $this->expressions[$ExprID]; 319 320 $cell = $this->referenceHelper->updateFormulaReferences( 321 $expression['formula'], 322 'A1', 323 $cellAttributes->Col - $expression['column'], 324 $cellAttributes->Row - $expression['row'], 325 $worksheetName 326 ); 327 } 328 $type = DataType::TYPE_FORMULA; 329 } else { 330 $vtype = (string) $ValueType; 331 if (array_key_exists($vtype, self::$mappings['dataType'])) { 332 $type = self::$mappings['dataType'][$vtype]; 333 } 334 if ($vtype === '20') { // Boolean 335 $cell = $cell == 'TRUE'; 336 } 337 } 338 $this->spreadsheet->getActiveSheet()->getCell($column . $row)->setValueExplicit((string) $cell, $type); 339 } 340 341 if ($sheet->Styles !== null) { 342 (new Styles($this->spreadsheet, $this->readDataOnly))->read($sheet, $maxRow, $maxCol); 343 } 344 345 $this->processComments($sheet); 346 $this->processColumnWidths($sheet, $maxCol); 347 $this->processRowHeights($sheet, $maxRow); 348 $this->processMergedCells($sheet); 349 $this->processAutofilter($sheet); 350 351 ++$worksheetID; 352 } 353 354 $this->processDefinedNames($gnmXML); 355 356 // Return 357 return $this->spreadsheet; 358 } 359 360 private function processMergedCells(?SimpleXMLElement $sheet): void 361 { 362 // Handle Merged Cells in this worksheet 363 if ($sheet !== null && isset($sheet->MergedRegions)) { 364 foreach ($sheet->MergedRegions->Merge as $mergeCells) { 365 if (strpos((string) $mergeCells, ':') !== false) { 366 $this->spreadsheet->getActiveSheet()->mergeCells($mergeCells); 367 } 368 } 369 } 370 } 371 372 private function processAutofilter(?SimpleXMLElement $sheet): void 373 { 374 if ($sheet !== null && isset($sheet->Filters)) { 375 foreach ($sheet->Filters->Filter as $autofilter) { 376 if ($autofilter !== null) { 377 $attributes = $autofilter->attributes(); 378 if (isset($attributes['Area'])) { 379 $this->spreadsheet->getActiveSheet()->setAutoFilter((string) $attributes['Area']); 380 } 381 } 382 } 383 } 384 } 385 386 private function setColumnWidth(int $whichColumn, float $defaultWidth): void 387 { 388 $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1)); 389 if ($columnDimension !== null) { 390 $columnDimension->setWidth($defaultWidth); 391 } 392 } 393 394 private function setColumnInvisible(int $whichColumn): void 395 { 396 $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1)); 397 if ($columnDimension !== null) { 398 $columnDimension->setVisible(false); 399 } 400 } 401 402 private function processColumnLoop(int $whichColumn, int $maxCol, ?SimpleXMLElement $columnOverride, float $defaultWidth): int 403 { 404 $columnOverride = self::testSimpleXml($columnOverride); 405 $columnAttributes = self::testSimpleXml($columnOverride->attributes()); 406 $column = $columnAttributes['No']; 407 $columnWidth = ((float) $columnAttributes['Unit']) / 5.4; 408 $hidden = (isset($columnAttributes['Hidden'])) && ((string) $columnAttributes['Hidden'] == '1'); 409 $columnCount = (int) ($columnAttributes['Count'] ?? 1); 410 while ($whichColumn < $column) { 411 $this->setColumnWidth($whichColumn, $defaultWidth); 412 ++$whichColumn; 413 } 414 while (($whichColumn < ($column + $columnCount)) && ($whichColumn <= $maxCol)) { 415 $this->setColumnWidth($whichColumn, $columnWidth); 416 if ($hidden) { 417 $this->setColumnInvisible($whichColumn); 418 } 419 ++$whichColumn; 420 } 421 422 return $whichColumn; 423 } 424 425 private function processColumnWidths(?SimpleXMLElement $sheet, int $maxCol): void 426 { 427 if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Cols))) { 428 // Column Widths 429 $defaultWidth = 0; 430 $columnAttributes = $sheet->Cols->attributes(); 431 if ($columnAttributes !== null) { 432 $defaultWidth = $columnAttributes['DefaultSizePts'] / 5.4; 433 } 434 $whichColumn = 0; 435 foreach ($sheet->Cols->ColInfo as $columnOverride) { 436 $whichColumn = $this->processColumnLoop($whichColumn, $maxCol, $columnOverride, $defaultWidth); 437 } 438 while ($whichColumn <= $maxCol) { 439 $this->setColumnWidth($whichColumn, $defaultWidth); 440 ++$whichColumn; 441 } 442 } 443 } 444 445 private function setRowHeight(int $whichRow, float $defaultHeight): void 446 { 447 $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow); 448 if ($rowDimension !== null) { 449 $rowDimension->setRowHeight($defaultHeight); 450 } 451 } 452 453 private function setRowInvisible(int $whichRow): void 454 { 455 $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow); 456 if ($rowDimension !== null) { 457 $rowDimension->setVisible(false); 458 } 459 } 460 461 private function processRowLoop(int $whichRow, int $maxRow, ?SimpleXMLElement $rowOverride, float $defaultHeight): int 462 { 463 $rowOverride = self::testSimpleXml($rowOverride); 464 $rowAttributes = self::testSimpleXml($rowOverride->attributes()); 465 $row = $rowAttributes['No']; 466 $rowHeight = (float) $rowAttributes['Unit']; 467 $hidden = (isset($rowAttributes['Hidden'])) && ((string) $rowAttributes['Hidden'] == '1'); 468 $rowCount = (int) ($rowAttributes['Count'] ?? 1); 469 while ($whichRow < $row) { 470 ++$whichRow; 471 $this->setRowHeight($whichRow, $defaultHeight); 472 } 473 while (($whichRow < ($row + $rowCount)) && ($whichRow < $maxRow)) { 474 ++$whichRow; 475 $this->setRowHeight($whichRow, $rowHeight); 476 if ($hidden) { 477 $this->setRowInvisible($whichRow); 478 } 479 } 480 481 return $whichRow; 482 } 483 484 private function processRowHeights(?SimpleXMLElement $sheet, int $maxRow): void 485 { 486 if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Rows))) { 487 // Row Heights 488 $defaultHeight = 0; 489 $rowAttributes = $sheet->Rows->attributes(); 490 if ($rowAttributes !== null) { 491 $defaultHeight = (float) $rowAttributes['DefaultSizePts']; 492 } 493 $whichRow = 0; 494 495 foreach ($sheet->Rows->RowInfo as $rowOverride) { 496 $whichRow = $this->processRowLoop($whichRow, $maxRow, $rowOverride, $defaultHeight); 497 } 498 // never executed, I can't figure out any circumstances 499 // under which it would be executed, and, even if 500 // such exist, I'm not convinced this is needed. 501 //while ($whichRow < $maxRow) { 502 // ++$whichRow; 503 // $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow)->setRowHeight($defaultHeight); 504 //} 505 } 506 } 507 508 private function processDefinedNames(?SimpleXMLElement $gnmXML): void 509 { 510 // Loop through definedNames (global named ranges) 511 if ($gnmXML !== null && isset($gnmXML->Names)) { 512 foreach ($gnmXML->Names->Name as $definedName) { 513 $name = (string) $definedName->name; 514 $value = (string) $definedName->value; 515 if (stripos($value, '#REF!') !== false) { 516 continue; 517 } 518 519 [$worksheetName] = Worksheet::extractSheetTitle($value, true); 520 $worksheetName = trim($worksheetName, "'"); 521 $worksheet = $this->spreadsheet->getSheetByName($worksheetName); 522 // Worksheet might still be null if we're only loading selected sheets rather than the full spreadsheet 523 if ($worksheet !== null) { 524 $this->spreadsheet->addDefinedName(DefinedName::createInstance($name, $worksheet, $value)); 525 } 526 } 527 } 528 } 529 530 private function parseRichText(string $is): RichText 531 { 532 $value = new RichText(); 533 $value->createText($is); 534 535 return $value; 536 } 537 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body