See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 6 use PhpOffice\PhpSpreadsheet\Cell\DataType; 7 use PhpOffice\PhpSpreadsheet\DefinedName; 8 use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\PageSetup; 9 use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Properties; 10 use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Styles; 11 use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; 12 use PhpOffice\PhpSpreadsheet\ReferenceHelper; 13 use PhpOffice\PhpSpreadsheet\RichText\RichText; 14 use PhpOffice\PhpSpreadsheet\Settings; 15 use PhpOffice\PhpSpreadsheet\Shared\File; 16 use PhpOffice\PhpSpreadsheet\Spreadsheet; 17 use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; 18 use SimpleXMLElement; 19 use XMLReader; 20 21 class Gnumeric extends BaseReader 22 { 23 const NAMESPACE_GNM = 'http://www.gnumeric.org/v10.dtd'; // gmr in old sheets 24 25 const NAMESPACE_XSI = 'http://www.w3.org/2001/XMLSchema-instance'; 26 27 const NAMESPACE_OFFICE = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0'; 28 29 const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; 30 31 const NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/'; 32 33 const NAMESPACE_META = 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0'; 34 35 const NAMESPACE_OOO = 'http://openoffice.org/2004/office'; 36 37 /** 38 * Shared Expressions. 39 * 40 * @var array 41 */ 42 private $expressions = []; 43 44 /** 45 * Spreadsheet shared across all functions. 46 * 47 * @var Spreadsheet 48 */ 49 private $spreadsheet; 50 51 /** @var ReferenceHelper */ 52 private $referenceHelper; 53 54 /** @var array */ 55 public static $mappings = [ 56 'dataType' => [ 57 '10' => DataType::TYPE_NULL, 58 '20' => DataType::TYPE_BOOL, 59 '30' => DataType::TYPE_NUMERIC, // Integer doesn't exist in Excel 60 '40' => DataType::TYPE_NUMERIC, // Float 61 '50' => DataType::TYPE_ERROR, 62 '60' => DataType::TYPE_STRING, 63 //'70': // Cell Range 64 //'80': // Array 65 ], 66 ]; 67 68 /** 69 * Create a new Gnumeric. 70 */ 71 public function __construct() 72 { 73 parent::__construct(); 74 $this->referenceHelper = ReferenceHelper::getInstance(); 75 $this->securityScanner = XmlScanner::getInstance($this); 76 } 77 78 /** 79 * Can the current IReader read the file? 80 */ 81 public function canRead(string $filename): bool 82 { 83 // Check if gzlib functions are available 84 if (File::testFileNoThrow($filename) && function_exists('gzread')) { 85 // Read signature data (first 3 bytes) 86 $fh = fopen($filename, 'rb'); 87 if ($fh !== false) { 88 $data = fread($fh, 2); 89 fclose($fh); 90 } 91 } 92 93 return isset($data) && $data === chr(0x1F) . chr(0x8B); 94 } 95 96 private static function matchXml(XMLReader $xml, string $expectedLocalName): bool 97 { 98 return $xml->namespaceURI === self::NAMESPACE_GNM 99 && $xml->localName === $expectedLocalName 100 && $xml->nodeType === XMLReader::ELEMENT; 101 } 102 103 /** 104 * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object. 105 * 106 * @param string $filename 107 * 108 * @return array 109 */ 110 public function listWorksheetNames($filename) 111 { 112 File::assertFile($filename); 113 114 $xml = new XMLReader(); 115 $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions()); 116 $xml->setParserProperty(2, true); 117 118 $worksheetNames = []; 119 while ($xml->read()) { 120 if (self::matchXml($xml, 'SheetName')) { 121 $xml->read(); // Move onto the value node 122 $worksheetNames[] = (string) $xml->value; 123 } elseif (self::matchXml($xml, 'Sheets')) { 124 // break out of the loop once we've got our sheet names rather than parse the entire file 125 break; 126 } 127 } 128 129 return $worksheetNames; 130 } 131 132 /** 133 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 134 * 135 * @param string $filename 136 * 137 * @return array 138 */ 139 public function listWorksheetInfo($filename) 140 { 141 File::assertFile($filename); 142 143 $xml = new XMLReader(); 144 $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions()); 145 $xml->setParserProperty(2, true); 146 147 $worksheetInfo = []; 148 while ($xml->read()) { 149 if (self::matchXml($xml, 'Sheet')) { 150 $tmpInfo = [ 151 'worksheetName' => '', 152 'lastColumnLetter' => 'A', 153 'lastColumnIndex' => 0, 154 'totalRows' => 0, 155 'totalColumns' => 0, 156 ]; 157 158 while ($xml->read()) { 159 if (self::matchXml($xml, 'Name')) { 160 $xml->read(); // Move onto the value node 161 $tmpInfo['worksheetName'] = (string) $xml->value; 162 } elseif (self::matchXml($xml, 'MaxCol')) { 163 $xml->read(); // Move onto the value node 164 $tmpInfo['lastColumnIndex'] = (int) $xml->value; 165 $tmpInfo['totalColumns'] = (int) $xml->value + 1; 166 } elseif (self::matchXml($xml, 'MaxRow')) { 167 $xml->read(); // Move onto the value node 168 $tmpInfo['totalRows'] = (int) $xml->value + 1; 169 170 break; 171 } 172 } 173 $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); 174 $worksheetInfo[] = $tmpInfo; 175 } 176 } 177 178 return $worksheetInfo; 179 } 180 181 /** 182 * @param string $filename 183 * 184 * @return string 185 */ 186 private function gzfileGetContents($filename) 187 { 188 $file = @gzopen($filename, 'rb'); 189 $data = ''; 190 if ($file !== false) { 191 while (!gzeof($file)) { 192 $data .= gzread($file, 1024); 193 } 194 gzclose($file); 195 } 196 197 return $data; 198 } 199 200 public static function gnumericMappings(): array 201 { 202 return array_merge(self::$mappings, Styles::$mappings); 203 } 204 205 private function processComments(SimpleXMLElement $sheet): void 206 { 207 if ((!$this->readDataOnly) && (isset($sheet->Objects))) { 208 foreach ($sheet->Objects->children(self::NAMESPACE_GNM) as $key => $comment) { 209 $commentAttributes = $comment->attributes(); 210 // Only comment objects are handled at the moment 211 if ($commentAttributes && $commentAttributes->Text) { 212 $this->spreadsheet->getActiveSheet()->getComment((string) $commentAttributes->ObjectBound) 213 ->setAuthor((string) $commentAttributes->Author) 214 ->setText($this->parseRichText((string) $commentAttributes->Text)); 215 } 216 } 217 } 218 } 219 220 /** 221 * @param mixed $value 222 */ 223 private static function testSimpleXml($value): SimpleXMLElement 224 { 225 return ($value instanceof SimpleXMLElement) ? $value : new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><root></root>'); 226 } 227 228 /** 229 * Loads Spreadsheet from file. 230 */ 231 protected function loadSpreadsheetFromFile(string $filename): Spreadsheet 232 { 233 // Create new Spreadsheet 234 $spreadsheet = new Spreadsheet(); 235 $spreadsheet->removeSheetByIndex(0); 236 237 // Load into this instance 238 return $this->loadIntoExisting($filename, $spreadsheet); 239 } 240 241 /** 242 * Loads from file into Spreadsheet instance. 243 */ 244 public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet 245 { 246 $this->spreadsheet = $spreadsheet; 247 File::assertFile($filename); 248 249 $gFileData = $this->gzfileGetContents($filename); 250 251 $xml2 = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions()); 252 $xml = self::testSimpleXml($xml2); 253 254 $gnmXML = $xml->children(self::NAMESPACE_GNM); 255 (new Properties($this->spreadsheet))->readProperties($xml, $gnmXML); 256 257 $worksheetID = 0; 258 foreach ($gnmXML->Sheets->Sheet as $sheetOrNull) { 259 $sheet = self::testSimpleXml($sheetOrNull); 260 $worksheetName = (string) $sheet->Name; 261 if (is_array($this->loadSheetsOnly) && !in_array($worksheetName, $this->loadSheetsOnly, true)) { 262 continue; 263 } 264 265 $maxRow = $maxCol = 0; 266 267 // Create new Worksheet 268 $this->spreadsheet->createSheet(); 269 $this->spreadsheet->setActiveSheetIndex($worksheetID); 270 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in formula 271 // cells... during the load, all formulae should be correct, and we're simply bringing the worksheet 272 // name in line with the formula, not the reverse 273 $this->spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false); 274 275 $visibility = $sheetOrNull->attributes()['Visibility'] ?? 'GNM_SHEET_VISIBILITY_VISIBLE'; 276 if ((string) $visibility !== 'GNM_SHEET_VISIBILITY_VISIBLE') { 277 $this->spreadsheet->getActiveSheet()->setSheetState(Worksheet::SHEETSTATE_HIDDEN); 278 } 279 280 if (!$this->readDataOnly) { 281 (new PageSetup($this->spreadsheet)) 282 ->printInformation($sheet) 283 ->sheetMargins($sheet); 284 } 285 286 foreach ($sheet->Cells->Cell as $cellOrNull) { 287 $cell = self::testSimpleXml($cellOrNull); 288 $cellAttributes = self::testSimpleXml($cell->attributes()); 289 $row = (int) $cellAttributes->Row + 1; 290 $column = (int) $cellAttributes->Col; 291 292 $maxRow = max($maxRow, $row); 293 $maxCol = max($maxCol, $column); 294 295 $column = Coordinate::stringFromColumnIndex($column + 1); 296 297 // Read cell? 298 if ($this->getReadFilter() !== null) { 299 if (!$this->getReadFilter()->readCell($column, $row, $worksheetName)) { 300 continue; 301 } 302 } 303 304 $this->loadCell($cell, $worksheetName, $cellAttributes, $column, $row); 305 } 306 307 if ($sheet->Styles !== null) { 308 (new Styles($this->spreadsheet, $this->readDataOnly))->read($sheet, $maxRow, $maxCol); 309 } 310 311 $this->processComments($sheet); 312 $this->processColumnWidths($sheet, $maxCol); 313 $this->processRowHeights($sheet, $maxRow); 314 $this->processMergedCells($sheet); 315 $this->processAutofilter($sheet); 316 317 $this->setSelectedCells($sheet); 318 ++$worksheetID; 319 } 320 321 $this->processDefinedNames($gnmXML); 322 323 $this->setSelectedSheet($gnmXML); 324 325 // Return 326 return $this->spreadsheet; 327 } 328 329 private function setSelectedSheet(SimpleXMLElement $gnmXML): void 330 { 331 if (isset($gnmXML->UIData)) { 332 $attributes = self::testSimpleXml($gnmXML->UIData->attributes()); 333 $selectedSheet = (int) $attributes['SelectedTab']; 334 $this->spreadsheet->setActiveSheetIndex($selectedSheet); 335 } 336 } 337 338 private function setSelectedCells(?SimpleXMLElement $sheet): void 339 { 340 if ($sheet !== null && isset($sheet->Selections)) { 341 foreach ($sheet->Selections as $selection) { 342 $startCol = (int) ($selection->StartCol ?? 0); 343 $startRow = (int) ($selection->StartRow ?? 0) + 1; 344 $endCol = (int) ($selection->EndCol ?? $startCol); 345 $endRow = (int) ($selection->endRow ?? 0) + 1; 346 347 $startColumn = Coordinate::stringFromColumnIndex($startCol + 1); 348 $endColumn = Coordinate::stringFromColumnIndex($endCol + 1); 349 350 $startCell = "{$startColumn}{$startRow}"; 351 $endCell = "{$endColumn}{$endRow}"; 352 $selectedRange = $startCell . (($endCell !== $startCell) ? ':' . $endCell : ''); 353 $this->spreadsheet->getActiveSheet()->setSelectedCell($selectedRange); 354 355 break; 356 } 357 } 358 } 359 360 private function processMergedCells(?SimpleXMLElement $sheet): void 361 { 362 // Handle Merged Cells in this worksheet 363 if ($sheet !== null && isset($sheet->MergedRegions)) { 364 foreach ($sheet->MergedRegions->Merge as $mergeCells) { 365 if (strpos((string) $mergeCells, ':') !== false) { 366 $this->spreadsheet->getActiveSheet()->mergeCells($mergeCells, Worksheet::MERGE_CELL_CONTENT_HIDE); 367 } 368 } 369 } 370 } 371 372 private function processAutofilter(?SimpleXMLElement $sheet): void 373 { 374 if ($sheet !== null && isset($sheet->Filters)) { 375 foreach ($sheet->Filters->Filter as $autofilter) { 376 if ($autofilter !== null) { 377 $attributes = $autofilter->attributes(); 378 if (isset($attributes['Area'])) { 379 $this->spreadsheet->getActiveSheet()->setAutoFilter((string) $attributes['Area']); 380 } 381 } 382 } 383 } 384 } 385 386 private function setColumnWidth(int $whichColumn, float $defaultWidth): void 387 { 388 $columnDimension = $this->spreadsheet->getActiveSheet() 389 ->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1)); 390 if ($columnDimension !== null) { 391 $columnDimension->setWidth($defaultWidth); 392 } 393 } 394 395 private function setColumnInvisible(int $whichColumn): void 396 { 397 $columnDimension = $this->spreadsheet->getActiveSheet() 398 ->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1)); 399 if ($columnDimension !== null) { 400 $columnDimension->setVisible(false); 401 } 402 } 403 404 private function processColumnLoop(int $whichColumn, int $maxCol, ?SimpleXMLElement $columnOverride, float $defaultWidth): int 405 { 406 $columnOverride = self::testSimpleXml($columnOverride); 407 $columnAttributes = self::testSimpleXml($columnOverride->attributes()); 408 $column = $columnAttributes['No']; 409 $columnWidth = ((float) $columnAttributes['Unit']) / 5.4; 410 $hidden = (isset($columnAttributes['Hidden'])) && ((string) $columnAttributes['Hidden'] == '1'); 411 $columnCount = (int) ($columnAttributes['Count'] ?? 1); 412 while ($whichColumn < $column) { 413 $this->setColumnWidth($whichColumn, $defaultWidth); 414 ++$whichColumn; 415 } 416 while (($whichColumn < ($column + $columnCount)) && ($whichColumn <= $maxCol)) { 417 $this->setColumnWidth($whichColumn, $columnWidth); 418 if ($hidden) { 419 $this->setColumnInvisible($whichColumn); 420 } 421 ++$whichColumn; 422 } 423 424 return $whichColumn; 425 } 426 427 private function processColumnWidths(?SimpleXMLElement $sheet, int $maxCol): void 428 { 429 if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Cols))) { 430 // Column Widths 431 $defaultWidth = 0; 432 $columnAttributes = $sheet->Cols->attributes(); 433 if ($columnAttributes !== null) { 434 $defaultWidth = $columnAttributes['DefaultSizePts'] / 5.4; 435 } 436 $whichColumn = 0; 437 foreach ($sheet->Cols->ColInfo as $columnOverride) { 438 $whichColumn = $this->processColumnLoop($whichColumn, $maxCol, $columnOverride, $defaultWidth); 439 } 440 while ($whichColumn <= $maxCol) { 441 $this->setColumnWidth($whichColumn, $defaultWidth); 442 ++$whichColumn; 443 } 444 } 445 } 446 447 private function setRowHeight(int $whichRow, float $defaultHeight): void 448 { 449 $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow); 450 if ($rowDimension !== null) { 451 $rowDimension->setRowHeight($defaultHeight); 452 } 453 } 454 455 private function setRowInvisible(int $whichRow): void 456 { 457 $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow); 458 if ($rowDimension !== null) { 459 $rowDimension->setVisible(false); 460 } 461 } 462 463 private function processRowLoop(int $whichRow, int $maxRow, ?SimpleXMLElement $rowOverride, float $defaultHeight): int 464 { 465 $rowOverride = self::testSimpleXml($rowOverride); 466 $rowAttributes = self::testSimpleXml($rowOverride->attributes()); 467 $row = $rowAttributes['No']; 468 $rowHeight = (float) $rowAttributes['Unit']; 469 $hidden = (isset($rowAttributes['Hidden'])) && ((string) $rowAttributes['Hidden'] == '1'); 470 $rowCount = (int) ($rowAttributes['Count'] ?? 1); 471 while ($whichRow < $row) { 472 ++$whichRow; 473 $this->setRowHeight($whichRow, $defaultHeight); 474 } 475 while (($whichRow < ($row + $rowCount)) && ($whichRow < $maxRow)) { 476 ++$whichRow; 477 $this->setRowHeight($whichRow, $rowHeight); 478 if ($hidden) { 479 $this->setRowInvisible($whichRow); 480 } 481 } 482 483 return $whichRow; 484 } 485 486 private function processRowHeights(?SimpleXMLElement $sheet, int $maxRow): void 487 { 488 if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Rows))) { 489 // Row Heights 490 $defaultHeight = 0; 491 $rowAttributes = $sheet->Rows->attributes(); 492 if ($rowAttributes !== null) { 493 $defaultHeight = (float) $rowAttributes['DefaultSizePts']; 494 } 495 $whichRow = 0; 496 497 foreach ($sheet->Rows->RowInfo as $rowOverride) { 498 $whichRow = $this->processRowLoop($whichRow, $maxRow, $rowOverride, $defaultHeight); 499 } 500 // never executed, I can't figure out any circumstances 501 // under which it would be executed, and, even if 502 // such exist, I'm not convinced this is needed. 503 //while ($whichRow < $maxRow) { 504 // ++$whichRow; 505 // $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow)->setRowHeight($defaultHeight); 506 //} 507 } 508 } 509 510 private function processDefinedNames(?SimpleXMLElement $gnmXML): void 511 { 512 // Loop through definedNames (global named ranges) 513 if ($gnmXML !== null && isset($gnmXML->Names)) { 514 foreach ($gnmXML->Names->Name as $definedName) { 515 $name = (string) $definedName->name; 516 $value = (string) $definedName->value; 517 if (stripos($value, '#REF!') !== false) { 518 continue; 519 } 520 521 [$worksheetName] = Worksheet::extractSheetTitle($value, true); 522 $worksheetName = trim($worksheetName, "'"); 523 $worksheet = $this->spreadsheet->getSheetByName($worksheetName); 524 // Worksheet might still be null if we're only loading selected sheets rather than the full spreadsheet 525 if ($worksheet !== null) { 526 $this->spreadsheet->addDefinedName(DefinedName::createInstance($name, $worksheet, $value)); 527 } 528 } 529 } 530 } 531 532 private function parseRichText(string $is): RichText 533 { 534 $value = new RichText(); 535 $value->createText($is); 536 537 return $value; 538 } 539 540 private function loadCell( 541 SimpleXMLElement $cell, 542 string $worksheetName, 543 SimpleXMLElement $cellAttributes, 544 string $column, 545 int $row 546 ): void { 547 $ValueType = $cellAttributes->ValueType; 548 $ExprID = (string) $cellAttributes->ExprID; 549 $type = DataType::TYPE_FORMULA; 550 if ($ExprID > '') { 551 if (((string) $cell) > '') { 552 $this->expressions[$ExprID] = [ 553 'column' => $cellAttributes->Col, 554 'row' => $cellAttributes->Row, 555 'formula' => (string) $cell, 556 ]; 557 } else { 558 $expression = $this->expressions[$ExprID]; 559 560 $cell = $this->referenceHelper->updateFormulaReferences( 561 $expression['formula'], 562 'A1', 563 $cellAttributes->Col - $expression['column'], 564 $cellAttributes->Row - $expression['row'], 565 $worksheetName 566 ); 567 } 568 $type = DataType::TYPE_FORMULA; 569 } else { 570 $vtype = (string) $ValueType; 571 if (array_key_exists($vtype, self::$mappings['dataType'])) { 572 $type = self::$mappings['dataType'][$vtype]; 573 } 574 if ($vtype === '20') { // Boolean 575 $cell = $cell == 'TRUE'; 576 } 577 } 578 579 $this->spreadsheet->getActiveSheet()->getCell($column . $row)->setValueExplicit((string) $cell, $type); 580 if (isset($cellAttributes->ValueFormat)) { 581 $this->spreadsheet->getActiveSheet()->getCell($column . $row) 582 ->getStyle()->getNumberFormat() 583 ->setFormatCode((string) $cellAttributes->ValueFormat); 584 } 585 } 586 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body