Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   6  use PhpOffice\PhpSpreadsheet\Cell\DataType;
   7  use PhpOffice\PhpSpreadsheet\DefinedName;
   8  use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\PageSetup;
   9  use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Properties;
  10  use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Styles;
  11  use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
  12  use PhpOffice\PhpSpreadsheet\ReferenceHelper;
  13  use PhpOffice\PhpSpreadsheet\RichText\RichText;
  14  use PhpOffice\PhpSpreadsheet\Settings;
  15  use PhpOffice\PhpSpreadsheet\Shared\File;
  16  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  17  use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
  18  use SimpleXMLElement;
  19  use XMLReader;
  20  
  21  class Gnumeric extends BaseReader
  22  {
  23      const NAMESPACE_GNM = 'http://www.gnumeric.org/v10.dtd'; // gmr in old sheets
  24  
  25      const NAMESPACE_XSI = 'http://www.w3.org/2001/XMLSchema-instance';
  26  
  27      const NAMESPACE_OFFICE = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0';
  28  
  29      const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
  30  
  31      const NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/';
  32  
  33      const NAMESPACE_META = 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0';
  34  
  35      const NAMESPACE_OOO = 'http://openoffice.org/2004/office';
  36  
  37      /**
  38       * Shared Expressions.
  39       *
  40       * @var array
  41       */
  42      private $expressions = [];
  43  
  44      /**
  45       * Spreadsheet shared across all functions.
  46       *
  47       * @var Spreadsheet
  48       */
  49      private $spreadsheet;
  50  
  51      /** @var ReferenceHelper */
  52      private $referenceHelper;
  53  
  54      /** @var array */
  55      public static $mappings = [
  56          'dataType' => [
  57              '10' => DataType::TYPE_NULL,
  58              '20' => DataType::TYPE_BOOL,
  59              '30' => DataType::TYPE_NUMERIC, // Integer doesn't exist in Excel
  60              '40' => DataType::TYPE_NUMERIC, // Float
  61              '50' => DataType::TYPE_ERROR,
  62              '60' => DataType::TYPE_STRING,
  63              //'70':        //    Cell Range
  64              //'80':        //    Array
  65          ],
  66      ];
  67  
  68      /**
  69       * Create a new Gnumeric.
  70       */
  71      public function __construct()
  72      {
  73          parent::__construct();
  74          $this->referenceHelper = ReferenceHelper::getInstance();
  75          $this->securityScanner = XmlScanner::getInstance($this);
  76      }
  77  
  78      /**
  79       * Can the current IReader read the file?
  80       */
  81      public function canRead(string $filename): bool
  82      {
  83          // Check if gzlib functions are available
  84          if (File::testFileNoThrow($filename) && function_exists('gzread')) {
  85              // Read signature data (first 3 bytes)
  86              $fh = fopen($filename, 'rb');
  87              if ($fh !== false) {
  88                  $data = fread($fh, 2);
  89                  fclose($fh);
  90              }
  91          }
  92  
  93          return isset($data) && $data === chr(0x1F) . chr(0x8B);
  94      }
  95  
  96      private static function matchXml(XMLReader $xml, string $expectedLocalName): bool
  97      {
  98          return $xml->namespaceURI === self::NAMESPACE_GNM
  99              && $xml->localName === $expectedLocalName
 100              && $xml->nodeType === XMLReader::ELEMENT;
 101      }
 102  
 103      /**
 104       * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
 105       *
 106       * @param string $filename
 107       *
 108       * @return array
 109       */
 110      public function listWorksheetNames($filename)
 111      {
 112          File::assertFile($filename);
 113  
 114          $xml = new XMLReader();
 115          $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions());
 116          $xml->setParserProperty(2, true);
 117  
 118          $worksheetNames = [];
 119          while ($xml->read()) {
 120              if (self::matchXml($xml, 'SheetName')) {
 121                  $xml->read(); //    Move onto the value node
 122                  $worksheetNames[] = (string) $xml->value;
 123              } elseif (self::matchXml($xml, 'Sheets')) {
 124                  //    break out of the loop once we've got our sheet names rather than parse the entire file
 125                  break;
 126              }
 127          }
 128  
 129          return $worksheetNames;
 130      }
 131  
 132      /**
 133       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 134       *
 135       * @param string $filename
 136       *
 137       * @return array
 138       */
 139      public function listWorksheetInfo($filename)
 140      {
 141          File::assertFile($filename);
 142  
 143          $xml = new XMLReader();
 144          $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions());
 145          $xml->setParserProperty(2, true);
 146  
 147          $worksheetInfo = [];
 148          while ($xml->read()) {
 149              if (self::matchXml($xml, 'Sheet')) {
 150                  $tmpInfo = [
 151                      'worksheetName' => '',
 152                      'lastColumnLetter' => 'A',
 153                      'lastColumnIndex' => 0,
 154                      'totalRows' => 0,
 155                      'totalColumns' => 0,
 156                  ];
 157  
 158                  while ($xml->read()) {
 159                      if (self::matchXml($xml, 'Name')) {
 160                          $xml->read(); //    Move onto the value node
 161                          $tmpInfo['worksheetName'] = (string) $xml->value;
 162                      } elseif (self::matchXml($xml, 'MaxCol')) {
 163                          $xml->read(); //    Move onto the value node
 164                          $tmpInfo['lastColumnIndex'] = (int) $xml->value;
 165                          $tmpInfo['totalColumns'] = (int) $xml->value + 1;
 166                      } elseif (self::matchXml($xml, 'MaxRow')) {
 167                          $xml->read(); //    Move onto the value node
 168                          $tmpInfo['totalRows'] = (int) $xml->value + 1;
 169  
 170                          break;
 171                      }
 172                  }
 173                  $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
 174                  $worksheetInfo[] = $tmpInfo;
 175              }
 176          }
 177  
 178          return $worksheetInfo;
 179      }
 180  
 181      /**
 182       * @param string $filename
 183       *
 184       * @return string
 185       */
 186      private function gzfileGetContents($filename)
 187      {
 188          $file = @gzopen($filename, 'rb');
 189          $data = '';
 190          if ($file !== false) {
 191              while (!gzeof($file)) {
 192                  $data .= gzread($file, 1024);
 193              }
 194              gzclose($file);
 195          }
 196  
 197          return $data;
 198      }
 199  
 200      public static function gnumericMappings(): array
 201      {
 202          return array_merge(self::$mappings, Styles::$mappings);
 203      }
 204  
 205      private function processComments(SimpleXMLElement $sheet): void
 206      {
 207          if ((!$this->readDataOnly) && (isset($sheet->Objects))) {
 208              foreach ($sheet->Objects->children(self::NAMESPACE_GNM) as $key => $comment) {
 209                  $commentAttributes = $comment->attributes();
 210                  //    Only comment objects are handled at the moment
 211                  if ($commentAttributes && $commentAttributes->Text) {
 212                      $this->spreadsheet->getActiveSheet()->getComment((string) $commentAttributes->ObjectBound)
 213                          ->setAuthor((string) $commentAttributes->Author)
 214                          ->setText($this->parseRichText((string) $commentAttributes->Text));
 215                  }
 216              }
 217          }
 218      }
 219  
 220      /**
 221       * @param mixed $value
 222       */
 223      private static function testSimpleXml($value): SimpleXMLElement
 224      {
 225          return ($value instanceof SimpleXMLElement) ? $value : new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><root></root>');
 226      }
 227  
 228      /**
 229       * Loads Spreadsheet from file.
 230       */
 231      protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
 232      {
 233          // Create new Spreadsheet
 234          $spreadsheet = new Spreadsheet();
 235          $spreadsheet->removeSheetByIndex(0);
 236  
 237          // Load into this instance
 238          return $this->loadIntoExisting($filename, $spreadsheet);
 239      }
 240  
 241      /**
 242       * Loads from file into Spreadsheet instance.
 243       */
 244      public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
 245      {
 246          $this->spreadsheet = $spreadsheet;
 247          File::assertFile($filename);
 248  
 249          $gFileData = $this->gzfileGetContents($filename);
 250  
 251          $xml2 = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
 252          $xml = self::testSimpleXml($xml2);
 253  
 254          $gnmXML = $xml->children(self::NAMESPACE_GNM);
 255          (new Properties($this->spreadsheet))->readProperties($xml, $gnmXML);
 256  
 257          $worksheetID = 0;
 258          foreach ($gnmXML->Sheets->Sheet as $sheetOrNull) {
 259              $sheet = self::testSimpleXml($sheetOrNull);
 260              $worksheetName = (string) $sheet->Name;
 261              if (is_array($this->loadSheetsOnly) && !in_array($worksheetName, $this->loadSheetsOnly, true)) {
 262                  continue;
 263              }
 264  
 265              $maxRow = $maxCol = 0;
 266  
 267              // Create new Worksheet
 268              $this->spreadsheet->createSheet();
 269              $this->spreadsheet->setActiveSheetIndex($worksheetID);
 270              //    Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in formula
 271              //        cells... during the load, all formulae should be correct, and we're simply bringing the worksheet
 272              //        name in line with the formula, not the reverse
 273              $this->spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
 274  
 275              $visibility = $sheetOrNull->attributes()['Visibility'] ?? 'GNM_SHEET_VISIBILITY_VISIBLE';
 276              if ((string) $visibility !== 'GNM_SHEET_VISIBILITY_VISIBLE') {
 277                  $this->spreadsheet->getActiveSheet()->setSheetState(Worksheet::SHEETSTATE_HIDDEN);
 278              }
 279  
 280              if (!$this->readDataOnly) {
 281                  (new PageSetup($this->spreadsheet))
 282                      ->printInformation($sheet)
 283                      ->sheetMargins($sheet);
 284              }
 285  
 286              foreach ($sheet->Cells->Cell as $cellOrNull) {
 287                  $cell = self::testSimpleXml($cellOrNull);
 288                  $cellAttributes = self::testSimpleXml($cell->attributes());
 289                  $row = (int) $cellAttributes->Row + 1;
 290                  $column = (int) $cellAttributes->Col;
 291  
 292                  $maxRow = max($maxRow, $row);
 293                  $maxCol = max($maxCol, $column);
 294  
 295                  $column = Coordinate::stringFromColumnIndex($column + 1);
 296  
 297                  // Read cell?
 298                  if ($this->getReadFilter() !== null) {
 299                      if (!$this->getReadFilter()->readCell($column, $row, $worksheetName)) {
 300                          continue;
 301                      }
 302                  }
 303  
 304                  $this->loadCell($cell, $worksheetName, $cellAttributes, $column, $row);
 305              }
 306  
 307              if ($sheet->Styles !== null) {
 308                  (new Styles($this->spreadsheet, $this->readDataOnly))->read($sheet, $maxRow, $maxCol);
 309              }
 310  
 311              $this->processComments($sheet);
 312              $this->processColumnWidths($sheet, $maxCol);
 313              $this->processRowHeights($sheet, $maxRow);
 314              $this->processMergedCells($sheet);
 315              $this->processAutofilter($sheet);
 316  
 317              $this->setSelectedCells($sheet);
 318              ++$worksheetID;
 319          }
 320  
 321          $this->processDefinedNames($gnmXML);
 322  
 323          $this->setSelectedSheet($gnmXML);
 324  
 325          // Return
 326          return $this->spreadsheet;
 327      }
 328  
 329      private function setSelectedSheet(SimpleXMLElement $gnmXML): void
 330      {
 331          if (isset($gnmXML->UIData)) {
 332              $attributes = self::testSimpleXml($gnmXML->UIData->attributes());
 333              $selectedSheet = (int) $attributes['SelectedTab'];
 334              $this->spreadsheet->setActiveSheetIndex($selectedSheet);
 335          }
 336      }
 337  
 338      private function setSelectedCells(?SimpleXMLElement $sheet): void
 339      {
 340          if ($sheet !== null && isset($sheet->Selections)) {
 341              foreach ($sheet->Selections as $selection) {
 342                  $startCol = (int) ($selection->StartCol ?? 0);
 343                  $startRow = (int) ($selection->StartRow ?? 0) + 1;
 344                  $endCol = (int) ($selection->EndCol ?? $startCol);
 345                  $endRow = (int) ($selection->endRow ?? 0) + 1;
 346  
 347                  $startColumn = Coordinate::stringFromColumnIndex($startCol + 1);
 348                  $endColumn = Coordinate::stringFromColumnIndex($endCol + 1);
 349  
 350                  $startCell = "{$startColumn}{$startRow}";
 351                  $endCell = "{$endColumn}{$endRow}";
 352                  $selectedRange = $startCell . (($endCell !== $startCell) ? ':' . $endCell : '');
 353                  $this->spreadsheet->getActiveSheet()->setSelectedCell($selectedRange);
 354  
 355                  break;
 356              }
 357          }
 358      }
 359  
 360      private function processMergedCells(?SimpleXMLElement $sheet): void
 361      {
 362          //    Handle Merged Cells in this worksheet
 363          if ($sheet !== null && isset($sheet->MergedRegions)) {
 364              foreach ($sheet->MergedRegions->Merge as $mergeCells) {
 365                  if (strpos((string) $mergeCells, ':') !== false) {
 366                      $this->spreadsheet->getActiveSheet()->mergeCells($mergeCells, Worksheet::MERGE_CELL_CONTENT_HIDE);
 367                  }
 368              }
 369          }
 370      }
 371  
 372      private function processAutofilter(?SimpleXMLElement $sheet): void
 373      {
 374          if ($sheet !== null && isset($sheet->Filters)) {
 375              foreach ($sheet->Filters->Filter as $autofilter) {
 376                  if ($autofilter !== null) {
 377                      $attributes = $autofilter->attributes();
 378                      if (isset($attributes['Area'])) {
 379                          $this->spreadsheet->getActiveSheet()->setAutoFilter((string) $attributes['Area']);
 380                      }
 381                  }
 382              }
 383          }
 384      }
 385  
 386      private function setColumnWidth(int $whichColumn, float $defaultWidth): void
 387      {
 388          $columnDimension = $this->spreadsheet->getActiveSheet()
 389              ->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
 390          if ($columnDimension !== null) {
 391              $columnDimension->setWidth($defaultWidth);
 392          }
 393      }
 394  
 395      private function setColumnInvisible(int $whichColumn): void
 396      {
 397          $columnDimension = $this->spreadsheet->getActiveSheet()
 398              ->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
 399          if ($columnDimension !== null) {
 400              $columnDimension->setVisible(false);
 401          }
 402      }
 403  
 404      private function processColumnLoop(int $whichColumn, int $maxCol, ?SimpleXMLElement $columnOverride, float $defaultWidth): int
 405      {
 406          $columnOverride = self::testSimpleXml($columnOverride);
 407          $columnAttributes = self::testSimpleXml($columnOverride->attributes());
 408          $column = $columnAttributes['No'];
 409          $columnWidth = ((float) $columnAttributes['Unit']) / 5.4;
 410          $hidden = (isset($columnAttributes['Hidden'])) && ((string) $columnAttributes['Hidden'] == '1');
 411          $columnCount = (int) ($columnAttributes['Count'] ?? 1);
 412          while ($whichColumn < $column) {
 413              $this->setColumnWidth($whichColumn, $defaultWidth);
 414              ++$whichColumn;
 415          }
 416          while (($whichColumn < ($column + $columnCount)) && ($whichColumn <= $maxCol)) {
 417              $this->setColumnWidth($whichColumn, $columnWidth);
 418              if ($hidden) {
 419                  $this->setColumnInvisible($whichColumn);
 420              }
 421              ++$whichColumn;
 422          }
 423  
 424          return $whichColumn;
 425      }
 426  
 427      private function processColumnWidths(?SimpleXMLElement $sheet, int $maxCol): void
 428      {
 429          if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Cols))) {
 430              //    Column Widths
 431              $defaultWidth = 0;
 432              $columnAttributes = $sheet->Cols->attributes();
 433              if ($columnAttributes !== null) {
 434                  $defaultWidth = $columnAttributes['DefaultSizePts'] / 5.4;
 435              }
 436              $whichColumn = 0;
 437              foreach ($sheet->Cols->ColInfo as $columnOverride) {
 438                  $whichColumn = $this->processColumnLoop($whichColumn, $maxCol, $columnOverride, $defaultWidth);
 439              }
 440              while ($whichColumn <= $maxCol) {
 441                  $this->setColumnWidth($whichColumn, $defaultWidth);
 442                  ++$whichColumn;
 443              }
 444          }
 445      }
 446  
 447      private function setRowHeight(int $whichRow, float $defaultHeight): void
 448      {
 449          $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
 450          if ($rowDimension !== null) {
 451              $rowDimension->setRowHeight($defaultHeight);
 452          }
 453      }
 454  
 455      private function setRowInvisible(int $whichRow): void
 456      {
 457          $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
 458          if ($rowDimension !== null) {
 459              $rowDimension->setVisible(false);
 460          }
 461      }
 462  
 463      private function processRowLoop(int $whichRow, int $maxRow, ?SimpleXMLElement $rowOverride, float $defaultHeight): int
 464      {
 465          $rowOverride = self::testSimpleXml($rowOverride);
 466          $rowAttributes = self::testSimpleXml($rowOverride->attributes());
 467          $row = $rowAttributes['No'];
 468          $rowHeight = (float) $rowAttributes['Unit'];
 469          $hidden = (isset($rowAttributes['Hidden'])) && ((string) $rowAttributes['Hidden'] == '1');
 470          $rowCount = (int) ($rowAttributes['Count'] ?? 1);
 471          while ($whichRow < $row) {
 472              ++$whichRow;
 473              $this->setRowHeight($whichRow, $defaultHeight);
 474          }
 475          while (($whichRow < ($row + $rowCount)) && ($whichRow < $maxRow)) {
 476              ++$whichRow;
 477              $this->setRowHeight($whichRow, $rowHeight);
 478              if ($hidden) {
 479                  $this->setRowInvisible($whichRow);
 480              }
 481          }
 482  
 483          return $whichRow;
 484      }
 485  
 486      private function processRowHeights(?SimpleXMLElement $sheet, int $maxRow): void
 487      {
 488          if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Rows))) {
 489              //    Row Heights
 490              $defaultHeight = 0;
 491              $rowAttributes = $sheet->Rows->attributes();
 492              if ($rowAttributes !== null) {
 493                  $defaultHeight = (float) $rowAttributes['DefaultSizePts'];
 494              }
 495              $whichRow = 0;
 496  
 497              foreach ($sheet->Rows->RowInfo as $rowOverride) {
 498                  $whichRow = $this->processRowLoop($whichRow, $maxRow, $rowOverride, $defaultHeight);
 499              }
 500              // never executed, I can't figure out any circumstances
 501              // under which it would be executed, and, even if
 502              // such exist, I'm not convinced this is needed.
 503              //while ($whichRow < $maxRow) {
 504              //    ++$whichRow;
 505              //    $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow)->setRowHeight($defaultHeight);
 506              //}
 507          }
 508      }
 509  
 510      private function processDefinedNames(?SimpleXMLElement $gnmXML): void
 511      {
 512          //    Loop through definedNames (global named ranges)
 513          if ($gnmXML !== null && isset($gnmXML->Names)) {
 514              foreach ($gnmXML->Names->Name as $definedName) {
 515                  $name = (string) $definedName->name;
 516                  $value = (string) $definedName->value;
 517                  if (stripos($value, '#REF!') !== false) {
 518                      continue;
 519                  }
 520  
 521                  [$worksheetName] = Worksheet::extractSheetTitle($value, true);
 522                  $worksheetName = trim($worksheetName, "'");
 523                  $worksheet = $this->spreadsheet->getSheetByName($worksheetName);
 524                  // Worksheet might still be null if we're only loading selected sheets rather than the full spreadsheet
 525                  if ($worksheet !== null) {
 526                      $this->spreadsheet->addDefinedName(DefinedName::createInstance($name, $worksheet, $value));
 527                  }
 528              }
 529          }
 530      }
 531  
 532      private function parseRichText(string $is): RichText
 533      {
 534          $value = new RichText();
 535          $value->createText($is);
 536  
 537          return $value;
 538      }
 539  
 540      private function loadCell(
 541          SimpleXMLElement $cell,
 542          string $worksheetName,
 543          SimpleXMLElement $cellAttributes,
 544          string $column,
 545          int $row
 546      ): void {
 547          $ValueType = $cellAttributes->ValueType;
 548          $ExprID = (string) $cellAttributes->ExprID;
 549          $type = DataType::TYPE_FORMULA;
 550          if ($ExprID > '') {
 551              if (((string) $cell) > '') {
 552                  $this->expressions[$ExprID] = [
 553                      'column' => $cellAttributes->Col,
 554                      'row' => $cellAttributes->Row,
 555                      'formula' => (string) $cell,
 556                  ];
 557              } else {
 558                  $expression = $this->expressions[$ExprID];
 559  
 560                  $cell = $this->referenceHelper->updateFormulaReferences(
 561                      $expression['formula'],
 562                      'A1',
 563                      $cellAttributes->Col - $expression['column'],
 564                      $cellAttributes->Row - $expression['row'],
 565                      $worksheetName
 566                  );
 567              }
 568              $type = DataType::TYPE_FORMULA;
 569          } else {
 570              $vtype = (string) $ValueType;
 571              if (array_key_exists($vtype, self::$mappings['dataType'])) {
 572                  $type = self::$mappings['dataType'][$vtype];
 573              }
 574              if ($vtype === '20') {        //    Boolean
 575                  $cell = $cell == 'TRUE';
 576              }
 577          }
 578  
 579          $this->spreadsheet->getActiveSheet()->getCell($column . $row)->setValueExplicit((string) $cell, $type);
 580          if (isset($cellAttributes->ValueFormat)) {
 581              $this->spreadsheet->getActiveSheet()->getCell($column . $row)
 582                  ->getStyle()->getNumberFormat()
 583                  ->setFormatCode((string) $cellAttributes->ValueFormat);
 584          }
 585      }
 586  }