Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 310 and 400] [Versions 311 and 400] [Versions 39 and 400] [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   6  use PhpOffice\PhpSpreadsheet\Cell\DataType;
   7  use PhpOffice\PhpSpreadsheet\DefinedName;
   8  use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\PageSetup;
   9  use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Properties;
  10  use PhpOffice\PhpSpreadsheet\Reader\Gnumeric\Styles;
  11  use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
  12  use PhpOffice\PhpSpreadsheet\ReferenceHelper;
  13  use PhpOffice\PhpSpreadsheet\RichText\RichText;
  14  use PhpOffice\PhpSpreadsheet\Settings;
  15  use PhpOffice\PhpSpreadsheet\Shared\File;
  16  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  17  use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
  18  use SimpleXMLElement;
  19  use XMLReader;
  20  
  21  class Gnumeric extends BaseReader
  22  {
  23      const NAMESPACE_GNM = 'http://www.gnumeric.org/v10.dtd'; // gmr in old sheets
  24  
  25      const NAMESPACE_XSI = 'http://www.w3.org/2001/XMLSchema-instance';
  26  
  27      const NAMESPACE_OFFICE = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0';
  28  
  29      const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
  30  
  31      const NAMESPACE_DC = 'http://purl.org/dc/elements/1.1/';
  32  
  33      const NAMESPACE_META = 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0';
  34  
  35      const NAMESPACE_OOO = 'http://openoffice.org/2004/office';
  36  
  37      /**
  38       * Shared Expressions.
  39       *
  40       * @var array
  41       */
  42      private $expressions = [];
  43  
  44      /**
  45       * Spreadsheet shared across all functions.
  46       *
  47       * @var Spreadsheet
  48       */
  49      private $spreadsheet;
  50  
  51      /** @var ReferenceHelper */
  52      private $referenceHelper;
  53  
  54      /** @var array */
  55      public static $mappings = [
  56          'dataType' => [
  57              '10' => DataType::TYPE_NULL,
  58              '20' => DataType::TYPE_BOOL,
  59              '30' => DataType::TYPE_NUMERIC, // Integer doesn't exist in Excel
  60              '40' => DataType::TYPE_NUMERIC, // Float
  61              '50' => DataType::TYPE_ERROR,
  62              '60' => DataType::TYPE_STRING,
  63              //'70':        //    Cell Range
  64              //'80':        //    Array
  65          ],
  66      ];
  67  
  68      /**
  69       * Create a new Gnumeric.
  70       */
  71      public function __construct()
  72      {
  73          parent::__construct();
  74          $this->referenceHelper = ReferenceHelper::getInstance();
  75          $this->securityScanner = XmlScanner::getInstance($this);
  76      }
  77  
  78      /**
  79       * Can the current IReader read the file?
  80       */
  81      public function canRead(string $filename): bool
  82      {
  83          // Check if gzlib functions are available
  84          if (File::testFileNoThrow($filename) && function_exists('gzread')) {
  85              // Read signature data (first 3 bytes)
  86              $fh = fopen($filename, 'rb');
  87              if ($fh !== false) {
  88                  $data = fread($fh, 2);
  89                  fclose($fh);
  90              }
  91          }
  92  
  93          return isset($data) && $data === chr(0x1F) . chr(0x8B);
  94      }
  95  
  96      private static function matchXml(XMLReader $xml, string $expectedLocalName): bool
  97      {
  98          return $xml->namespaceURI === self::NAMESPACE_GNM
  99              && $xml->localName === $expectedLocalName
 100              && $xml->nodeType === XMLReader::ELEMENT;
 101      }
 102  
 103      /**
 104       * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
 105       *
 106       * @param string $filename
 107       *
 108       * @return array
 109       */
 110      public function listWorksheetNames($filename)
 111      {
 112          File::assertFile($filename);
 113  
 114          $xml = new XMLReader();
 115          $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions());
 116          $xml->setParserProperty(2, true);
 117  
 118          $worksheetNames = [];
 119          while ($xml->read()) {
 120              if (self::matchXml($xml, 'SheetName')) {
 121                  $xml->read(); //    Move onto the value node
 122                  $worksheetNames[] = (string) $xml->value;
 123              } elseif (self::matchXml($xml, 'Sheets')) {
 124                  //    break out of the loop once we've got our sheet names rather than parse the entire file
 125                  break;
 126              }
 127          }
 128  
 129          return $worksheetNames;
 130      }
 131  
 132      /**
 133       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 134       *
 135       * @param string $filename
 136       *
 137       * @return array
 138       */
 139      public function listWorksheetInfo($filename)
 140      {
 141          File::assertFile($filename);
 142  
 143          $xml = new XMLReader();
 144          $xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($filename)), null, Settings::getLibXmlLoaderOptions());
 145          $xml->setParserProperty(2, true);
 146  
 147          $worksheetInfo = [];
 148          while ($xml->read()) {
 149              if (self::matchXml($xml, 'Sheet')) {
 150                  $tmpInfo = [
 151                      'worksheetName' => '',
 152                      'lastColumnLetter' => 'A',
 153                      'lastColumnIndex' => 0,
 154                      'totalRows' => 0,
 155                      'totalColumns' => 0,
 156                  ];
 157  
 158                  while ($xml->read()) {
 159                      if (self::matchXml($xml, 'Name')) {
 160                          $xml->read(); //    Move onto the value node
 161                          $tmpInfo['worksheetName'] = (string) $xml->value;
 162                      } elseif (self::matchXml($xml, 'MaxCol')) {
 163                          $xml->read(); //    Move onto the value node
 164                          $tmpInfo['lastColumnIndex'] = (int) $xml->value;
 165                          $tmpInfo['totalColumns'] = (int) $xml->value + 1;
 166                      } elseif (self::matchXml($xml, 'MaxRow')) {
 167                          $xml->read(); //    Move onto the value node
 168                          $tmpInfo['totalRows'] = (int) $xml->value + 1;
 169  
 170                          break;
 171                      }
 172                  }
 173                  $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
 174                  $worksheetInfo[] = $tmpInfo;
 175              }
 176          }
 177  
 178          return $worksheetInfo;
 179      }
 180  
 181      /**
 182       * @param string $filename
 183       *
 184       * @return string
 185       */
 186      private function gzfileGetContents($filename)
 187      {
 188          $file = @gzopen($filename, 'rb');
 189          $data = '';
 190          if ($file !== false) {
 191              while (!gzeof($file)) {
 192                  $data .= gzread($file, 1024);
 193              }
 194              gzclose($file);
 195          }
 196  
 197          return $data;
 198      }
 199  
 200      public static function gnumericMappings(): array
 201      {
 202          return array_merge(self::$mappings, Styles::$mappings);
 203      }
 204  
 205      private function processComments(SimpleXMLElement $sheet): void
 206      {
 207          if ((!$this->readDataOnly) && (isset($sheet->Objects))) {
 208              foreach ($sheet->Objects->children(self::NAMESPACE_GNM) as $key => $comment) {
 209                  $commentAttributes = $comment->attributes();
 210                  //    Only comment objects are handled at the moment
 211                  if ($commentAttributes && $commentAttributes->Text) {
 212                      $this->spreadsheet->getActiveSheet()->getComment((string) $commentAttributes->ObjectBound)
 213                          ->setAuthor((string) $commentAttributes->Author)
 214                          ->setText($this->parseRichText((string) $commentAttributes->Text));
 215                  }
 216              }
 217          }
 218      }
 219  
 220      /**
 221       * @param mixed $value
 222       */
 223      private static function testSimpleXml($value): SimpleXMLElement
 224      {
 225          return ($value instanceof SimpleXMLElement) ? $value : new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><root></root>');
 226      }
 227  
 228      /**
 229       * Loads Spreadsheet from file.
 230       *
 231       * @return Spreadsheet
 232       */
 233      public function load(string $filename, int $flags = 0)
 234      {
 235          $this->processFlags($flags);
 236  
 237          // Create new Spreadsheet
 238          $spreadsheet = new Spreadsheet();
 239          $spreadsheet->removeSheetByIndex(0);
 240  
 241          // Load into this instance
 242          return $this->loadIntoExisting($filename, $spreadsheet);
 243      }
 244  
 245      /**
 246       * Loads from file into Spreadsheet instance.
 247       */
 248      public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
 249      {
 250          $this->spreadsheet = $spreadsheet;
 251          File::assertFile($filename);
 252  
 253          $gFileData = $this->gzfileGetContents($filename);
 254  
 255          $xml2 = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
 256          $xml = self::testSimpleXml($xml2);
 257  
 258          $gnmXML = $xml->children(self::NAMESPACE_GNM);
 259          (new Properties($this->spreadsheet))->readProperties($xml, $gnmXML);
 260  
 261          $worksheetID = 0;
 262          foreach ($gnmXML->Sheets->Sheet as $sheetOrNull) {
 263              $sheet = self::testSimpleXml($sheetOrNull);
 264              $worksheetName = (string) $sheet->Name;
 265              if (is_array($this->loadSheetsOnly) && !in_array($worksheetName, $this->loadSheetsOnly, true)) {
 266                  continue;
 267              }
 268  
 269              $maxRow = $maxCol = 0;
 270  
 271              // Create new Worksheet
 272              $this->spreadsheet->createSheet();
 273              $this->spreadsheet->setActiveSheetIndex($worksheetID);
 274              //    Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in formula
 275              //        cells... during the load, all formulae should be correct, and we're simply bringing the worksheet
 276              //        name in line with the formula, not the reverse
 277              $this->spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
 278  
 279              if (!$this->readDataOnly) {
 280                  (new PageSetup($this->spreadsheet))
 281                      ->printInformation($sheet)
 282                      ->sheetMargins($sheet);
 283              }
 284  
 285              foreach ($sheet->Cells->Cell as $cellOrNull) {
 286                  $cell = self::testSimpleXml($cellOrNull);
 287                  $cellAttributes = self::testSimpleXml($cell->attributes());
 288                  $row = (int) $cellAttributes->Row + 1;
 289                  $column = (int) $cellAttributes->Col;
 290  
 291                  if ($row > $maxRow) {
 292                      $maxRow = $row;
 293                  }
 294                  if ($column > $maxCol) {
 295                      $maxCol = $column;
 296                  }
 297  
 298                  $column = Coordinate::stringFromColumnIndex($column + 1);
 299  
 300                  // Read cell?
 301                  if ($this->getReadFilter() !== null) {
 302                      if (!$this->getReadFilter()->readCell($column, $row, $worksheetName)) {
 303                          continue;
 304                      }
 305                  }
 306  
 307                  $ValueType = $cellAttributes->ValueType;
 308                  $ExprID = (string) $cellAttributes->ExprID;
 309                  $type = DataType::TYPE_FORMULA;
 310                  if ($ExprID > '') {
 311                      if (((string) $cell) > '') {
 312                          $this->expressions[$ExprID] = [
 313                              'column' => $cellAttributes->Col,
 314                              'row' => $cellAttributes->Row,
 315                              'formula' => (string) $cell,
 316                          ];
 317                      } else {
 318                          $expression = $this->expressions[$ExprID];
 319  
 320                          $cell = $this->referenceHelper->updateFormulaReferences(
 321                              $expression['formula'],
 322                              'A1',
 323                              $cellAttributes->Col - $expression['column'],
 324                              $cellAttributes->Row - $expression['row'],
 325                              $worksheetName
 326                          );
 327                      }
 328                      $type = DataType::TYPE_FORMULA;
 329                  } else {
 330                      $vtype = (string) $ValueType;
 331                      if (array_key_exists($vtype, self::$mappings['dataType'])) {
 332                          $type = self::$mappings['dataType'][$vtype];
 333                      }
 334                      if ($vtype === '20') {        //    Boolean
 335                          $cell = $cell == 'TRUE';
 336                      }
 337                  }
 338                  $this->spreadsheet->getActiveSheet()->getCell($column . $row)->setValueExplicit((string) $cell, $type);
 339              }
 340  
 341              if ($sheet->Styles !== null) {
 342                  (new Styles($this->spreadsheet, $this->readDataOnly))->read($sheet, $maxRow, $maxCol);
 343              }
 344  
 345              $this->processComments($sheet);
 346              $this->processColumnWidths($sheet, $maxCol);
 347              $this->processRowHeights($sheet, $maxRow);
 348              $this->processMergedCells($sheet);
 349              $this->processAutofilter($sheet);
 350  
 351              ++$worksheetID;
 352          }
 353  
 354          $this->processDefinedNames($gnmXML);
 355  
 356          // Return
 357          return $this->spreadsheet;
 358      }
 359  
 360      private function processMergedCells(?SimpleXMLElement $sheet): void
 361      {
 362          //    Handle Merged Cells in this worksheet
 363          if ($sheet !== null && isset($sheet->MergedRegions)) {
 364              foreach ($sheet->MergedRegions->Merge as $mergeCells) {
 365                  if (strpos((string) $mergeCells, ':') !== false) {
 366                      $this->spreadsheet->getActiveSheet()->mergeCells($mergeCells);
 367                  }
 368              }
 369          }
 370      }
 371  
 372      private function processAutofilter(?SimpleXMLElement $sheet): void
 373      {
 374          if ($sheet !== null && isset($sheet->Filters)) {
 375              foreach ($sheet->Filters->Filter as $autofilter) {
 376                  if ($autofilter !== null) {
 377                      $attributes = $autofilter->attributes();
 378                      if (isset($attributes['Area'])) {
 379                          $this->spreadsheet->getActiveSheet()->setAutoFilter((string) $attributes['Area']);
 380                      }
 381                  }
 382              }
 383          }
 384      }
 385  
 386      private function setColumnWidth(int $whichColumn, float $defaultWidth): void
 387      {
 388          $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
 389          if ($columnDimension !== null) {
 390              $columnDimension->setWidth($defaultWidth);
 391          }
 392      }
 393  
 394      private function setColumnInvisible(int $whichColumn): void
 395      {
 396          $columnDimension = $this->spreadsheet->getActiveSheet()->getColumnDimension(Coordinate::stringFromColumnIndex($whichColumn + 1));
 397          if ($columnDimension !== null) {
 398              $columnDimension->setVisible(false);
 399          }
 400      }
 401  
 402      private function processColumnLoop(int $whichColumn, int $maxCol, ?SimpleXMLElement $columnOverride, float $defaultWidth): int
 403      {
 404          $columnOverride = self::testSimpleXml($columnOverride);
 405          $columnAttributes = self::testSimpleXml($columnOverride->attributes());
 406          $column = $columnAttributes['No'];
 407          $columnWidth = ((float) $columnAttributes['Unit']) / 5.4;
 408          $hidden = (isset($columnAttributes['Hidden'])) && ((string) $columnAttributes['Hidden'] == '1');
 409          $columnCount = (int) ($columnAttributes['Count'] ?? 1);
 410          while ($whichColumn < $column) {
 411              $this->setColumnWidth($whichColumn, $defaultWidth);
 412              ++$whichColumn;
 413          }
 414          while (($whichColumn < ($column + $columnCount)) && ($whichColumn <= $maxCol)) {
 415              $this->setColumnWidth($whichColumn, $columnWidth);
 416              if ($hidden) {
 417                  $this->setColumnInvisible($whichColumn);
 418              }
 419              ++$whichColumn;
 420          }
 421  
 422          return $whichColumn;
 423      }
 424  
 425      private function processColumnWidths(?SimpleXMLElement $sheet, int $maxCol): void
 426      {
 427          if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Cols))) {
 428              //    Column Widths
 429              $defaultWidth = 0;
 430              $columnAttributes = $sheet->Cols->attributes();
 431              if ($columnAttributes !== null) {
 432                  $defaultWidth = $columnAttributes['DefaultSizePts'] / 5.4;
 433              }
 434              $whichColumn = 0;
 435              foreach ($sheet->Cols->ColInfo as $columnOverride) {
 436                  $whichColumn = $this->processColumnLoop($whichColumn, $maxCol, $columnOverride, $defaultWidth);
 437              }
 438              while ($whichColumn <= $maxCol) {
 439                  $this->setColumnWidth($whichColumn, $defaultWidth);
 440                  ++$whichColumn;
 441              }
 442          }
 443      }
 444  
 445      private function setRowHeight(int $whichRow, float $defaultHeight): void
 446      {
 447          $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
 448          if ($rowDimension !== null) {
 449              $rowDimension->setRowHeight($defaultHeight);
 450          }
 451      }
 452  
 453      private function setRowInvisible(int $whichRow): void
 454      {
 455          $rowDimension = $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow);
 456          if ($rowDimension !== null) {
 457              $rowDimension->setVisible(false);
 458          }
 459      }
 460  
 461      private function processRowLoop(int $whichRow, int $maxRow, ?SimpleXMLElement $rowOverride, float $defaultHeight): int
 462      {
 463          $rowOverride = self::testSimpleXml($rowOverride);
 464          $rowAttributes = self::testSimpleXml($rowOverride->attributes());
 465          $row = $rowAttributes['No'];
 466          $rowHeight = (float) $rowAttributes['Unit'];
 467          $hidden = (isset($rowAttributes['Hidden'])) && ((string) $rowAttributes['Hidden'] == '1');
 468          $rowCount = (int) ($rowAttributes['Count'] ?? 1);
 469          while ($whichRow < $row) {
 470              ++$whichRow;
 471              $this->setRowHeight($whichRow, $defaultHeight);
 472          }
 473          while (($whichRow < ($row + $rowCount)) && ($whichRow < $maxRow)) {
 474              ++$whichRow;
 475              $this->setRowHeight($whichRow, $rowHeight);
 476              if ($hidden) {
 477                  $this->setRowInvisible($whichRow);
 478              }
 479          }
 480  
 481          return $whichRow;
 482      }
 483  
 484      private function processRowHeights(?SimpleXMLElement $sheet, int $maxRow): void
 485      {
 486          if ((!$this->readDataOnly) && $sheet !== null && (isset($sheet->Rows))) {
 487              //    Row Heights
 488              $defaultHeight = 0;
 489              $rowAttributes = $sheet->Rows->attributes();
 490              if ($rowAttributes !== null) {
 491                  $defaultHeight = (float) $rowAttributes['DefaultSizePts'];
 492              }
 493              $whichRow = 0;
 494  
 495              foreach ($sheet->Rows->RowInfo as $rowOverride) {
 496                  $whichRow = $this->processRowLoop($whichRow, $maxRow, $rowOverride, $defaultHeight);
 497              }
 498              // never executed, I can't figure out any circumstances
 499              // under which it would be executed, and, even if
 500              // such exist, I'm not convinced this is needed.
 501              //while ($whichRow < $maxRow) {
 502              //    ++$whichRow;
 503              //    $this->spreadsheet->getActiveSheet()->getRowDimension($whichRow)->setRowHeight($defaultHeight);
 504              //}
 505          }
 506      }
 507  
 508      private function processDefinedNames(?SimpleXMLElement $gnmXML): void
 509      {
 510          //    Loop through definedNames (global named ranges)
 511          if ($gnmXML !== null && isset($gnmXML->Names)) {
 512              foreach ($gnmXML->Names->Name as $definedName) {
 513                  $name = (string) $definedName->name;
 514                  $value = (string) $definedName->value;
 515                  if (stripos($value, '#REF!') !== false) {
 516                      continue;
 517                  }
 518  
 519                  [$worksheetName] = Worksheet::extractSheetTitle($value, true);
 520                  $worksheetName = trim($worksheetName, "'");
 521                  $worksheet = $this->spreadsheet->getSheetByName($worksheetName);
 522                  // Worksheet might still be null if we're only loading selected sheets rather than the full spreadsheet
 523                  if ($worksheet !== null) {
 524                      $this->spreadsheet->addDefinedName(DefinedName::createInstance($name, $worksheet, $value));
 525                  }
 526              }
 527          }
 528      }
 529  
 530      private function parseRichText(string $is): RichText
 531      {
 532          $value = new RichText();
 533          $value->createText($is);
 534  
 535          return $value;
 536      }
 537  }