Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use DateTime;
   6  use DateTimeZone;
   7  use PhpOffice\PhpSpreadsheet\Cell\AddressHelper;
   8  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   9  use PhpOffice\PhpSpreadsheet\Cell\DataType;
  10  use PhpOffice\PhpSpreadsheet\DefinedName;
  11  use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
  12  use PhpOffice\PhpSpreadsheet\Reader\Xml\PageSettings;
  13  use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties;
  14  use PhpOffice\PhpSpreadsheet\Reader\Xml\Style;
  15  use PhpOffice\PhpSpreadsheet\RichText\RichText;
  16  use PhpOffice\PhpSpreadsheet\Settings;
  17  use PhpOffice\PhpSpreadsheet\Shared\Date;
  18  use PhpOffice\PhpSpreadsheet\Shared\File;
  19  use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
  20  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  21  use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
  22  use SimpleXMLElement;
  23  
  24  /**
  25   * Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003.
  26   */
  27  class Xml extends BaseReader
  28  {
  29      /**
  30       * Formats.
  31       *
  32       * @var array
  33       */
  34      protected $styles = [];
  35  
  36      /**
  37       * Create a new Excel2003XML Reader instance.
  38       */
  39      public function __construct()
  40      {
  41          parent::__construct();
  42          $this->securityScanner = XmlScanner::getInstance($this);
  43      }
  44  
  45      private $fileContents = '';
  46  
  47      public static function xmlMappings(): array
  48      {
  49          return array_merge(
  50              Style\Fill::FILL_MAPPINGS,
  51              Style\Border::BORDER_MAPPINGS
  52          );
  53      }
  54  
  55      /**
  56       * Can the current IReader read the file?
  57       */
  58      public function canRead(string $filename): bool
  59      {
  60          //    Office                    xmlns:o="urn:schemas-microsoft-com:office:office"
  61          //    Excel                    xmlns:x="urn:schemas-microsoft-com:office:excel"
  62          //    XML Spreadsheet            xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
  63          //    Spreadsheet component    xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet"
  64          //    XML schema                 xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882"
  65          //    XML data type            xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
  66          //    MS-persist recordset    xmlns:rs="urn:schemas-microsoft-com:rowset"
  67          //    Rowset                    xmlns:z="#RowsetSchema"
  68          //
  69  
  70          $signature = [
  71              '<?xml version="1.0"',
  72              'xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet',
  73          ];
  74  
  75          // Open file
  76          $data = file_get_contents($filename);
  77  
  78          // Why?
  79          //$data = str_replace("'", '"', $data); // fix headers with single quote
  80  
  81          $valid = true;
  82          foreach ($signature as $match) {
  83              // every part of the signature must be present
  84              if (strpos($data, $match) === false) {
  85                  $valid = false;
  86  
  87                  break;
  88              }
  89          }
  90  
  91          //    Retrieve charset encoding
  92          if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $data, $matches)) {
  93              $charSet = strtoupper($matches[1]);
  94              if (preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet) === 1) {
  95                  $data = StringHelper::convertEncoding($data, 'UTF-8', $charSet);
  96                  $data = (string) preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1);
  97              }
  98          }
  99          $this->fileContents = $data;
 100  
 101          return $valid;
 102      }
 103  
 104      /**
 105       * Check if the file is a valid SimpleXML.
 106       *
 107       * @param string $filename
 108       *
 109       * @return false|SimpleXMLElement
 110       */
 111      public function trySimpleXMLLoadString($filename)
 112      {
 113          try {
 114              $xml = simplexml_load_string(
 115                  $this->securityScanner->scan($this->fileContents ?: file_get_contents($filename)),
 116                  'SimpleXMLElement',
 117                  Settings::getLibXmlLoaderOptions()
 118              );
 119          } catch (\Exception $e) {
 120              throw new Exception('Cannot load invalid XML file: ' . $filename, 0, $e);
 121          }
 122          $this->fileContents = '';
 123  
 124          return $xml;
 125      }
 126  
 127      /**
 128       * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
 129       *
 130       * @param string $filename
 131       *
 132       * @return array
 133       */
 134      public function listWorksheetNames($filename)
 135      {
 136          File::assertFile($filename);
 137          if (!$this->canRead($filename)) {
 138              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 139          }
 140  
 141          $worksheetNames = [];
 142  
 143          $xml = $this->trySimpleXMLLoadString($filename);
 144          if ($xml === false) {
 145              throw new Exception("Problem reading {$filename}");
 146          }
 147  
 148          $namespaces = $xml->getNamespaces(true);
 149  
 150          $xml_ss = $xml->children($namespaces['ss']);
 151          foreach ($xml_ss->Worksheet as $worksheet) {
 152              $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
 153              $worksheetNames[] = (string) $worksheet_ss['Name'];
 154          }
 155  
 156          return $worksheetNames;
 157      }
 158  
 159      /**
 160       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 161       *
 162       * @param string $filename
 163       *
 164       * @return array
 165       */
 166      public function listWorksheetInfo($filename)
 167      {
 168          File::assertFile($filename);
 169          if (!$this->canRead($filename)) {
 170              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 171          }
 172  
 173          $worksheetInfo = [];
 174  
 175          $xml = $this->trySimpleXMLLoadString($filename);
 176          if ($xml === false) {
 177              throw new Exception("Problem reading {$filename}");
 178          }
 179  
 180          $namespaces = $xml->getNamespaces(true);
 181  
 182          $worksheetID = 1;
 183          $xml_ss = $xml->children($namespaces['ss']);
 184          foreach ($xml_ss->Worksheet as $worksheet) {
 185              $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
 186  
 187              $tmpInfo = [];
 188              $tmpInfo['worksheetName'] = '';
 189              $tmpInfo['lastColumnLetter'] = 'A';
 190              $tmpInfo['lastColumnIndex'] = 0;
 191              $tmpInfo['totalRows'] = 0;
 192              $tmpInfo['totalColumns'] = 0;
 193  
 194              $tmpInfo['worksheetName'] = "Worksheet_{$worksheetID}";
 195              if (isset($worksheet_ss['Name'])) {
 196                  $tmpInfo['worksheetName'] = (string) $worksheet_ss['Name'];
 197              }
 198  
 199              if (isset($worksheet->Table->Row)) {
 200                  $rowIndex = 0;
 201  
 202                  foreach ($worksheet->Table->Row as $rowData) {
 203                      $columnIndex = 0;
 204                      $rowHasData = false;
 205  
 206                      foreach ($rowData->Cell as $cell) {
 207                          if (isset($cell->Data)) {
 208                              $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex);
 209                              $rowHasData = true;
 210                          }
 211  
 212                          ++$columnIndex;
 213                      }
 214  
 215                      ++$rowIndex;
 216  
 217                      if ($rowHasData) {
 218                          $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex);
 219                      }
 220                  }
 221              }
 222  
 223              $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
 224              $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1;
 225  
 226              $worksheetInfo[] = $tmpInfo;
 227              ++$worksheetID;
 228          }
 229  
 230          return $worksheetInfo;
 231      }
 232  
 233      /**
 234       * Loads Spreadsheet from file.
 235       */
 236      protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
 237      {
 238          // Create new Spreadsheet
 239          $spreadsheet = new Spreadsheet();
 240          $spreadsheet->removeSheetByIndex(0);
 241  
 242          // Load into this instance
 243          return $this->loadIntoExisting($filename, $spreadsheet);
 244      }
 245  
 246      /**
 247       * Loads from file into Spreadsheet instance.
 248       *
 249       * @param string $filename
 250       *
 251       * @return Spreadsheet
 252       */
 253      public function loadIntoExisting($filename, Spreadsheet $spreadsheet)
 254      {
 255          File::assertFile($filename);
 256          if (!$this->canRead($filename)) {
 257              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 258          }
 259  
 260          $xml = $this->trySimpleXMLLoadString($filename);
 261          if ($xml === false) {
 262              throw new Exception("Problem reading {$filename}");
 263          }
 264  
 265          $namespaces = $xml->getNamespaces(true);
 266  
 267          (new Properties($spreadsheet))->readProperties($xml, $namespaces);
 268  
 269          $this->styles = (new Style())->parseStyles($xml, $namespaces);
 270  
 271          $worksheetID = 0;
 272          $xml_ss = $xml->children($namespaces['ss']);
 273  
 274          /** @var null|SimpleXMLElement $worksheetx */
 275          foreach ($xml_ss->Worksheet as $worksheetx) {
 276              $worksheet = $worksheetx ?? new SimpleXMLElement('<xml></xml>');
 277              $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
 278  
 279              if (
 280                  isset($this->loadSheetsOnly, $worksheet_ss['Name']) &&
 281                  (!in_array($worksheet_ss['Name'], $this->loadSheetsOnly))
 282              ) {
 283                  continue;
 284              }
 285  
 286              // Create new Worksheet
 287              $spreadsheet->createSheet();
 288              $spreadsheet->setActiveSheetIndex($worksheetID);
 289              $worksheetName = '';
 290              if (isset($worksheet_ss['Name'])) {
 291                  $worksheetName = (string) $worksheet_ss['Name'];
 292                  //    Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in
 293                  //        formula cells... during the load, all formulae should be correct, and we're simply bringing
 294                  //        the worksheet name in line with the formula, not the reverse
 295                  $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
 296              }
 297  
 298              // locally scoped defined names
 299              if (isset($worksheet->Names[0])) {
 300                  foreach ($worksheet->Names[0] as $definedName) {
 301                      $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
 302                      $name = (string) $definedName_ss['Name'];
 303                      $definedValue = (string) $definedName_ss['RefersTo'];
 304                      $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
 305                      if ($convertedValue[0] === '=') {
 306                          $convertedValue = substr($convertedValue, 1);
 307                      }
 308                      $spreadsheet->addDefinedName(DefinedName::createInstance($name, $spreadsheet->getActiveSheet(), $convertedValue, true));
 309                  }
 310              }
 311  
 312              $columnID = 'A';
 313              if (isset($worksheet->Table->Column)) {
 314                  foreach ($worksheet->Table->Column as $columnData) {
 315                      $columnData_ss = self::getAttributes($columnData, $namespaces['ss']);
 316                      if (isset($columnData_ss['Index'])) {
 317                          $columnID = Coordinate::stringFromColumnIndex((int) $columnData_ss['Index']);
 318                      }
 319                      if (isset($columnData_ss['Width'])) {
 320                          $columnWidth = $columnData_ss['Width'];
 321                          $spreadsheet->getActiveSheet()->getColumnDimension($columnID)->setWidth($columnWidth / 5.4);
 322                      }
 323                      ++$columnID;
 324                  }
 325              }
 326  
 327              $rowID = 1;
 328              if (isset($worksheet->Table->Row)) {
 329                  $additionalMergedCells = 0;
 330                  foreach ($worksheet->Table->Row as $rowData) {
 331                      $rowHasData = false;
 332                      $row_ss = self::getAttributes($rowData, $namespaces['ss']);
 333                      if (isset($row_ss['Index'])) {
 334                          $rowID = (int) $row_ss['Index'];
 335                      }
 336  
 337                      $columnID = 'A';
 338                      foreach ($rowData->Cell as $cell) {
 339                          $cell_ss = self::getAttributes($cell, $namespaces['ss']);
 340                          if (isset($cell_ss['Index'])) {
 341                              $columnID = Coordinate::stringFromColumnIndex((int) $cell_ss['Index']);
 342                          }
 343                          $cellRange = $columnID . $rowID;
 344  
 345                          if ($this->getReadFilter() !== null) {
 346                              if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) {
 347                                  ++$columnID;
 348  
 349                                  continue;
 350                              }
 351                          }
 352  
 353                          if (isset($cell_ss['HRef'])) {
 354                              $spreadsheet->getActiveSheet()->getCell($cellRange)->getHyperlink()->setUrl((string) $cell_ss['HRef']);
 355                          }
 356  
 357                          if ((isset($cell_ss['MergeAcross'])) || (isset($cell_ss['MergeDown']))) {
 358                              $columnTo = $columnID;
 359                              if (isset($cell_ss['MergeAcross'])) {
 360                                  $additionalMergedCells += (int) $cell_ss['MergeAcross'];
 361                                  $columnTo = Coordinate::stringFromColumnIndex((int) (Coordinate::columnIndexFromString($columnID) + $cell_ss['MergeAcross']));
 362                              }
 363                              $rowTo = $rowID;
 364                              if (isset($cell_ss['MergeDown'])) {
 365                                  $rowTo = $rowTo + $cell_ss['MergeDown'];
 366                              }
 367                              $cellRange .= ':' . $columnTo . $rowTo;
 368                              $spreadsheet->getActiveSheet()->mergeCells($cellRange, Worksheet::MERGE_CELL_CONTENT_HIDE);
 369                          }
 370  
 371                          $hasCalculatedValue = false;
 372                          $cellDataFormula = '';
 373                          if (isset($cell_ss['Formula'])) {
 374                              $cellDataFormula = $cell_ss['Formula'];
 375                              $hasCalculatedValue = true;
 376                          }
 377                          if (isset($cell->Data)) {
 378                              $cellData = $cell->Data;
 379                              $cellValue = (string) $cellData;
 380                              $type = DataType::TYPE_NULL;
 381                              $cellData_ss = self::getAttributes($cellData, $namespaces['ss']);
 382                              if (isset($cellData_ss['Type'])) {
 383                                  $cellDataType = $cellData_ss['Type'];
 384                                  switch ($cellDataType) {
 385                                      /*
 386                                      const TYPE_STRING        = 's';
 387                                      const TYPE_FORMULA        = 'f';
 388                                      const TYPE_NUMERIC        = 'n';
 389                                      const TYPE_BOOL            = 'b';
 390                                      const TYPE_NULL            = 'null';
 391                                      const TYPE_INLINE        = 'inlineStr';
 392                                      const TYPE_ERROR        = 'e';
 393                                      */
 394                                      case 'String':
 395                                          $type = DataType::TYPE_STRING;
 396  
 397                                          break;
 398                                      case 'Number':
 399                                          $type = DataType::TYPE_NUMERIC;
 400                                          $cellValue = (float) $cellValue;
 401                                          if (floor($cellValue) == $cellValue) {
 402                                              $cellValue = (int) $cellValue;
 403                                          }
 404  
 405                                          break;
 406                                      case 'Boolean':
 407                                          $type = DataType::TYPE_BOOL;
 408                                          $cellValue = ($cellValue != 0);
 409  
 410                                          break;
 411                                      case 'DateTime':
 412                                          $type = DataType::TYPE_NUMERIC;
 413                                          $dateTime = new DateTime($cellValue, new DateTimeZone('UTC'));
 414                                          $cellValue = Date::PHPToExcel($dateTime);
 415  
 416                                          break;
 417                                      case 'Error':
 418                                          $type = DataType::TYPE_ERROR;
 419                                          $hasCalculatedValue = false;
 420  
 421                                          break;
 422                                  }
 423                              }
 424  
 425                              if ($hasCalculatedValue) {
 426                                  $type = DataType::TYPE_FORMULA;
 427                                  $columnNumber = Coordinate::columnIndexFromString($columnID);
 428                                  $cellDataFormula = AddressHelper::convertFormulaToA1($cellDataFormula, $rowID, $columnNumber);
 429                              }
 430  
 431                              $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValueExplicit((($hasCalculatedValue) ? $cellDataFormula : $cellValue), $type);
 432                              if ($hasCalculatedValue) {
 433                                  $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setCalculatedValue($cellValue);
 434                              }
 435                              $rowHasData = true;
 436                          }
 437  
 438                          if (isset($cell->Comment)) {
 439                              $this->parseCellComment($cell->Comment, $namespaces, $spreadsheet, $columnID, $rowID);
 440                          }
 441  
 442                          if (isset($cell_ss['StyleID'])) {
 443                              $style = (string) $cell_ss['StyleID'];
 444                              if ((isset($this->styles[$style])) && (!empty($this->styles[$style]))) {
 445                                  //if (!$spreadsheet->getActiveSheet()->cellExists($columnID . $rowID)) {
 446                                  //    $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValue(null);
 447                                  //}
 448                                  $spreadsheet->getActiveSheet()->getStyle($cellRange)
 449                                      ->applyFromArray($this->styles[$style]);
 450                              }
 451                          }
 452                          ++$columnID;
 453                          while ($additionalMergedCells > 0) {
 454                              ++$columnID;
 455                              --$additionalMergedCells;
 456                          }
 457                      }
 458  
 459                      if ($rowHasData) {
 460                          if (isset($row_ss['Height'])) {
 461                              $rowHeight = $row_ss['Height'];
 462                              $spreadsheet->getActiveSheet()->getRowDimension($rowID)->setRowHeight((float) $rowHeight);
 463                          }
 464                      }
 465  
 466                      ++$rowID;
 467                  }
 468  
 469                  if (isset($namespaces['x'])) {
 470                      $xmlX = $worksheet->children($namespaces['x']);
 471                      if (isset($xmlX->WorksheetOptions)) {
 472                          (new PageSettings($xmlX, $namespaces))->loadPageSettings($spreadsheet);
 473                      }
 474                  }
 475              }
 476              ++$worksheetID;
 477          }
 478  
 479          // Globally scoped defined names
 480          $activeWorksheet = $spreadsheet->setActiveSheetIndex(0);
 481          if (isset($xml->Names[0])) {
 482              foreach ($xml->Names[0] as $definedName) {
 483                  $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
 484                  $name = (string) $definedName_ss['Name'];
 485                  $definedValue = (string) $definedName_ss['RefersTo'];
 486                  $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
 487                  if ($convertedValue[0] === '=') {
 488                      $convertedValue = substr($convertedValue, 1);
 489                  }
 490                  $spreadsheet->addDefinedName(DefinedName::createInstance($name, $activeWorksheet, $convertedValue));
 491              }
 492          }
 493  
 494          // Return
 495          return $spreadsheet;
 496      }
 497  
 498      protected function parseCellComment(
 499          SimpleXMLElement $comment,
 500          array $namespaces,
 501          Spreadsheet $spreadsheet,
 502          string $columnID,
 503          int $rowID
 504      ): void {
 505          $commentAttributes = $comment->attributes($namespaces['ss']);
 506          $author = 'unknown';
 507          if (isset($commentAttributes->Author)) {
 508              $author = (string) $commentAttributes->Author;
 509          }
 510  
 511          $node = $comment->Data->asXML();
 512          $annotation = strip_tags((string) $node);
 513          $spreadsheet->getActiveSheet()->getComment($columnID . $rowID)
 514              ->setAuthor($author)
 515              ->setText($this->parseRichText($annotation));
 516      }
 517  
 518      protected function parseRichText(string $annotation): RichText
 519      {
 520          $value = new RichText();
 521  
 522          $value->createText($annotation);
 523  
 524          return $value;
 525      }
 526  
 527      private static function getAttributes(?SimpleXMLElement $simple, string $node): SimpleXMLElement
 528      {
 529          return ($simple === null)
 530              ? new SimpleXMLElement('<xml></xml>')
 531              : ($simple->attributes($node) ?? new SimpleXMLElement('<xml></xml>'));
 532      }
 533  }