Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 310 and 400] [Versions 311 and 400] [Versions 39 and 400] [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use DateTime;
   6  use DateTimeZone;
   7  use PhpOffice\PhpSpreadsheet\Cell\AddressHelper;
   8  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
   9  use PhpOffice\PhpSpreadsheet\Cell\DataType;
  10  use PhpOffice\PhpSpreadsheet\DefinedName;
  11  use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
  12  use PhpOffice\PhpSpreadsheet\Reader\Xml\PageSettings;
  13  use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties;
  14  use PhpOffice\PhpSpreadsheet\Reader\Xml\Style;
  15  use PhpOffice\PhpSpreadsheet\RichText\RichText;
  16  use PhpOffice\PhpSpreadsheet\Settings;
  17  use PhpOffice\PhpSpreadsheet\Shared\Date;
  18  use PhpOffice\PhpSpreadsheet\Shared\File;
  19  use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
  20  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  21  use SimpleXMLElement;
  22  
  23  /**
  24   * Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003.
  25   */
  26  class Xml extends BaseReader
  27  {
  28      /**
  29       * Formats.
  30       *
  31       * @var array
  32       */
  33      protected $styles = [];
  34  
  35      /**
  36       * Create a new Excel2003XML Reader instance.
  37       */
  38      public function __construct()
  39      {
  40          parent::__construct();
  41          $this->securityScanner = XmlScanner::getInstance($this);
  42      }
  43  
  44      private $fileContents = '';
  45  
  46      public static function xmlMappings(): array
  47      {
  48          return array_merge(
  49              Style\Fill::FILL_MAPPINGS,
  50              Style\Border::BORDER_MAPPINGS
  51          );
  52      }
  53  
  54      /**
  55       * Can the current IReader read the file?
  56       */
  57      public function canRead(string $filename): bool
  58      {
  59          //    Office                    xmlns:o="urn:schemas-microsoft-com:office:office"
  60          //    Excel                    xmlns:x="urn:schemas-microsoft-com:office:excel"
  61          //    XML Spreadsheet            xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
  62          //    Spreadsheet component    xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet"
  63          //    XML schema                 xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882"
  64          //    XML data type            xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882"
  65          //    MS-persist recordset    xmlns:rs="urn:schemas-microsoft-com:rowset"
  66          //    Rowset                    xmlns:z="#RowsetSchema"
  67          //
  68  
  69          $signature = [
  70              '<?xml version="1.0"',
  71              'xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet',
  72          ];
  73  
  74          // Open file
  75          $data = file_get_contents($filename);
  76  
  77          // Why?
  78          //$data = str_replace("'", '"', $data); // fix headers with single quote
  79  
  80          $valid = true;
  81          foreach ($signature as $match) {
  82              // every part of the signature must be present
  83              if (strpos($data, $match) === false) {
  84                  $valid = false;
  85  
  86                  break;
  87              }
  88          }
  89  
  90          //    Retrieve charset encoding
  91          if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $data, $matches)) {
  92              $charSet = strtoupper($matches[1]);
  93              if (1 == preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet)) {
  94                  $data = StringHelper::convertEncoding($data, 'UTF-8', $charSet);
  95                  $data = preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1);
  96              }
  97          }
  98          $this->fileContents = $data;
  99  
 100          return $valid;
 101      }
 102  
 103      /**
 104       * Check if the file is a valid SimpleXML.
 105       *
 106       * @param string $filename
 107       *
 108       * @return false|SimpleXMLElement
 109       */
 110      public function trySimpleXMLLoadString($filename)
 111      {
 112          try {
 113              $xml = simplexml_load_string(
 114                  $this->securityScanner->scan($this->fileContents ?: file_get_contents($filename)),
 115                  'SimpleXMLElement',
 116                  Settings::getLibXmlLoaderOptions()
 117              );
 118          } catch (\Exception $e) {
 119              throw new Exception('Cannot load invalid XML file: ' . $filename, 0, $e);
 120          }
 121          $this->fileContents = '';
 122  
 123          return $xml;
 124      }
 125  
 126      /**
 127       * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object.
 128       *
 129       * @param string $filename
 130       *
 131       * @return array
 132       */
 133      public function listWorksheetNames($filename)
 134      {
 135          File::assertFile($filename);
 136          if (!$this->canRead($filename)) {
 137              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 138          }
 139  
 140          $worksheetNames = [];
 141  
 142          $xml = $this->trySimpleXMLLoadString($filename);
 143          if ($xml === false) {
 144              throw new Exception("Problem reading {$filename}");
 145          }
 146  
 147          $namespaces = $xml->getNamespaces(true);
 148  
 149          $xml_ss = $xml->children($namespaces['ss']);
 150          foreach ($xml_ss->Worksheet as $worksheet) {
 151              $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
 152              $worksheetNames[] = (string) $worksheet_ss['Name'];
 153          }
 154  
 155          return $worksheetNames;
 156      }
 157  
 158      /**
 159       * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
 160       *
 161       * @param string $filename
 162       *
 163       * @return array
 164       */
 165      public function listWorksheetInfo($filename)
 166      {
 167          File::assertFile($filename);
 168          if (!$this->canRead($filename)) {
 169              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 170          }
 171  
 172          $worksheetInfo = [];
 173  
 174          $xml = $this->trySimpleXMLLoadString($filename);
 175          if ($xml === false) {
 176              throw new Exception("Problem reading {$filename}");
 177          }
 178  
 179          $namespaces = $xml->getNamespaces(true);
 180  
 181          $worksheetID = 1;
 182          $xml_ss = $xml->children($namespaces['ss']);
 183          foreach ($xml_ss->Worksheet as $worksheet) {
 184              $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
 185  
 186              $tmpInfo = [];
 187              $tmpInfo['worksheetName'] = '';
 188              $tmpInfo['lastColumnLetter'] = 'A';
 189              $tmpInfo['lastColumnIndex'] = 0;
 190              $tmpInfo['totalRows'] = 0;
 191              $tmpInfo['totalColumns'] = 0;
 192  
 193              $tmpInfo['worksheetName'] = "Worksheet_{$worksheetID}";
 194              if (isset($worksheet_ss['Name'])) {
 195                  $tmpInfo['worksheetName'] = (string) $worksheet_ss['Name'];
 196              }
 197  
 198              if (isset($worksheet->Table->Row)) {
 199                  $rowIndex = 0;
 200  
 201                  foreach ($worksheet->Table->Row as $rowData) {
 202                      $columnIndex = 0;
 203                      $rowHasData = false;
 204  
 205                      foreach ($rowData->Cell as $cell) {
 206                          if (isset($cell->Data)) {
 207                              $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex);
 208                              $rowHasData = true;
 209                          }
 210  
 211                          ++$columnIndex;
 212                      }
 213  
 214                      ++$rowIndex;
 215  
 216                      if ($rowHasData) {
 217                          $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex);
 218                      }
 219                  }
 220              }
 221  
 222              $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1);
 223              $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1;
 224  
 225              $worksheetInfo[] = $tmpInfo;
 226              ++$worksheetID;
 227          }
 228  
 229          return $worksheetInfo;
 230      }
 231  
 232      /**
 233       * Loads Spreadsheet from file.
 234       *
 235       * @return Spreadsheet
 236       */
 237      public function load(string $filename, int $flags = 0)
 238      {
 239          $this->processFlags($flags);
 240  
 241          // Create new Spreadsheet
 242          $spreadsheet = new Spreadsheet();
 243          $spreadsheet->removeSheetByIndex(0);
 244  
 245          // Load into this instance
 246          return $this->loadIntoExisting($filename, $spreadsheet);
 247      }
 248  
 249      /**
 250       * Loads from file into Spreadsheet instance.
 251       *
 252       * @param string $filename
 253       *
 254       * @return Spreadsheet
 255       */
 256      public function loadIntoExisting($filename, Spreadsheet $spreadsheet)
 257      {
 258          File::assertFile($filename);
 259          if (!$this->canRead($filename)) {
 260              throw new Exception($filename . ' is an Invalid Spreadsheet file.');
 261          }
 262  
 263          $xml = $this->trySimpleXMLLoadString($filename);
 264          if ($xml === false) {
 265              throw new Exception("Problem reading {$filename}");
 266          }
 267  
 268          $namespaces = $xml->getNamespaces(true);
 269  
 270          (new Properties($spreadsheet))->readProperties($xml, $namespaces);
 271  
 272          $this->styles = (new Style())->parseStyles($xml, $namespaces);
 273  
 274          $worksheetID = 0;
 275          $xml_ss = $xml->children($namespaces['ss']);
 276  
 277          /** @var null|SimpleXMLElement $worksheetx */
 278          foreach ($xml_ss->Worksheet as $worksheetx) {
 279              $worksheet = $worksheetx ?? new SimpleXMLElement('<xml></xml>');
 280              $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']);
 281  
 282              if (
 283                  isset($this->loadSheetsOnly, $worksheet_ss['Name']) &&
 284                  (!in_array($worksheet_ss['Name'], $this->loadSheetsOnly))
 285              ) {
 286                  continue;
 287              }
 288  
 289              // Create new Worksheet
 290              $spreadsheet->createSheet();
 291              $spreadsheet->setActiveSheetIndex($worksheetID);
 292              $worksheetName = '';
 293              if (isset($worksheet_ss['Name'])) {
 294                  $worksheetName = (string) $worksheet_ss['Name'];
 295                  //    Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in
 296                  //        formula cells... during the load, all formulae should be correct, and we're simply bringing
 297                  //        the worksheet name in line with the formula, not the reverse
 298                  $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false);
 299              }
 300  
 301              // locally scoped defined names
 302              if (isset($worksheet->Names[0])) {
 303                  foreach ($worksheet->Names[0] as $definedName) {
 304                      $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
 305                      $name = (string) $definedName_ss['Name'];
 306                      $definedValue = (string) $definedName_ss['RefersTo'];
 307                      $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
 308                      if ($convertedValue[0] === '=') {
 309                          $convertedValue = substr($convertedValue, 1);
 310                      }
 311                      $spreadsheet->addDefinedName(DefinedName::createInstance($name, $spreadsheet->getActiveSheet(), $convertedValue, true));
 312                  }
 313              }
 314  
 315              $columnID = 'A';
 316              if (isset($worksheet->Table->Column)) {
 317                  foreach ($worksheet->Table->Column as $columnData) {
 318                      $columnData_ss = self::getAttributes($columnData, $namespaces['ss']);
 319                      if (isset($columnData_ss['Index'])) {
 320                          $columnID = Coordinate::stringFromColumnIndex((int) $columnData_ss['Index']);
 321                      }
 322                      if (isset($columnData_ss['Width'])) {
 323                          $columnWidth = $columnData_ss['Width'];
 324                          $spreadsheet->getActiveSheet()->getColumnDimension($columnID)->setWidth($columnWidth / 5.4);
 325                      }
 326                      ++$columnID;
 327                  }
 328              }
 329  
 330              $rowID = 1;
 331              if (isset($worksheet->Table->Row)) {
 332                  $additionalMergedCells = 0;
 333                  foreach ($worksheet->Table->Row as $rowData) {
 334                      $rowHasData = false;
 335                      $row_ss = self::getAttributes($rowData, $namespaces['ss']);
 336                      if (isset($row_ss['Index'])) {
 337                          $rowID = (int) $row_ss['Index'];
 338                      }
 339  
 340                      $columnID = 'A';
 341                      foreach ($rowData->Cell as $cell) {
 342                          $cell_ss = self::getAttributes($cell, $namespaces['ss']);
 343                          if (isset($cell_ss['Index'])) {
 344                              $columnID = Coordinate::stringFromColumnIndex((int) $cell_ss['Index']);
 345                          }
 346                          $cellRange = $columnID . $rowID;
 347  
 348                          if ($this->getReadFilter() !== null) {
 349                              if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) {
 350                                  ++$columnID;
 351  
 352                                  continue;
 353                              }
 354                          }
 355  
 356                          if (isset($cell_ss['HRef'])) {
 357                              $spreadsheet->getActiveSheet()->getCell($cellRange)->getHyperlink()->setUrl((string) $cell_ss['HRef']);
 358                          }
 359  
 360                          if ((isset($cell_ss['MergeAcross'])) || (isset($cell_ss['MergeDown']))) {
 361                              $columnTo = $columnID;
 362                              if (isset($cell_ss['MergeAcross'])) {
 363                                  $additionalMergedCells += (int) $cell_ss['MergeAcross'];
 364                                  $columnTo = Coordinate::stringFromColumnIndex((int) (Coordinate::columnIndexFromString($columnID) + $cell_ss['MergeAcross']));
 365                              }
 366                              $rowTo = $rowID;
 367                              if (isset($cell_ss['MergeDown'])) {
 368                                  $rowTo = $rowTo + $cell_ss['MergeDown'];
 369                              }
 370                              $cellRange .= ':' . $columnTo . $rowTo;
 371                              $spreadsheet->getActiveSheet()->mergeCells($cellRange);
 372                          }
 373  
 374                          $hasCalculatedValue = false;
 375                          $cellDataFormula = '';
 376                          if (isset($cell_ss['Formula'])) {
 377                              $cellDataFormula = $cell_ss['Formula'];
 378                              $hasCalculatedValue = true;
 379                          }
 380                          if (isset($cell->Data)) {
 381                              $cellData = $cell->Data;
 382                              $cellValue = (string) $cellData;
 383                              $type = DataType::TYPE_NULL;
 384                              $cellData_ss = self::getAttributes($cellData, $namespaces['ss']);
 385                              if (isset($cellData_ss['Type'])) {
 386                                  $cellDataType = $cellData_ss['Type'];
 387                                  switch ($cellDataType) {
 388                                      /*
 389                                      const TYPE_STRING        = 's';
 390                                      const TYPE_FORMULA        = 'f';
 391                                      const TYPE_NUMERIC        = 'n';
 392                                      const TYPE_BOOL            = 'b';
 393                                      const TYPE_NULL            = 'null';
 394                                      const TYPE_INLINE        = 'inlineStr';
 395                                      const TYPE_ERROR        = 'e';
 396                                      */
 397                                      case 'String':
 398                                          $type = DataType::TYPE_STRING;
 399  
 400                                          break;
 401                                      case 'Number':
 402                                          $type = DataType::TYPE_NUMERIC;
 403                                          $cellValue = (float) $cellValue;
 404                                          if (floor($cellValue) == $cellValue) {
 405                                              $cellValue = (int) $cellValue;
 406                                          }
 407  
 408                                          break;
 409                                      case 'Boolean':
 410                                          $type = DataType::TYPE_BOOL;
 411                                          $cellValue = ($cellValue != 0);
 412  
 413                                          break;
 414                                      case 'DateTime':
 415                                          $type = DataType::TYPE_NUMERIC;
 416                                          $dateTime = new DateTime($cellValue, new DateTimeZone('UTC'));
 417                                          $cellValue = Date::PHPToExcel($dateTime);
 418  
 419                                          break;
 420                                      case 'Error':
 421                                          $type = DataType::TYPE_ERROR;
 422                                          $hasCalculatedValue = false;
 423  
 424                                          break;
 425                                  }
 426                              }
 427  
 428                              if ($hasCalculatedValue) {
 429                                  $type = DataType::TYPE_FORMULA;
 430                                  $columnNumber = Coordinate::columnIndexFromString($columnID);
 431                                  $cellDataFormula = AddressHelper::convertFormulaToA1($cellDataFormula, $rowID, $columnNumber);
 432                              }
 433  
 434                              $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValueExplicit((($hasCalculatedValue) ? $cellDataFormula : $cellValue), $type);
 435                              if ($hasCalculatedValue) {
 436                                  $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setCalculatedValue($cellValue);
 437                              }
 438                              $rowHasData = true;
 439                          }
 440  
 441                          if (isset($cell->Comment)) {
 442                              $this->parseCellComment($cell->Comment, $namespaces, $spreadsheet, $columnID, $rowID);
 443                          }
 444  
 445                          if (isset($cell_ss['StyleID'])) {
 446                              $style = (string) $cell_ss['StyleID'];
 447                              if ((isset($this->styles[$style])) && (!empty($this->styles[$style]))) {
 448                                  //if (!$spreadsheet->getActiveSheet()->cellExists($columnID . $rowID)) {
 449                                  //    $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValue(null);
 450                                  //}
 451                                  $spreadsheet->getActiveSheet()->getStyle($cellRange)
 452                                      ->applyFromArray($this->styles[$style]);
 453                              }
 454                          }
 455                          ++$columnID;
 456                          while ($additionalMergedCells > 0) {
 457                              ++$columnID;
 458                              --$additionalMergedCells;
 459                          }
 460                      }
 461  
 462                      if ($rowHasData) {
 463                          if (isset($row_ss['Height'])) {
 464                              $rowHeight = $row_ss['Height'];
 465                              $spreadsheet->getActiveSheet()->getRowDimension($rowID)->setRowHeight((float) $rowHeight);
 466                          }
 467                      }
 468  
 469                      ++$rowID;
 470                  }
 471  
 472                  if (isset($namespaces['x'])) {
 473                      $xmlX = $worksheet->children($namespaces['x']);
 474                      if (isset($xmlX->WorksheetOptions)) {
 475                          (new PageSettings($xmlX, $namespaces))->loadPageSettings($spreadsheet);
 476                      }
 477                  }
 478              }
 479              ++$worksheetID;
 480          }
 481  
 482          // Globally scoped defined names
 483          $activeWorksheet = $spreadsheet->setActiveSheetIndex(0);
 484          if (isset($xml->Names[0])) {
 485              foreach ($xml->Names[0] as $definedName) {
 486                  $definedName_ss = self::getAttributes($definedName, $namespaces['ss']);
 487                  $name = (string) $definedName_ss['Name'];
 488                  $definedValue = (string) $definedName_ss['RefersTo'];
 489                  $convertedValue = AddressHelper::convertFormulaToA1($definedValue);
 490                  if ($convertedValue[0] === '=') {
 491                      $convertedValue = substr($convertedValue, 1);
 492                  }
 493                  $spreadsheet->addDefinedName(DefinedName::createInstance($name, $activeWorksheet, $convertedValue));
 494              }
 495          }
 496  
 497          // Return
 498          return $spreadsheet;
 499      }
 500  
 501      protected function parseCellComment(
 502          SimpleXMLElement $comment,
 503          array $namespaces,
 504          Spreadsheet $spreadsheet,
 505          string $columnID,
 506          int $rowID
 507      ): void {
 508          $commentAttributes = $comment->attributes($namespaces['ss']);
 509          $author = 'unknown';
 510          if (isset($commentAttributes->Author)) {
 511              $author = (string) $commentAttributes->Author;
 512          }
 513  
 514          $node = $comment->Data->asXML();
 515          $annotation = strip_tags((string) $node);
 516          $spreadsheet->getActiveSheet()->getComment($columnID . $rowID)
 517              ->setAuthor($author)
 518              ->setText($this->parseRichText($annotation));
 519      }
 520  
 521      protected function parseRichText(string $annotation): RichText
 522      {
 523          $value = new RichText();
 524  
 525          $value->createText($annotation);
 526  
 527          return $value;
 528      }
 529  
 530      private static function getAttributes(?SimpleXMLElement $simple, string $node): SimpleXMLElement
 531      {
 532          return ($simple === null)
 533              ? new SimpleXMLElement('<xml></xml>')
 534              : ($simple->attributes($node) ?? new SimpleXMLElement('<xml></xml>'));
 535      }
 536  }