Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.3.x will end 7 October 2024 (12 months).
  • Bug fixes for security issues in 4.3.x will end 21 April 2025 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.2.x is supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Reader\XLSX\Manager;
   6  
   7  use OpenSpout\Common\Helper\Escaper\XLSX;
   8  use OpenSpout\Reader\Common\Manager\RowManager;
   9  use OpenSpout\Reader\Common\XMLProcessor;
  10  use OpenSpout\Reader\Wrapper\XMLReader;
  11  use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
  12  use OpenSpout\Reader\XLSX\Options;
  13  use OpenSpout\Reader\XLSX\RowIterator;
  14  use OpenSpout\Reader\XLSX\Sheet;
  15  use OpenSpout\Reader\XLSX\SheetHeaderReader;
  16  
  17  /**
  18   * @internal
  19   */
  20  final class SheetManager
  21  {
  22      /**
  23       * Paths of XML files relative to the XLSX file root.
  24       */
  25      public const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
  26      public const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
  27  
  28      /**
  29       * Definition of XML node names used to parse data.
  30       */
  31      public const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr';
  32      public const XML_NODE_WORKBOOK_VIEW = 'workbookView';
  33      public const XML_NODE_SHEET = 'sheet';
  34      public const XML_NODE_SHEETS = 'sheets';
  35      public const XML_NODE_RELATIONSHIP = 'Relationship';
  36  
  37      /**
  38       * Definition of XML attributes used to parse data.
  39       */
  40      public const XML_ATTRIBUTE_DATE_1904 = 'date1904';
  41      public const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
  42      public const XML_ATTRIBUTE_R_ID = 'r:id';
  43      public const XML_ATTRIBUTE_NAME = 'name';
  44      public const XML_ATTRIBUTE_STATE = 'state';
  45      public const XML_ATTRIBUTE_ID = 'Id';
  46      public const XML_ATTRIBUTE_TARGET = 'Target';
  47  
  48      /**
  49       * State value to represent a hidden sheet.
  50       */
  51      public const SHEET_STATE_HIDDEN = 'hidden';
  52  
  53      /** @var string Path of the XLSX file being read */
  54      private string $filePath;
  55  
  56      private Options $options;
  57  
  58      /** @var SharedStringsManager Manages shared strings */
  59      private SharedStringsManager $sharedStringsManager;
  60  
  61      /** @var XLSX Used to unescape XML data */
  62      private XLSX $escaper;
  63  
  64      /** @var Sheet[] List of sheets */
  65      private array $sheets;
  66  
  67      /** @var int Index of the sheet currently read */
  68      private int $currentSheetIndex;
  69  
  70      /** @var int Index of the active sheet (0 by default) */
  71      private int $activeSheetIndex;
  72  
  73      public function __construct(
  74          string $filePath,
  75          Options $options,
  76          SharedStringsManager $sharedStringsManager,
  77          XLSX $escaper
  78      ) {
  79          $this->filePath = $filePath;
  80          $this->options = $options;
  81          $this->sharedStringsManager = $sharedStringsManager;
  82          $this->escaper = $escaper;
  83      }
  84  
  85      /**
  86       * Returns the sheets metadata of the file located at the previously given file path.
  87       * The paths to the sheets' data are read from the [Content_Types].xml file.
  88       *
  89       * @return Sheet[] Sheets within the XLSX file
  90       */
  91      public function getSheets(): array
  92      {
  93          $this->sheets = [];
  94          $this->currentSheetIndex = 0;
  95          $this->activeSheetIndex = 0; // By default, the first sheet is active
  96  
  97          $xmlReader = new XMLReader();
  98          $xmlProcessor = new XMLProcessor($xmlReader);
  99  
 100          $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']);
 101          $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']);
 102          $xmlProcessor->registerCallback(self::XML_NODE_SHEET, XMLProcessor::NODE_TYPE_START, [$this, 'processSheetStartingNode']);
 103          $xmlProcessor->registerCallback(self::XML_NODE_SHEETS, XMLProcessor::NODE_TYPE_END, [$this, 'processSheetsEndingNode']);
 104  
 105          if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
 106              $xmlProcessor->readUntilStopped();
 107              $xmlReader->close();
 108          }
 109  
 110          return $this->sheets;
 111      }
 112  
 113      /**
 114       * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node
 115       *
 116       * @return int A return code that indicates what action should the processor take next
 117       */
 118      private function processWorkbookPropertiesStartingNode(XMLReader $xmlReader): int
 119      {
 120          // Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute
 121          // may be the string "false", that is not mapped to the boolean "false" by default...
 122          $shouldUse1904Dates = filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN);
 123          $this->options->SHOULD_USE_1904_DATES = $shouldUse1904Dates;
 124  
 125          return XMLProcessor::PROCESSING_CONTINUE;
 126      }
 127  
 128      /**
 129       * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node
 130       *
 131       * @return int A return code that indicates what action should the processor take next
 132       */
 133      private function processWorkbookViewStartingNode(XMLReader $xmlReader): int
 134      {
 135          // The "workbookView" node is located before "sheet" nodes, ensuring that
 136          // the active sheet is known before parsing sheets data.
 137          $this->activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB);
 138  
 139          return XMLProcessor::PROCESSING_CONTINUE;
 140      }
 141  
 142      /**
 143       * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node
 144       *
 145       * @return int A return code that indicates what action should the processor take next
 146       */
 147      private function processSheetStartingNode(XMLReader $xmlReader): int
 148      {
 149          $isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex);
 150          $this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive);
 151          ++$this->currentSheetIndex;
 152  
 153          return XMLProcessor::PROCESSING_CONTINUE;
 154      }
 155  
 156      /**
 157       * @return int A return code that indicates what action should the processor take next
 158       */
 159      private function processSheetsEndingNode(): int
 160      {
 161          return XMLProcessor::PROCESSING_STOP;
 162      }
 163  
 164      /**
 165       * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
 166       * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
 167       * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
 168       *
 169       * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
 170       * @param int                                 $sheetIndexZeroBased  Index of the sheet, based on order of appearance in the workbook (zero-based)
 171       * @param bool                                $isSheetActive        Whether this sheet was defined as active
 172       *
 173       * @return \OpenSpout\Reader\XLSX\Sheet Sheet instance
 174       */
 175      private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $sheetIndexZeroBased, bool $isSheetActive): Sheet
 176      {
 177          $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
 178          \assert(null !== $sheetId);
 179  
 180          $sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE);
 181          $isSheetVisible = (self::SHEET_STATE_HIDDEN !== $sheetState);
 182  
 183          $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
 184          \assert(null !== $escapedSheetName);
 185          $sheetName = $this->escaper->unescape($escapedSheetName);
 186  
 187          $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
 188  
 189          return new Sheet(
 190              $this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager),
 191              $this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath),
 192              $sheetIndexZeroBased,
 193              $sheetName,
 194              $isSheetActive,
 195              $isSheetVisible
 196          );
 197      }
 198  
 199      /**
 200       * @param string $sheetId The sheet ID, as defined in "workbook.xml"
 201       *
 202       * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
 203       */
 204      private function getSheetDataXMLFilePathForSheetId(string $sheetId): string
 205      {
 206          $sheetDataXMLFilePath = '';
 207  
 208          // find the file path of the sheet, by looking at the "workbook.xml.res" file
 209          $xmlReader = new XMLReader();
 210          if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
 211              while ($xmlReader->read()) {
 212                  if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
 213                      $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID);
 214  
 215                      if ($relationshipSheetId === $sheetId) {
 216                          // In workbook.xml.rels, it is only "worksheets/sheet1.xml"
 217                          // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
 218                          $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
 219                          \assert(null !== $sheetDataXMLFilePath);
 220  
 221                          // sometimes, the sheet data file path already contains "/xl/"...
 222                          if (!str_starts_with($sheetDataXMLFilePath, '/xl/')) {
 223                              $sheetDataXMLFilePath = '/xl/'.$sheetDataXMLFilePath;
 224  
 225                              break;
 226                          }
 227                      }
 228                  }
 229              }
 230  
 231              $xmlReader->close();
 232          }
 233  
 234          return $sheetDataXMLFilePath;
 235      }
 236  
 237      private function createRowIterator(
 238          string $filePath,
 239          string $sheetDataXMLFilePath,
 240          Options $options,
 241          SharedStringsManager $sharedStringsManager
 242      ): RowIterator {
 243          $xmlReader = new XMLReader();
 244  
 245          $workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath);
 246          $styleManager = new StyleManager(
 247              $filePath,
 248              $workbookRelationshipsManager->hasStylesXMLFile()
 249                  ? $workbookRelationshipsManager->getStylesXMLFilePath()
 250                  : null
 251          );
 252  
 253          $cellValueFormatter = new CellValueFormatter(
 254              $sharedStringsManager,
 255              $styleManager,
 256              $options->SHOULD_FORMAT_DATES,
 257              $options->SHOULD_USE_1904_DATES,
 258              new XLSX()
 259          );
 260  
 261          return new RowIterator(
 262              $filePath,
 263              $sheetDataXMLFilePath,
 264              $options->SHOULD_PRESERVE_EMPTY_ROWS,
 265              $xmlReader,
 266              new XMLProcessor($xmlReader),
 267              $cellValueFormatter,
 268              new RowManager()
 269          );
 270      }
 271  
 272      private function createSheetHeaderReader(
 273          string $filePath,
 274          string $sheetDataXMLFilePath
 275      ): SheetHeaderReader {
 276          $xmlReader = new XMLReader();
 277  
 278          return new SheetHeaderReader(
 279              $filePath,
 280              $sheetDataXMLFilePath,
 281              $xmlReader,
 282              new XMLProcessor($xmlReader)
 283          );
 284      }
 285  }