1 <?php 2 3 declare(strict_types=1); 4 5 namespace OpenSpout\Reader\XLSX\Manager; 6 7 use OpenSpout\Common\Helper\Escaper\XLSX; 8 use OpenSpout\Reader\Common\Manager\RowManager; 9 use OpenSpout\Reader\Common\XMLProcessor; 10 use OpenSpout\Reader\Wrapper\XMLReader; 11 use OpenSpout\Reader\XLSX\Helper\CellValueFormatter; 12 use OpenSpout\Reader\XLSX\Options; 13 use OpenSpout\Reader\XLSX\RowIterator; 14 use OpenSpout\Reader\XLSX\Sheet; 15 use OpenSpout\Reader\XLSX\SheetHeaderReader; 16 17 /** 18 * @internal 19 */ 20 final class SheetManager 21 { 22 /** 23 * Paths of XML files relative to the XLSX file root. 24 */ 25 public const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels'; 26 public const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml'; 27 28 /** 29 * Definition of XML node names used to parse data. 30 */ 31 public const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr'; 32 public const XML_NODE_WORKBOOK_VIEW = 'workbookView'; 33 public const XML_NODE_SHEET = 'sheet'; 34 public const XML_NODE_SHEETS = 'sheets'; 35 public const XML_NODE_RELATIONSHIP = 'Relationship'; 36 37 /** 38 * Definition of XML attributes used to parse data. 39 */ 40 public const XML_ATTRIBUTE_DATE_1904 = 'date1904'; 41 public const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab'; 42 public const XML_ATTRIBUTE_R_ID = 'r:id'; 43 public const XML_ATTRIBUTE_NAME = 'name'; 44 public const XML_ATTRIBUTE_STATE = 'state'; 45 public const XML_ATTRIBUTE_ID = 'Id'; 46 public const XML_ATTRIBUTE_TARGET = 'Target'; 47 48 /** 49 * State value to represent a hidden sheet. 50 */ 51 public const SHEET_STATE_HIDDEN = 'hidden'; 52 53 /** @var string Path of the XLSX file being read */ 54 private string $filePath; 55 56 private Options $options; 57 58 /** @var SharedStringsManager Manages shared strings */ 59 private SharedStringsManager $sharedStringsManager; 60 61 /** @var XLSX Used to unescape XML data */ 62 private XLSX $escaper; 63 64 /** @var Sheet[] List of sheets */ 65 private array $sheets; 66 67 /** @var int Index of the sheet currently read */ 68 private int $currentSheetIndex; 69 70 /** @var int Index of the active sheet (0 by default) */ 71 private int $activeSheetIndex; 72 73 public function __construct( 74 string $filePath, 75 Options $options, 76 SharedStringsManager $sharedStringsManager, 77 XLSX $escaper 78 ) { 79 $this->filePath = $filePath; 80 $this->options = $options; 81 $this->sharedStringsManager = $sharedStringsManager; 82 $this->escaper = $escaper; 83 } 84 85 /** 86 * Returns the sheets metadata of the file located at the previously given file path. 87 * The paths to the sheets' data are read from the [Content_Types].xml file. 88 * 89 * @return Sheet[] Sheets within the XLSX file 90 */ 91 public function getSheets(): array 92 { 93 $this->sheets = []; 94 $this->currentSheetIndex = 0; 95 $this->activeSheetIndex = 0; // By default, the first sheet is active 96 97 $xmlReader = new XMLReader(); 98 $xmlProcessor = new XMLProcessor($xmlReader); 99 100 $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']); 101 $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']); 102 $xmlProcessor->registerCallback(self::XML_NODE_SHEET, XMLProcessor::NODE_TYPE_START, [$this, 'processSheetStartingNode']); 103 $xmlProcessor->registerCallback(self::XML_NODE_SHEETS, XMLProcessor::NODE_TYPE_END, [$this, 'processSheetsEndingNode']); 104 105 if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) { 106 $xmlProcessor->readUntilStopped(); 107 $xmlReader->close(); 108 } 109 110 return $this->sheets; 111 } 112 113 /** 114 * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node 115 * 116 * @return int A return code that indicates what action should the processor take next 117 */ 118 private function processWorkbookPropertiesStartingNode(XMLReader $xmlReader): int 119 { 120 // Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute 121 // may be the string "false", that is not mapped to the boolean "false" by default... 122 $shouldUse1904Dates = filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN); 123 $this->options->SHOULD_USE_1904_DATES = $shouldUse1904Dates; 124 125 return XMLProcessor::PROCESSING_CONTINUE; 126 } 127 128 /** 129 * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node 130 * 131 * @return int A return code that indicates what action should the processor take next 132 */ 133 private function processWorkbookViewStartingNode(XMLReader $xmlReader): int 134 { 135 // The "workbookView" node is located before "sheet" nodes, ensuring that 136 // the active sheet is known before parsing sheets data. 137 $this->activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB); 138 139 return XMLProcessor::PROCESSING_CONTINUE; 140 } 141 142 /** 143 * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node 144 * 145 * @return int A return code that indicates what action should the processor take next 146 */ 147 private function processSheetStartingNode(XMLReader $xmlReader): int 148 { 149 $isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex); 150 $this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive); 151 ++$this->currentSheetIndex; 152 153 return XMLProcessor::PROCESSING_CONTINUE; 154 } 155 156 /** 157 * @return int A return code that indicates what action should the processor take next 158 */ 159 private function processSheetsEndingNode(): int 160 { 161 return XMLProcessor::PROCESSING_STOP; 162 } 163 164 /** 165 * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml". 166 * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID 167 * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res"). 168 * 169 * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml" 170 * @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based) 171 * @param bool $isSheetActive Whether this sheet was defined as active 172 * 173 * @return \OpenSpout\Reader\XLSX\Sheet Sheet instance 174 */ 175 private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $sheetIndexZeroBased, bool $isSheetActive): Sheet 176 { 177 $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID); 178 \assert(null !== $sheetId); 179 180 $sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE); 181 $isSheetVisible = (self::SHEET_STATE_HIDDEN !== $sheetState); 182 183 $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME); 184 \assert(null !== $escapedSheetName); 185 $sheetName = $this->escaper->unescape($escapedSheetName); 186 187 $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId); 188 189 return new Sheet( 190 $this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager), 191 $this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath), 192 $sheetIndexZeroBased, 193 $sheetName, 194 $isSheetActive, 195 $isSheetVisible 196 ); 197 } 198 199 /** 200 * @param string $sheetId The sheet ID, as defined in "workbook.xml" 201 * 202 * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID 203 */ 204 private function getSheetDataXMLFilePathForSheetId(string $sheetId): string 205 { 206 $sheetDataXMLFilePath = ''; 207 208 // find the file path of the sheet, by looking at the "workbook.xml.res" file 209 $xmlReader = new XMLReader(); 210 if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) { 211 while ($xmlReader->read()) { 212 if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) { 213 $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID); 214 215 if ($relationshipSheetId === $sheetId) { 216 // In workbook.xml.rels, it is only "worksheets/sheet1.xml" 217 // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" 218 $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET); 219 \assert(null !== $sheetDataXMLFilePath); 220 221 // sometimes, the sheet data file path already contains "/xl/"... 222 if (!str_starts_with($sheetDataXMLFilePath, '/xl/')) { 223 $sheetDataXMLFilePath = '/xl/'.$sheetDataXMLFilePath; 224 225 break; 226 } 227 } 228 } 229 } 230 231 $xmlReader->close(); 232 } 233 234 return $sheetDataXMLFilePath; 235 } 236 237 private function createRowIterator( 238 string $filePath, 239 string $sheetDataXMLFilePath, 240 Options $options, 241 SharedStringsManager $sharedStringsManager 242 ): RowIterator { 243 $xmlReader = new XMLReader(); 244 245 $workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath); 246 $styleManager = new StyleManager( 247 $filePath, 248 $workbookRelationshipsManager->hasStylesXMLFile() 249 ? $workbookRelationshipsManager->getStylesXMLFilePath() 250 : null 251 ); 252 253 $cellValueFormatter = new CellValueFormatter( 254 $sharedStringsManager, 255 $styleManager, 256 $options->SHOULD_FORMAT_DATES, 257 $options->SHOULD_USE_1904_DATES, 258 new XLSX() 259 ); 260 261 return new RowIterator( 262 $filePath, 263 $sheetDataXMLFilePath, 264 $options->SHOULD_PRESERVE_EMPTY_ROWS, 265 $xmlReader, 266 new XMLProcessor($xmlReader), 267 $cellValueFormatter, 268 new RowManager() 269 ); 270 } 271 272 private function createSheetHeaderReader( 273 string $filePath, 274 string $sheetDataXMLFilePath 275 ): SheetHeaderReader { 276 $xmlReader = new XMLReader(); 277 278 return new SheetHeaderReader( 279 $filePath, 280 $sheetDataXMLFilePath, 281 $xmlReader, 282 new XMLProcessor($xmlReader) 283 ); 284 } 285 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body