1 <?php 2 3 declare(strict_types=1); 4 5 namespace OpenSpout\Reader\XLSX\Manager; 6 7 use OpenSpout\Reader\Wrapper\XMLReader; 8 9 class StyleManager implements StyleManagerInterface 10 { 11 /** 12 * Nodes used to find relevant information in the styles XML file. 13 */ 14 public const XML_NODE_NUM_FMTS = 'numFmts'; 15 public const XML_NODE_NUM_FMT = 'numFmt'; 16 public const XML_NODE_CELL_XFS = 'cellXfs'; 17 public const XML_NODE_XF = 'xf'; 18 19 /** 20 * Attributes used to find relevant information in the styles XML file. 21 */ 22 public const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId'; 23 public const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode'; 24 public const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat'; 25 public const XML_ATTRIBUTE_COUNT = 'count'; 26 27 /** 28 * By convention, default style ID is 0. 29 */ 30 public const DEFAULT_STYLE_ID = 0; 31 32 public const NUMBER_FORMAT_GENERAL = 'General'; 33 34 /** 35 * Mapping between built-in numFmtId and the associated format - for dates only. 36 * 37 * @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx 38 */ 39 private const builtinNumFmtIdToNumFormatMapping = [ 40 14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy' 41 15 => 'd-mmm-yy', 42 16 => 'd-mmm', 43 17 => 'mmm-yy', 44 18 => 'h:mm AM/PM', 45 19 => 'h:mm:ss AM/PM', 46 20 => 'h:mm', 47 21 => 'h:mm:ss', 48 22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm', 49 45 => 'mm:ss', 50 46 => '[h]:mm:ss', 51 47 => 'mm:ss.0', // @NOTE: ECMA spec is 'mmss.0', 52 ]; 53 54 /** @var string Path of the XLSX file being read */ 55 private string $filePath; 56 57 /** @var null|string Path of the styles XML file */ 58 private ?string $stylesXMLFilePath; 59 60 /** @var array<int, string> Array containing a mapping NUM_FMT_ID => FORMAT_CODE */ 61 private array $customNumberFormats; 62 63 /** @var array<array-key, array<string, null|bool|int>> Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */ 64 private array $stylesAttributes; 65 66 /** @var array<int, bool> Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */ 67 private array $numFmtIdToIsDateFormatCache = []; 68 69 /** 70 * @param string $filePath Path of the XLSX file being read 71 * @param ?string $stylesXMLFilePath 72 */ 73 public function __construct(string $filePath, ?string $stylesXMLFilePath) 74 { 75 $this->filePath = $filePath; 76 $this->stylesXMLFilePath = $stylesXMLFilePath; 77 } 78 79 public function shouldFormatNumericValueAsDate(int $styleId): bool 80 { 81 if (null === $this->stylesXMLFilePath) { 82 return false; 83 } 84 85 $stylesAttributes = $this->getStylesAttributes(); 86 87 // Default style (0) does not format numeric values as timestamps. Only custom styles do. 88 // Also if the style ID does not exist in the styles.xml file, format as numeric value. 89 // Using isset here because it is way faster than array_key_exists... 90 if (self::DEFAULT_STYLE_ID === $styleId || !isset($stylesAttributes[$styleId])) { 91 return false; 92 } 93 94 $styleAttributes = $stylesAttributes[$styleId]; 95 96 return $this->doesStyleIndicateDate($styleAttributes); 97 } 98 99 public function getNumberFormatCode(int $styleId): string 100 { 101 $stylesAttributes = $this->getStylesAttributes(); 102 $styleAttributes = $stylesAttributes[$styleId]; 103 $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; 104 \assert(\is_int($numFmtId)); 105 106 if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) { 107 $numberFormatCode = self::builtinNumFmtIdToNumFormatMapping[$numFmtId]; 108 } else { 109 $customNumberFormats = $this->getCustomNumberFormats(); 110 $numberFormatCode = $customNumberFormats[$numFmtId]; 111 } 112 113 return $numberFormatCode; 114 } 115 116 /** 117 * @return array<int, string> The custom number formats 118 */ 119 protected function getCustomNumberFormats(): array 120 { 121 if (!isset($this->customNumberFormats)) { 122 $this->extractRelevantInfo(); 123 } 124 125 return $this->customNumberFormats; 126 } 127 128 /** 129 * @return array<array-key, array<string, null|bool|int>> The styles attributes 130 */ 131 protected function getStylesAttributes(): array 132 { 133 if (!isset($this->stylesAttributes)) { 134 $this->extractRelevantInfo(); 135 } 136 137 return $this->stylesAttributes; 138 } 139 140 /** 141 * Reads the styles.xml file and extract the relevant information from the file. 142 */ 143 private function extractRelevantInfo(): void 144 { 145 $this->customNumberFormats = []; 146 $this->stylesAttributes = []; 147 148 $xmlReader = new XMLReader(); 149 150 if ($xmlReader->openFileInZip($this->filePath, $this->stylesXMLFilePath)) { 151 while ($xmlReader->read()) { 152 if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS) 153 && '0' !== $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT)) { 154 $this->extractNumberFormats($xmlReader); 155 } elseif ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) { 156 $this->extractStyleAttributes($xmlReader); 157 } 158 } 159 160 $xmlReader->close(); 161 } 162 } 163 164 /** 165 * Extracts number formats from the "numFmt" nodes. 166 * For simplicity, the styles attributes are kept in memory. This is possible thanks 167 * to the reuse of formats. So 1 million cells should not use 1 million formats. 168 * 169 * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "numFmts" node 170 */ 171 private function extractNumberFormats(XMLReader $xmlReader): void 172 { 173 while ($xmlReader->read()) { 174 if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) { 175 $numFmtId = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID); 176 $formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE); 177 \assert(null !== $formatCode); 178 $this->customNumberFormats[$numFmtId] = $formatCode; 179 } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) { 180 // Once done reading "numFmts" node's children 181 break; 182 } 183 } 184 } 185 186 /** 187 * Extracts style attributes from the "xf" nodes, inside the "cellXfs" section. 188 * For simplicity, the styles attributes are kept in memory. This is possible thanks 189 * to the reuse of styles. So 1 million cells should not use 1 million styles. 190 * 191 * @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node 192 */ 193 private function extractStyleAttributes(XMLReader $xmlReader): void 194 { 195 while ($xmlReader->read()) { 196 if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) { 197 $numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID); 198 $normalizedNumFmtId = (null !== $numFmtId) ? (int) $numFmtId : null; 199 200 $applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT); 201 $normalizedApplyNumberFormat = (null !== $applyNumberFormat) ? (bool) $applyNumberFormat : null; 202 203 $this->stylesAttributes[] = [ 204 self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId, 205 self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat, 206 ]; 207 } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) { 208 // Once done reading "cellXfs" node's children 209 break; 210 } 211 } 212 } 213 214 /** 215 * @param array<string, null|bool|int> $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId") 216 * 217 * @return bool Whether the style with the given attributes indicates that the number is a date 218 */ 219 private function doesStyleIndicateDate(array $styleAttributes): bool 220 { 221 $applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT]; 222 $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; 223 224 // A style may apply a date format if it has: 225 // - "applyNumberFormat" attribute not set to "false" 226 // - "numFmtId" attribute set 227 // This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format, 228 // but this is not necessarily a date. 229 if (false === $applyNumberFormat || !\is_int($numFmtId)) { 230 return false; 231 } 232 233 return $this->doesNumFmtIdIndicateDate($numFmtId); 234 } 235 236 /** 237 * Returns whether the number format ID indicates that the number is a date. 238 * The result is cached to avoid recomputing the same thing over and over, as 239 * "numFmtId" attributes can be shared between multiple styles. 240 * 241 * @return bool Whether the number format ID indicates that the number is a date 242 */ 243 private function doesNumFmtIdIndicateDate(int $numFmtId): bool 244 { 245 if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) { 246 $formatCode = $this->getFormatCodeForNumFmtId($numFmtId); 247 248 $this->numFmtIdToIsDateFormatCache[$numFmtId] = ( 249 $this->isNumFmtIdBuiltInDateFormat($numFmtId) 250 || $this->isFormatCodeCustomDateFormat($formatCode) 251 ); 252 } 253 254 return $this->numFmtIdToIsDateFormatCache[$numFmtId]; 255 } 256 257 /** 258 * @return null|string The custom number format or NULL if none defined for the given numFmtId 259 */ 260 private function getFormatCodeForNumFmtId(int $numFmtId): ?string 261 { 262 $customNumberFormats = $this->getCustomNumberFormats(); 263 264 // Using isset here because it is way faster than array_key_exists... 265 return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null; 266 } 267 268 /** 269 * @return bool Whether the number format ID indicates that the number is a date 270 */ 271 private function isNumFmtIdBuiltInDateFormat(int $numFmtId): bool 272 { 273 return \array_key_exists($numFmtId, self::builtinNumFmtIdToNumFormatMapping); 274 } 275 276 /** 277 * @return bool Whether the given format code indicates that the number is a date 278 */ 279 private function isFormatCodeCustomDateFormat(?string $formatCode): bool 280 { 281 // if no associated format code or if using the default "General" format 282 if (null === $formatCode || 0 === strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL)) { 283 return false; 284 } 285 286 return $this->isFormatCodeMatchingDateFormatPattern($formatCode); 287 } 288 289 /** 290 * @return bool Whether the given format code matches a date format pattern 291 */ 292 private function isFormatCodeMatchingDateFormatPattern(string $formatCode): bool 293 { 294 // Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\") 295 $pattern = '((?<!\\\)\[.+?(?<!\\\)\])'; 296 $formatCode = preg_replace($pattern, '', $formatCode); 297 \assert(null !== $formatCode); 298 299 // custom date formats contain specific characters to represent the date: 300 // e - yy - m - d - h - s 301 // and all of their variants (yyyy - mm - dd...) 302 $dateFormatCharacters = ['e', 'yy', 'm', 'd', 'h', 's']; 303 304 $hasFoundDateFormatCharacter = false; 305 foreach ($dateFormatCharacters as $dateFormatCharacter) { 306 // character not preceded by "\" (case insensitive) 307 $pattern = '/(?<!\\\)'.$dateFormatCharacter.'/i'; 308 309 if (1 === preg_match($pattern, $formatCode)) { 310 $hasFoundDateFormatCharacter = true; 311 312 break; 313 } 314 } 315 316 return $hasFoundDateFormatCharacter; 317 } 318 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body