Differences Between: [Versions 402 and 403]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace OpenSpout\Reader\XLSX\Helper; 6 7 use DateTimeImmutable; 8 use DOMElement; 9 use Exception; 10 use OpenSpout\Common\Helper\Escaper\XLSX; 11 use OpenSpout\Reader\Exception\InvalidValueException; 12 use OpenSpout\Reader\XLSX\Manager\SharedStringsManager; 13 use OpenSpout\Reader\XLSX\Manager\StyleManagerInterface; 14 15 /** 16 * This class provides helper functions to format cell values. 17 */ 18 final class CellValueFormatter 19 { 20 /** 21 * Definition of all possible cell types. 22 */ 23 public const CELL_TYPE_INLINE_STRING = 'inlineStr'; 24 public const CELL_TYPE_STR = 'str'; 25 public const CELL_TYPE_SHARED_STRING = 's'; 26 public const CELL_TYPE_BOOLEAN = 'b'; 27 public const CELL_TYPE_NUMERIC = 'n'; 28 public const CELL_TYPE_DATE = 'd'; 29 public const CELL_TYPE_ERROR = 'e'; 30 31 /** 32 * Definition of XML nodes names used to parse data. 33 */ 34 public const XML_NODE_VALUE = 'v'; 35 public const XML_NODE_INLINE_STRING_VALUE = 't'; 36 37 /** 38 * Definition of XML attributes used to parse data. 39 */ 40 public const XML_ATTRIBUTE_TYPE = 't'; 41 public const XML_ATTRIBUTE_STYLE_ID = 's'; 42 43 /** 44 * Constants used for date formatting. 45 */ 46 public const NUM_SECONDS_IN_ONE_DAY = 86400; 47 48 /** @var SharedStringsManager Manages shared strings */ 49 private SharedStringsManager $sharedStringsManager; 50 51 /** @var StyleManagerInterface Manages styles */ 52 private StyleManagerInterface $styleManager; 53 54 /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ 55 private bool $shouldFormatDates; 56 57 /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */ 58 private bool $shouldUse1904Dates; 59 60 /** @var XLSX Used to unescape XML data */ 61 private XLSX $escaper; 62 63 /** 64 * @param SharedStringsManager $sharedStringsManager Manages shared strings 65 * @param StyleManagerInterface $styleManager Manages styles 66 * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings 67 * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900 68 * @param XLSX $escaper Used to unescape XML data 69 */ 70 public function __construct( 71 SharedStringsManager $sharedStringsManager, 72 StyleManagerInterface $styleManager, 73 bool $shouldFormatDates, 74 bool $shouldUse1904Dates, 75 XLSX $escaper 76 ) { 77 $this->sharedStringsManager = $sharedStringsManager; 78 $this->styleManager = $styleManager; 79 $this->shouldFormatDates = $shouldFormatDates; 80 $this->shouldUse1904Dates = $shouldUse1904Dates; 81 $this->escaper = $escaper; 82 } 83 84 /** 85 * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. 86 * 87 * @return bool|DateTimeImmutable|float|int|string The value associated with the cell 88 * 89 * @throws InvalidValueException If the value is not valid 90 */ 91 public function extractAndFormatNodeValue(DOMElement $node): bool|DateTimeImmutable|float|int|string 92 { 93 // Default cell type is "n" 94 $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; 95 $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID); 96 $vNodeValue = $this->getVNodeValue($node); 97 98 if (('' === $vNodeValue) && (self::CELL_TYPE_INLINE_STRING !== $cellType)) { 99 return $vNodeValue; 100 } 101 102 return match ($cellType) { 103 self::CELL_TYPE_INLINE_STRING => $this->formatInlineStringCellValue($node), 104 self::CELL_TYPE_SHARED_STRING => $this->formatSharedStringCellValue($vNodeValue), 105 self::CELL_TYPE_STR => $this->formatStrCellValue($vNodeValue), 106 self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($vNodeValue), 107 self::CELL_TYPE_NUMERIC => $this->formatNumericCellValue($vNodeValue, $cellStyleId), 108 self::CELL_TYPE_DATE => $this->formatDateCellValue($vNodeValue), 109 default => throw new InvalidValueException($vNodeValue), 110 }; 111 } 112 113 /** 114 * Returns the cell's string value from a node's nested value node. 115 * 116 * @return string The value associated with the cell 117 */ 118 private function getVNodeValue(DOMElement $node): string 119 { 120 // for cell types having a "v" tag containing the value. 121 // if not, the returned value should be empty string. 122 $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); 123 124 return (string) $vNode?->nodeValue; 125 } 126 127 /** 128 * Returns the cell String value where string is inline. 129 * 130 * @return string The value associated with the cell 131 */ 132 private function formatInlineStringCellValue(DOMElement $node): string 133 { 134 // inline strings are formatted this way (they can contain any number of <t> nodes): 135 // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c> 136 $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE); 137 138 $cellValue = ''; 139 for ($i = 0; $i < $tNodes->count(); ++$i) { 140 $nodeValue = $tNodes->item($i)->nodeValue; 141 \assert(null !== $nodeValue); 142 $cellValue .= $this->escaper->unescape($nodeValue); 143 } 144 145 return $cellValue; 146 } 147 148 /** 149 * Returns the cell String value from shared-strings file using nodeValue index. 150 * 151 * @return string The value associated with the cell 152 */ 153 private function formatSharedStringCellValue(string $nodeValue): string 154 { 155 // shared strings are formatted this way: 156 // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c> 157 $sharedStringIndex = (int) $nodeValue; 158 $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex); 159 160 return $this->escaper->unescape($escapedCellValue); 161 } 162 163 /** 164 * Returns the cell String value, where string is stored in value node. 165 * 166 * @return string The value associated with the cell 167 */ 168 private function formatStrCellValue(string $nodeValue): string 169 { 170 $escapedCellValue = trim($nodeValue); 171 172 return $this->escaper->unescape($escapedCellValue); 173 } 174 175 /** 176 * Returns the cell Numeric value from string of nodeValue. 177 * The value can also represent a timestamp and a DateTime will be returned. 178 * 179 * @param int $cellStyleId 0 being the default style 180 */ 181 private function formatNumericCellValue(int|float|string $nodeValue, int $cellStyleId): DateTimeImmutable|float|int|string 182 { 183 // Numeric values can represent numbers as well as timestamps. 184 // We need to look at the style of the cell to determine whether it is one or the other. 185 $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId); 186 187 if ($shouldFormatAsDate) { 188 $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId); 189 } else { 190 $nodeIntValue = (int) $nodeValue; 191 $nodeFloatValue = (float) $nodeValue; 192 $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue; 193 } 194 195 return $cellValue; 196 } 197 198 /** 199 * Returns a cell's PHP Date value, associated to the given timestamp. 200 * NOTE: The timestamp is a float representing the number of days since the base Excel date: 201 * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. 202 * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1. 203 * 204 * @param int $cellStyleId 0 being the default style 205 * 206 * @throws InvalidValueException If the value is not a valid timestamp 207 * 208 * @see ECMA-376 Part 1 - §18.17.4 209 */ 210 private function formatExcelTimestampValue(float $nodeValue, int $cellStyleId): string|DateTimeImmutable 211 { 212 if (!$this->isValidTimestampValue($nodeValue)) { 213 throw new InvalidValueException((string) $nodeValue); 214 } 215 216 return $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId); 217 } 218 219 /** 220 * Returns whether the given timestamp is supported by SpreadsheetML. 221 * 222 * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries. 223 */ 224 private function isValidTimestampValue(float $timestampValue): bool 225 { 226 // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011) 227 return 228 $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 229 || !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884; 230 } 231 232 /** 233 * Returns a cell's PHP DateTime value, associated to the given timestamp. 234 * Only the time value matters. The date part is set to the base Excel date: 235 * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. 236 * 237 * @param int $cellStyleId 0 being the default style 238 */ 239 private function formatExcelTimestampValueAsDateTimeValue(float $nodeValue, int $cellStyleId): string|DateTimeImmutable 240 { 241 $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30'; 242 243 $daysSinceBaseDate = (int) $nodeValue; 244 $daysSign = '+'; 245 if ($daysSinceBaseDate < 0) { 246 $daysSinceBaseDate = abs($daysSinceBaseDate); 247 $daysSign = '-'; 248 } 249 $timeRemainder = fmod($nodeValue, 1); 250 $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0); 251 $secondsSign = '+'; 252 if ($secondsRemainder < 0) { 253 $secondsRemainder = abs($secondsRemainder); 254 $secondsSign = '-'; 255 } 256 257 $dateObj = DateTimeImmutable::createFromFormat('|Y-m-d', $baseDate); 258 \assert(false !== $dateObj); 259 $dateObj = $dateObj->modify($daysSign.$daysSinceBaseDate.'days'); 260 \assert(false !== $dateObj); 261 $dateObj = $dateObj->modify($secondsSign.$secondsRemainder.'seconds'); 262 \assert(false !== $dateObj); 263 264 if ($this->shouldFormatDates) { 265 $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId); 266 $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode); 267 $cellValue = $dateObj->format($phpDateFormat); 268 } else { 269 $cellValue = $dateObj; 270 } 271 272 return $cellValue; 273 } 274 275 /** 276 * Returns the cell Boolean value from a specific node's Value. 277 * 278 * @return bool The value associated with the cell 279 */ 280 private function formatBooleanCellValue(string $nodeValue): bool 281 { 282 return (bool) $nodeValue; 283 } 284 285 /** 286 * Returns a cell's PHP Date value, associated to the given stored nodeValue. 287 * 288 * @see ECMA-376 Part 1 - §18.17.4 289 * 290 * @param string $nodeValue ISO 8601 Date string 291 * 292 * @throws InvalidValueException If the value is not a valid date 293 */ 294 private function formatDateCellValue(string $nodeValue): string|DateTimeImmutable 295 { 296 // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) 297 try { 298 $cellValue = ($this->shouldFormatDates) ? $nodeValue : new DateTimeImmutable($nodeValue); 299 } catch (Exception $exception) { 300 throw new InvalidValueException($nodeValue, '', 0, $exception); 301 } 302 303 return $cellValue; 304 } 305 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body