See Release Notes
Long Term Support Release
Differences Between: [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401]
1 <?php 2 3 namespace Box\Spout\Reader\XLSX\Helper; 4 5 use Box\Spout\Reader\Exception\InvalidValueException; 6 use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; 7 use Box\Spout\Reader\XLSX\Manager\StyleManager; 8 9 /** 10 * Class CellValueFormatter 11 * This class provides helper functions to format cell values 12 */ 13 class CellValueFormatter 14 { 15 /** Definition of all possible cell types */ 16 const CELL_TYPE_INLINE_STRING = 'inlineStr'; 17 const CELL_TYPE_STR = 'str'; 18 const CELL_TYPE_SHARED_STRING = 's'; 19 const CELL_TYPE_BOOLEAN = 'b'; 20 const CELL_TYPE_NUMERIC = 'n'; 21 const CELL_TYPE_DATE = 'd'; 22 const CELL_TYPE_ERROR = 'e'; 23 24 /** Definition of XML nodes names used to parse data */ 25 const XML_NODE_VALUE = 'v'; 26 const XML_NODE_INLINE_STRING_VALUE = 't'; 27 28 /** Definition of XML attributes used to parse data */ 29 const XML_ATTRIBUTE_TYPE = 't'; 30 const XML_ATTRIBUTE_STYLE_ID = 's'; 31 32 /** Constants used for date formatting */ 33 const NUM_SECONDS_IN_ONE_DAY = 86400; 34 const NUM_SECONDS_IN_ONE_HOUR = 3600; 35 const NUM_SECONDS_IN_ONE_MINUTE = 60; 36 37 /** 38 * February 29th, 1900 is NOT a leap year but Excel thinks it is... 39 * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel 40 */ 41 const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60; 42 43 /** @var SharedStringsManager Manages shared strings */ 44 protected $sharedStringsManager; 45 46 /** @var StyleManager Manages styles */ 47 protected $styleManager; 48 49 /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ 50 protected $shouldFormatDates; 51 52 /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */ 53 protected $shouldUse1904Dates; 54 55 /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */ 56 protected $escaper; 57 58 /** 59 * @param SharedStringsManager $sharedStringsManager Manages shared strings 60 * @param StyleManager $styleManager Manages styles 61 * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings 62 * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900 63 * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data 64 */ 65 public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper) 66 { 67 $this->sharedStringsManager = $sharedStringsManager; 68 $this->styleManager = $styleManager; 69 $this->shouldFormatDates = $shouldFormatDates; 70 $this->shouldUse1904Dates = $shouldUse1904Dates; 71 $this->escaper = $escaper; 72 } 73 74 /** 75 * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. 76 * 77 * @param \DOMNode $node 78 * @throws InvalidValueException If the value is not valid 79 * @return string|int|float|bool|\DateTime The value associated with the cell 80 */ 81 public function extractAndFormatNodeValue($node) 82 { 83 // Default cell type is "n" 84 $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; 85 $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID); 86 $vNodeValue = $this->getVNodeValue($node); 87 88 if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { 89 return $vNodeValue; 90 } 91 92 switch ($cellType) { 93 case self::CELL_TYPE_INLINE_STRING: 94 return $this->formatInlineStringCellValue($node); 95 case self::CELL_TYPE_SHARED_STRING: 96 return $this->formatSharedStringCellValue($vNodeValue); 97 case self::CELL_TYPE_STR: 98 return $this->formatStrCellValue($vNodeValue); 99 case self::CELL_TYPE_BOOLEAN: 100 return $this->formatBooleanCellValue($vNodeValue); 101 case self::CELL_TYPE_NUMERIC: 102 return $this->formatNumericCellValue($vNodeValue, $cellStyleId); 103 case self::CELL_TYPE_DATE: 104 return $this->formatDateCellValue($vNodeValue); 105 default: 106 throw new InvalidValueException($vNodeValue); 107 } 108 } 109 110 /** 111 * Returns the cell's string value from a node's nested value node 112 * 113 * @param \DOMNode $node 114 * @return string The value associated with the cell 115 */ 116 protected function getVNodeValue($node) 117 { 118 // for cell types having a "v" tag containing the value. 119 // if not, the returned value should be empty string. 120 $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); 121 122 return ($vNode !== null) ? $vNode->nodeValue : ''; 123 } 124 125 /** 126 * Returns the cell String value where string is inline. 127 * 128 * @param \DOMNode $node 129 * @return string The value associated with the cell 130 */ 131 protected function formatInlineStringCellValue($node) 132 { 133 // inline strings are formatted this way: 134 // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c> 135 $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0); 136 $cellValue = $this->escaper->unescape($tNode->nodeValue); 137 138 return $cellValue; 139 } 140 141 /** 142 * Returns the cell String value from shared-strings file using nodeValue index. 143 * 144 * @param string $nodeValue 145 * @return string The value associated with the cell 146 */ 147 protected function formatSharedStringCellValue($nodeValue) 148 { 149 // shared strings are formatted this way: 150 // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c> 151 $sharedStringIndex = (int) $nodeValue; 152 $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex); 153 $cellValue = $this->escaper->unescape($escapedCellValue); 154 155 return $cellValue; 156 } 157 158 /** 159 * Returns the cell String value, where string is stored in value node. 160 * 161 * @param string $nodeValue 162 * @return string The value associated with the cell 163 */ 164 protected function formatStrCellValue($nodeValue) 165 { 166 $escapedCellValue = trim($nodeValue); 167 $cellValue = $this->escaper->unescape($escapedCellValue); 168 169 return $cellValue; 170 } 171 172 /** 173 * Returns the cell Numeric value from string of nodeValue. 174 * The value can also represent a timestamp and a DateTime will be returned. 175 * 176 * @param string $nodeValue 177 * @param int $cellStyleId 0 being the default style 178 * @return int|float|\DateTime The value associated with the cell 179 */ 180 protected function formatNumericCellValue($nodeValue, $cellStyleId) 181 { 182 // Numeric values can represent numbers as well as timestamps. 183 // We need to look at the style of the cell to determine whether it is one or the other. 184 $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId); 185 186 if ($shouldFormatAsDate) { 187 $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId); 188 } else { 189 $nodeIntValue = (int) $nodeValue; 190 $nodeFloatValue = (float) $nodeValue; 191 $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue; 192 } 193 194 return $cellValue; 195 } 196 197 /** 198 * Returns a cell's PHP Date value, associated to the given timestamp. 199 * NOTE: The timestamp is a float representing the number of days since the base Excel date: 200 * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. 201 * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1. 202 * 203 * @see ECMA-376 Part 1 - §18.17.4 204 * 205 * @param float $nodeValue 206 * @param int $cellStyleId 0 being the default style 207 * @throws InvalidValueException If the value is not a valid timestamp 208 * @return \DateTime The value associated with the cell 209 */ 210 protected function formatExcelTimestampValue($nodeValue, $cellStyleId) 211 { 212 if ($this->isValidTimestampValue($nodeValue)) { 213 $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId); 214 } else { 215 throw new InvalidValueException($nodeValue); 216 } 217 218 return $cellValue; 219 } 220 221 /** 222 * Returns whether the given timestamp is supported by SpreadsheetML 223 * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries. 224 * 225 * @param float $timestampValue 226 * @return bool 227 */ 228 protected function isValidTimestampValue($timestampValue) 229 { 230 // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011) 231 return ( 232 $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 || 233 !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884 234 ); 235 } 236 237 /** 238 * Returns a cell's PHP DateTime value, associated to the given timestamp. 239 * Only the time value matters. The date part is set to the base Excel date: 240 * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. 241 * 242 * @param float $nodeValue 243 * @param int $cellStyleId 0 being the default style 244 * @return \DateTime|string The value associated with the cell 245 */ 246 protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId) 247 { 248 $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30'; 249 250 $daysSinceBaseDate = (int) $nodeValue; 251 $timeRemainder = fmod($nodeValue, 1); 252 $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0); 253 254 $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate); 255 $dateObj->modify('+' . $daysSinceBaseDate . 'days'); 256 $dateObj->modify('+' . $secondsRemainder . 'seconds'); 257 258 if ($this->shouldFormatDates) { 259 $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId); 260 $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode); 261 $cellValue = $dateObj->format($phpDateFormat); 262 } else { 263 $cellValue = $dateObj; 264 } 265 266 return $cellValue; 267 } 268 269 /** 270 * Returns the cell Boolean value from a specific node's Value. 271 * 272 * @param string $nodeValue 273 * @return bool The value associated with the cell 274 */ 275 protected function formatBooleanCellValue($nodeValue) 276 { 277 return (bool) $nodeValue; 278 } 279 280 /** 281 * Returns a cell's PHP Date value, associated to the given stored nodeValue. 282 * @see ECMA-376 Part 1 - §18.17.4 283 * 284 * @param string $nodeValue ISO 8601 Date string 285 * @throws InvalidValueException If the value is not a valid date 286 * @return \DateTime|string The value associated with the cell 287 */ 288 protected function formatDateCellValue($nodeValue) 289 { 290 // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) 291 try { 292 $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue); 293 } catch (\Exception $e) { 294 throw new InvalidValueException($nodeValue); 295 } 296 297 return $cellValue; 298 } 299 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body