See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 39 and 401]
1 <?php 2 3 namespace Box\Spout\Reader\XLSX\Helper; 4 5 use Box\Spout\Reader\Exception\InvalidValueException; 6 use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; 7 use Box\Spout\Reader\XLSX\Manager\StyleManager; 8 9 /** 10 * Class CellValueFormatter 11 * This class provides helper functions to format cell values 12 */ 13 class CellValueFormatter 14 { 15 /** Definition of all possible cell types */ 16 const CELL_TYPE_INLINE_STRING = 'inlineStr'; 17 const CELL_TYPE_STR = 'str'; 18 const CELL_TYPE_SHARED_STRING = 's'; 19 const CELL_TYPE_BOOLEAN = 'b'; 20 const CELL_TYPE_NUMERIC = 'n'; 21 const CELL_TYPE_DATE = 'd'; 22 const CELL_TYPE_ERROR = 'e'; 23 24 /** Definition of XML nodes names used to parse data */ 25 const XML_NODE_VALUE = 'v'; 26 const XML_NODE_INLINE_STRING_VALUE = 't'; 27 28 /** Definition of XML attributes used to parse data */ 29 const XML_ATTRIBUTE_TYPE = 't'; 30 const XML_ATTRIBUTE_STYLE_ID = 's'; 31 32 /** Constants used for date formatting */ 33 const NUM_SECONDS_IN_ONE_DAY = 86400; 34 35 /** @var SharedStringsManager Manages shared strings */ 36 protected $sharedStringsManager; 37 38 /** @var StyleManager Manages styles */ 39 protected $styleManager; 40 41 /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ 42 protected $shouldFormatDates; 43 44 /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */ 45 protected $shouldUse1904Dates; 46 47 /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */ 48 protected $escaper; 49 50 /** 51 * @param SharedStringsManager $sharedStringsManager Manages shared strings 52 * @param StyleManager $styleManager Manages styles 53 * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings 54 * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900 55 * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data 56 */ 57 public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper) 58 { 59 $this->sharedStringsManager = $sharedStringsManager; 60 $this->styleManager = $styleManager; 61 $this->shouldFormatDates = $shouldFormatDates; 62 $this->shouldUse1904Dates = $shouldUse1904Dates; 63 $this->escaper = $escaper; 64 } 65 66 /** 67 * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. 68 * 69 * @param \DOMNode $node 70 * @throws InvalidValueException If the value is not valid 71 * @return string|int|float|bool|\DateTime The value associated with the cell 72 */ 73 public function extractAndFormatNodeValue($node) 74 { 75 // Default cell type is "n" 76 $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; 77 $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID); 78 $vNodeValue = $this->getVNodeValue($node); 79 80 if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { 81 return $vNodeValue; 82 } 83 84 switch ($cellType) { 85 case self::CELL_TYPE_INLINE_STRING: 86 return $this->formatInlineStringCellValue($node); 87 case self::CELL_TYPE_SHARED_STRING: 88 return $this->formatSharedStringCellValue($vNodeValue); 89 case self::CELL_TYPE_STR: 90 return $this->formatStrCellValue($vNodeValue); 91 case self::CELL_TYPE_BOOLEAN: 92 return $this->formatBooleanCellValue($vNodeValue); 93 case self::CELL_TYPE_NUMERIC: 94 return $this->formatNumericCellValue($vNodeValue, $cellStyleId); 95 case self::CELL_TYPE_DATE: 96 return $this->formatDateCellValue($vNodeValue); 97 default: 98 throw new InvalidValueException($vNodeValue); 99 } 100 } 101 102 /** 103 * Returns the cell's string value from a node's nested value node 104 * 105 * @param \DOMNode $node 106 * @return string The value associated with the cell 107 */ 108 protected function getVNodeValue($node) 109 { 110 // for cell types having a "v" tag containing the value. 111 // if not, the returned value should be empty string. 112 $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); 113 114 return ($vNode !== null) ? $vNode->nodeValue : ''; 115 } 116 117 /** 118 * Returns the cell String value where string is inline. 119 * 120 * @param \DOMNode $node 121 * @return string The value associated with the cell 122 */ 123 protected function formatInlineStringCellValue($node) 124 { 125 // inline strings are formatted this way (they can contain any number of <t> nodes): 126 // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c> 127 $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE); 128 129 $cellValue = ''; 130 for ($i = 0; $i < $tNodes->count(); $i++) { 131 $tNode = $tNodes->item($i); 132 $cellValue .= $this->escaper->unescape($tNode->nodeValue); 133 } 134 135 return $cellValue; 136 } 137 138 /** 139 * Returns the cell String value from shared-strings file using nodeValue index. 140 * 141 * @param string $nodeValue 142 * @return string The value associated with the cell 143 */ 144 protected function formatSharedStringCellValue($nodeValue) 145 { 146 // shared strings are formatted this way: 147 // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c> 148 $sharedStringIndex = (int) $nodeValue; 149 $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex); 150 $cellValue = $this->escaper->unescape($escapedCellValue); 151 152 return $cellValue; 153 } 154 155 /** 156 * Returns the cell String value, where string is stored in value node. 157 * 158 * @param string $nodeValue 159 * @return string The value associated with the cell 160 */ 161 protected function formatStrCellValue($nodeValue) 162 { 163 $escapedCellValue = \trim($nodeValue); 164 $cellValue = $this->escaper->unescape($escapedCellValue); 165 166 return $cellValue; 167 } 168 169 /** 170 * Returns the cell Numeric value from string of nodeValue. 171 * The value can also represent a timestamp and a DateTime will be returned. 172 * 173 * @param string $nodeValue 174 * @param int $cellStyleId 0 being the default style 175 * @return int|float|\DateTime The value associated with the cell 176 */ 177 protected function formatNumericCellValue($nodeValue, $cellStyleId) 178 { 179 // Numeric values can represent numbers as well as timestamps. 180 // We need to look at the style of the cell to determine whether it is one or the other. 181 $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId); 182 183 if ($shouldFormatAsDate) { 184 $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId); 185 } else { 186 $nodeIntValue = (int) $nodeValue; 187 $nodeFloatValue = (float) $nodeValue; 188 $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue; 189 } 190 191 return $cellValue; 192 } 193 194 /** 195 * Returns a cell's PHP Date value, associated to the given timestamp. 196 * NOTE: The timestamp is a float representing the number of days since the base Excel date: 197 * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. 198 * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1. 199 * 200 * @see ECMA-376 Part 1 - §18.17.4 201 * 202 * @param float $nodeValue 203 * @param int $cellStyleId 0 being the default style 204 * @throws InvalidValueException If the value is not a valid timestamp 205 * @return \DateTime The value associated with the cell 206 */ 207 protected function formatExcelTimestampValue($nodeValue, $cellStyleId) 208 { 209 if ($this->isValidTimestampValue($nodeValue)) { 210 $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId); 211 } else { 212 throw new InvalidValueException($nodeValue); 213 } 214 215 return $cellValue; 216 } 217 218 /** 219 * Returns whether the given timestamp is supported by SpreadsheetML 220 * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries. 221 * 222 * @param float $timestampValue 223 * @return bool 224 */ 225 protected function isValidTimestampValue($timestampValue) 226 { 227 // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011) 228 return ( 229 $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 || 230 !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884 231 ); 232 } 233 234 /** 235 * Returns a cell's PHP DateTime value, associated to the given timestamp. 236 * Only the time value matters. The date part is set to the base Excel date: 237 * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. 238 * 239 * @param float $nodeValue 240 * @param int $cellStyleId 0 being the default style 241 * @return \DateTime|string The value associated with the cell 242 */ 243 protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId) 244 { 245 $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30'; 246 247 $daysSinceBaseDate = (int) $nodeValue; 248 $timeRemainder = \fmod($nodeValue, 1); 249 $secondsRemainder = \round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0); 250 251 $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate); 252 $dateObj->modify('+' . $daysSinceBaseDate . 'days'); 253 $dateObj->modify('+' . $secondsRemainder . 'seconds'); 254 255 if ($this->shouldFormatDates) { 256 $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId); 257 $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode); 258 $cellValue = $dateObj->format($phpDateFormat); 259 } else { 260 $cellValue = $dateObj; 261 } 262 263 return $cellValue; 264 } 265 266 /** 267 * Returns the cell Boolean value from a specific node's Value. 268 * 269 * @param string $nodeValue 270 * @return bool The value associated with the cell 271 */ 272 protected function formatBooleanCellValue($nodeValue) 273 { 274 return (bool) $nodeValue; 275 } 276 277 /** 278 * Returns a cell's PHP Date value, associated to the given stored nodeValue. 279 * @see ECMA-376 Part 1 - §18.17.4 280 * 281 * @param string $nodeValue ISO 8601 Date string 282 * @throws InvalidValueException If the value is not a valid date 283 * @return \DateTime|string The value associated with the cell 284 */ 285 protected function formatDateCellValue($nodeValue) 286 { 287 // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) 288 try { 289 $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue); 290 } catch (\Exception $e) { 291 throw new InvalidValueException($nodeValue); 292 } 293 294 return $cellValue; 295 } 296 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body