See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 39 and 401]
1 <?php 2 3 namespace Box\Spout\Reader\ODS\Helper; 4 5 use Box\Spout\Reader\Exception\InvalidValueException; 6 7 /** 8 * Class CellValueFormatter 9 * This class provides helper functions to format cell values 10 */ 11 class CellValueFormatter 12 { 13 /** Definition of all possible cell types */ 14 const CELL_TYPE_STRING = 'string'; 15 const CELL_TYPE_FLOAT = 'float'; 16 const CELL_TYPE_BOOLEAN = 'boolean'; 17 const CELL_TYPE_DATE = 'date'; 18 const CELL_TYPE_TIME = 'time'; 19 const CELL_TYPE_CURRENCY = 'currency'; 20 const CELL_TYPE_PERCENTAGE = 'percentage'; 21 const CELL_TYPE_VOID = 'void'; 22 23 /** Definition of XML nodes names used to parse data */ 24 const XML_NODE_P = 'p'; 25 const XML_NODE_TEXT_A = 'text:a'; 26 const XML_NODE_TEXT_SPAN = 'text:span'; 27 const XML_NODE_TEXT_S = 'text:s'; 28 const XML_NODE_TEXT_TAB = 'text:tab'; 29 const XML_NODE_TEXT_LINE_BREAK = 'text:line-break'; 30 31 /** Definition of XML attributes used to parse data */ 32 const XML_ATTRIBUTE_TYPE = 'office:value-type'; 33 const XML_ATTRIBUTE_VALUE = 'office:value'; 34 const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value'; 35 const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value'; 36 const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value'; 37 const XML_ATTRIBUTE_CURRENCY = 'office:currency'; 38 const XML_ATTRIBUTE_C = 'text:c'; 39 40 /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ 41 protected $shouldFormatDates; 42 43 /** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */ 44 protected $escaper; 45 46 /** @var array List of XML nodes representing whitespaces and their corresponding value */ 47 private static $WHITESPACE_XML_NODES = [ 48 self::XML_NODE_TEXT_S => ' ', 49 self::XML_NODE_TEXT_TAB => "\t", 50 self::XML_NODE_TEXT_LINE_BREAK => "\n", 51 ]; 52 53 /** 54 * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings 55 * @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data 56 */ 57 public function __construct($shouldFormatDates, $escaper) 58 { 59 $this->shouldFormatDates = $shouldFormatDates; 60 $this->escaper = $escaper; 61 } 62 63 /** 64 * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. 65 * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13 66 * 67 * @param \DOMNode $node 68 * @throws InvalidValueException If the node value is not valid 69 * @return string|int|float|bool|\DateTime|\DateInterval The value associated with the cell, empty string if cell's type is void/undefined 70 */ 71 public function extractAndFormatNodeValue($node) 72 { 73 $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE); 74 75 switch ($cellType) { 76 case self::CELL_TYPE_STRING: 77 return $this->formatStringCellValue($node); 78 case self::CELL_TYPE_FLOAT: 79 return $this->formatFloatCellValue($node); 80 case self::CELL_TYPE_BOOLEAN: 81 return $this->formatBooleanCellValue($node); 82 case self::CELL_TYPE_DATE: 83 return $this->formatDateCellValue($node); 84 case self::CELL_TYPE_TIME: 85 return $this->formatTimeCellValue($node); 86 case self::CELL_TYPE_CURRENCY: 87 return $this->formatCurrencyCellValue($node); 88 case self::CELL_TYPE_PERCENTAGE: 89 return $this->formatPercentageCellValue($node); 90 case self::CELL_TYPE_VOID: 91 default: 92 return ''; 93 } 94 } 95 96 /** 97 * Returns the cell String value. 98 * 99 * @param \DOMNode $node 100 * @return string The value associated with the cell 101 */ 102 protected function formatStringCellValue($node) 103 { 104 $pNodeValues = []; 105 $pNodes = $node->getElementsByTagName(self::XML_NODE_P); 106 107 foreach ($pNodes as $pNode) { 108 $pNodeValues[] = $this->extractTextValueFromNode($pNode); 109 } 110 111 $escapedCellValue = \implode("\n", $pNodeValues); 112 $cellValue = $this->escaper->unescape($escapedCellValue); 113 114 return $cellValue; 115 } 116 117 /** 118 * @param $pNode 119 * @return string 120 */ 121 private function extractTextValueFromNode($pNode) 122 { 123 $textValue = ''; 124 125 foreach ($pNode->childNodes as $childNode) { 126 if ($childNode instanceof \DOMText) { 127 $textValue .= $childNode->nodeValue; 128 } elseif ($this->isWhitespaceNode($childNode->nodeName)) { 129 $textValue .= $this->transformWhitespaceNode($childNode); 130 } elseif ($childNode->nodeName === self::XML_NODE_TEXT_A || $childNode->nodeName === self::XML_NODE_TEXT_SPAN) { 131 $textValue .= $this->extractTextValueFromNode($childNode); 132 } 133 } 134 135 return $textValue; 136 } 137 138 /** 139 * Returns whether the given node is a whitespace node. It must be one of these: 140 * - <text:s /> 141 * - <text:tab /> 142 * - <text:line-break /> 143 * 144 * @param string $nodeName 145 * @return bool 146 */ 147 private function isWhitespaceNode($nodeName) 148 { 149 return isset(self::$WHITESPACE_XML_NODES[$nodeName]); 150 } 151 152 /** 153 * The "<text:p>" node can contain the string value directly 154 * or contain child elements. In this case, whitespaces contain in 155 * the child elements should be replaced by their XML equivalent: 156 * - space => <text:s /> 157 * - tab => <text:tab /> 158 * - line break => <text:line-break /> 159 * 160 * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949 161 * 162 * @param \DOMNode $node The XML node representing a whitespace 163 * @return string The corresponding whitespace value 164 */ 165 private function transformWhitespaceNode($node) 166 { 167 $countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for "<text:s>" 168 $numWhitespaces = (!empty($countAttribute)) ? (int) $countAttribute : 1; 169 170 return \str_repeat(self::$WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces); 171 } 172 173 /** 174 * Returns the cell Numeric value from the given node. 175 * 176 * @param \DOMNode $node 177 * @return int|float The value associated with the cell 178 */ 179 protected function formatFloatCellValue($node) 180 { 181 $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE); 182 183 $nodeIntValue = (int) $nodeValue; 184 $nodeFloatValue = (float) $nodeValue; 185 $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue; 186 187 return $cellValue; 188 } 189 190 /** 191 * Returns the cell Boolean value from the given node. 192 * 193 * @param \DOMNode $node 194 * @return bool The value associated with the cell 195 */ 196 protected function formatBooleanCellValue($node) 197 { 198 $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE); 199 200 return (bool) $nodeValue; 201 } 202 203 /** 204 * Returns the cell Date value from the given node. 205 * 206 * @param \DOMNode $node 207 * @throws InvalidValueException If the value is not a valid date 208 * @return \DateTime|string The value associated with the cell 209 */ 210 protected function formatDateCellValue($node) 211 { 212 // The XML node looks like this: 213 // <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date"> 214 // <text:p>05/19/16 04:39 PM</text:p> 215 // </table:table-cell> 216 217 if ($this->shouldFormatDates) { 218 // The date is already formatted in the "p" tag 219 $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0); 220 $cellValue = $nodeWithValueAlreadyFormatted->nodeValue; 221 } else { 222 // otherwise, get it from the "date-value" attribute 223 $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE); 224 try { 225 $cellValue = new \DateTime($nodeValue); 226 } catch (\Exception $e) { 227 throw new InvalidValueException($nodeValue); 228 } 229 } 230 231 return $cellValue; 232 } 233 234 /** 235 * Returns the cell Time value from the given node. 236 * 237 * @param \DOMNode $node 238 * @throws InvalidValueException If the value is not a valid time 239 * @return \DateInterval|string The value associated with the cell 240 */ 241 protected function formatTimeCellValue($node) 242 { 243 // The XML node looks like this: 244 // <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time"> 245 // <text:p>01:24:00 PM</text:p> 246 // </table:table-cell> 247 248 if ($this->shouldFormatDates) { 249 // The date is already formatted in the "p" tag 250 $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0); 251 $cellValue = $nodeWithValueAlreadyFormatted->nodeValue; 252 } else { 253 // otherwise, get it from the "time-value" attribute 254 $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE); 255 try { 256 $cellValue = new \DateInterval($nodeValue); 257 } catch (\Exception $e) { 258 throw new InvalidValueException($nodeValue); 259 } 260 } 261 262 return $cellValue; 263 } 264 265 /** 266 * Returns the cell Currency value from the given node. 267 * 268 * @param \DOMNode $node 269 * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR") 270 */ 271 protected function formatCurrencyCellValue($node) 272 { 273 $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE); 274 $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY); 275 276 return "$value $currency"; 277 } 278 279 /** 280 * Returns the cell Percentage value from the given node. 281 * 282 * @param \DOMNode $node 283 * @return int|float The value associated with the cell 284 */ 285 protected function formatPercentageCellValue($node) 286 { 287 // percentages are formatted like floats 288 return $this->formatFloatCellValue($node); 289 } 290 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body