Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 39 and 401]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\ODS\Helper;
   4  
   5  use Box\Spout\Reader\Exception\InvalidValueException;
   6  
   7  /**
   8   * Class CellValueFormatter
   9   * This class provides helper functions to format cell values
  10   */
  11  class CellValueFormatter
  12  {
  13      /** Definition of all possible cell types */
  14      const CELL_TYPE_STRING = 'string';
  15      const CELL_TYPE_FLOAT = 'float';
  16      const CELL_TYPE_BOOLEAN = 'boolean';
  17      const CELL_TYPE_DATE = 'date';
  18      const CELL_TYPE_TIME = 'time';
  19      const CELL_TYPE_CURRENCY = 'currency';
  20      const CELL_TYPE_PERCENTAGE = 'percentage';
  21      const CELL_TYPE_VOID = 'void';
  22  
  23      /** Definition of XML nodes names used to parse data */
  24      const XML_NODE_P = 'p';
  25      const XML_NODE_TEXT_A = 'text:a';
  26      const XML_NODE_TEXT_SPAN = 'text:span';
  27      const XML_NODE_TEXT_S = 'text:s';
  28      const XML_NODE_TEXT_TAB = 'text:tab';
  29      const XML_NODE_TEXT_LINE_BREAK = 'text:line-break';
  30  
  31      /** Definition of XML attributes used to parse data */
  32      const XML_ATTRIBUTE_TYPE = 'office:value-type';
  33      const XML_ATTRIBUTE_VALUE = 'office:value';
  34      const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value';
  35      const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value';
  36      const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value';
  37      const XML_ATTRIBUTE_CURRENCY = 'office:currency';
  38      const XML_ATTRIBUTE_C = 'text:c';
  39  
  40      /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
  41      protected $shouldFormatDates;
  42  
  43      /** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */
  44      protected $escaper;
  45  
  46      /** @var array List of XML nodes representing whitespaces and their corresponding value */
  47      private static $WHITESPACE_XML_NODES = [
  48          self::XML_NODE_TEXT_S => ' ',
  49          self::XML_NODE_TEXT_TAB => "\t",
  50          self::XML_NODE_TEXT_LINE_BREAK => "\n",
  51      ];
  52  
  53      /**
  54       * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
  55       * @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data
  56       */
  57      public function __construct($shouldFormatDates, $escaper)
  58      {
  59          $this->shouldFormatDates = $shouldFormatDates;
  60          $this->escaper = $escaper;
  61      }
  62  
  63      /**
  64       * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
  65       * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
  66       *
  67       * @param \DOMNode $node
  68       * @throws InvalidValueException If the node value is not valid
  69       * @return string|int|float|bool|\DateTime|\DateInterval The value associated with the cell, empty string if cell's type is void/undefined
  70       */
  71      public function extractAndFormatNodeValue($node)
  72      {
  73          $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
  74  
  75          switch ($cellType) {
  76              case self::CELL_TYPE_STRING:
  77                  return $this->formatStringCellValue($node);
  78              case self::CELL_TYPE_FLOAT:
  79                  return $this->formatFloatCellValue($node);
  80              case self::CELL_TYPE_BOOLEAN:
  81                  return $this->formatBooleanCellValue($node);
  82              case self::CELL_TYPE_DATE:
  83                  return $this->formatDateCellValue($node);
  84              case self::CELL_TYPE_TIME:
  85                  return $this->formatTimeCellValue($node);
  86              case self::CELL_TYPE_CURRENCY:
  87                  return $this->formatCurrencyCellValue($node);
  88              case self::CELL_TYPE_PERCENTAGE:
  89                  return $this->formatPercentageCellValue($node);
  90              case self::CELL_TYPE_VOID:
  91              default:
  92                  return '';
  93          }
  94      }
  95  
  96      /**
  97       * Returns the cell String value.
  98       *
  99       * @param \DOMNode $node
 100       * @return string The value associated with the cell
 101       */
 102      protected function formatStringCellValue($node)
 103      {
 104          $pNodeValues = [];
 105          $pNodes = $node->getElementsByTagName(self::XML_NODE_P);
 106  
 107          foreach ($pNodes as $pNode) {
 108              $pNodeValues[] = $this->extractTextValueFromNode($pNode);
 109          }
 110  
 111          $escapedCellValue = \implode("\n", $pNodeValues);
 112          $cellValue = $this->escaper->unescape($escapedCellValue);
 113  
 114          return $cellValue;
 115      }
 116  
 117      /**
 118       * @param $pNode
 119       * @return string
 120       */
 121      private function extractTextValueFromNode($pNode)
 122      {
 123          $textValue = '';
 124  
 125          foreach ($pNode->childNodes as $childNode) {
 126              if ($childNode instanceof \DOMText) {
 127                  $textValue .= $childNode->nodeValue;
 128              } elseif ($this->isWhitespaceNode($childNode->nodeName)) {
 129                  $textValue .= $this->transformWhitespaceNode($childNode);
 130              } elseif ($childNode->nodeName === self::XML_NODE_TEXT_A || $childNode->nodeName === self::XML_NODE_TEXT_SPAN) {
 131                  $textValue .= $this->extractTextValueFromNode($childNode);
 132              }
 133          }
 134  
 135          return $textValue;
 136      }
 137  
 138      /**
 139       * Returns whether the given node is a whitespace node. It must be one of these:
 140       *  - <text:s />
 141       *  - <text:tab />
 142       *  - <text:line-break />
 143       *
 144       * @param string $nodeName
 145       * @return bool
 146       */
 147      private function isWhitespaceNode($nodeName)
 148      {
 149          return isset(self::$WHITESPACE_XML_NODES[$nodeName]);
 150      }
 151  
 152      /**
 153       * The "<text:p>" node can contain the string value directly
 154       * or contain child elements. In this case, whitespaces contain in
 155       * the child elements should be replaced by their XML equivalent:
 156       *  - space => <text:s />
 157       *  - tab => <text:tab />
 158       *  - line break => <text:line-break />
 159       *
 160       * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949
 161       *
 162       * @param \DOMNode $node The XML node representing a whitespace
 163       * @return string The corresponding whitespace value
 164       */
 165      private function transformWhitespaceNode($node)
 166      {
 167          $countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for "<text:s>"
 168          $numWhitespaces = (!empty($countAttribute)) ? (int) $countAttribute : 1;
 169  
 170          return \str_repeat(self::$WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces);
 171      }
 172  
 173      /**
 174       * Returns the cell Numeric value from the given node.
 175       *
 176       * @param \DOMNode $node
 177       * @return int|float The value associated with the cell
 178       */
 179      protected function formatFloatCellValue($node)
 180      {
 181          $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
 182  
 183          $nodeIntValue = (int) $nodeValue;
 184          $nodeFloatValue = (float) $nodeValue;
 185          $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
 186  
 187          return $cellValue;
 188      }
 189  
 190      /**
 191       * Returns the cell Boolean value from the given node.
 192       *
 193       * @param \DOMNode $node
 194       * @return bool The value associated with the cell
 195       */
 196      protected function formatBooleanCellValue($node)
 197      {
 198          $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE);
 199  
 200          return (bool) $nodeValue;
 201      }
 202  
 203      /**
 204       * Returns the cell Date value from the given node.
 205       *
 206       * @param \DOMNode $node
 207       * @throws InvalidValueException If the value is not a valid date
 208       * @return \DateTime|string The value associated with the cell
 209       */
 210      protected function formatDateCellValue($node)
 211      {
 212          // The XML node looks like this:
 213          // <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
 214          //   <text:p>05/19/16 04:39 PM</text:p>
 215          // </table:table-cell>
 216  
 217          if ($this->shouldFormatDates) {
 218              // The date is already formatted in the "p" tag
 219              $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
 220              $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
 221          } else {
 222              // otherwise, get it from the "date-value" attribute
 223              $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
 224              try {
 225                  $cellValue = new \DateTime($nodeValue);
 226              } catch (\Exception $e) {
 227                  throw new InvalidValueException($nodeValue);
 228              }
 229          }
 230  
 231          return $cellValue;
 232      }
 233  
 234      /**
 235       * Returns the cell Time value from the given node.
 236       *
 237       * @param \DOMNode $node
 238       * @throws InvalidValueException If the value is not a valid time
 239       * @return \DateInterval|string The value associated with the cell
 240       */
 241      protected function formatTimeCellValue($node)
 242      {
 243          // The XML node looks like this:
 244          // <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
 245          //   <text:p>01:24:00 PM</text:p>
 246          // </table:table-cell>
 247  
 248          if ($this->shouldFormatDates) {
 249              // The date is already formatted in the "p" tag
 250              $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
 251              $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
 252          } else {
 253              // otherwise, get it from the "time-value" attribute
 254              $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
 255              try {
 256                  $cellValue = new \DateInterval($nodeValue);
 257              } catch (\Exception $e) {
 258                  throw new InvalidValueException($nodeValue);
 259              }
 260          }
 261  
 262          return $cellValue;
 263      }
 264  
 265      /**
 266       * Returns the cell Currency value from the given node.
 267       *
 268       * @param \DOMNode $node
 269       * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR")
 270       */
 271      protected function formatCurrencyCellValue($node)
 272      {
 273          $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
 274          $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY);
 275  
 276          return "$value $currency";
 277      }
 278  
 279      /**
 280       * Returns the cell Percentage value from the given node.
 281       *
 282       * @param \DOMNode $node
 283       * @return int|float The value associated with the cell
 284       */
 285      protected function formatPercentageCellValue($node)
 286      {
 287          // percentages are formatted like floats
 288          return $this->formatFloatCellValue($node);
 289      }
 290  }