Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Reader\ODS\Helper;
   6  
   7  use DateInterval;
   8  use DateTimeImmutable;
   9  use DOMElement;
  10  use DOMNode;
  11  use DOMText;
  12  use Exception;
  13  use OpenSpout\Common\Helper\Escaper\ODS;
  14  use OpenSpout\Reader\Exception\InvalidValueException;
  15  
  16  /**
  17   * @internal
  18   */
  19  final class CellValueFormatter
  20  {
  21      /**
  22       * Definition of all possible cell types.
  23       */
  24      public const CELL_TYPE_STRING = 'string';
  25      public const CELL_TYPE_FLOAT = 'float';
  26      public const CELL_TYPE_BOOLEAN = 'boolean';
  27      public const CELL_TYPE_DATE = 'date';
  28      public const CELL_TYPE_TIME = 'time';
  29      public const CELL_TYPE_CURRENCY = 'currency';
  30      public const CELL_TYPE_PERCENTAGE = 'percentage';
  31      public const CELL_TYPE_VOID = 'void';
  32  
  33      /**
  34       * Definition of XML nodes names used to parse data.
  35       */
  36      public const XML_NODE_P = 'p';
  37      public const XML_NODE_TEXT_A = 'text:a';
  38      public const XML_NODE_TEXT_SPAN = 'text:span';
  39      public const XML_NODE_TEXT_S = 'text:s';
  40      public const XML_NODE_TEXT_TAB = 'text:tab';
  41      public const XML_NODE_TEXT_LINE_BREAK = 'text:line-break';
  42  
  43      /**
  44       * Definition of XML attributes used to parse data.
  45       */
  46      public const XML_ATTRIBUTE_TYPE = 'office:value-type';
  47      public const XML_ATTRIBUTE_VALUE = 'office:value';
  48      public const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value';
  49      public const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value';
  50      public const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value';
  51      public const XML_ATTRIBUTE_CURRENCY = 'office:currency';
  52      public const XML_ATTRIBUTE_C = 'text:c';
  53  
  54      /**
  55       * List of XML nodes representing whitespaces and their corresponding value.
  56       */
  57      private const WHITESPACE_XML_NODES = [
  58          self::XML_NODE_TEXT_S => ' ',
  59          self::XML_NODE_TEXT_TAB => "\t",
  60          self::XML_NODE_TEXT_LINE_BREAK => "\n",
  61      ];
  62  
  63      /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
  64      private bool $shouldFormatDates;
  65  
  66      /** @var ODS Used to unescape XML data */
  67      private ODS $escaper;
  68  
  69      /**
  70       * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
  71       * @param ODS  $escaper           Used to unescape XML data
  72       */
  73      public function __construct(bool $shouldFormatDates, ODS $escaper)
  74      {
  75          $this->shouldFormatDates = $shouldFormatDates;
  76          $this->escaper = $escaper;
  77      }
  78  
  79      /**
  80       * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
  81       *
  82       * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
  83       *
  84       * @return bool|DateInterval|DateTimeImmutable|float|int|string The value associated with the cell, empty string if cell's type is void/undefined
  85       *
  86       * @throws InvalidValueException If the node value is not valid
  87       */
  88      public function extractAndFormatNodeValue(DOMElement $node): bool|DateInterval|DateTimeImmutable|float|int|string
  89      {
  90          $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
  91  
  92          return match ($cellType) {
  93              self::CELL_TYPE_STRING => $this->formatStringCellValue($node),
  94              self::CELL_TYPE_FLOAT => $this->formatFloatCellValue($node),
  95              self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($node),
  96              self::CELL_TYPE_DATE => $this->formatDateCellValue($node),
  97              self::CELL_TYPE_TIME => $this->formatTimeCellValue($node),
  98              self::CELL_TYPE_CURRENCY => $this->formatCurrencyCellValue($node),
  99              self::CELL_TYPE_PERCENTAGE => $this->formatPercentageCellValue($node),
 100              default => '',
 101          };
 102      }
 103  
 104      /**
 105       * Returns the cell String value.
 106       *
 107       * @return string The value associated with the cell
 108       */
 109      private function formatStringCellValue(DOMElement $node): string
 110      {
 111          $pNodeValues = [];
 112          $pNodes = $node->getElementsByTagName(self::XML_NODE_P);
 113  
 114          foreach ($pNodes as $pNode) {
 115              $pNodeValues[] = $this->extractTextValueFromNode($pNode);
 116          }
 117  
 118          $escapedCellValue = implode("\n", $pNodeValues);
 119  
 120          return $this->escaper->unescape($escapedCellValue);
 121      }
 122  
 123      /**
 124       * Returns the cell Numeric value from the given node.
 125       *
 126       * @return float|int The value associated with the cell
 127       */
 128      private function formatFloatCellValue(DOMElement $node): float|int
 129      {
 130          $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
 131  
 132          $nodeIntValue = (int) $nodeValue;
 133          $nodeFloatValue = (float) $nodeValue;
 134  
 135          return ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
 136      }
 137  
 138      /**
 139       * Returns the cell Boolean value from the given node.
 140       *
 141       * @return bool The value associated with the cell
 142       */
 143      private function formatBooleanCellValue(DOMElement $node): bool
 144      {
 145          return (bool) $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE);
 146      }
 147  
 148      /**
 149       * Returns the cell Date value from the given node.
 150       *
 151       * @throws InvalidValueException If the value is not a valid date
 152       */
 153      private function formatDateCellValue(DOMElement $node): string|DateTimeImmutable
 154      {
 155          // The XML node looks like this:
 156          // <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
 157          //   <text:p>05/19/16 04:39 PM</text:p>
 158          // </table:table-cell>
 159  
 160          if ($this->shouldFormatDates) {
 161              // The date is already formatted in the "p" tag
 162              $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
 163              $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
 164          } else {
 165              // otherwise, get it from the "date-value" attribute
 166              $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
 167  
 168              try {
 169                  $cellValue = new DateTimeImmutable($nodeValue);
 170              } catch (Exception $previous) {
 171                  throw new InvalidValueException($nodeValue, '', 0, $previous);
 172              }
 173          }
 174  
 175          return $cellValue;
 176      }
 177  
 178      /**
 179       * Returns the cell Time value from the given node.
 180       *
 181       * @return DateInterval|string The value associated with the cell
 182       *
 183       * @throws InvalidValueException If the value is not a valid time
 184       */
 185      private function formatTimeCellValue(DOMElement $node): DateInterval|string
 186      {
 187          // The XML node looks like this:
 188          // <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
 189          //   <text:p>01:24:00 PM</text:p>
 190          // </table:table-cell>
 191  
 192          if ($this->shouldFormatDates) {
 193              // The date is already formatted in the "p" tag
 194              $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
 195              $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
 196          } else {
 197              // otherwise, get it from the "time-value" attribute
 198              $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
 199  
 200              try {
 201                  $cellValue = new DateInterval($nodeValue);
 202              } catch (Exception $previous) {
 203                  throw new InvalidValueException($nodeValue, '', 0, $previous);
 204              }
 205          }
 206  
 207          return $cellValue;
 208      }
 209  
 210      /**
 211       * Returns the cell Currency value from the given node.
 212       *
 213       * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR")
 214       */
 215      private function formatCurrencyCellValue(DOMElement $node): string
 216      {
 217          $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
 218          $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY);
 219  
 220          return "{$value} {$currency}";
 221      }
 222  
 223      /**
 224       * Returns the cell Percentage value from the given node.
 225       *
 226       * @return float|int The value associated with the cell
 227       */
 228      private function formatPercentageCellValue(DOMElement $node): float|int
 229      {
 230          // percentages are formatted like floats
 231          return $this->formatFloatCellValue($node);
 232      }
 233  
 234      private function extractTextValueFromNode(DOMNode $pNode): string
 235      {
 236          $textValue = '';
 237  
 238          foreach ($pNode->childNodes as $childNode) {
 239              if ($childNode instanceof DOMText) {
 240                  $textValue .= $childNode->nodeValue;
 241              } elseif ($this->isWhitespaceNode($childNode->nodeName) && $childNode instanceof DOMElement) {
 242                  $textValue .= $this->transformWhitespaceNode($childNode);
 243              } elseif (self::XML_NODE_TEXT_A === $childNode->nodeName || self::XML_NODE_TEXT_SPAN === $childNode->nodeName) {
 244                  $textValue .= $this->extractTextValueFromNode($childNode);
 245              }
 246          }
 247  
 248          return $textValue;
 249      }
 250  
 251      /**
 252       * Returns whether the given node is a whitespace node. It must be one of these:
 253       *  - <text:s />
 254       *  - <text:tab />
 255       *  - <text:line-break />.
 256       */
 257      private function isWhitespaceNode(string $nodeName): bool
 258      {
 259          return isset(self::WHITESPACE_XML_NODES[$nodeName]);
 260      }
 261  
 262      /**
 263       * The "<text:p>" node can contain the string value directly
 264       * or contain child elements. In this case, whitespaces contain in
 265       * the child elements should be replaced by their XML equivalent:
 266       *  - space => <text:s />
 267       *  - tab => <text:tab />
 268       *  - line break => <text:line-break />.
 269       *
 270       * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949
 271       *
 272       * @param DOMElement $node The XML node representing a whitespace
 273       *
 274       * @return string The corresponding whitespace value
 275       */
 276      private function transformWhitespaceNode(DOMElement $node): string
 277      {
 278          $countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for "<text:s>"
 279          $numWhitespaces = '' !== $countAttribute ? (int) $countAttribute : 1;
 280  
 281          return str_repeat(self::WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces);
 282      }
 283  }