Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.3.x will end 7 October 2024 (12 months).
  • Bug fixes for security issues in 4.3.x will end 21 April 2025 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.2.x is supported too.

Differences Between: [Versions 402 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Reader\XLSX\Helper;
   6  
   7  use DateTimeImmutable;
   8  use DOMElement;
   9  use Exception;
  10  use OpenSpout\Common\Helper\Escaper\XLSX;
  11  use OpenSpout\Reader\Exception\InvalidValueException;
  12  use OpenSpout\Reader\XLSX\Manager\SharedStringsManager;
  13  use OpenSpout\Reader\XLSX\Manager\StyleManagerInterface;
  14  
  15  /**
  16   * This class provides helper functions to format cell values.
  17   */
  18  final class CellValueFormatter
  19  {
  20      /**
  21       * Definition of all possible cell types.
  22       */
  23      public const CELL_TYPE_INLINE_STRING = 'inlineStr';
  24      public const CELL_TYPE_STR = 'str';
  25      public const CELL_TYPE_SHARED_STRING = 's';
  26      public const CELL_TYPE_BOOLEAN = 'b';
  27      public const CELL_TYPE_NUMERIC = 'n';
  28      public const CELL_TYPE_DATE = 'd';
  29      public const CELL_TYPE_ERROR = 'e';
  30  
  31      /**
  32       * Definition of XML nodes names used to parse data.
  33       */
  34      public const XML_NODE_VALUE = 'v';
  35      public const XML_NODE_INLINE_STRING_VALUE = 't';
  36  
  37      /**
  38       * Definition of XML attributes used to parse data.
  39       */
  40      public const XML_ATTRIBUTE_TYPE = 't';
  41      public const XML_ATTRIBUTE_STYLE_ID = 's';
  42  
  43      /**
  44       * Constants used for date formatting.
  45       */
  46      public const NUM_SECONDS_IN_ONE_DAY = 86400;
  47  
  48      /** @var SharedStringsManager Manages shared strings */
  49      private SharedStringsManager $sharedStringsManager;
  50  
  51      /** @var StyleManagerInterface Manages styles */
  52      private StyleManagerInterface $styleManager;
  53  
  54      /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
  55      private bool $shouldFormatDates;
  56  
  57      /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
  58      private bool $shouldUse1904Dates;
  59  
  60      /** @var XLSX Used to unescape XML data */
  61      private XLSX $escaper;
  62  
  63      /**
  64       * @param SharedStringsManager  $sharedStringsManager Manages shared strings
  65       * @param StyleManagerInterface $styleManager         Manages styles
  66       * @param bool                  $shouldFormatDates    Whether date/time values should be returned as PHP objects or be formatted as strings
  67       * @param bool                  $shouldUse1904Dates   Whether date/time values should use a calendar starting in 1904 instead of 1900
  68       * @param XLSX                  $escaper              Used to unescape XML data
  69       */
  70      public function __construct(
  71          SharedStringsManager $sharedStringsManager,
  72          StyleManagerInterface $styleManager,
  73          bool $shouldFormatDates,
  74          bool $shouldUse1904Dates,
  75          XLSX $escaper
  76      ) {
  77          $this->sharedStringsManager = $sharedStringsManager;
  78          $this->styleManager = $styleManager;
  79          $this->shouldFormatDates = $shouldFormatDates;
  80          $this->shouldUse1904Dates = $shouldUse1904Dates;
  81          $this->escaper = $escaper;
  82      }
  83  
  84      /**
  85       * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
  86       *
  87       * @return bool|DateTimeImmutable|float|int|string The value associated with the cell
  88       *
  89       * @throws InvalidValueException If the value is not valid
  90       */
  91      public function extractAndFormatNodeValue(DOMElement $node): bool|DateTimeImmutable|float|int|string
  92      {
  93          // Default cell type is "n"
  94          $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
  95          $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
  96          $vNodeValue = $this->getVNodeValue($node);
  97  
  98          if (('' === $vNodeValue) && (self::CELL_TYPE_INLINE_STRING !== $cellType)) {
  99              return $vNodeValue;
 100          }
 101  
 102          return match ($cellType) {
 103              self::CELL_TYPE_INLINE_STRING => $this->formatInlineStringCellValue($node),
 104              self::CELL_TYPE_SHARED_STRING => $this->formatSharedStringCellValue($vNodeValue),
 105              self::CELL_TYPE_STR => $this->formatStrCellValue($vNodeValue),
 106              self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($vNodeValue),
 107              self::CELL_TYPE_NUMERIC => $this->formatNumericCellValue($vNodeValue, $cellStyleId),
 108              self::CELL_TYPE_DATE => $this->formatDateCellValue($vNodeValue),
 109              default => throw new InvalidValueException($vNodeValue),
 110          };
 111      }
 112  
 113      /**
 114       * Returns the cell's string value from a node's nested value node.
 115       *
 116       * @return string The value associated with the cell
 117       */
 118      private function getVNodeValue(DOMElement $node): string
 119      {
 120          // for cell types having a "v" tag containing the value.
 121          // if not, the returned value should be empty string.
 122          $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
 123  
 124          return (string) $vNode?->nodeValue;
 125      }
 126  
 127      /**
 128       * Returns the cell String value where string is inline.
 129       *
 130       * @return string The value associated with the cell
 131       */
 132      private function formatInlineStringCellValue(DOMElement $node): string
 133      {
 134          // inline strings are formatted this way (they can contain any number of <t> nodes):
 135          // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c>
 136          $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE);
 137  
 138          $cellValue = '';
 139          for ($i = 0; $i < $tNodes->count(); ++$i) {
 140              $nodeValue = $tNodes->item($i)->nodeValue;
 141              \assert(null !== $nodeValue);
 142              $cellValue .= $this->escaper->unescape($nodeValue);
 143          }
 144  
 145          return $cellValue;
 146      }
 147  
 148      /**
 149       * Returns the cell String value from shared-strings file using nodeValue index.
 150       *
 151       * @return string The value associated with the cell
 152       */
 153      private function formatSharedStringCellValue(string $nodeValue): string
 154      {
 155          // shared strings are formatted this way:
 156          // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
 157          $sharedStringIndex = (int) $nodeValue;
 158          $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
 159  
 160          return $this->escaper->unescape($escapedCellValue);
 161      }
 162  
 163      /**
 164       * Returns the cell String value, where string is stored in value node.
 165       *
 166       * @return string The value associated with the cell
 167       */
 168      private function formatStrCellValue(string $nodeValue): string
 169      {
 170          $escapedCellValue = trim($nodeValue);
 171  
 172          return $this->escaper->unescape($escapedCellValue);
 173      }
 174  
 175      /**
 176       * Returns the cell Numeric value from string of nodeValue.
 177       * The value can also represent a timestamp and a DateTime will be returned.
 178       *
 179       * @param int $cellStyleId 0 being the default style
 180       */
 181      private function formatNumericCellValue(int|float|string $nodeValue, int $cellStyleId): DateTimeImmutable|float|int|string
 182      {
 183          // Numeric values can represent numbers as well as timestamps.
 184          // We need to look at the style of the cell to determine whether it is one or the other.
 185          $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
 186  
 187          if ($shouldFormatAsDate) {
 188              $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
 189          } else {
 190              $nodeIntValue = (int) $nodeValue;
 191              $nodeFloatValue = (float) $nodeValue;
 192              $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
 193          }
 194  
 195          return $cellValue;
 196      }
 197  
 198      /**
 199       * Returns a cell's PHP Date value, associated to the given timestamp.
 200       * NOTE: The timestamp is a float representing the number of days since the base Excel date:
 201       *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
 202       * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
 203       *
 204       * @param int $cellStyleId 0 being the default style
 205       *
 206       * @throws InvalidValueException If the value is not a valid timestamp
 207       *
 208       * @see ECMA-376 Part 1 - §18.17.4
 209       */
 210      private function formatExcelTimestampValue(float $nodeValue, int $cellStyleId): string|DateTimeImmutable
 211      {
 212          if (!$this->isValidTimestampValue($nodeValue)) {
 213              throw new InvalidValueException((string) $nodeValue);
 214          }
 215  
 216          return $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
 217      }
 218  
 219      /**
 220       * Returns whether the given timestamp is supported by SpreadsheetML.
 221       *
 222       * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
 223       */
 224      private function isValidTimestampValue(float $timestampValue): bool
 225      {
 226          // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
 227          return
 228              $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884
 229              || !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884;
 230      }
 231  
 232      /**
 233       * Returns a cell's PHP DateTime value, associated to the given timestamp.
 234       * Only the time value matters. The date part is set to the base Excel date:
 235       * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
 236       *
 237       * @param int $cellStyleId 0 being the default style
 238       */
 239      private function formatExcelTimestampValueAsDateTimeValue(float $nodeValue, int $cellStyleId): string|DateTimeImmutable
 240      {
 241          $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
 242  
 243          $daysSinceBaseDate = (int) $nodeValue;
 244          $daysSign = '+';
 245          if ($daysSinceBaseDate < 0) {
 246              $daysSinceBaseDate = abs($daysSinceBaseDate);
 247              $daysSign = '-';
 248          }
 249          $timeRemainder = fmod($nodeValue, 1);
 250          $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
 251          $secondsSign = '+';
 252          if ($secondsRemainder < 0) {
 253              $secondsRemainder = abs($secondsRemainder);
 254              $secondsSign = '-';
 255          }
 256  
 257          $dateObj = DateTimeImmutable::createFromFormat('|Y-m-d', $baseDate);
 258          \assert(false !== $dateObj);
 259          $dateObj = $dateObj->modify($daysSign.$daysSinceBaseDate.'days');
 260          \assert(false !== $dateObj);
 261          $dateObj = $dateObj->modify($secondsSign.$secondsRemainder.'seconds');
 262          \assert(false !== $dateObj);
 263  
 264          if ($this->shouldFormatDates) {
 265              $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
 266              $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
 267              $cellValue = $dateObj->format($phpDateFormat);
 268          } else {
 269              $cellValue = $dateObj;
 270          }
 271  
 272          return $cellValue;
 273      }
 274  
 275      /**
 276       * Returns the cell Boolean value from a specific node's Value.
 277       *
 278       * @return bool The value associated with the cell
 279       */
 280      private function formatBooleanCellValue(string $nodeValue): bool
 281      {
 282          return (bool) $nodeValue;
 283      }
 284  
 285      /**
 286       * Returns a cell's PHP Date value, associated to the given stored nodeValue.
 287       *
 288       * @see ECMA-376 Part 1 - §18.17.4
 289       *
 290       * @param string $nodeValue ISO 8601 Date string
 291       *
 292       * @throws InvalidValueException If the value is not a valid date
 293       */
 294      private function formatDateCellValue(string $nodeValue): string|DateTimeImmutable
 295      {
 296          // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
 297          try {
 298              $cellValue = ($this->shouldFormatDates) ? $nodeValue : new DateTimeImmutable($nodeValue);
 299          } catch (Exception $exception) {
 300              throw new InvalidValueException($nodeValue, '', 0, $exception);
 301          }
 302  
 303          return $cellValue;
 304      }
 305  }