Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 39 and 401]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\XLSX\Helper;
   4  
   5  use Box\Spout\Reader\Exception\InvalidValueException;
   6  use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
   7  use Box\Spout\Reader\XLSX\Manager\StyleManager;
   8  
   9  /**
  10   * Class CellValueFormatter
  11   * This class provides helper functions to format cell values
  12   */
  13  class CellValueFormatter
  14  {
  15      /** Definition of all possible cell types */
  16      const CELL_TYPE_INLINE_STRING = 'inlineStr';
  17      const CELL_TYPE_STR = 'str';
  18      const CELL_TYPE_SHARED_STRING = 's';
  19      const CELL_TYPE_BOOLEAN = 'b';
  20      const CELL_TYPE_NUMERIC = 'n';
  21      const CELL_TYPE_DATE = 'd';
  22      const CELL_TYPE_ERROR = 'e';
  23  
  24      /** Definition of XML nodes names used to parse data */
  25      const XML_NODE_VALUE = 'v';
  26      const XML_NODE_INLINE_STRING_VALUE = 't';
  27  
  28      /** Definition of XML attributes used to parse data */
  29      const XML_ATTRIBUTE_TYPE = 't';
  30      const XML_ATTRIBUTE_STYLE_ID = 's';
  31  
  32      /** Constants used for date formatting */
  33      const NUM_SECONDS_IN_ONE_DAY = 86400;
  34  
  35      /** @var SharedStringsManager Manages shared strings */
  36      protected $sharedStringsManager;
  37  
  38      /** @var StyleManager Manages styles */
  39      protected $styleManager;
  40  
  41      /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
  42      protected $shouldFormatDates;
  43  
  44      /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
  45      protected $shouldUse1904Dates;
  46  
  47      /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */
  48      protected $escaper;
  49  
  50      /**
  51       * @param SharedStringsManager $sharedStringsManager Manages shared strings
  52       * @param StyleManager $styleManager Manages styles
  53       * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
  54       * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900
  55       * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
  56       */
  57      public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper)
  58      {
  59          $this->sharedStringsManager = $sharedStringsManager;
  60          $this->styleManager = $styleManager;
  61          $this->shouldFormatDates = $shouldFormatDates;
  62          $this->shouldUse1904Dates = $shouldUse1904Dates;
  63          $this->escaper = $escaper;
  64      }
  65  
  66      /**
  67       * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
  68       *
  69       * @param \DOMNode $node
  70       * @throws InvalidValueException If the value is not valid
  71       * @return string|int|float|bool|\DateTime The value associated with the cell
  72       */
  73      public function extractAndFormatNodeValue($node)
  74      {
  75          // Default cell type is "n"
  76          $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
  77          $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
  78          $vNodeValue = $this->getVNodeValue($node);
  79  
  80          if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
  81              return $vNodeValue;
  82          }
  83  
  84          switch ($cellType) {
  85              case self::CELL_TYPE_INLINE_STRING:
  86                  return $this->formatInlineStringCellValue($node);
  87              case self::CELL_TYPE_SHARED_STRING:
  88                  return $this->formatSharedStringCellValue($vNodeValue);
  89              case self::CELL_TYPE_STR:
  90                  return $this->formatStrCellValue($vNodeValue);
  91              case self::CELL_TYPE_BOOLEAN:
  92                  return $this->formatBooleanCellValue($vNodeValue);
  93              case self::CELL_TYPE_NUMERIC:
  94                  return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
  95              case self::CELL_TYPE_DATE:
  96                  return $this->formatDateCellValue($vNodeValue);
  97              default:
  98                  throw new InvalidValueException($vNodeValue);
  99          }
 100      }
 101  
 102      /**
 103       * Returns the cell's string value from a node's nested value node
 104       *
 105       * @param \DOMNode $node
 106       * @return string The value associated with the cell
 107       */
 108      protected function getVNodeValue($node)
 109      {
 110          // for cell types having a "v" tag containing the value.
 111          // if not, the returned value should be empty string.
 112          $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
 113  
 114          return ($vNode !== null) ? $vNode->nodeValue : '';
 115      }
 116  
 117      /**
 118       * Returns the cell String value where string is inline.
 119       *
 120       * @param \DOMNode $node
 121       * @return string The value associated with the cell
 122       */
 123      protected function formatInlineStringCellValue($node)
 124      {
 125          // inline strings are formatted this way (they can contain any number of <t> nodes):
 126          // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c>
 127          $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE);
 128  
 129          $cellValue = '';
 130          for ($i = 0; $i < $tNodes->count(); $i++) {
 131              $tNode = $tNodes->item($i);
 132              $cellValue .= $this->escaper->unescape($tNode->nodeValue);
 133          }
 134  
 135          return $cellValue;
 136      }
 137  
 138      /**
 139       * Returns the cell String value from shared-strings file using nodeValue index.
 140       *
 141       * @param string $nodeValue
 142       * @return string The value associated with the cell
 143       */
 144      protected function formatSharedStringCellValue($nodeValue)
 145      {
 146          // shared strings are formatted this way:
 147          // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
 148          $sharedStringIndex = (int) $nodeValue;
 149          $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
 150          $cellValue = $this->escaper->unescape($escapedCellValue);
 151  
 152          return $cellValue;
 153      }
 154  
 155      /**
 156       * Returns the cell String value, where string is stored in value node.
 157       *
 158       * @param string $nodeValue
 159       * @return string The value associated with the cell
 160       */
 161      protected function formatStrCellValue($nodeValue)
 162      {
 163          $escapedCellValue = \trim($nodeValue);
 164          $cellValue = $this->escaper->unescape($escapedCellValue);
 165  
 166          return $cellValue;
 167      }
 168  
 169      /**
 170       * Returns the cell Numeric value from string of nodeValue.
 171       * The value can also represent a timestamp and a DateTime will be returned.
 172       *
 173       * @param string $nodeValue
 174       * @param int $cellStyleId 0 being the default style
 175       * @return int|float|\DateTime The value associated with the cell
 176       */
 177      protected function formatNumericCellValue($nodeValue, $cellStyleId)
 178      {
 179          // Numeric values can represent numbers as well as timestamps.
 180          // We need to look at the style of the cell to determine whether it is one or the other.
 181          $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
 182  
 183          if ($shouldFormatAsDate) {
 184              $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
 185          } else {
 186              $nodeIntValue = (int) $nodeValue;
 187              $nodeFloatValue = (float) $nodeValue;
 188              $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
 189          }
 190  
 191          return $cellValue;
 192      }
 193  
 194      /**
 195       * Returns a cell's PHP Date value, associated to the given timestamp.
 196       * NOTE: The timestamp is a float representing the number of days since the base Excel date:
 197       *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
 198       * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
 199       *
 200       * @see ECMA-376 Part 1 - §18.17.4
 201       *
 202       * @param float $nodeValue
 203       * @param int $cellStyleId 0 being the default style
 204       * @throws InvalidValueException If the value is not a valid timestamp
 205       * @return \DateTime The value associated with the cell
 206       */
 207      protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
 208      {
 209          if ($this->isValidTimestampValue($nodeValue)) {
 210              $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
 211          } else {
 212              throw new InvalidValueException($nodeValue);
 213          }
 214  
 215          return $cellValue;
 216      }
 217  
 218      /**
 219       * Returns whether the given timestamp is supported by SpreadsheetML
 220       * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
 221       *
 222       * @param float $timestampValue
 223       * @return bool
 224       */
 225      protected function isValidTimestampValue($timestampValue)
 226      {
 227          // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
 228          return (
 229              $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 ||
 230              !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884
 231          );
 232      }
 233  
 234      /**
 235       * Returns a cell's PHP DateTime value, associated to the given timestamp.
 236       * Only the time value matters. The date part is set to the base Excel date:
 237       * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
 238       *
 239       * @param float $nodeValue
 240       * @param int $cellStyleId 0 being the default style
 241       * @return \DateTime|string The value associated with the cell
 242       */
 243      protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId)
 244      {
 245          $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
 246  
 247          $daysSinceBaseDate = (int) $nodeValue;
 248          $timeRemainder = \fmod($nodeValue, 1);
 249          $secondsRemainder = \round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
 250  
 251          $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate);
 252          $dateObj->modify('+' . $daysSinceBaseDate . 'days');
 253          $dateObj->modify('+' . $secondsRemainder . 'seconds');
 254  
 255          if ($this->shouldFormatDates) {
 256              $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
 257              $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
 258              $cellValue = $dateObj->format($phpDateFormat);
 259          } else {
 260              $cellValue = $dateObj;
 261          }
 262  
 263          return $cellValue;
 264      }
 265  
 266      /**
 267       * Returns the cell Boolean value from a specific node's Value.
 268       *
 269       * @param string $nodeValue
 270       * @return bool The value associated with the cell
 271       */
 272      protected function formatBooleanCellValue($nodeValue)
 273      {
 274          return (bool) $nodeValue;
 275      }
 276  
 277      /**
 278       * Returns a cell's PHP Date value, associated to the given stored nodeValue.
 279       * @see ECMA-376 Part 1 - §18.17.4
 280       *
 281       * @param string $nodeValue ISO 8601 Date string
 282       * @throws InvalidValueException If the value is not a valid date
 283       * @return \DateTime|string The value associated with the cell
 284       */
 285      protected function formatDateCellValue($nodeValue)
 286      {
 287          // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
 288          try {
 289              $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
 290          } catch (\Exception $e) {
 291              throw new InvalidValueException($nodeValue);
 292          }
 293  
 294          return $cellValue;
 295      }
 296  }