Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 3.9.x will end* 10 May 2021 (12 months).
  • Bug fixes for security issues in 3.9.x will end* 8 May 2023 (36 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\XLSX\Helper;
   4  
   5  use Box\Spout\Reader\Exception\InvalidValueException;
   6  use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
   7  use Box\Spout\Reader\XLSX\Manager\StyleManager;
   8  
   9  /**
  10   * Class CellValueFormatter
  11   * This class provides helper functions to format cell values
  12   */
  13  class CellValueFormatter
  14  {
  15      /** Definition of all possible cell types */
  16      const CELL_TYPE_INLINE_STRING = 'inlineStr';
  17      const CELL_TYPE_STR = 'str';
  18      const CELL_TYPE_SHARED_STRING = 's';
  19      const CELL_TYPE_BOOLEAN = 'b';
  20      const CELL_TYPE_NUMERIC = 'n';
  21      const CELL_TYPE_DATE = 'd';
  22      const CELL_TYPE_ERROR = 'e';
  23  
  24      /** Definition of XML nodes names used to parse data */
  25      const XML_NODE_VALUE = 'v';
  26      const XML_NODE_INLINE_STRING_VALUE = 't';
  27  
  28      /** Definition of XML attributes used to parse data */
  29      const XML_ATTRIBUTE_TYPE = 't';
  30      const XML_ATTRIBUTE_STYLE_ID = 's';
  31  
  32      /** Constants used for date formatting */
  33      const NUM_SECONDS_IN_ONE_DAY = 86400;
  34      const NUM_SECONDS_IN_ONE_HOUR = 3600;
  35      const NUM_SECONDS_IN_ONE_MINUTE = 60;
  36  
  37      /**
  38       * February 29th, 1900 is NOT a leap year but Excel thinks it is...
  39       * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
  40       */
  41      const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
  42  
  43      /** @var SharedStringsManager Manages shared strings */
  44      protected $sharedStringsManager;
  45  
  46      /** @var StyleManager Manages styles */
  47      protected $styleManager;
  48  
  49      /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
  50      protected $shouldFormatDates;
  51  
  52      /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
  53      protected $shouldUse1904Dates;
  54  
  55      /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */
  56      protected $escaper;
  57  
  58      /**
  59       * @param SharedStringsManager $sharedStringsManager Manages shared strings
  60       * @param StyleManager $styleManager Manages styles
  61       * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
  62       * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900
  63       * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
  64       */
  65      public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper)
  66      {
  67          $this->sharedStringsManager = $sharedStringsManager;
  68          $this->styleManager = $styleManager;
  69          $this->shouldFormatDates = $shouldFormatDates;
  70          $this->shouldUse1904Dates = $shouldUse1904Dates;
  71          $this->escaper = $escaper;
  72      }
  73  
  74      /**
  75       * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
  76       *
  77       * @param \DOMNode $node
  78       * @throws InvalidValueException If the value is not valid
  79       * @return string|int|float|bool|\DateTime The value associated with the cell
  80       */
  81      public function extractAndFormatNodeValue($node)
  82      {
  83          // Default cell type is "n"
  84          $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
  85          $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
  86          $vNodeValue = $this->getVNodeValue($node);
  87  
  88          if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
  89              return $vNodeValue;
  90          }
  91  
  92          switch ($cellType) {
  93              case self::CELL_TYPE_INLINE_STRING:
  94                  return $this->formatInlineStringCellValue($node);
  95              case self::CELL_TYPE_SHARED_STRING:
  96                  return $this->formatSharedStringCellValue($vNodeValue);
  97              case self::CELL_TYPE_STR:
  98                  return $this->formatStrCellValue($vNodeValue);
  99              case self::CELL_TYPE_BOOLEAN:
 100                  return $this->formatBooleanCellValue($vNodeValue);
 101              case self::CELL_TYPE_NUMERIC:
 102                  return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
 103              case self::CELL_TYPE_DATE:
 104                  return $this->formatDateCellValue($vNodeValue);
 105              default:
 106                  throw new InvalidValueException($vNodeValue);
 107          }
 108      }
 109  
 110      /**
 111       * Returns the cell's string value from a node's nested value node
 112       *
 113       * @param \DOMNode $node
 114       * @return string The value associated with the cell
 115       */
 116      protected function getVNodeValue($node)
 117      {
 118          // for cell types having a "v" tag containing the value.
 119          // if not, the returned value should be empty string.
 120          $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
 121  
 122          return ($vNode !== null) ? $vNode->nodeValue : '';
 123      }
 124  
 125      /**
 126       * Returns the cell String value where string is inline.
 127       *
 128       * @param \DOMNode $node
 129       * @return string The value associated with the cell
 130       */
 131      protected function formatInlineStringCellValue($node)
 132      {
 133          // inline strings are formatted this way:
 134          // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
 135          $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
 136          $cellValue = $this->escaper->unescape($tNode->nodeValue);
 137  
 138          return $cellValue;
 139      }
 140  
 141      /**
 142       * Returns the cell String value from shared-strings file using nodeValue index.
 143       *
 144       * @param string $nodeValue
 145       * @return string The value associated with the cell
 146       */
 147      protected function formatSharedStringCellValue($nodeValue)
 148      {
 149          // shared strings are formatted this way:
 150          // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
 151          $sharedStringIndex = (int) $nodeValue;
 152          $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
 153          $cellValue = $this->escaper->unescape($escapedCellValue);
 154  
 155          return $cellValue;
 156      }
 157  
 158      /**
 159       * Returns the cell String value, where string is stored in value node.
 160       *
 161       * @param string $nodeValue
 162       * @return string The value associated with the cell
 163       */
 164      protected function formatStrCellValue($nodeValue)
 165      {
 166          $escapedCellValue = trim($nodeValue);
 167          $cellValue = $this->escaper->unescape($escapedCellValue);
 168  
 169          return $cellValue;
 170      }
 171  
 172      /**
 173       * Returns the cell Numeric value from string of nodeValue.
 174       * The value can also represent a timestamp and a DateTime will be returned.
 175       *
 176       * @param string $nodeValue
 177       * @param int $cellStyleId 0 being the default style
 178       * @return int|float|\DateTime The value associated with the cell
 179       */
 180      protected function formatNumericCellValue($nodeValue, $cellStyleId)
 181      {
 182          // Numeric values can represent numbers as well as timestamps.
 183          // We need to look at the style of the cell to determine whether it is one or the other.
 184          $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
 185  
 186          if ($shouldFormatAsDate) {
 187              $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
 188          } else {
 189              $nodeIntValue = (int) $nodeValue;
 190              $nodeFloatValue = (float) $nodeValue;
 191              $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
 192          }
 193  
 194          return $cellValue;
 195      }
 196  
 197      /**
 198       * Returns a cell's PHP Date value, associated to the given timestamp.
 199       * NOTE: The timestamp is a float representing the number of days since the base Excel date:
 200       *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
 201       * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
 202       *
 203       * @see ECMA-376 Part 1 - §18.17.4
 204       *
 205       * @param float $nodeValue
 206       * @param int $cellStyleId 0 being the default style
 207       * @throws InvalidValueException If the value is not a valid timestamp
 208       * @return \DateTime The value associated with the cell
 209       */
 210      protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
 211      {
 212          if ($this->isValidTimestampValue($nodeValue)) {
 213              $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
 214          } else {
 215              throw new InvalidValueException($nodeValue);
 216          }
 217  
 218          return $cellValue;
 219      }
 220  
 221      /**
 222       * Returns whether the given timestamp is supported by SpreadsheetML
 223       * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
 224       *
 225       * @param float $timestampValue
 226       * @return bool
 227       */
 228      protected function isValidTimestampValue($timestampValue)
 229      {
 230          // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
 231          return (
 232              $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 ||
 233              !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884
 234          );
 235      }
 236  
 237      /**
 238       * Returns a cell's PHP DateTime value, associated to the given timestamp.
 239       * Only the time value matters. The date part is set to the base Excel date:
 240       * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
 241       *
 242       * @param float $nodeValue
 243       * @param int $cellStyleId 0 being the default style
 244       * @return \DateTime|string The value associated with the cell
 245       */
 246      protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId)
 247      {
 248          $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
 249  
 250          $daysSinceBaseDate = (int) $nodeValue;
 251          $timeRemainder = fmod($nodeValue, 1);
 252          $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
 253  
 254          $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate);
 255          $dateObj->modify('+' . $daysSinceBaseDate . 'days');
 256          $dateObj->modify('+' . $secondsRemainder . 'seconds');
 257  
 258          if ($this->shouldFormatDates) {
 259              $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
 260              $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
 261              $cellValue = $dateObj->format($phpDateFormat);
 262          } else {
 263              $cellValue = $dateObj;
 264          }
 265  
 266          return $cellValue;
 267      }
 268  
 269      /**
 270       * Returns the cell Boolean value from a specific node's Value.
 271       *
 272       * @param string $nodeValue
 273       * @return bool The value associated with the cell
 274       */
 275      protected function formatBooleanCellValue($nodeValue)
 276      {
 277          return (bool) $nodeValue;
 278      }
 279  
 280      /**
 281       * Returns a cell's PHP Date value, associated to the given stored nodeValue.
 282       * @see ECMA-376 Part 1 - §18.17.4
 283       *
 284       * @param string $nodeValue ISO 8601 Date string
 285       * @throws InvalidValueException If the value is not a valid date
 286       * @return \DateTime|string The value associated with the cell
 287       */
 288      protected function formatDateCellValue($nodeValue)
 289      {
 290          // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
 291          try {
 292              $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
 293          } catch (\Exception $e) {
 294              throw new InvalidValueException($nodeValue);
 295          }
 296  
 297          return $cellValue;
 298      }
 299  }