Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.

Differences Between: [Versions 310 and 400] [Versions 39 and 400]

   1  <?php
   2  
   3  namespace Box\Spout\Reader\XLSX\Manager;
   4  
   5  use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory;
   6  
   7  /**
   8   * Class StyleManager
   9   * This class manages XLSX styles
  10   */
  11  class StyleManager
  12  {
  13      /** Nodes used to find relevant information in the styles XML file */
  14      const XML_NODE_NUM_FMTS = 'numFmts';
  15      const XML_NODE_NUM_FMT = 'numFmt';
  16      const XML_NODE_CELL_XFS = 'cellXfs';
  17      const XML_NODE_XF = 'xf';
  18  
  19      /** Attributes used to find relevant information in the styles XML file */
  20      const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId';
  21      const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode';
  22      const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat';
  23  
  24      /** By convention, default style ID is 0 */
  25      const DEFAULT_STYLE_ID = 0;
  26  
  27      const NUMBER_FORMAT_GENERAL = 'General';
  28  
  29      /**
  30       * @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
  31       * @var array Mapping between built-in numFmtId and the associated format - for dates only
  32       */
  33      protected static $builtinNumFmtIdToNumFormatMapping = [
  34          14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy'
  35          15 => 'd-mmm-yy',
  36          16 => 'd-mmm',
  37          17 => 'mmm-yy',
  38          18 => 'h:mm AM/PM',
  39          19 => 'h:mm:ss AM/PM',
  40          20 => 'h:mm',
  41          21 => 'h:mm:ss',
  42          22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm',
  43          45 => 'mm:ss',
  44          46 => '[h]:mm:ss',
  45          47 => 'mm:ss.0',  // @NOTE: ECMA spec is 'mmss.0',
  46      ];
  47  
  48      /** @var string Path of the XLSX file being read */
  49      protected $filePath;
  50  
  51      /** @var bool Whether the XLSX file contains a styles XML file */
  52      protected $hasStylesXMLFile;
  53  
  54      /** @var string|null Path of the styles XML file */
  55      protected $stylesXMLFilePath;
  56  
  57      /** @var InternalEntityFactory Factory to create entities */
  58      protected $entityFactory;
  59  
  60      /** @var array Array containing the IDs of built-in number formats indicating a date */
  61      protected $builtinNumFmtIdIndicatingDates;
  62  
  63      /** @var array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
  64      protected $customNumberFormats;
  65  
  66      /** @var array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
  67      protected $stylesAttributes;
  68  
  69      /** @var array Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */
  70      protected $numFmtIdToIsDateFormatCache = [];
  71  
  72      /**
  73       * @param string $filePath Path of the XLSX file being read
  74       * @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships
  75       * @param InternalEntityFactory $entityFactory Factory to create entities
  76       */
  77      public function __construct($filePath, $workbookRelationshipsManager, $entityFactory)
  78      {
  79          $this->filePath = $filePath;
  80          $this->entityFactory = $entityFactory;
  81          $this->builtinNumFmtIdIndicatingDates = \array_keys(self::$builtinNumFmtIdToNumFormatMapping);
  82          $this->hasStylesXMLFile = $workbookRelationshipsManager->hasStylesXMLFile();
  83          if ($this->hasStylesXMLFile) {
  84              $this->stylesXMLFilePath = $workbookRelationshipsManager->getStylesXMLFilePath();
  85          }
  86      }
  87  
  88      /**
  89       * Returns whether the style with the given ID should consider
  90       * numeric values as timestamps and format the cell as a date.
  91       *
  92       * @param int $styleId Zero-based style ID
  93       * @return bool Whether the cell with the given cell should display a date instead of a numeric value
  94       */
  95      public function shouldFormatNumericValueAsDate($styleId)
  96      {
  97          if (!$this->hasStylesXMLFile) {
  98              return false;
  99          }
 100  
 101          $stylesAttributes = $this->getStylesAttributes();
 102  
 103          // Default style (0) does not format numeric values as timestamps. Only custom styles do.
 104          // Also if the style ID does not exist in the styles.xml file, format as numeric value.
 105          // Using isset here because it is way faster than array_key_exists...
 106          if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
 107              return false;
 108          }
 109  
 110          $styleAttributes = $stylesAttributes[$styleId];
 111  
 112          return $this->doesStyleIndicateDate($styleAttributes);
 113      }
 114  
 115      /**
 116       * Reads the styles.xml file and extract the relevant information from the file.
 117       *
 118       * @return void
 119       */
 120      protected function extractRelevantInfo()
 121      {
 122          $this->customNumberFormats = [];
 123          $this->stylesAttributes = [];
 124  
 125          $xmlReader = $this->entityFactory->createXMLReader();
 126  
 127          if ($xmlReader->openFileInZip($this->filePath, $this->stylesXMLFilePath)) {
 128              while ($xmlReader->read()) {
 129                  if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
 130                      $this->extractNumberFormats($xmlReader);
 131                  } elseif ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) {
 132                      $this->extractStyleAttributes($xmlReader);
 133                  }
 134              }
 135  
 136              $xmlReader->close();
 137          }
 138      }
 139  
 140      /**
 141       * Extracts number formats from the "numFmt" nodes.
 142       * For simplicity, the styles attributes are kept in memory. This is possible thanks
 143       * to the reuse of formats. So 1 million cells should not use 1 million formats.
 144       *
 145       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "numFmts" node
 146       * @return void
 147       */
 148      protected function extractNumberFormats($xmlReader)
 149      {
 150          while ($xmlReader->read()) {
 151              if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) {
 152                  $numFmtId = (int) ($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID));
 153                  $formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE);
 154                  $this->customNumberFormats[$numFmtId] = $formatCode;
 155              } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) {
 156                  // Once done reading "numFmts" node's children
 157                  break;
 158              }
 159          }
 160      }
 161  
 162      /**
 163       * Extracts style attributes from the "xf" nodes, inside the "cellXfs" section.
 164       * For simplicity, the styles attributes are kept in memory. This is possible thanks
 165       * to the reuse of styles. So 1 million cells should not use 1 million styles.
 166       *
 167       * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node
 168       * @return void
 169       */
 170      protected function extractStyleAttributes($xmlReader)
 171      {
 172          while ($xmlReader->read()) {
 173              if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
 174                  $numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID);
 175                  $normalizedNumFmtId = ($numFmtId !== null) ? (int) $numFmtId : null;
 176  
 177                  $applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT);
 178                  $normalizedApplyNumberFormat = ($applyNumberFormat !== null) ? (bool) $applyNumberFormat : null;
 179  
 180                  $this->stylesAttributes[] = [
 181                      self::XML_ATTRIBUTE_NUM_FMT_ID          => $normalizedNumFmtId,
 182                      self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat,
 183                  ];
 184              } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) {
 185                  // Once done reading "cellXfs" node's children
 186                  break;
 187              }
 188          }
 189      }
 190  
 191      /**
 192       * @return array The custom number formats
 193       */
 194      protected function getCustomNumberFormats()
 195      {
 196          if (!isset($this->customNumberFormats)) {
 197              $this->extractRelevantInfo();
 198          }
 199  
 200          return $this->customNumberFormats;
 201      }
 202  
 203      /**
 204       * @return array The styles attributes
 205       */
 206      protected function getStylesAttributes()
 207      {
 208          if (!isset($this->stylesAttributes)) {
 209              $this->extractRelevantInfo();
 210          }
 211  
 212          return $this->stylesAttributes;
 213      }
 214  
 215      /**
 216       * @param array $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId")
 217       * @return bool Whether the style with the given attributes indicates that the number is a date
 218       */
 219      protected function doesStyleIndicateDate($styleAttributes)
 220      {
 221          $applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT];
 222          $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
 223  
 224          // A style may apply a date format if it has:
 225          //  - "applyNumberFormat" attribute not set to "false"
 226          //  - "numFmtId" attribute set
 227          // This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format,
 228          // but this is not necessarily a date.
 229          if ($applyNumberFormat === false || $numFmtId === null) {
 230              return false;
 231          }
 232  
 233          return $this->doesNumFmtIdIndicateDate($numFmtId);
 234      }
 235  
 236      /**
 237       * Returns whether the number format ID indicates that the number is a date.
 238       * The result is cached to avoid recomputing the same thing over and over, as
 239       * "numFmtId" attributes can be shared between multiple styles.
 240       *
 241       * @param int $numFmtId
 242       * @return bool Whether the number format ID indicates that the number is a date
 243       */
 244      protected function doesNumFmtIdIndicateDate($numFmtId)
 245      {
 246          if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) {
 247              $formatCode = $this->getFormatCodeForNumFmtId($numFmtId);
 248  
 249              $this->numFmtIdToIsDateFormatCache[$numFmtId] = (
 250                  $this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
 251                  $this->isFormatCodeCustomDateFormat($formatCode)
 252              );
 253          }
 254  
 255          return $this->numFmtIdToIsDateFormatCache[$numFmtId];
 256      }
 257  
 258      /**
 259       * @param int $numFmtId
 260       * @return string|null The custom number format or NULL if none defined for the given numFmtId
 261       */
 262      protected function getFormatCodeForNumFmtId($numFmtId)
 263      {
 264          $customNumberFormats = $this->getCustomNumberFormats();
 265  
 266          // Using isset here because it is way faster than array_key_exists...
 267          return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null;
 268      }
 269  
 270      /**
 271       * @param int $numFmtId
 272       * @return bool Whether the number format ID indicates that the number is a date
 273       */
 274      protected function isNumFmtIdBuiltInDateFormat($numFmtId)
 275      {
 276          return \in_array($numFmtId, $this->builtinNumFmtIdIndicatingDates);
 277      }
 278  
 279      /**
 280       * @param string|null $formatCode
 281       * @return bool Whether the given format code indicates that the number is a date
 282       */
 283      protected function isFormatCodeCustomDateFormat($formatCode)
 284      {
 285          // if no associated format code or if using the default "General" format
 286          if ($formatCode === null || \strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL) === 0) {
 287              return false;
 288          }
 289  
 290          return $this->isFormatCodeMatchingDateFormatPattern($formatCode);
 291      }
 292  
 293      /**
 294       * @param string $formatCode
 295       * @return bool Whether the given format code matches a date format pattern
 296       */
 297      protected function isFormatCodeMatchingDateFormatPattern($formatCode)
 298      {
 299          // Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\")
 300          $pattern = '((?<!\\\)\[.+?(?<!\\\)\])';
 301          $formatCode = \preg_replace($pattern, '', $formatCode);
 302  
 303          // custom date formats contain specific characters to represent the date:
 304          // e - yy - m - d - h - s
 305          // and all of their variants (yyyy - mm - dd...)
 306          $dateFormatCharacters = ['e', 'yy', 'm', 'd', 'h', 's'];
 307  
 308          $hasFoundDateFormatCharacter = false;
 309          foreach ($dateFormatCharacters as $dateFormatCharacter) {
 310              // character not preceded by "\" (case insensitive)
 311              $pattern = '/(?<!\\\)' . $dateFormatCharacter . '/i';
 312  
 313              if (\preg_match($pattern, $formatCode)) {
 314                  $hasFoundDateFormatCharacter = true;
 315                  break;
 316              }
 317          }
 318  
 319          return $hasFoundDateFormatCharacter;
 320      }
 321  
 322      /**
 323       * Returns the format as defined in "styles.xml" of the given style.
 324       * NOTE: It is assumed that the style DOES have a number format associated to it.
 325       *
 326       * @param int $styleId Zero-based style ID
 327       * @return string The number format code associated with the given style
 328       */
 329      public function getNumberFormatCode($styleId)
 330      {
 331          $stylesAttributes = $this->getStylesAttributes();
 332          $styleAttributes = $stylesAttributes[$styleId];
 333          $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
 334  
 335          if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
 336              $numberFormatCode = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
 337          } else {
 338              $customNumberFormats = $this->getCustomNumberFormats();
 339              $numberFormatCode = $customNumberFormats[$numFmtId];
 340          }
 341  
 342          return $numberFormatCode;
 343      }
 344  }