Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 3.9.x will end* 10 May 2021 (12 months).
  • Bug fixes for security issues in 3.9.x will end* 8 May 2023 (36 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401]

   1  <?php
   2  
   3  namespace Box\Spout\Common\Helper\Escaper;
   4  
   5  /**
   6   * Class XLSX
   7   * Provides functions to escape and unescape data for XLSX files
   8   */
   9  class XLSX implements EscaperInterface
  10  {
  11      /** @var bool Whether the escaper has already been initialized */
  12      private $isAlreadyInitialized = false;
  13  
  14      /** @var string Regex pattern to detect control characters that need to be escaped */
  15      private $escapableControlCharactersPattern;
  16  
  17      /** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
  18      private $controlCharactersEscapingMap;
  19  
  20      /** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
  21      private $controlCharactersEscapingReverseMap;
  22  
  23      /**
  24       * Initializes the control characters if not already done
  25       */
  26      protected function initIfNeeded()
  27      {
  28          if (!$this->isAlreadyInitialized) {
  29              $this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
  30              $this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
  31              $this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
  32  
  33              $this->isAlreadyInitialized = true;
  34          }
  35      }
  36  
  37      /**
  38       * Escapes the given string to make it compatible with XLSX
  39       *
  40       * @param string $string The string to escape
  41       * @return string The escaped string
  42       */
  43      public function escape($string)
  44      {
  45          $this->initIfNeeded();
  46  
  47          $escapedString = $this->escapeControlCharacters($string);
  48          // @NOTE: Using ENT_QUOTES as XML entities ('<', '>', '&') as well as
  49          //        single/double quotes (for XML attributes) need to be encoded.
  50          $escapedString = htmlspecialchars($escapedString, ENT_QUOTES, 'UTF-8');
  51  
  52          return $escapedString;
  53      }
  54  
  55      /**
  56       * Unescapes the given string to make it compatible with XLSX
  57       *
  58       * @param string $string The string to unescape
  59       * @return string The unescaped string
  60       */
  61      public function unescape($string)
  62      {
  63          $this->initIfNeeded();
  64  
  65          // ==============
  66          // =   WARNING  =
  67          // ==============
  68          // It is assumed that the given string has already had its XML entities decoded.
  69          // This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation).
  70          // Therefore there is no need to call "htmlspecialchars_decode()".
  71          $unescapedString = $this->unescapeControlCharacters($string);
  72  
  73          return $unescapedString;
  74      }
  75  
  76      /**
  77       * @return string Regex pattern containing all escapable control characters
  78       */
  79      protected function getEscapableControlCharactersPattern()
  80      {
  81          // control characters values are from 0 to 1F (hex values) in the ASCII table
  82          // some characters should not be escaped though: "\t", "\r" and "\n".
  83          return '[\x00-\x08' .
  84                  // skipping "\t" (0x9) and "\n" (0xA)
  85                  '\x0B-\x0C' .
  86                  // skipping "\r" (0xD)
  87                  '\x0E-\x1F]';
  88      }
  89  
  90      /**
  91       * Builds the map containing control characters to be escaped
  92       * mapped to their escaped values.
  93       * "\t", "\r" and "\n" don't need to be escaped.
  94       *
  95       * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
  96       * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
  97       *
  98       * @return string[]
  99       */
 100      protected function getControlCharactersEscapingMap()
 101      {
 102          $controlCharactersEscapingMap = [];
 103  
 104          // control characters values are from 0 to 1F (hex values) in the ASCII table
 105          for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) {
 106              $character = chr($charValue);
 107              if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
 108                  $charHexValue = dechex($charValue);
 109                  $escapedChar = '_x' . sprintf('%04s', strtoupper($charHexValue)) . '_';
 110                  $controlCharactersEscapingMap[$escapedChar] = $character;
 111              }
 112          }
 113  
 114          return $controlCharactersEscapingMap;
 115      }
 116  
 117      /**
 118       * Converts PHP control characters from the given string to OpenXML escaped control characters
 119       *
 120       * Excel escapes control characters with _xHHHH_ and also escapes any
 121       * literal strings of that type by encoding the leading underscore.
 122       * So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.
 123       *
 124       * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
 125       * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
 126       *
 127       * @param string $string String to escape
 128       * @return string
 129       */
 130      protected function escapeControlCharacters($string)
 131      {
 132          $escapedString = $this->escapeEscapeCharacter($string);
 133  
 134          // if no control characters
 135          if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
 136              return $escapedString;
 137          }
 138  
 139          return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function ($matches) {
 140              return $this->controlCharactersEscapingReverseMap[$matches[0]];
 141          }, $escapedString);
 142      }
 143  
 144      /**
 145       * Escapes the escape character: "_x0000_" -> "_x005F_x0000_"
 146       *
 147       * @param string $string String to escape
 148       * @return string The escaped string
 149       */
 150      protected function escapeEscapeCharacter($string)
 151      {
 152          return preg_replace('/_(x[\dA-F]{4})_/', '_x005F_$1_', $string);
 153      }
 154  
 155      /**
 156       * Converts OpenXML escaped control characters from the given string to PHP control characters
 157       *
 158       * Excel escapes control characters with _xHHHH_ and also escapes any
 159       * literal strings of that type by encoding the leading underscore.
 160       * So "_x0000_" -> "\0" and "_x005F_x0000_" -> "_x0000_"
 161       *
 162       * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
 163       * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
 164       *
 165       * @param string $string String to unescape
 166       * @return string
 167       */
 168      protected function unescapeControlCharacters($string)
 169      {
 170          $unescapedString = $string;
 171  
 172          foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
 173              // only unescape characters that don't contain the escaped escape character for now
 174              $unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
 175          }
 176  
 177          return $this->unescapeEscapeCharacter($unescapedString);
 178      }
 179  
 180      /**
 181       * Unecapes the escape character: "_x005F_x0000_" => "_x0000_"
 182       *
 183       * @param string $string String to unescape
 184       * @return string The unescaped string
 185       */
 186      protected function unescapeEscapeCharacter($string)
 187      {
 188          return preg_replace('/_x005F(_x[\dA-F]{4}_)/', '$1', $string);
 189      }
 190  }