Differences Between: [Versions 310 and 311] [Versions 39 and 311]
1 <?php 2 3 namespace Box\Spout\Common\Helper\Escaper; 4 5 /** 6 * Class XLSX 7 * Provides functions to escape and unescape data for XLSX files 8 */ 9 class XLSX implements EscaperInterface 10 { 11 /** @var bool Whether the escaper has already been initialized */ 12 private $isAlreadyInitialized = false; 13 14 /** @var string Regex pattern to detect control characters that need to be escaped */ 15 private $escapableControlCharactersPattern; 16 17 /** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */ 18 private $controlCharactersEscapingMap; 19 20 /** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */ 21 private $controlCharactersEscapingReverseMap; 22 23 /** 24 * Initializes the control characters if not already done 25 */ 26 protected function initIfNeeded() 27 { 28 if (!$this->isAlreadyInitialized) { 29 $this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern(); 30 $this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap(); 31 $this->controlCharactersEscapingReverseMap = \array_flip($this->controlCharactersEscapingMap); 32 33 $this->isAlreadyInitialized = true; 34 } 35 } 36 37 /** 38 * Escapes the given string to make it compatible with XLSX 39 * 40 * @param string $string The string to escape 41 * @return string The escaped string 42 */ 43 public function escape($string) 44 { 45 $this->initIfNeeded(); 46 47 $escapedString = $this->escapeControlCharacters($string); 48 // @NOTE: Using ENT_QUOTES as XML entities ('<', '>', '&') as well as 49 // single/double quotes (for XML attributes) need to be encoded. 50 $escapedString = \htmlspecialchars($escapedString, ENT_QUOTES, 'UTF-8'); 51 52 return $escapedString; 53 } 54 55 /** 56 * Unescapes the given string to make it compatible with XLSX 57 * 58 * @param string $string The string to unescape 59 * @return string The unescaped string 60 */ 61 public function unescape($string) 62 { 63 $this->initIfNeeded(); 64 65 // ============== 66 // = WARNING = 67 // ============== 68 // It is assumed that the given string has already had its XML entities decoded. 69 // This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation). 70 // Therefore there is no need to call "htmlspecialchars_decode()". 71 $unescapedString = $this->unescapeControlCharacters($string); 72 73 return $unescapedString; 74 } 75 76 /** 77 * @return string Regex pattern containing all escapable control characters 78 */ 79 protected function getEscapableControlCharactersPattern() 80 { 81 // control characters values are from 0 to 1F (hex values) in the ASCII table 82 // some characters should not be escaped though: "\t", "\r" and "\n". 83 return '[\x00-\x08' . 84 // skipping "\t" (0x9) and "\n" (0xA) 85 '\x0B-\x0C' . 86 // skipping "\r" (0xD) 87 '\x0E-\x1F]'; 88 } 89 90 /** 91 * Builds the map containing control characters to be escaped 92 * mapped to their escaped values. 93 * "\t", "\r" and "\n" don't need to be escaped. 94 * 95 * NOTE: the logic has been adapted from the XlsxWriter library (BSD License) 96 * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89 97 * 98 * @return string[] 99 */ 100 protected function getControlCharactersEscapingMap() 101 { 102 $controlCharactersEscapingMap = []; 103 104 // control characters values are from 0 to 1F (hex values) in the ASCII table 105 for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) { 106 $character = \chr($charValue); 107 if (\preg_match("/{$this->escapableControlCharactersPattern}/", $character)) { 108 $charHexValue = \dechex($charValue); 109 $escapedChar = '_x' . \sprintf('%04s', \strtoupper($charHexValue)) . '_'; 110 $controlCharactersEscapingMap[$escapedChar] = $character; 111 } 112 } 113 114 return $controlCharactersEscapingMap; 115 } 116 117 /** 118 * Converts PHP control characters from the given string to OpenXML escaped control characters 119 * 120 * Excel escapes control characters with _xHHHH_ and also escapes any 121 * literal strings of that type by encoding the leading underscore. 122 * So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_. 123 * 124 * NOTE: the logic has been adapted from the XlsxWriter library (BSD License) 125 * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89 126 * 127 * @param string $string String to escape 128 * @return string 129 */ 130 protected function escapeControlCharacters($string) 131 { 132 $escapedString = $this->escapeEscapeCharacter($string); 133 134 // if no control characters 135 if (!\preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) { 136 return $escapedString; 137 } 138 139 return \preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function ($matches) { 140 return $this->controlCharactersEscapingReverseMap[$matches[0]]; 141 }, $escapedString); 142 } 143 144 /** 145 * Escapes the escape character: "_x0000_" -> "_x005F_x0000_" 146 * 147 * @param string $string String to escape 148 * @return string The escaped string 149 */ 150 protected function escapeEscapeCharacter($string) 151 { 152 return \preg_replace('/_(x[\dA-F]{4})_/', '_x005F_$1_', $string); 153 } 154 155 /** 156 * Converts OpenXML escaped control characters from the given string to PHP control characters 157 * 158 * Excel escapes control characters with _xHHHH_ and also escapes any 159 * literal strings of that type by encoding the leading underscore. 160 * So "_x0000_" -> "\0" and "_x005F_x0000_" -> "_x0000_" 161 * 162 * NOTE: the logic has been adapted from the XlsxWriter library (BSD License) 163 * @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89 164 * 165 * @param string $string String to unescape 166 * @return string 167 */ 168 protected function unescapeControlCharacters($string) 169 { 170 $unescapedString = $string; 171 172 foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) { 173 // only unescape characters that don't contain the escaped escape character for now 174 $unescapedString = \preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString); 175 } 176 177 return $this->unescapeEscapeCharacter($unescapedString); 178 } 179 180 /** 181 * Unecapes the escape character: "_x005F_x0000_" => "_x0000_" 182 * 183 * @param string $string String to unescape 184 * @return string The unescaped string 185 */ 186 protected function unescapeEscapeCharacter($string) 187 { 188 return \preg_replace('/_x005F(_x[\dA-F]{4}_)/', '$1', $string); 189 } 190 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body