Differences Between: [Versions 310 and 311] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403] [Versions 39 and 311]
1 <?php 2 3 /* 4 * This file is part of Mustache.php. 5 * 6 * (c) 2010-2017 Justin Hileman 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12 /** 13 * Mustache Tokenizer class. 14 * 15 * This class is responsible for turning raw template source into a set of Mustache tokens. 16 */ 17 class Mustache_Tokenizer 18 { 19 // Finite state machine states 20 const IN_TEXT = 0; 21 const IN_TAG_TYPE = 1; 22 const IN_TAG = 2; 23 24 // Token types 25 const T_SECTION = '#'; 26 const T_INVERTED = '^'; 27 const T_END_SECTION = '/'; 28 const T_COMMENT = '!'; 29 const T_PARTIAL = '>'; 30 const T_PARENT = '<'; 31 const T_DELIM_CHANGE = '='; 32 const T_ESCAPED = '_v'; 33 const T_UNESCAPED = '{'; 34 const T_UNESCAPED_2 = '&'; 35 const T_TEXT = '_t'; 36 const T_PRAGMA = '%'; 37 const T_BLOCK_VAR = '$'; 38 const T_BLOCK_ARG = '$arg'; 39 40 // Valid token types 41 private static $tagTypes = array( 42 self::T_SECTION => true, 43 self::T_INVERTED => true, 44 self::T_END_SECTION => true, 45 self::T_COMMENT => true, 46 self::T_PARTIAL => true, 47 self::T_PARENT => true, 48 self::T_DELIM_CHANGE => true, 49 self::T_ESCAPED => true, 50 self::T_UNESCAPED => true, 51 self::T_UNESCAPED_2 => true, 52 self::T_PRAGMA => true, 53 self::T_BLOCK_VAR => true, 54 ); 55 56 // Token properties 57 const TYPE = 'type'; 58 const NAME = 'name'; 59 const OTAG = 'otag'; 60 const CTAG = 'ctag'; 61 const LINE = 'line'; 62 const INDEX = 'index'; 63 const END = 'end'; 64 const INDENT = 'indent'; 65 const NODES = 'nodes'; 66 const VALUE = 'value'; 67 const FILTERS = 'filters'; 68 69 private $state; 70 private $tagType; 71 private $buffer; 72 private $tokens; 73 private $seenTag; 74 private $line; 75 76 private $otag; 77 private $otagChar; 78 private $otagLen; 79 80 private $ctag; 81 private $ctagChar; 82 private $ctagLen; 83 84 /** 85 * Scan and tokenize template source. 86 * 87 * @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered 88 * @throws Mustache_Exception_InvalidArgumentException when $delimiters string is invalid 89 * 90 * @param string $text Mustache template source to tokenize 91 * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: empty string) 92 * 93 * @return array Set of Mustache tokens 94 */ 95 public function scan($text, $delimiters = '') 96 { 97 // Setting mbstring.func_overload makes things *really* slow. 98 // Let's do everyone a favor and scan this string as ASCII instead. 99 // 100 // The INI directive was removed in PHP 8.0 so we don't need to check there (and can drop it 101 // when we remove support for older versions of PHP). 102 // 103 // @codeCoverageIgnoreStart 104 $encoding = null; 105 if (version_compare(PHP_VERSION, '8.0.0', '<')) { 106 if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) { 107 $encoding = mb_internal_encoding(); 108 mb_internal_encoding('ASCII'); 109 } 110 } 111 // @codeCoverageIgnoreEnd 112 113 $this->reset(); 114 115 if (is_string($delimiters) && $delimiters = trim($delimiters)) { 116 $this->setDelimiters($delimiters); 117 } 118 119 $len = strlen($text); 120 for ($i = 0; $i < $len; $i++) { 121 switch ($this->state) { 122 case self::IN_TEXT: 123 $char = $text[$i]; 124 // Test whether it's time to change tags. 125 if ($char === $this->otagChar && substr($text, $i, $this->otagLen) === $this->otag) { 126 $i--; 127 $this->flushBuffer(); 128 $this->state = self::IN_TAG_TYPE; 129 } else { 130 $this->buffer .= $char; 131 if ($char === "\n") { 132 $this->flushBuffer(); 133 $this->line++; 134 } 135 } 136 break; 137 138 case self::IN_TAG_TYPE: 139 $i += $this->otagLen - 1; 140 $char = $text[$i + 1]; 141 if (isset(self::$tagTypes[$char])) { 142 $tag = $char; 143 $this->tagType = $tag; 144 } else { 145 $tag = null; 146 $this->tagType = self::T_ESCAPED; 147 } 148 149 if ($this->tagType === self::T_DELIM_CHANGE) { 150 $i = $this->changeDelimiters($text, $i); 151 $this->state = self::IN_TEXT; 152 } elseif ($this->tagType === self::T_PRAGMA) { 153 $i = $this->addPragma($text, $i); 154 $this->state = self::IN_TEXT; 155 } else { 156 if ($tag !== null) { 157 $i++; 158 } 159 $this->state = self::IN_TAG; 160 } 161 $this->seenTag = $i; 162 break; 163 164 default: 165 $char = $text[$i]; 166 // Test whether it's time to change tags. 167 if ($char === $this->ctagChar && substr($text, $i, $this->ctagLen) === $this->ctag) { 168 $token = array( 169 self::TYPE => $this->tagType, 170 self::NAME => trim($this->buffer), 171 self::OTAG => $this->otag, 172 self::CTAG => $this->ctag, 173 self::LINE => $this->line, 174 self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen, 175 ); 176 177 if ($this->tagType === self::T_UNESCAPED) { 178 // Clean up `{{{ tripleStache }}}` style tokens. 179 if ($this->ctag === '}}') { 180 if (($i + 2 < $len) && $text[$i + 2] === '}') { 181 $i++; 182 } else { 183 $msg = sprintf( 184 'Mismatched tag delimiters: %s on line %d', 185 $token[self::NAME], 186 $token[self::LINE] 187 ); 188 189 throw new Mustache_Exception_SyntaxException($msg, $token); 190 } 191 } else { 192 $lastName = $token[self::NAME]; 193 if (substr($lastName, -1) === '}') { 194 $token[self::NAME] = trim(substr($lastName, 0, -1)); 195 } else { 196 $msg = sprintf( 197 'Mismatched tag delimiters: %s on line %d', 198 $token[self::NAME], 199 $token[self::LINE] 200 ); 201 202 throw new Mustache_Exception_SyntaxException($msg, $token); 203 } 204 } 205 } 206 207 $this->buffer = ''; 208 $i += $this->ctagLen - 1; 209 $this->state = self::IN_TEXT; 210 $this->tokens[] = $token; 211 } else { 212 $this->buffer .= $char; 213 } 214 break; 215 } 216 } 217 218 $this->flushBuffer(); 219 220 // Restore the user's encoding... 221 // @codeCoverageIgnoreStart 222 if ($encoding) { 223 mb_internal_encoding($encoding); 224 } 225 // @codeCoverageIgnoreEnd 226 227 return $this->tokens; 228 } 229 230 /** 231 * Helper function to reset tokenizer internal state. 232 */ 233 private function reset() 234 { 235 $this->state = self::IN_TEXT; 236 $this->tagType = null; 237 $this->buffer = ''; 238 $this->tokens = array(); 239 $this->seenTag = false; 240 $this->line = 0; 241 242 $this->otag = '{{'; 243 $this->otagChar = '{'; 244 $this->otagLen = 2; 245 246 $this->ctag = '}}'; 247 $this->ctagChar = '}'; 248 $this->ctagLen = 2; 249 } 250 251 /** 252 * Flush the current buffer to a token. 253 */ 254 private function flushBuffer() 255 { 256 if (strlen($this->buffer) > 0) { 257 $this->tokens[] = array( 258 self::TYPE => self::T_TEXT, 259 self::LINE => $this->line, 260 self::VALUE => $this->buffer, 261 ); 262 $this->buffer = ''; 263 } 264 } 265 266 /** 267 * Change the current Mustache delimiters. Set new `otag` and `ctag` values. 268 * 269 * @throws Mustache_Exception_SyntaxException when delimiter string is invalid 270 * 271 * @param string $text Mustache template source 272 * @param int $index Current tokenizer index 273 * 274 * @return int New index value 275 */ 276 private function changeDelimiters($text, $index) 277 { 278 $startIndex = strpos($text, '=', $index) + 1; 279 $close = '=' . $this->ctag; 280 $closeIndex = strpos($text, $close, $index); 281 282 $token = array( 283 self::TYPE => self::T_DELIM_CHANGE, 284 self::LINE => $this->line, 285 ); 286 287 try { 288 $this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex))); 289 } catch (Mustache_Exception_InvalidArgumentException $e) { 290 throw new Mustache_Exception_SyntaxException($e->getMessage(), $token); 291 } 292 293 $this->tokens[] = $token; 294 295 return $closeIndex + strlen($close) - 1; 296 } 297 298 /** 299 * Set the current Mustache `otag` and `ctag` delimiters. 300 * 301 * @throws Mustache_Exception_InvalidArgumentException when delimiter string is invalid 302 * 303 * @param string $delimiters 304 */ 305 private function setDelimiters($delimiters) 306 { 307 if (!preg_match('/^\s*(\S+)\s+(\S+)\s*$/', $delimiters, $matches)) { 308 throw new Mustache_Exception_InvalidArgumentException(sprintf('Invalid delimiters: %s', $delimiters)); 309 } 310 311 list($_, $otag, $ctag) = $matches; 312 313 $this->otag = $otag; 314 $this->otagChar = $otag[0]; 315 $this->otagLen = strlen($otag); 316 317 $this->ctag = $ctag; 318 $this->ctagChar = $ctag[0]; 319 $this->ctagLen = strlen($ctag); 320 } 321 322 /** 323 * Add pragma token. 324 * 325 * Pragmas are hoisted to the front of the template, so all pragma tokens 326 * will appear at the front of the token list. 327 * 328 * @param string $text 329 * @param int $index 330 * 331 * @return int New index value 332 */ 333 private function addPragma($text, $index) 334 { 335 $end = strpos($text, $this->ctag, $index); 336 $pragma = trim(substr($text, $index + 2, $end - $index - 2)); 337 338 // Pragmas are hoisted to the front of the template. 339 array_unshift($this->tokens, array( 340 self::TYPE => self::T_PRAGMA, 341 self::NAME => $pragma, 342 self::LINE => 0, 343 )); 344 345 return $end + $this->ctagLen - 1; 346 } 347 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body