Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403]
1 <?php 2 3 namespace Sabberworm\CSS\Parsing; 4 5 use Sabberworm\CSS\Comment\Comment; 6 use Sabberworm\CSS\Settings; 7 8 class ParserState 9 { 10 /** 11 * @var null 12 */ 13 const EOF = null; 14 15 /** 16 * @var Settings 17 */ 18 private $oParserSettings; 19 20 /** 21 * @var string 22 */ 23 private $sText; 24 25 /** 26 * @var array<int, string> 27 */ 28 private $aText; 29 30 /** 31 * @var int 32 */ 33 private $iCurrentPosition; 34 35 /** 36 * @var string 37 */ 38 private $sCharset; 39 40 /** 41 * @var int 42 */ 43 private $iLength; 44 45 /** 46 * @var int 47 */ 48 private $iLineNo; 49 50 /** 51 * @param string $sText 52 * @param int $iLineNo 53 */ 54 public function __construct($sText, Settings $oParserSettings, $iLineNo = 1) 55 { 56 $this->oParserSettings = $oParserSettings; 57 $this->sText = $sText; 58 $this->iCurrentPosition = 0; 59 $this->iLineNo = $iLineNo; 60 $this->setCharset($this->oParserSettings->sDefaultCharset); 61 } 62 63 /** 64 * @param string $sCharset 65 * 66 * @return void 67 */ 68 public function setCharset($sCharset) 69 { 70 $this->sCharset = $sCharset; 71 $this->aText = $this->strsplit($this->sText); 72 if (is_array($this->aText)) { 73 $this->iLength = count($this->aText); 74 } 75 } 76 77 /** 78 * @return string 79 */ 80 public function getCharset() 81 { 82 return $this->sCharset; 83 } 84 85 /** 86 * @return int 87 */ 88 public function currentLine() 89 { 90 return $this->iLineNo; 91 } 92 93 /** 94 * @return int 95 */ 96 public function currentColumn() 97 { 98 return $this->iCurrentPosition; 99 } 100 101 /** 102 * @return Settings 103 */ 104 public function getSettings() 105 { 106 return $this->oParserSettings; 107 } 108 109 /** 110 * @param bool $bIgnoreCase 111 * 112 * @return string 113 * 114 * @throws UnexpectedTokenException 115 */ 116 public function parseIdentifier($bIgnoreCase = true) 117 { 118 $sResult = $this->parseCharacter(true); 119 if ($sResult === null) { 120 throw new UnexpectedTokenException($sResult, $this->peek(5), 'identifier', $this->iLineNo); 121 } 122 $sCharacter = null; 123 while (($sCharacter = $this->parseCharacter(true)) !== null) { 124 if (preg_match('/[a-zA-Z0-9\x{00A0}-\x{FFFF}_-]/Sux', $sCharacter)) { 125 $sResult .= $sCharacter; 126 } else { 127 $sResult .= '\\' . $sCharacter; 128 } 129 } 130 if ($bIgnoreCase) { 131 $sResult = $this->strtolower($sResult); 132 } 133 return $sResult; 134 } 135 136 /** 137 * @param bool $bIsForIdentifier 138 * 139 * @return string|null 140 * 141 * @throws UnexpectedEOFException 142 * @throws UnexpectedTokenException 143 */ 144 public function parseCharacter($bIsForIdentifier) 145 { 146 if ($this->peek() === '\\') { 147 if ( 148 $bIsForIdentifier && $this->oParserSettings->bLenientParsing 149 && ($this->comes('\0') || $this->comes('\9')) 150 ) { 151 // Non-strings can contain \0 or \9 which is an IE hack supported in lenient parsing. 152 return null; 153 } 154 $this->consume('\\'); 155 if ($this->comes('\n') || $this->comes('\r')) { 156 return ''; 157 } 158 if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) { 159 return $this->consume(1); 160 } 161 $sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u', 6); 162 if ($this->strlen($sUnicode) < 6) { 163 // Consume whitespace after incomplete unicode escape 164 if (preg_match('/\\s/isSu', $this->peek())) { 165 if ($this->comes('\r\n')) { 166 $this->consume(2); 167 } else { 168 $this->consume(1); 169 } 170 } 171 } 172 $iUnicode = intval($sUnicode, 16); 173 $sUtf32 = ""; 174 for ($i = 0; $i < 4; ++$i) { 175 $sUtf32 .= chr($iUnicode & 0xff); 176 $iUnicode = $iUnicode >> 8; 177 } 178 return iconv('utf-32le', $this->sCharset, $sUtf32); 179 } 180 if ($bIsForIdentifier) { 181 $peek = ord($this->peek()); 182 // Ranges: a-z A-Z 0-9 - _ 183 if ( 184 ($peek >= 97 && $peek <= 122) 185 || ($peek >= 65 && $peek <= 90) 186 || ($peek >= 48 && $peek <= 57) 187 || ($peek === 45) 188 || ($peek === 95) 189 || ($peek > 0xa1) 190 ) { 191 return $this->consume(1); 192 } 193 } else { 194 return $this->consume(1); 195 } 196 return null; 197 } 198 199 /** 200 * @return array<int, Comment>|void 201 * 202 * @throws UnexpectedEOFException 203 * @throws UnexpectedTokenException 204 */ 205 public function consumeWhiteSpace() 206 { 207 $comments = []; 208 do { 209 while (preg_match('/\\s/isSu', $this->peek()) === 1) { 210 $this->consume(1); 211 } 212 if ($this->oParserSettings->bLenientParsing) { 213 try { 214 $oComment = $this->consumeComment(); 215 } catch (UnexpectedEOFException $e) { 216 $this->iCurrentPosition = $this->iLength; 217 return; 218 } 219 } else { 220 $oComment = $this->consumeComment(); 221 } 222 if ($oComment !== false) { 223 $comments[] = $oComment; 224 } 225 } while ($oComment !== false); 226 return $comments; 227 } 228 229 /** 230 * @param string $sString 231 * @param bool $bCaseInsensitive 232 * 233 * @return bool 234 */ 235 public function comes($sString, $bCaseInsensitive = false) 236 { 237 $sPeek = $this->peek(strlen($sString)); 238 return ($sPeek == '') 239 ? false 240 : $this->streql($sPeek, $sString, $bCaseInsensitive); 241 } 242 243 /** 244 * @param int $iLength 245 * @param int $iOffset 246 * 247 * @return string 248 */ 249 public function peek($iLength = 1, $iOffset = 0) 250 { 251 $iOffset += $this->iCurrentPosition; 252 if ($iOffset >= $this->iLength) { 253 return ''; 254 } 255 return $this->substr($iOffset, $iLength); 256 } 257 258 /** 259 * @param int $mValue 260 * 261 * @return string 262 * 263 * @throws UnexpectedEOFException 264 * @throws UnexpectedTokenException 265 */ 266 public function consume($mValue = 1) 267 { 268 if (is_string($mValue)) { 269 $iLineCount = substr_count($mValue, "\n"); 270 $iLength = $this->strlen($mValue); 271 if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) { 272 throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5)), $this->iLineNo); 273 } 274 $this->iLineNo += $iLineCount; 275 $this->iCurrentPosition += $this->strlen($mValue); 276 return $mValue; 277 } else { 278 if ($this->iCurrentPosition + $mValue > $this->iLength) { 279 throw new UnexpectedEOFException($mValue, $this->peek(5), 'count', $this->iLineNo); 280 } 281 $sResult = $this->substr($this->iCurrentPosition, $mValue); 282 $iLineCount = substr_count($sResult, "\n"); 283 $this->iLineNo += $iLineCount; 284 $this->iCurrentPosition += $mValue; 285 return $sResult; 286 } 287 } 288 289 /** 290 * @param string $mExpression 291 * @param int|null $iMaxLength 292 * 293 * @return string 294 * 295 * @throws UnexpectedEOFException 296 * @throws UnexpectedTokenException 297 */ 298 public function consumeExpression($mExpression, $iMaxLength = null) 299 { 300 $aMatches = null; 301 $sInput = $iMaxLength !== null ? $this->peek($iMaxLength) : $this->inputLeft(); 302 if (preg_match($mExpression, $sInput, $aMatches, PREG_OFFSET_CAPTURE) === 1) { 303 return $this->consume($aMatches[0][0]); 304 } 305 throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression', $this->iLineNo); 306 } 307 308 /** 309 * @return Comment|false 310 */ 311 public function consumeComment() 312 { 313 $mComment = false; 314 if ($this->comes('/*')) { 315 $iLineNo = $this->iLineNo; 316 $this->consume(1); 317 $mComment = ''; 318 while (($char = $this->consume(1)) !== '') { 319 $mComment .= $char; 320 if ($this->comes('*/')) { 321 $this->consume(2); 322 break; 323 } 324 } 325 } 326 327 if ($mComment !== false) { 328 // We skip the * which was included in the comment. 329 return new Comment(substr($mComment, 1), $iLineNo); 330 } 331 332 return $mComment; 333 } 334 335 /** 336 * @return bool 337 */ 338 public function isEnd() 339 { 340 return $this->iCurrentPosition >= $this->iLength; 341 } 342 343 /** 344 * @param array<array-key, string>|string $aEnd 345 * @param string $bIncludeEnd 346 * @param string $consumeEnd 347 * @param array<int, Comment> $comments 348 * 349 * @return string 350 * 351 * @throws UnexpectedEOFException 352 * @throws UnexpectedTokenException 353 */ 354 public function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false, array &$comments = []) 355 { 356 $aEnd = is_array($aEnd) ? $aEnd : [$aEnd]; 357 $out = ''; 358 $start = $this->iCurrentPosition; 359 360 while (!$this->isEnd()) { 361 $char = $this->consume(1); 362 if (in_array($char, $aEnd)) { 363 if ($bIncludeEnd) { 364 $out .= $char; 365 } elseif (!$consumeEnd) { 366 $this->iCurrentPosition -= $this->strlen($char); 367 } 368 return $out; 369 } 370 $out .= $char; 371 if ($comment = $this->consumeComment()) { 372 $comments[] = $comment; 373 } 374 } 375 376 if (in_array(self::EOF, $aEnd)) { 377 return $out; 378 } 379 380 $this->iCurrentPosition = $start; 381 throw new UnexpectedEOFException( 382 'One of ("' . implode('","', $aEnd) . '")', 383 $this->peek(5), 384 'search', 385 $this->iLineNo 386 ); 387 } 388 389 /** 390 * @return string 391 */ 392 private function inputLeft() 393 { 394 return $this->substr($this->iCurrentPosition, -1); 395 } 396 397 /** 398 * @param string $sString1 399 * @param string $sString2 400 * @param bool $bCaseInsensitive 401 * 402 * @return bool 403 */ 404 public function streql($sString1, $sString2, $bCaseInsensitive = true) 405 { 406 if ($bCaseInsensitive) { 407 return $this->strtolower($sString1) === $this->strtolower($sString2); 408 } else { 409 return $sString1 === $sString2; 410 } 411 } 412 413 /** 414 * @param int $iAmount 415 * 416 * @return void 417 */ 418 public function backtrack($iAmount) 419 { 420 $this->iCurrentPosition -= $iAmount; 421 } 422 423 /** 424 * @param string $sString 425 * 426 * @return int 427 */ 428 public function strlen($sString) 429 { 430 if ($this->oParserSettings->bMultibyteSupport) { 431 return mb_strlen($sString, $this->sCharset); 432 } else { 433 return strlen($sString); 434 } 435 } 436 437 /** 438 * @param int $iStart 439 * @param int $iLength 440 * 441 * @return string 442 */ 443 private function substr($iStart, $iLength) 444 { 445 if ($iLength < 0) { 446 $iLength = $this->iLength - $iStart + $iLength; 447 } 448 if ($iStart + $iLength > $this->iLength) { 449 $iLength = $this->iLength - $iStart; 450 } 451 $sResult = ''; 452 while ($iLength > 0) { 453 $sResult .= $this->aText[$iStart]; 454 $iStart++; 455 $iLength--; 456 } 457 return $sResult; 458 } 459 460 /** 461 * @param string $sString 462 * 463 * @return string 464 */ 465 private function strtolower($sString) 466 { 467 if ($this->oParserSettings->bMultibyteSupport) { 468 return mb_strtolower($sString, $this->sCharset); 469 } else { 470 return strtolower($sString); 471 } 472 } 473 474 /** 475 * @param string $sString 476 * 477 * @return array<int, string> 478 */ 479 private function strsplit($sString) 480 { 481 if ($this->oParserSettings->bMultibyteSupport) { 482 if ($this->streql($this->sCharset, 'utf-8')) { 483 return preg_split('//u', $sString, -1, PREG_SPLIT_NO_EMPTY); 484 } else { 485 $iLength = mb_strlen($sString, $this->sCharset); 486 $aResult = []; 487 for ($i = 0; $i < $iLength; ++$i) { 488 $aResult[] = mb_substr($sString, $i, 1, $this->sCharset); 489 } 490 return $aResult; 491 } 492 } else { 493 if ($sString === '') { 494 return []; 495 } else { 496 return str_split($sString); 497 } 498 } 499 } 500 501 /** 502 * @param string $sString 503 * @param string $sNeedle 504 * @param int $iOffset 505 * 506 * @return int|false 507 */ 508 private function strpos($sString, $sNeedle, $iOffset) 509 { 510 if ($this->oParserSettings->bMultibyteSupport) { 511 return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset); 512 } else { 513 return strpos($sString, $sNeedle, $iOffset); 514 } 515 } 516 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body