Differences Between: [Versions 310 and 311] [Versions 311 and 400] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403] [Versions 39 and 311]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Calculation; 4 5 /** 6 * PARTLY BASED ON: 7 * Copyright (c) 2007 E. W. Bachtal, Inc. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software 10 * and associated documentation files (the "Software"), to deal in the Software without restriction, 11 * including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 13 * subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * The software is provided "as is", without warranty of any kind, express or implied, including but not 19 * limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In 20 * no event shall the authors or copyright holders be liable for any claim, damages or other liability, 21 * whether in an action of contract, tort or otherwise, arising from, out of or in connection with the 22 * software or the use or other dealings in the software. 23 * 24 * https://ewbi.blogs.com/develops/2007/03/excel_formula_p.html 25 * https://ewbi.blogs.com/develops/2004/12/excel_formula_p.html 26 */ 27 class FormulaParser 28 { 29 // Character constants 30 const QUOTE_DOUBLE = '"'; 31 const QUOTE_SINGLE = '\''; 32 const BRACKET_CLOSE = ']'; 33 const BRACKET_OPEN = '['; 34 const BRACE_OPEN = '{'; 35 const BRACE_CLOSE = '}'; 36 const PAREN_OPEN = '('; 37 const PAREN_CLOSE = ')'; 38 const SEMICOLON = ';'; 39 const WHITESPACE = ' '; 40 const COMMA = ','; 41 const ERROR_START = '#'; 42 43 const OPERATORS_SN = '+-'; 44 const OPERATORS_INFIX = '+-*/^&=><'; 45 const OPERATORS_POSTFIX = '%'; 46 47 /** 48 * Formula. 49 * 50 * @var string 51 */ 52 private $formula; 53 54 /** 55 * Tokens. 56 * 57 * @var FormulaToken[] 58 */ 59 private $tokens = []; 60 61 /** 62 * Create a new FormulaParser. 63 * 64 * @param string $pFormula Formula to parse 65 */ 66 public function __construct($pFormula = '') 67 { 68 // Check parameters 69 if ($pFormula === null) { 70 throw new Exception('Invalid parameter passed: formula'); 71 } 72 73 // Initialise values 74 $this->formula = trim($pFormula); 75 // Parse! 76 $this->parseToTokens(); 77 } 78 79 /** 80 * Get Formula. 81 * 82 * @return string 83 */ 84 public function getFormula() 85 { 86 return $this->formula; 87 } 88 89 /** 90 * Get Token. 91 * 92 * @param int $pId Token id 93 * 94 * @return string 95 */ 96 public function getToken($pId = 0) 97 { 98 if (isset($this->tokens[$pId])) { 99 return $this->tokens[$pId]; 100 } 101 102 throw new Exception("Token with id $pId does not exist."); 103 } 104 105 /** 106 * Get Token count. 107 * 108 * @return int 109 */ 110 public function getTokenCount() 111 { 112 return count($this->tokens); 113 } 114 115 /** 116 * Get Tokens. 117 * 118 * @return FormulaToken[] 119 */ 120 public function getTokens() 121 { 122 return $this->tokens; 123 } 124 125 /** 126 * Parse to tokens. 127 */ 128 private function parseToTokens(): void 129 { 130 // No attempt is made to verify formulas; assumes formulas are derived from Excel, where 131 // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions. 132 133 // Check if the formula has a valid starting = 134 $formulaLength = strlen($this->formula); 135 if ($formulaLength < 2 || $this->formula[0] != '=') { 136 return; 137 } 138 139 // Helper variables 140 $tokens1 = $tokens2 = $stack = []; 141 $inString = $inPath = $inRange = $inError = false; 142 $token = $previousToken = $nextToken = null; 143 144 $index = 1; 145 $value = ''; 146 147 $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A']; 148 $COMPARATORS_MULTI = ['>=', '<=', '<>']; 149 150 while ($index < $formulaLength) { 151 // state-dependent character evaluation (order is important) 152 153 // double-quoted strings 154 // embeds are doubled 155 // end marks token 156 if ($inString) { 157 if ($this->formula[$index] == self::QUOTE_DOUBLE) { 158 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) { 159 $value .= self::QUOTE_DOUBLE; 160 ++$index; 161 } else { 162 $inString = false; 163 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_TEXT); 164 $value = ''; 165 } 166 } else { 167 $value .= $this->formula[$index]; 168 } 169 ++$index; 170 171 continue; 172 } 173 174 // single-quoted strings (links) 175 // embeds are double 176 // end does not mark a token 177 if ($inPath) { 178 if ($this->formula[$index] == self::QUOTE_SINGLE) { 179 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) { 180 $value .= self::QUOTE_SINGLE; 181 ++$index; 182 } else { 183 $inPath = false; 184 } 185 } else { 186 $value .= $this->formula[$index]; 187 } 188 ++$index; 189 190 continue; 191 } 192 193 // bracked strings (R1C1 range index or linked workbook name) 194 // no embeds (changed to "()" by Excel) 195 // end does not mark a token 196 if ($inRange) { 197 if ($this->formula[$index] == self::BRACKET_CLOSE) { 198 $inRange = false; 199 } 200 $value .= $this->formula[$index]; 201 ++$index; 202 203 continue; 204 } 205 206 // error values 207 // end marks a token, determined from absolute list of values 208 if ($inError) { 209 $value .= $this->formula[$index]; 210 ++$index; 211 if (in_array($value, $ERRORS)) { 212 $inError = false; 213 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_ERROR); 214 $value = ''; 215 } 216 217 continue; 218 } 219 220 // scientific notation check 221 if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) { 222 if (strlen($value) > 1) { 223 if (preg_match('/^[1-9]{1}(\\.\\d+)?E{1}$/', $this->formula[$index]) != 0) { 224 $value .= $this->formula[$index]; 225 ++$index; 226 227 continue; 228 } 229 } 230 } 231 232 // independent character evaluation (order not important) 233 234 // establish state-dependent character evaluations 235 if ($this->formula[$index] == self::QUOTE_DOUBLE) { 236 if (strlen($value) > 0) { 237 // unexpected 238 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 239 $value = ''; 240 } 241 $inString = true; 242 ++$index; 243 244 continue; 245 } 246 247 if ($this->formula[$index] == self::QUOTE_SINGLE) { 248 if (strlen($value) > 0) { 249 // unexpected 250 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 251 $value = ''; 252 } 253 $inPath = true; 254 ++$index; 255 256 continue; 257 } 258 259 if ($this->formula[$index] == self::BRACKET_OPEN) { 260 $inRange = true; 261 $value .= self::BRACKET_OPEN; 262 ++$index; 263 264 continue; 265 } 266 267 if ($this->formula[$index] == self::ERROR_START) { 268 if (strlen($value) > 0) { 269 // unexpected 270 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 271 $value = ''; 272 } 273 $inError = true; 274 $value .= self::ERROR_START; 275 ++$index; 276 277 continue; 278 } 279 280 // mark start and end of arrays and array rows 281 if ($this->formula[$index] == self::BRACE_OPEN) { 282 if (strlen($value) > 0) { 283 // unexpected 284 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 285 $value = ''; 286 } 287 288 $tmp = new FormulaToken('ARRAY', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 289 $tokens1[] = $tmp; 290 $stack[] = clone $tmp; 291 292 $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 293 $tokens1[] = $tmp; 294 $stack[] = clone $tmp; 295 296 ++$index; 297 298 continue; 299 } 300 301 if ($this->formula[$index] == self::SEMICOLON) { 302 if (strlen($value) > 0) { 303 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 304 $value = ''; 305 } 306 307 $tmp = array_pop($stack); 308 $tmp->setValue(''); 309 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 310 $tokens1[] = $tmp; 311 312 $tmp = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); 313 $tokens1[] = $tmp; 314 315 $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 316 $tokens1[] = $tmp; 317 $stack[] = clone $tmp; 318 319 ++$index; 320 321 continue; 322 } 323 324 if ($this->formula[$index] == self::BRACE_CLOSE) { 325 if (strlen($value) > 0) { 326 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 327 $value = ''; 328 } 329 330 $tmp = array_pop($stack); 331 $tmp->setValue(''); 332 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 333 $tokens1[] = $tmp; 334 335 $tmp = array_pop($stack); 336 $tmp->setValue(''); 337 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 338 $tokens1[] = $tmp; 339 340 ++$index; 341 342 continue; 343 } 344 345 // trim white-space 346 if ($this->formula[$index] == self::WHITESPACE) { 347 if (strlen($value) > 0) { 348 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 349 $value = ''; 350 } 351 $tokens1[] = new FormulaToken('', FormulaToken::TOKEN_TYPE_WHITESPACE); 352 ++$index; 353 while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) { 354 ++$index; 355 } 356 357 continue; 358 } 359 360 // multi-character comparators 361 if (($index + 2) <= $formulaLength) { 362 if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) { 363 if (strlen($value) > 0) { 364 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 365 $value = ''; 366 } 367 $tokens1[] = new FormulaToken(substr($this->formula, $index, 2), FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_LOGICAL); 368 $index += 2; 369 370 continue; 371 } 372 } 373 374 // standard infix operators 375 if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) { 376 if (strlen($value) > 0) { 377 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 378 $value = ''; 379 } 380 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX); 381 ++$index; 382 383 continue; 384 } 385 386 // standard postfix operators (only one) 387 if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) { 388 if (strlen($value) > 0) { 389 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 390 $value = ''; 391 } 392 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX); 393 ++$index; 394 395 continue; 396 } 397 398 // start subexpression or function 399 if ($this->formula[$index] == self::PAREN_OPEN) { 400 if (strlen($value) > 0) { 401 $tmp = new FormulaToken($value, FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 402 $tokens1[] = $tmp; 403 $stack[] = clone $tmp; 404 $value = ''; 405 } else { 406 $tmp = new FormulaToken('', FormulaToken::TOKEN_TYPE_SUBEXPRESSION, FormulaToken::TOKEN_SUBTYPE_START); 407 $tokens1[] = $tmp; 408 $stack[] = clone $tmp; 409 } 410 ++$index; 411 412 continue; 413 } 414 415 // function, subexpression, or array parameters, or operand unions 416 if ($this->formula[$index] == self::COMMA) { 417 if (strlen($value) > 0) { 418 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 419 $value = ''; 420 } 421 422 $tmp = array_pop($stack); 423 $tmp->setValue(''); 424 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 425 $stack[] = $tmp; 426 427 if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { 428 $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_UNION); 429 } else { 430 $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); 431 } 432 ++$index; 433 434 continue; 435 } 436 437 // stop subexpression 438 if ($this->formula[$index] == self::PAREN_CLOSE) { 439 if (strlen($value) > 0) { 440 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 441 $value = ''; 442 } 443 444 $tmp = array_pop($stack); 445 $tmp->setValue(''); 446 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 447 $tokens1[] = $tmp; 448 449 ++$index; 450 451 continue; 452 } 453 454 // token accumulation 455 $value .= $this->formula[$index]; 456 ++$index; 457 } 458 459 // dump remaining accumulation 460 if (strlen($value) > 0) { 461 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 462 } 463 464 // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections 465 $tokenCount = count($tokens1); 466 for ($i = 0; $i < $tokenCount; ++$i) { 467 $token = $tokens1[$i]; 468 if (isset($tokens1[$i - 1])) { 469 $previousToken = $tokens1[$i - 1]; 470 } else { 471 $previousToken = null; 472 } 473 if (isset($tokens1[$i + 1])) { 474 $nextToken = $tokens1[$i + 1]; 475 } else { 476 $nextToken = null; 477 } 478 479 if ($token === null) { 480 continue; 481 } 482 483 if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) { 484 $tokens2[] = $token; 485 486 continue; 487 } 488 489 if ($previousToken === null) { 490 continue; 491 } 492 493 if ( 494 !( 495 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 496 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 497 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 498 ) 499 ) { 500 continue; 501 } 502 503 if ($nextToken === null) { 504 continue; 505 } 506 507 if ( 508 !( 509 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || 510 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || 511 ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 512 ) 513 ) { 514 continue; 515 } 516 517 $tokens2[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_INTERSECTION); 518 } 519 520 // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators 521 // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names 522 $this->tokens = []; 523 524 $tokenCount = count($tokens2); 525 for ($i = 0; $i < $tokenCount; ++$i) { 526 $token = $tokens2[$i]; 527 if (isset($tokens2[$i - 1])) { 528 $previousToken = $tokens2[$i - 1]; 529 } else { 530 $previousToken = null; 531 } 532 if (isset($tokens2[$i + 1])) { 533 $nextToken = $tokens2[$i + 1]; 534 } else { 535 $nextToken = null; 536 } 537 538 if ($token === null) { 539 continue; 540 } 541 542 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') { 543 if ($i == 0) { 544 $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); 545 } elseif ( 546 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && 547 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 548 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && 549 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 550 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || 551 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 552 ) { 553 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 554 } else { 555 $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); 556 } 557 558 $this->tokens[] = $token; 559 560 continue; 561 } 562 563 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') { 564 if ($i == 0) { 565 continue; 566 } elseif ( 567 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && 568 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 569 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && 570 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 571 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || 572 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 573 ) { 574 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 575 } else { 576 continue; 577 } 578 579 $this->tokens[] = $token; 580 581 continue; 582 } 583 584 if ( 585 $token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && 586 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING 587 ) { 588 if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) { 589 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); 590 } elseif ($token->getValue() == '&') { 591 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_CONCATENATION); 592 } else { 593 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 594 } 595 596 $this->tokens[] = $token; 597 598 continue; 599 } 600 601 if ( 602 $token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND && 603 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING 604 ) { 605 if (!is_numeric($token->getValue())) { 606 if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue()) == 'FALSE') { 607 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); 608 } else { 609 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_RANGE); 610 } 611 } else { 612 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_NUMBER); 613 } 614 615 $this->tokens[] = $token; 616 617 continue; 618 } 619 620 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { 621 if (strlen($token->getValue()) > 0) { 622 if (substr($token->getValue(), 0, 1) == '@') { 623 $token->setValue(substr($token->getValue(), 1)); 624 } 625 } 626 } 627 628 $this->tokens[] = $token; 629 } 630 } 631 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body