Differences Between: [Versions 310 and 402] [Versions 311 and 402] [Versions 39 and 402] [Versions 400 and 402] [Versions 401 and 402]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Calculation; 4 5 /** 6 * PARTLY BASED ON: 7 * Copyright (c) 2007 E. W. Bachtal, Inc. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software 10 * and associated documentation files (the "Software"), to deal in the Software without restriction, 11 * including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 13 * subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * The software is provided "as is", without warranty of any kind, express or implied, including but not 19 * limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In 20 * no event shall the authors or copyright holders be liable for any claim, damages or other liability, 21 * whether in an action of contract, tort or otherwise, arising from, out of or in connection with the 22 * software or the use or other dealings in the software. 23 * 24 * https://ewbi.blogs.com/develops/2007/03/excel_formula_p.html 25 * https://ewbi.blogs.com/develops/2004/12/excel_formula_p.html 26 */ 27 class FormulaParser 28 { 29 // Character constants 30 const QUOTE_DOUBLE = '"'; 31 const QUOTE_SINGLE = '\''; 32 const BRACKET_CLOSE = ']'; 33 const BRACKET_OPEN = '['; 34 const BRACE_OPEN = '{'; 35 const BRACE_CLOSE = '}'; 36 const PAREN_OPEN = '('; 37 const PAREN_CLOSE = ')'; 38 const SEMICOLON = ';'; 39 const WHITESPACE = ' '; 40 const COMMA = ','; 41 const ERROR_START = '#'; 42 43 const OPERATORS_SN = '+-'; 44 const OPERATORS_INFIX = '+-*/^&=><'; 45 const OPERATORS_POSTFIX = '%'; 46 47 /** 48 * Formula. 49 * 50 * @var string 51 */ 52 private $formula; 53 54 /** 55 * Tokens. 56 * 57 * @var FormulaToken[] 58 */ 59 private $tokens = []; 60 61 /** 62 * Create a new FormulaParser. 63 * 64 * @param ?string $formula Formula to parse 65 */ 66 public function __construct($formula = '') 67 { 68 // Check parameters 69 if ($formula === null) { 70 throw new Exception('Invalid parameter passed: formula'); 71 } 72 73 // Initialise values 74 $this->formula = trim($formula); 75 // Parse! 76 $this->parseToTokens(); 77 } 78 79 /** 80 * Get Formula. 81 * 82 * @return string 83 */ 84 public function getFormula() 85 { 86 return $this->formula; 87 } 88 89 /** 90 * Get Token. 91 * 92 * @param int $id Token id 93 */ 94 public function getToken(int $id = 0): FormulaToken 95 { 96 if (isset($this->tokens[$id])) { 97 return $this->tokens[$id]; 98 } 99 100 throw new Exception("Token with id $id does not exist."); 101 } 102 103 /** 104 * Get Token count. 105 * 106 * @return int 107 */ 108 public function getTokenCount() 109 { 110 return count($this->tokens); 111 } 112 113 /** 114 * Get Tokens. 115 * 116 * @return FormulaToken[] 117 */ 118 public function getTokens() 119 { 120 return $this->tokens; 121 } 122 123 /** 124 * Parse to tokens. 125 */ 126 private function parseToTokens(): void 127 { 128 // No attempt is made to verify formulas; assumes formulas are derived from Excel, where 129 // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions. 130 131 // Check if the formula has a valid starting = 132 $formulaLength = strlen($this->formula); 133 if ($formulaLength < 2 || $this->formula[0] != '=') { 134 return; 135 } 136 137 // Helper variables 138 $tokens1 = $tokens2 = $stack = []; 139 $inString = $inPath = $inRange = $inError = false; 140 $nextToken = null; 141 //$token = $previousToken = null; 142 143 $index = 1; 144 $value = ''; 145 146 $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A']; 147 $COMPARATORS_MULTI = ['>=', '<=', '<>']; 148 149 while ($index < $formulaLength) { 150 // state-dependent character evaluation (order is important) 151 152 // double-quoted strings 153 // embeds are doubled 154 // end marks token 155 if ($inString) { 156 if ($this->formula[$index] == self::QUOTE_DOUBLE) { 157 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) { 158 $value .= self::QUOTE_DOUBLE; 159 ++$index; 160 } else { 161 $inString = false; 162 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_TEXT); 163 $value = ''; 164 } 165 } else { 166 $value .= $this->formula[$index]; 167 } 168 ++$index; 169 170 continue; 171 } 172 173 // single-quoted strings (links) 174 // embeds are double 175 // end does not mark a token 176 if ($inPath) { 177 if ($this->formula[$index] == self::QUOTE_SINGLE) { 178 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) { 179 $value .= self::QUOTE_SINGLE; 180 ++$index; 181 } else { 182 $inPath = false; 183 } 184 } else { 185 $value .= $this->formula[$index]; 186 } 187 ++$index; 188 189 continue; 190 } 191 192 // bracked strings (R1C1 range index or linked workbook name) 193 // no embeds (changed to "()" by Excel) 194 // end does not mark a token 195 if ($inRange) { 196 if ($this->formula[$index] == self::BRACKET_CLOSE) { 197 $inRange = false; 198 } 199 $value .= $this->formula[$index]; 200 ++$index; 201 202 continue; 203 } 204 205 // error values 206 // end marks a token, determined from absolute list of values 207 if ($inError) { 208 $value .= $this->formula[$index]; 209 ++$index; 210 if (in_array($value, $ERRORS)) { 211 $inError = false; 212 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_ERROR); 213 $value = ''; 214 } 215 216 continue; 217 } 218 219 // scientific notation check 220 if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) { 221 if (strlen($value) > 1) { 222 if (preg_match('/^[1-9]{1}(\\.\\d+)?E{1}$/', $this->formula[$index]) != 0) { 223 $value .= $this->formula[$index]; 224 ++$index; 225 226 continue; 227 } 228 } 229 } 230 231 // independent character evaluation (order not important) 232 233 // establish state-dependent character evaluations 234 if ($this->formula[$index] == self::QUOTE_DOUBLE) { 235 if (strlen($value) > 0) { 236 // unexpected 237 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 238 $value = ''; 239 } 240 $inString = true; 241 ++$index; 242 243 continue; 244 } 245 246 if ($this->formula[$index] == self::QUOTE_SINGLE) { 247 if (strlen($value) > 0) { 248 // unexpected 249 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 250 $value = ''; 251 } 252 $inPath = true; 253 ++$index; 254 255 continue; 256 } 257 258 if ($this->formula[$index] == self::BRACKET_OPEN) { 259 $inRange = true; 260 $value .= self::BRACKET_OPEN; 261 ++$index; 262 263 continue; 264 } 265 266 if ($this->formula[$index] == self::ERROR_START) { 267 if (strlen($value) > 0) { 268 // unexpected 269 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 270 $value = ''; 271 } 272 $inError = true; 273 $value .= self::ERROR_START; 274 ++$index; 275 276 continue; 277 } 278 279 // mark start and end of arrays and array rows 280 if ($this->formula[$index] == self::BRACE_OPEN) { 281 if (strlen($value) > 0) { 282 // unexpected 283 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 284 $value = ''; 285 } 286 287 $tmp = new FormulaToken('ARRAY', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 288 $tokens1[] = $tmp; 289 $stack[] = clone $tmp; 290 291 $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 292 $tokens1[] = $tmp; 293 $stack[] = clone $tmp; 294 295 ++$index; 296 297 continue; 298 } 299 300 if ($this->formula[$index] == self::SEMICOLON) { 301 if (strlen($value) > 0) { 302 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 303 $value = ''; 304 } 305 306 $tmp = array_pop($stack); 307 $tmp->setValue(''); 308 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 309 $tokens1[] = $tmp; 310 311 $tmp = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); 312 $tokens1[] = $tmp; 313 314 $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 315 $tokens1[] = $tmp; 316 $stack[] = clone $tmp; 317 318 ++$index; 319 320 continue; 321 } 322 323 if ($this->formula[$index] == self::BRACE_CLOSE) { 324 if (strlen($value) > 0) { 325 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 326 $value = ''; 327 } 328 329 $tmp = array_pop($stack); 330 $tmp->setValue(''); 331 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 332 $tokens1[] = $tmp; 333 334 $tmp = array_pop($stack); 335 $tmp->setValue(''); 336 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 337 $tokens1[] = $tmp; 338 339 ++$index; 340 341 continue; 342 } 343 344 // trim white-space 345 if ($this->formula[$index] == self::WHITESPACE) { 346 if (strlen($value) > 0) { 347 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 348 $value = ''; 349 } 350 $tokens1[] = new FormulaToken('', FormulaToken::TOKEN_TYPE_WHITESPACE); 351 ++$index; 352 while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) { 353 ++$index; 354 } 355 356 continue; 357 } 358 359 // multi-character comparators 360 if (($index + 2) <= $formulaLength) { 361 if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) { 362 if (strlen($value) > 0) { 363 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 364 $value = ''; 365 } 366 $tokens1[] = new FormulaToken(substr($this->formula, $index, 2), FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_LOGICAL); 367 $index += 2; 368 369 continue; 370 } 371 } 372 373 // standard infix operators 374 if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) { 375 if (strlen($value) > 0) { 376 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 377 $value = ''; 378 } 379 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX); 380 ++$index; 381 382 continue; 383 } 384 385 // standard postfix operators (only one) 386 if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) { 387 if (strlen($value) > 0) { 388 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 389 $value = ''; 390 } 391 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX); 392 ++$index; 393 394 continue; 395 } 396 397 // start subexpression or function 398 if ($this->formula[$index] == self::PAREN_OPEN) { 399 if (strlen($value) > 0) { 400 $tmp = new FormulaToken($value, FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 401 $tokens1[] = $tmp; 402 $stack[] = clone $tmp; 403 $value = ''; 404 } else { 405 $tmp = new FormulaToken('', FormulaToken::TOKEN_TYPE_SUBEXPRESSION, FormulaToken::TOKEN_SUBTYPE_START); 406 $tokens1[] = $tmp; 407 $stack[] = clone $tmp; 408 } 409 ++$index; 410 411 continue; 412 } 413 414 // function, subexpression, or array parameters, or operand unions 415 if ($this->formula[$index] == self::COMMA) { 416 if (strlen($value) > 0) { 417 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 418 $value = ''; 419 } 420 421 $tmp = array_pop($stack); 422 $tmp->setValue(''); 423 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 424 $stack[] = $tmp; 425 426 if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { 427 $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_UNION); 428 } else { 429 $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); 430 } 431 ++$index; 432 433 continue; 434 } 435 436 // stop subexpression 437 if ($this->formula[$index] == self::PAREN_CLOSE) { 438 if (strlen($value) > 0) { 439 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 440 $value = ''; 441 } 442 443 $tmp = array_pop($stack); 444 $tmp->setValue(''); 445 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 446 $tokens1[] = $tmp; 447 448 ++$index; 449 450 continue; 451 } 452 453 // token accumulation 454 $value .= $this->formula[$index]; 455 ++$index; 456 } 457 458 // dump remaining accumulation 459 if (strlen($value) > 0) { 460 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 461 } 462 463 // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections 464 $tokenCount = count($tokens1); 465 for ($i = 0; $i < $tokenCount; ++$i) { 466 $token = $tokens1[$i]; 467 if (isset($tokens1[$i - 1])) { 468 $previousToken = $tokens1[$i - 1]; 469 } else { 470 $previousToken = null; 471 } 472 if (isset($tokens1[$i + 1])) { 473 $nextToken = $tokens1[$i + 1]; 474 } else { 475 $nextToken = null; 476 } 477 478 if ($token === null) { 479 continue; 480 } 481 482 if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) { 483 $tokens2[] = $token; 484 485 continue; 486 } 487 488 if ($previousToken === null) { 489 continue; 490 } 491 492 if ( 493 !( 494 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 495 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 496 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 497 ) 498 ) { 499 continue; 500 } 501 502 if ($nextToken === null) { 503 continue; 504 } 505 506 if ( 507 !( 508 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || 509 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || 510 ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 511 ) 512 ) { 513 continue; 514 } 515 516 $tokens2[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_INTERSECTION); 517 } 518 519 // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators 520 // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names 521 $this->tokens = []; 522 523 $tokenCount = count($tokens2); 524 for ($i = 0; $i < $tokenCount; ++$i) { 525 $token = $tokens2[$i]; 526 if (isset($tokens2[$i - 1])) { 527 $previousToken = $tokens2[$i - 1]; 528 } else { 529 $previousToken = null; 530 } 531 //if (isset($tokens2[$i + 1])) { 532 // $nextToken = $tokens2[$i + 1]; 533 //} else { 534 // $nextToken = null; 535 //} 536 537 if ($token === null) { 538 continue; 539 } 540 541 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') { 542 if ($i == 0) { 543 $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); 544 } elseif ( 545 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && 546 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 547 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && 548 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 549 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || 550 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 551 ) { 552 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 553 } else { 554 $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); 555 } 556 557 $this->tokens[] = $token; 558 559 continue; 560 } 561 562 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') { 563 if ($i == 0) { 564 continue; 565 } elseif ( 566 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && 567 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 568 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && 569 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 570 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || 571 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 572 ) { 573 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 574 } else { 575 continue; 576 } 577 578 $this->tokens[] = $token; 579 580 continue; 581 } 582 583 if ( 584 $token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && 585 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING 586 ) { 587 if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) { 588 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); 589 } elseif ($token->getValue() == '&') { 590 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_CONCATENATION); 591 } else { 592 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 593 } 594 595 $this->tokens[] = $token; 596 597 continue; 598 } 599 600 if ( 601 $token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND && 602 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING 603 ) { 604 if (!is_numeric($token->getValue())) { 605 if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue()) == 'FALSE') { 606 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); 607 } else { 608 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_RANGE); 609 } 610 } else { 611 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_NUMBER); 612 } 613 614 $this->tokens[] = $token; 615 616 continue; 617 } 618 619 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { 620 if (strlen($token->getValue()) > 0) { 621 if (substr($token->getValue(), 0, 1) == '@') { 622 $token->setValue(substr($token->getValue(), 1)); 623 } 624 } 625 } 626 627 $this->tokens[] = $token; 628 } 629 } 630 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body