See Release Notes
Long Term Support Release
Differences Between: [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401] [Versions 39 and 402] [Versions 39 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Calculation; 4 5 /** 6 * PARTLY BASED ON: 7 * Copyright (c) 2007 E. W. Bachtal, Inc. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software 10 * and associated documentation files (the "Software"), to deal in the Software without restriction, 11 * including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 13 * subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * The software is provided "as is", without warranty of any kind, express or implied, including but not 19 * limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In 20 * no event shall the authors or copyright holders be liable for any claim, damages or other liability, 21 * whether in an action of contract, tort or otherwise, arising from, out of or in connection with the 22 * software or the use or other dealings in the software. 23 * 24 * https://ewbi.blogs.com/develops/2007/03/excel_formula_p.html 25 * https://ewbi.blogs.com/develops/2004/12/excel_formula_p.html 26 */ 27 class FormulaParser 28 { 29 // Character constants 30 const QUOTE_DOUBLE = '"'; 31 const QUOTE_SINGLE = '\''; 32 const BRACKET_CLOSE = ']'; 33 const BRACKET_OPEN = '['; 34 const BRACE_OPEN = '{'; 35 const BRACE_CLOSE = '}'; 36 const PAREN_OPEN = '('; 37 const PAREN_CLOSE = ')'; 38 const SEMICOLON = ';'; 39 const WHITESPACE = ' '; 40 const COMMA = ','; 41 const ERROR_START = '#'; 42 43 const OPERATORS_SN = '+-'; 44 const OPERATORS_INFIX = '+-*/^&=><'; 45 const OPERATORS_POSTFIX = '%'; 46 47 /** 48 * Formula. 49 * 50 * @var string 51 */ 52 private $formula; 53 54 /** 55 * Tokens. 56 * 57 * @var FormulaToken[] 58 */ 59 private $tokens = []; 60 61 /** 62 * Create a new FormulaParser. 63 * 64 * @param string $pFormula Formula to parse 65 * 66 * @throws Exception 67 */ 68 public function __construct($pFormula = '') 69 { 70 // Check parameters 71 if ($pFormula === null) { 72 throw new Exception('Invalid parameter passed: formula'); 73 } 74 75 // Initialise values 76 $this->formula = trim($pFormula); 77 // Parse! 78 $this->parseToTokens(); 79 } 80 81 /** 82 * Get Formula. 83 * 84 * @return string 85 */ 86 public function getFormula() 87 { 88 return $this->formula; 89 } 90 91 /** 92 * Get Token. 93 * 94 * @param int $pId Token id 95 * 96 * @throws Exception 97 * 98 * @return string 99 */ 100 public function getToken($pId = 0) 101 { 102 if (isset($this->tokens[$pId])) { 103 return $this->tokens[$pId]; 104 } 105 106 throw new Exception("Token with id $pId does not exist."); 107 } 108 109 /** 110 * Get Token count. 111 * 112 * @return int 113 */ 114 public function getTokenCount() 115 { 116 return count($this->tokens); 117 } 118 119 /** 120 * Get Tokens. 121 * 122 * @return FormulaToken[] 123 */ 124 public function getTokens() 125 { 126 return $this->tokens; 127 } 128 129 /** 130 * Parse to tokens. 131 */ 132 private function parseToTokens() 133 { 134 // No attempt is made to verify formulas; assumes formulas are derived from Excel, where 135 // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions. 136 137 // Check if the formula has a valid starting = 138 $formulaLength = strlen($this->formula); 139 if ($formulaLength < 2 || $this->formula[0] != '=') { 140 return; 141 } 142 143 // Helper variables 144 $tokens1 = $tokens2 = $stack = []; 145 $inString = $inPath = $inRange = $inError = false; 146 $token = $previousToken = $nextToken = null; 147 148 $index = 1; 149 $value = ''; 150 151 $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A']; 152 $COMPARATORS_MULTI = ['>=', '<=', '<>']; 153 154 while ($index < $formulaLength) { 155 // state-dependent character evaluation (order is important) 156 157 // double-quoted strings 158 // embeds are doubled 159 // end marks token 160 if ($inString) { 161 if ($this->formula[$index] == self::QUOTE_DOUBLE) { 162 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) { 163 $value .= self::QUOTE_DOUBLE; 164 ++$index; 165 } else { 166 $inString = false; 167 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_TEXT); 168 $value = ''; 169 } 170 } else { 171 $value .= $this->formula[$index]; 172 } 173 ++$index; 174 175 continue; 176 } 177 178 // single-quoted strings (links) 179 // embeds are double 180 // end does not mark a token 181 if ($inPath) { 182 if ($this->formula[$index] == self::QUOTE_SINGLE) { 183 if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) { 184 $value .= self::QUOTE_SINGLE; 185 ++$index; 186 } else { 187 $inPath = false; 188 } 189 } else { 190 $value .= $this->formula[$index]; 191 } 192 ++$index; 193 194 continue; 195 } 196 197 // bracked strings (R1C1 range index or linked workbook name) 198 // no embeds (changed to "()" by Excel) 199 // end does not mark a token 200 if ($inRange) { 201 if ($this->formula[$index] == self::BRACKET_CLOSE) { 202 $inRange = false; 203 } 204 $value .= $this->formula[$index]; 205 ++$index; 206 207 continue; 208 } 209 210 // error values 211 // end marks a token, determined from absolute list of values 212 if ($inError) { 213 $value .= $this->formula[$index]; 214 ++$index; 215 if (in_array($value, $ERRORS)) { 216 $inError = false; 217 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_ERROR); 218 $value = ''; 219 } 220 221 continue; 222 } 223 224 // scientific notation check 225 if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) { 226 if (strlen($value) > 1) { 227 if (preg_match('/^[1-9]{1}(\\.\\d+)?E{1}$/', $this->formula[$index]) != 0) { 228 $value .= $this->formula[$index]; 229 ++$index; 230 231 continue; 232 } 233 } 234 } 235 236 // independent character evaluation (order not important) 237 238 // establish state-dependent character evaluations 239 if ($this->formula[$index] == self::QUOTE_DOUBLE) { 240 if (strlen($value) > 0) { 241 // unexpected 242 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 243 $value = ''; 244 } 245 $inString = true; 246 ++$index; 247 248 continue; 249 } 250 251 if ($this->formula[$index] == self::QUOTE_SINGLE) { 252 if (strlen($value) > 0) { 253 // unexpected 254 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 255 $value = ''; 256 } 257 $inPath = true; 258 ++$index; 259 260 continue; 261 } 262 263 if ($this->formula[$index] == self::BRACKET_OPEN) { 264 $inRange = true; 265 $value .= self::BRACKET_OPEN; 266 ++$index; 267 268 continue; 269 } 270 271 if ($this->formula[$index] == self::ERROR_START) { 272 if (strlen($value) > 0) { 273 // unexpected 274 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 275 $value = ''; 276 } 277 $inError = true; 278 $value .= self::ERROR_START; 279 ++$index; 280 281 continue; 282 } 283 284 // mark start and end of arrays and array rows 285 if ($this->formula[$index] == self::BRACE_OPEN) { 286 if (strlen($value) > 0) { 287 // unexpected 288 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN); 289 $value = ''; 290 } 291 292 $tmp = new FormulaToken('ARRAY', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 293 $tokens1[] = $tmp; 294 $stack[] = clone $tmp; 295 296 $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 297 $tokens1[] = $tmp; 298 $stack[] = clone $tmp; 299 300 ++$index; 301 302 continue; 303 } 304 305 if ($this->formula[$index] == self::SEMICOLON) { 306 if (strlen($value) > 0) { 307 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 308 $value = ''; 309 } 310 311 $tmp = array_pop($stack); 312 $tmp->setValue(''); 313 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 314 $tokens1[] = $tmp; 315 316 $tmp = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); 317 $tokens1[] = $tmp; 318 319 $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 320 $tokens1[] = $tmp; 321 $stack[] = clone $tmp; 322 323 ++$index; 324 325 continue; 326 } 327 328 if ($this->formula[$index] == self::BRACE_CLOSE) { 329 if (strlen($value) > 0) { 330 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 331 $value = ''; 332 } 333 334 $tmp = array_pop($stack); 335 $tmp->setValue(''); 336 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 337 $tokens1[] = $tmp; 338 339 $tmp = array_pop($stack); 340 $tmp->setValue(''); 341 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 342 $tokens1[] = $tmp; 343 344 ++$index; 345 346 continue; 347 } 348 349 // trim white-space 350 if ($this->formula[$index] == self::WHITESPACE) { 351 if (strlen($value) > 0) { 352 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 353 $value = ''; 354 } 355 $tokens1[] = new FormulaToken('', FormulaToken::TOKEN_TYPE_WHITESPACE); 356 ++$index; 357 while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) { 358 ++$index; 359 } 360 361 continue; 362 } 363 364 // multi-character comparators 365 if (($index + 2) <= $formulaLength) { 366 if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) { 367 if (strlen($value) > 0) { 368 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 369 $value = ''; 370 } 371 $tokens1[] = new FormulaToken(substr($this->formula, $index, 2), FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_LOGICAL); 372 $index += 2; 373 374 continue; 375 } 376 } 377 378 // standard infix operators 379 if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) { 380 if (strlen($value) > 0) { 381 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 382 $value = ''; 383 } 384 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX); 385 ++$index; 386 387 continue; 388 } 389 390 // standard postfix operators (only one) 391 if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) { 392 if (strlen($value) > 0) { 393 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 394 $value = ''; 395 } 396 $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX); 397 ++$index; 398 399 continue; 400 } 401 402 // start subexpression or function 403 if ($this->formula[$index] == self::PAREN_OPEN) { 404 if (strlen($value) > 0) { 405 $tmp = new FormulaToken($value, FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START); 406 $tokens1[] = $tmp; 407 $stack[] = clone $tmp; 408 $value = ''; 409 } else { 410 $tmp = new FormulaToken('', FormulaToken::TOKEN_TYPE_SUBEXPRESSION, FormulaToken::TOKEN_SUBTYPE_START); 411 $tokens1[] = $tmp; 412 $stack[] = clone $tmp; 413 } 414 ++$index; 415 416 continue; 417 } 418 419 // function, subexpression, or array parameters, or operand unions 420 if ($this->formula[$index] == self::COMMA) { 421 if (strlen($value) > 0) { 422 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 423 $value = ''; 424 } 425 426 $tmp = array_pop($stack); 427 $tmp->setValue(''); 428 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 429 $stack[] = $tmp; 430 431 if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { 432 $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_UNION); 433 } else { 434 $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT); 435 } 436 ++$index; 437 438 continue; 439 } 440 441 // stop subexpression 442 if ($this->formula[$index] == self::PAREN_CLOSE) { 443 if (strlen($value) > 0) { 444 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 445 $value = ''; 446 } 447 448 $tmp = array_pop($stack); 449 $tmp->setValue(''); 450 $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP); 451 $tokens1[] = $tmp; 452 453 ++$index; 454 455 continue; 456 } 457 458 // token accumulation 459 $value .= $this->formula[$index]; 460 ++$index; 461 } 462 463 // dump remaining accumulation 464 if (strlen($value) > 0) { 465 $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND); 466 } 467 468 // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections 469 $tokenCount = count($tokens1); 470 for ($i = 0; $i < $tokenCount; ++$i) { 471 $token = $tokens1[$i]; 472 if (isset($tokens1[$i - 1])) { 473 $previousToken = $tokens1[$i - 1]; 474 } else { 475 $previousToken = null; 476 } 477 if (isset($tokens1[$i + 1])) { 478 $nextToken = $tokens1[$i + 1]; 479 } else { 480 $nextToken = null; 481 } 482 483 if ($token === null) { 484 continue; 485 } 486 487 if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) { 488 $tokens2[] = $token; 489 490 continue; 491 } 492 493 if ($previousToken === null) { 494 continue; 495 } 496 497 if (!( 498 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 499 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 500 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 501 )) { 502 continue; 503 } 504 505 if ($nextToken === null) { 506 continue; 507 } 508 509 if (!( 510 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || 511 (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) || 512 ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND) 513 )) { 514 continue; 515 } 516 517 $tokens2[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_INTERSECTION); 518 } 519 520 // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators 521 // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names 522 $this->tokens = []; 523 524 $tokenCount = count($tokens2); 525 for ($i = 0; $i < $tokenCount; ++$i) { 526 $token = $tokens2[$i]; 527 if (isset($tokens2[$i - 1])) { 528 $previousToken = $tokens2[$i - 1]; 529 } else { 530 $previousToken = null; 531 } 532 if (isset($tokens2[$i + 1])) { 533 $nextToken = $tokens2[$i + 1]; 534 } else { 535 $nextToken = null; 536 } 537 538 if ($token === null) { 539 continue; 540 } 541 542 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') { 543 if ($i == 0) { 544 $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); 545 } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && 546 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 547 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && 548 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 549 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || 550 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) { 551 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 552 } else { 553 $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX); 554 } 555 556 $this->tokens[] = $token; 557 558 continue; 559 } 560 561 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') { 562 if ($i == 0) { 563 continue; 564 } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && 565 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 566 (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && 567 ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) || 568 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) || 569 ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) { 570 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 571 } else { 572 continue; 573 } 574 575 $this->tokens[] = $token; 576 577 continue; 578 } 579 580 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && 581 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) { 582 if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) { 583 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); 584 } elseif ($token->getValue() == '&') { 585 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_CONCATENATION); 586 } else { 587 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH); 588 } 589 590 $this->tokens[] = $token; 591 592 continue; 593 } 594 595 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND && 596 $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) { 597 if (!is_numeric($token->getValue())) { 598 if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue()) == 'FALSE') { 599 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL); 600 } else { 601 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_RANGE); 602 } 603 } else { 604 $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_NUMBER); 605 } 606 607 $this->tokens[] = $token; 608 609 continue; 610 } 611 612 if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) { 613 if (strlen($token->getValue()) > 0) { 614 if (substr($token->getValue(), 0, 1) == '@') { 615 $token->setValue(substr($token->getValue(), 1)); 616 } 617 } 618 } 619 620 $this->tokens[] = $token; 621 } 622 } 623 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body