From 8d7be25823aaf3d6051f7daece475179a856259f Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Tue, 27 Apr 2021 19:10:37 +0200 Subject: [PATCH] Improve Range handling in the Calculation Engine for Row and Column ranges (#2028) * Improve Range handling in the Calculation Engine for Row and Column ranges --- CHANGELOG.md | 1 + .../Calculation/Calculation.php | 108 ++++++++++++------ .../Calculation/MathTrig/Sum.php | 6 +- .../Calculation/RowColumnReferenceTest.php | 84 ++++++++++++++ 4 files changed, 162 insertions(+), 37 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index bb09f628..21762f89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Added +- Improved support for Row and Column ranges in formulae [Issue #1755](https://github.com/PHPOffice/PhpSpreadsheet/issues/1755) [PR #2028](https://github.com/PHPOffice/PhpSpreadsheet/pull/2028) - Implemented the CHITEST(), CHISQ.DIST() and CHISQ.INV() and equivalent Statistical functions, for both left- and right-tailed distributions. - Support for ActiveSheet and SelectedCells in the ODS Reader and Writer. [PR #1908](https://github.com/PHPOffice/PhpSpreadsheet/pull/1908) diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index 97bd1779..abd997a3 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -30,6 +30,9 @@ class Calculation const CALCULATION_REGEXP_CELLREF = '((([^\s,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?\$?\b([a-z]{1,3})\$?(\d{1,7})(?![\w.])'; // Cell reference (with or without a sheet reference) ensuring absolute/relative const CALCULATION_REGEXP_CELLREF_RELATIVE = '((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?\b[a-z]{1,3})(\$?\d{1,7})(?![\w.])'; + const CALCULATION_REGEXP_COLUMN_RANGE = '(((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?[a-z]{1,3})):(?![.*])'; + const CALCULATION_REGEXP_ROW_RANGE = '(((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?[1-9][0-9]{0,6})):(?![.*])'; + // Cell reference (with or without a sheet reference) ensuring absolute/relative // Cell ranges ensuring absolute/relative const CALCULATION_REGEXP_COLUMNRANGE_RELATIVE = '(\$?[a-z]{1,3}):(\$?[a-z]{1,3})'; const CALCULATION_REGEXP_ROWRANGE_RELATIVE = '(\$?\d{1,7}):(\$?\d{1,7})'; @@ -3798,6 +3801,8 @@ class Calculation $regexpMatchString = '/^(' . self::CALCULATION_REGEXP_FUNCTION . '|' . self::CALCULATION_REGEXP_CELLREF . + '|' . self::CALCULATION_REGEXP_COLUMN_RANGE . + '|' . self::CALCULATION_REGEXP_ROW_RANGE . '|' . self::CALCULATION_REGEXP_NUMBER . '|' . self::CALCULATION_REGEXP_STRING . '|' . self::CALCULATION_REGEXP_OPENBRACE . @@ -3866,7 +3871,8 @@ class Calculation $opCharacter .= $formula[++$index]; } // Find out if we're currently at the beginning of a number, variable, cell reference, function, parenthesis or operand - $isOperandOrFunction = preg_match($regexpMatchString, substr($formula, $index), $match); + $isOperandOrFunction = (bool) preg_match($regexpMatchString, substr($formula, $index), $match); + if ($opCharacter == '-' && !$expectingOperator) { // Is it a negation instead of a minus? // Put a negation on the stack $stack->push('Unary Operator', '~', null, $currentCondition, $currentOnlyIf, $currentOnlyIfNot); @@ -4038,6 +4044,7 @@ class Calculation $expectingOperand = false; $val = $match[1]; $length = strlen($val); + if (preg_match('/^' . self::CALCULATION_REGEXP_FUNCTION . '$/miu', $val, $matches)) { $val = preg_replace('/\s/u', '', $val); if (isset(self::$phpSpreadsheetFunctions[strtoupper($matches[1])]) || isset(self::$controlFunctions[strtoupper($matches[1])])) { // it's a function @@ -4074,7 +4081,7 @@ class Calculation // Should only be applied to the actual cell column, not the worksheet name // If the last entry on the stack was a : operator, then we have a cell range reference $testPrevOp = $stack->last(1); - if ($testPrevOp !== null && $testPrevOp['value'] == ':') { + if ($testPrevOp !== null && $testPrevOp['value'] === ':') { // If we have a worksheet reference, then we're playing with a 3D reference if ($matches[2] == '') { // Otherwise, we 'inherit' the worksheet reference from the start cell reference @@ -4091,62 +4098,57 @@ class Calculation return $this->raiseFormulaError('3D Range references are not yet supported'); } } + } elseif (strpos($val, '!') === false && $pCellParent !== null) { + $worksheet = $pCellParent->getTitle(); + $val = "'{$worksheet}'!{$val}"; } $outputItem = $stack->getStackItem('Cell Reference', $val, $val, $currentCondition, $currentOnlyIf, $currentOnlyIfNot); $output[] = $outputItem; } else { // it's a variable, constant, string, number or boolean + $localeConstant = false; + $stackItemType = 'Value'; + $stackItemReference = null; + // If the last entry on the stack was a : operator, then we may have a row or column range reference $testPrevOp = $stack->last(1); if ($testPrevOp !== null && $testPrevOp['value'] === ':') { + $stackItemType = 'Cell Reference'; $startRowColRef = $output[count($output) - 1]['value']; [$rangeWS1, $startRowColRef] = Worksheet::extractSheetTitle($startRowColRef, true); $rangeSheetRef = $rangeWS1; - if ($rangeWS1 != '') { + if ($rangeWS1 !== '') { $rangeWS1 .= '!'; } + $rangeSheetRef = trim($rangeSheetRef, "'"); [$rangeWS2, $val] = Worksheet::extractSheetTitle($val, true); - if ($rangeWS2 != '') { + if ($rangeWS2 !== '') { $rangeWS2 .= '!'; } else { $rangeWS2 = $rangeWS1; } + $refSheet = $pCellParent; - if ($pCellParent !== null && $rangeSheetRef !== $pCellParent->getTitle()) { + if ($pCellParent !== null && $rangeSheetRef !== '' && $rangeSheetRef !== $pCellParent->getTitle()) { $refSheet = $pCellParent->getParent()->getSheetByName($rangeSheetRef); } - if ( - (is_int($startRowColRef)) && (ctype_digit($val)) && - ($startRowColRef <= 1048576) && ($val <= 1048576) - ) { - // Row range - $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestColumn() : 'XFD'; // Max 16,384 columns for Excel2007 - $output[count($output) - 1]['value'] = $rangeWS1 . 'A' . $startRowColRef; - $val = $rangeWS2 . $endRowColRef . $val; - } elseif ( - (ctype_alpha($startRowColRef)) && (ctype_alpha($val)) && - (strlen($startRowColRef) <= 3) && (strlen($val) <= 3) - ) { - // Column range - $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestRow() : 1048576; // Max 1,048,576 rows for Excel2007 - $output[count($output) - 1]['value'] = $rangeWS1 . strtoupper($startRowColRef) . '1'; - $val = $rangeWS2 . $val . $endRowColRef; - } - } - $localeConstant = false; - $stackItemType = 'Value'; - $stackItemReference = null; - if ($opCharacter == self::FORMULA_STRING_QUOTE) { + if (ctype_digit($val) && $val <= 1048576) { + // Row range + $stackItemType = 'Row Reference'; + $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestDataColumn($val) : 'XFD'; // Max 16,384 columns for Excel2007 + $val = "{$rangeWS2}{$endRowColRef}{$val}"; + } elseif (ctype_alpha($val) && strlen($val) <= 3) { + // Column range + $stackItemType = 'Column Reference'; + $endRowColRef = ($refSheet !== null) ? $refSheet->getHighestDataRow($val) : 1048576; // Max 1,048,576 rows for Excel2007 + $val = "{$rangeWS2}{$val}{$endRowColRef}"; + } + $stackItemReference = $val; + } elseif ($opCharacter == self::FORMULA_STRING_QUOTE) { // UnEscape any quotes within the string $val = self::wrapResult(str_replace('""', self::FORMULA_STRING_QUOTE, self::unwrapResult($val))); - } elseif (is_numeric($val)) { - if ((strpos($val, '.') !== false) || (stripos($val, 'e') !== false) || ($val > PHP_INT_MAX) || ($val < -PHP_INT_MAX)) { - $val = (float) $val; - } else { - $val = (int) $val; - } } elseif (isset(self::$excelConstants[trim(strtoupper($val))])) { $stackItemType = 'Constant'; $excelConstant = trim(strtoupper($val)); @@ -4154,10 +4156,41 @@ class Calculation } elseif (($localeConstant = array_search(trim(strtoupper($val)), self::$localeBoolean)) !== false) { $stackItemType = 'Constant'; $val = self::$excelConstants[$localeConstant]; + } elseif ( + preg_match('/^' . self::CALCULATION_REGEXP_ROW_RANGE . '/miu', substr($formula, $index), $rowRangeReference) + ) { + $val = $rowRangeReference[1]; + $length = strlen($rowRangeReference[1]); + $stackItemType = 'Row Reference'; + $column = 'A'; + if (($testPrevOp !== null && $testPrevOp['value'] === ':') && $pCellParent !== null) { + $column = $pCellParent->getHighestDataColumn($val); + } + $val = "{$rowRangeReference[2]}{$column}{$rowRangeReference[7]}"; + $stackItemReference = $val; + } elseif ( + preg_match('/^' . self::CALCULATION_REGEXP_COLUMN_RANGE . '/miu', substr($formula, $index), $columnRangeReference) + ) { + $val = $columnRangeReference[1]; + $length = strlen($val); + $stackItemType = 'Column Reference'; + $row = '1'; + if (($testPrevOp !== null && $testPrevOp['value'] === ':') && $pCellParent !== null) { + $row = $pCellParent->getHighestDataRow($val); + } + $val = "{$val}{$row}"; + $stackItemReference = $val; } elseif (preg_match('/^' . self::CALCULATION_REGEXP_DEFINEDNAME . '.*/miu', $val, $match)) { $stackItemType = 'Defined Name'; $stackItemReference = $val; + } elseif (is_numeric($val)) { + if ((strpos($val, '.') !== false) || (stripos($val, 'e') !== false) || ($val > PHP_INT_MAX) || ($val < -PHP_INT_MAX)) { + $val = (float) $val; + } else { + $val = (int) $val; + } } + $details = $stack->getStackItem($stackItemType, $val, $stackItemReference, $currentCondition, $currentOnlyIf, $currentOnlyIfNot); if ($localeConstant) { $details['localeValue'] = $localeConstant; @@ -4431,6 +4464,7 @@ class Calculation } else { return $this->raiseFormulaError('Unable to access Cell Reference'); } + $stack->push('Cell Reference', $cellValue, $cellRef); } else { $stack->push('Error', Functions::REF(), null); @@ -4564,6 +4598,7 @@ class Calculation } } elseif (preg_match('/^' . self::CALCULATION_REGEXP_CELLREF . '$/i', $token, $matches)) { $cellRef = null; + if (isset($matches[8])) { if ($pCell === null) { // We can't access the range, so return a REF error @@ -4596,7 +4631,7 @@ class Calculation } } else { if ($pCell === null) { - // We can't access the cell, so return a REF error + // We can't access the cell, so return a REF error $cellValue = Functions::REF(); } else { $cellRef = $matches[6] . $matches[7]; @@ -4613,6 +4648,7 @@ class Calculation $cellValue = $this->extractCellRange($cellRef, $this->spreadsheet->getSheetByName($matches[2]), false); $pCell->attach($pCellParent); } else { + $cellRef = ($cellSheet !== null) ? "{$matches[2]}!{$cellRef}" : $cellRef; $cellValue = null; } } else { @@ -4631,7 +4667,8 @@ class Calculation } } } - $stack->push('Value', $cellValue, $cellRef); + + $stack->push('Cell Value', $cellValue, $cellRef); if (isset($storeKey)) { $branchStore[$storeKey] = $cellValue; } @@ -5116,6 +5153,7 @@ class Calculation if ($pSheet !== null) { $pSheetName = $pSheet->getTitle(); + if (strpos($pRange, '!') !== false) { [$pSheetName, $pRange] = Worksheet::extractSheetTitle($pRange, true); $pSheet = $this->spreadsheet->getSheetByName($pSheetName); diff --git a/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php b/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php index cd29248b..ab3a9a07 100644 --- a/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php +++ b/src/PhpSpreadsheet/Calculation/MathTrig/Sum.php @@ -50,11 +50,13 @@ class Sum public static function funcSumNoStrings(...$args) { $returnValue = 0; - // Loop through the arguments foreach (Functions::flattenArray($args) as $arg) { // Is it a numeric value? - if (is_numeric($arg)) { + if (is_numeric($arg) || empty($arg)) { + if (is_string($arg)) { + $arg = (int) $arg; + } $returnValue += $arg; } elseif (Functions::isError($arg)) { return $arg; diff --git a/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php b/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php new file mode 100644 index 00000000..8c9d23f7 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Calculation/RowColumnReferenceTest.php @@ -0,0 +1,84 @@ +spreadSheet = new Spreadsheet(); + + $dataSheet = new Worksheet($this->spreadSheet, 'data sheet'); + $this->spreadSheet->addSheet($dataSheet, 0); + $dataSheet->setCellValue('B1', 1.1); + $dataSheet->setCellValue('B2', 2.2); + $dataSheet->setCellValue('B3', 4.4); + $dataSheet->setCellValue('C3', 8.8); + $dataSheet->setCellValue('D3', 16.16); + + $calcSheet = new Worksheet($this->spreadSheet, 'summary sheet'); + $this->spreadSheet->addSheet($calcSheet, 1); + $calcSheet->setCellValue('B1', 2.2); + $calcSheet->setCellValue('B2', 4.4); + $calcSheet->setCellValue('B3', 8.8); + $calcSheet->setCellValue('C3', 16.16); + $calcSheet->setCellValue('D3', 32.32); + + $this->spreadSheet->setActiveSheetIndexByName('summary sheet'); + } + + /** + * @dataProvider providerCurrentWorksheetFormulae + */ + public function testCurrentWorksheet(string $formula, float $expectedResult): void + { + $worksheet = $this->spreadSheet->getActiveSheet(); + + $worksheet->setCellValue('A1', $formula); + + $result = $worksheet->getCell('A1')->getCalculatedValue(); + self::assertSame($expectedResult, $result); + } + + public function providerCurrentWorksheetFormulae(): array + { + return [ + 'relative range in active worksheet' => ['=SUM(B1:B3)', 15.4], + 'range with absolute columns in active worksheet' => ['=SUM($B1:$B3)', 15.4], + 'range with absolute rows in active worksheet' => ['=SUM(B$1:B$3)', 15.4], + 'range with absolute columns and rows in active worksheet' => ['=SUM($B$1:$B$3)', 15.4], + 'another relative range in active worksheet' => ['=SUM(B3:D3)', 57.28], + 'relative column range in active worksheet' => ['=SUM(B:B)', 15.4], + 'absolute column range in active worksheet' => ['=SUM($B:$B)', 15.4], + 'relative row range in active worksheet' => ['=SUM(3:3)', 57.28], + 'absolute row range in active worksheet' => ['=SUM($3:$3)', 57.28], + 'relative range in specified active worksheet' => ['=SUM(\'summary sheet\'!B1:B3)', 15.4], + 'range with absolute columns in specified active worksheet' => ['=SUM(\'summary sheet\'!$B1:$B3)', 15.4], + 'range with absolute rows in specified active worksheet' => ['=SUM(\'summary sheet\'!B$1:B$3)', 15.4], + 'range with absolute columns and rows in specified active worksheet' => ['=SUM(\'summary sheet\'!$B$1:$B$3)', 15.4], + 'another relative range in specified active worksheet' => ['=SUM(\'summary sheet\'!B3:D3)', 57.28], + 'relative column range in specified active worksheet' => ['=SUM(\'summary sheet\'!B:B)', 15.4], + 'absolute column range in specified active worksheet' => ['=SUM(\'summary sheet\'!$B:$B)', 15.4], + 'relative row range in specified active worksheet' => ['=SUM(\'summary sheet\'!3:3)', 57.28], + 'absolute row range in specified active worksheet' => ['=SUM(\'summary sheet\'!$3:$3)', 57.28], + 'relative range in specified other worksheet' => ['=SUM(\'data sheet\'!B1:B3)', 7.7], + 'range with absolute columns in specified other worksheet' => ['=SUM(\'data sheet\'!$B1:$B3)', 7.7], + 'range with absolute rows in specified other worksheet' => ['=SUM(\'data sheet\'!B$1:B$3)', 7.7], + 'range with absolute columns and rows in specified other worksheet' => ['=SUM(\'data sheet\'!$B$1:$B$3)', 7.7], + 'another relative range in specified other worksheet' => ['=SUM(\'data sheet\'!B3:D3)', 29.36], + 'relative column range in specified other worksheet' => ['=SUM(\'data sheet\'!B:B)', 7.7], + 'absolute column range in specified other worksheet' => ['=SUM(\'data sheet\'!$B:$B)', 7.7], + 'relative row range in specified other worksheet' => ['=SUM(\'data sheet\'!3:3)', 29.36], + 'absolute row range in specified other worksheet' => ['=SUM(\'data sheet\'!$3:$3)', 29.36], + ]; + } +}