Improve Range handling in the Calculation Engine for Row and Column ranges (#2028)

* Improve Range handling in the Calculation Engine for Row and Column ranges
This commit is contained in:
Mark Baker 2021-04-27 19:10:37 +02:00 committed by GitHub
parent 27eac4d649
commit 8d7be25823
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 162 additions and 37 deletions

View File

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
### Added
- Improved support for Row and Column ranges in formulae [Issue #1755](https://github.com/PHPOffice/PhpSpreadsheet/issues/1755) [PR #2028](https://github.com/PHPOffice/PhpSpreadsheet/pull/2028)
- Implemented the CHITEST(), CHISQ.DIST() and CHISQ.INV() and equivalent Statistical functions, for both left- and right-tailed distributions.
- Support for ActiveSheet and SelectedCells in the ODS Reader and Writer. [PR #1908](https://github.com/PHPOffice/PhpSpreadsheet/pull/1908)

View File

@ -30,6 +30,9 @@ class Calculation
const CALCULATION_REGEXP_CELLREF = '((([^\s,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?\$?\b([a-z]{1,3})\$?(\d{1,7})(?![\w.])';
// Cell reference (with or without a sheet reference) ensuring absolute/relative
const CALCULATION_REGEXP_CELLREF_RELATIVE = '((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?\b[a-z]{1,3})(\$?\d{1,7})(?![\w.])';
const CALCULATION_REGEXP_COLUMN_RANGE = '(((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?[a-z]{1,3})):(?![.*])';
const CALCULATION_REGEXP_ROW_RANGE = '(((([^\s\(,!&%^\/\*\+<>=-]*)|(\'[^\']*\')|(\"[^\"]*\"))!)?(\$?[1-9][0-9]{0,6})):(?![.*])';
// Cell reference (with or without a sheet reference) ensuring absolute/relative
// Cell ranges ensuring absolute/relative
const CALCULATION_REGEXP_COLUMNRANGE_RELATIVE = '(\$?[a-z]{1,3}):(\$?[a-z]{1,3})';
const CALCULATION_REGEXP_ROWRANGE_RELATIVE = '(\$?\d{1,7}):(\$?\d{1,7})';
@ -3798,6 +3801,8 @@ class Calculation
$regexpMatchString = '/^(' . self::CALCULATION_REGEXP_FUNCTION .
'|' . self::CALCULATION_REGEXP_CELLREF .
'|' . self::CALCULATION_REGEXP_COLUMN_RANGE .
'|' . self::CALCULATION_REGEXP_ROW_RANGE .
'|' . self::CALCULATION_REGEXP_NUMBER .
'|' . self::CALCULATION_REGEXP_STRING .
'|' . self::CALCULATION_REGEXP_OPENBRACE .
@ -3866,7 +3871,8 @@ class Calculation
$opCharacter .= $formula[++$index];
}
// Find out if we're currently at the beginning of a number, variable, cell reference, function, parenthesis or operand
$isOperandOrFunction = preg_match($regexpMatchString, substr($formula, $index), $match);
$isOperandOrFunction = (bool) preg_match($regexpMatchString, substr($formula, $index), $match);
if ($opCharacter == '-' && !$expectingOperator) { // Is it a negation instead of a minus?
// Put a negation on the stack
$stack->push('Unary Operator', '~', null, $currentCondition, $currentOnlyIf, $currentOnlyIfNot);
@ -4038,6 +4044,7 @@ class Calculation
$expectingOperand = false;
$val = $match[1];
$length = strlen($val);
if (preg_match('/^' . self::CALCULATION_REGEXP_FUNCTION . '$/miu', $val, $matches)) {
$val = preg_replace('/\s/u', '', $val);
if (isset(self::$phpSpreadsheetFunctions[strtoupper($matches[1])]) || isset(self::$controlFunctions[strtoupper($matches[1])])) { // it's a function
@ -4074,7 +4081,7 @@ class Calculation
// Should only be applied to the actual cell column, not the worksheet name
// If the last entry on the stack was a : operator, then we have a cell range reference
$testPrevOp = $stack->last(1);
if ($testPrevOp !== null && $testPrevOp['value'] == ':') {
if ($testPrevOp !== null && $testPrevOp['value'] === ':') {
// If we have a worksheet reference, then we're playing with a 3D reference
if ($matches[2] == '') {
// Otherwise, we 'inherit' the worksheet reference from the start cell reference
@ -4091,62 +4098,57 @@ class Calculation
return $this->raiseFormulaError('3D Range references are not yet supported');
}
}
} elseif (strpos($val, '!') === false && $pCellParent !== null) {
$worksheet = $pCellParent->getTitle();
$val = "'{$worksheet}'!{$val}";
}
$outputItem = $stack->getStackItem('Cell Reference', $val, $val, $currentCondition, $currentOnlyIf, $currentOnlyIfNot);
$output[] = $outputItem;
} else { // it's a variable, constant, string, number or boolean
$localeConstant = false;
$stackItemType = 'Value';
$stackItemReference = null;
// If the last entry on the stack was a : operator, then we may have a row or column range reference
$testPrevOp = $stack->last(1);
if ($testPrevOp !== null && $testPrevOp['value'] === ':') {
$stackItemType = 'Cell Reference';
$startRowColRef = $output[count($output) - 1]['value'];
[$rangeWS1, $startRowColRef] = Worksheet::extractSheetTitle($startRowColRef, true);
$rangeSheetRef = $rangeWS1;
if ($rangeWS1 != '') {
if ($rangeWS1 !== '') {
$rangeWS1 .= '!';
}
$rangeSheetRef = trim($rangeSheetRef, "'");
[$rangeWS2, $val] = Worksheet::extractSheetTitle($val, true);
if ($rangeWS2 != '') {
if ($rangeWS2 !== '') {
$rangeWS2 .= '!';
} else {
$rangeWS2 = $rangeWS1;
}
$refSheet = $pCellParent;
if ($pCellParent !== null && $rangeSheetRef !== $pCellParent->getTitle()) {
if ($pCellParent !== null && $rangeSheetRef !== '' && $rangeSheetRef !== $pCellParent->getTitle()) {
$refSheet = $pCellParent->getParent()->getSheetByName($rangeSheetRef);
}
if (
(is_int($startRowColRef)) && (ctype_digit($val)) &&
($startRowColRef <= 1048576) && ($val <= 1048576)
) {
// Row range
$endRowColRef = ($refSheet !== null) ? $refSheet->getHighestColumn() : 'XFD'; // Max 16,384 columns for Excel2007
$output[count($output) - 1]['value'] = $rangeWS1 . 'A' . $startRowColRef;
$val = $rangeWS2 . $endRowColRef . $val;
} elseif (
(ctype_alpha($startRowColRef)) && (ctype_alpha($val)) &&
(strlen($startRowColRef) <= 3) && (strlen($val) <= 3)
) {
// Column range
$endRowColRef = ($refSheet !== null) ? $refSheet->getHighestRow() : 1048576; // Max 1,048,576 rows for Excel2007
$output[count($output) - 1]['value'] = $rangeWS1 . strtoupper($startRowColRef) . '1';
$val = $rangeWS2 . $val . $endRowColRef;
}
}
$localeConstant = false;
$stackItemType = 'Value';
$stackItemReference = null;
if ($opCharacter == self::FORMULA_STRING_QUOTE) {
if (ctype_digit($val) && $val <= 1048576) {
// Row range
$stackItemType = 'Row Reference';
$endRowColRef = ($refSheet !== null) ? $refSheet->getHighestDataColumn($val) : 'XFD'; // Max 16,384 columns for Excel2007
$val = "{$rangeWS2}{$endRowColRef}{$val}";
} elseif (ctype_alpha($val) && strlen($val) <= 3) {
// Column range
$stackItemType = 'Column Reference';
$endRowColRef = ($refSheet !== null) ? $refSheet->getHighestDataRow($val) : 1048576; // Max 1,048,576 rows for Excel2007
$val = "{$rangeWS2}{$val}{$endRowColRef}";
}
$stackItemReference = $val;
} elseif ($opCharacter == self::FORMULA_STRING_QUOTE) {
// UnEscape any quotes within the string
$val = self::wrapResult(str_replace('""', self::FORMULA_STRING_QUOTE, self::unwrapResult($val)));
} elseif (is_numeric($val)) {
if ((strpos($val, '.') !== false) || (stripos($val, 'e') !== false) || ($val > PHP_INT_MAX) || ($val < -PHP_INT_MAX)) {
$val = (float) $val;
} else {
$val = (int) $val;
}
} elseif (isset(self::$excelConstants[trim(strtoupper($val))])) {
$stackItemType = 'Constant';
$excelConstant = trim(strtoupper($val));
@ -4154,10 +4156,41 @@ class Calculation
} elseif (($localeConstant = array_search(trim(strtoupper($val)), self::$localeBoolean)) !== false) {
$stackItemType = 'Constant';
$val = self::$excelConstants[$localeConstant];
} elseif (
preg_match('/^' . self::CALCULATION_REGEXP_ROW_RANGE . '/miu', substr($formula, $index), $rowRangeReference)
) {
$val = $rowRangeReference[1];
$length = strlen($rowRangeReference[1]);
$stackItemType = 'Row Reference';
$column = 'A';
if (($testPrevOp !== null && $testPrevOp['value'] === ':') && $pCellParent !== null) {
$column = $pCellParent->getHighestDataColumn($val);
}
$val = "{$rowRangeReference[2]}{$column}{$rowRangeReference[7]}";
$stackItemReference = $val;
} elseif (
preg_match('/^' . self::CALCULATION_REGEXP_COLUMN_RANGE . '/miu', substr($formula, $index), $columnRangeReference)
) {
$val = $columnRangeReference[1];
$length = strlen($val);
$stackItemType = 'Column Reference';
$row = '1';
if (($testPrevOp !== null && $testPrevOp['value'] === ':') && $pCellParent !== null) {
$row = $pCellParent->getHighestDataRow($val);
}
$val = "{$val}{$row}";
$stackItemReference = $val;
} elseif (preg_match('/^' . self::CALCULATION_REGEXP_DEFINEDNAME . '.*/miu', $val, $match)) {
$stackItemType = 'Defined Name';
$stackItemReference = $val;
} elseif (is_numeric($val)) {
if ((strpos($val, '.') !== false) || (stripos($val, 'e') !== false) || ($val > PHP_INT_MAX) || ($val < -PHP_INT_MAX)) {
$val = (float) $val;
} else {
$val = (int) $val;
}
}
$details = $stack->getStackItem($stackItemType, $val, $stackItemReference, $currentCondition, $currentOnlyIf, $currentOnlyIfNot);
if ($localeConstant) {
$details['localeValue'] = $localeConstant;
@ -4431,6 +4464,7 @@ class Calculation
} else {
return $this->raiseFormulaError('Unable to access Cell Reference');
}
$stack->push('Cell Reference', $cellValue, $cellRef);
} else {
$stack->push('Error', Functions::REF(), null);
@ -4564,6 +4598,7 @@ class Calculation
}
} elseif (preg_match('/^' . self::CALCULATION_REGEXP_CELLREF . '$/i', $token, $matches)) {
$cellRef = null;
if (isset($matches[8])) {
if ($pCell === null) {
// We can't access the range, so return a REF error
@ -4596,7 +4631,7 @@ class Calculation
}
} else {
if ($pCell === null) {
// We can't access the cell, so return a REF error
// We can't access the cell, so return a REF error
$cellValue = Functions::REF();
} else {
$cellRef = $matches[6] . $matches[7];
@ -4613,6 +4648,7 @@ class Calculation
$cellValue = $this->extractCellRange($cellRef, $this->spreadsheet->getSheetByName($matches[2]), false);
$pCell->attach($pCellParent);
} else {
$cellRef = ($cellSheet !== null) ? "{$matches[2]}!{$cellRef}" : $cellRef;
$cellValue = null;
}
} else {
@ -4631,7 +4667,8 @@ class Calculation
}
}
}
$stack->push('Value', $cellValue, $cellRef);
$stack->push('Cell Value', $cellValue, $cellRef);
if (isset($storeKey)) {
$branchStore[$storeKey] = $cellValue;
}
@ -5116,6 +5153,7 @@ class Calculation
if ($pSheet !== null) {
$pSheetName = $pSheet->getTitle();
if (strpos($pRange, '!') !== false) {
[$pSheetName, $pRange] = Worksheet::extractSheetTitle($pRange, true);
$pSheet = $this->spreadsheet->getSheetByName($pSheetName);

View File

@ -50,11 +50,13 @@ class Sum
public static function funcSumNoStrings(...$args)
{
$returnValue = 0;
// Loop through the arguments
foreach (Functions::flattenArray($args) as $arg) {
// Is it a numeric value?
if (is_numeric($arg)) {
if (is_numeric($arg) || empty($arg)) {
if (is_string($arg)) {
$arg = (int) $arg;
}
$returnValue += $arg;
} elseif (Functions::isError($arg)) {
return $arg;

View File

@ -0,0 +1,84 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Calculation;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
use PHPUnit\Framework\TestCase;
class RowColumnReferenceTest extends TestCase
{
/**
* @var Spreadsheet
*/
protected $spreadSheet;
protected function setUp(): void
{
$this->spreadSheet = new Spreadsheet();
$dataSheet = new Worksheet($this->spreadSheet, 'data sheet');
$this->spreadSheet->addSheet($dataSheet, 0);
$dataSheet->setCellValue('B1', 1.1);
$dataSheet->setCellValue('B2', 2.2);
$dataSheet->setCellValue('B3', 4.4);
$dataSheet->setCellValue('C3', 8.8);
$dataSheet->setCellValue('D3', 16.16);
$calcSheet = new Worksheet($this->spreadSheet, 'summary sheet');
$this->spreadSheet->addSheet($calcSheet, 1);
$calcSheet->setCellValue('B1', 2.2);
$calcSheet->setCellValue('B2', 4.4);
$calcSheet->setCellValue('B3', 8.8);
$calcSheet->setCellValue('C3', 16.16);
$calcSheet->setCellValue('D3', 32.32);
$this->spreadSheet->setActiveSheetIndexByName('summary sheet');
}
/**
* @dataProvider providerCurrentWorksheetFormulae
*/
public function testCurrentWorksheet(string $formula, float $expectedResult): void
{
$worksheet = $this->spreadSheet->getActiveSheet();
$worksheet->setCellValue('A1', $formula);
$result = $worksheet->getCell('A1')->getCalculatedValue();
self::assertSame($expectedResult, $result);
}
public function providerCurrentWorksheetFormulae(): array
{
return [
'relative range in active worksheet' => ['=SUM(B1:B3)', 15.4],
'range with absolute columns in active worksheet' => ['=SUM($B1:$B3)', 15.4],
'range with absolute rows in active worksheet' => ['=SUM(B$1:B$3)', 15.4],
'range with absolute columns and rows in active worksheet' => ['=SUM($B$1:$B$3)', 15.4],
'another relative range in active worksheet' => ['=SUM(B3:D3)', 57.28],
'relative column range in active worksheet' => ['=SUM(B:B)', 15.4],
'absolute column range in active worksheet' => ['=SUM($B:$B)', 15.4],
'relative row range in active worksheet' => ['=SUM(3:3)', 57.28],
'absolute row range in active worksheet' => ['=SUM($3:$3)', 57.28],
'relative range in specified active worksheet' => ['=SUM(\'summary sheet\'!B1:B3)', 15.4],
'range with absolute columns in specified active worksheet' => ['=SUM(\'summary sheet\'!$B1:$B3)', 15.4],
'range with absolute rows in specified active worksheet' => ['=SUM(\'summary sheet\'!B$1:B$3)', 15.4],
'range with absolute columns and rows in specified active worksheet' => ['=SUM(\'summary sheet\'!$B$1:$B$3)', 15.4],
'another relative range in specified active worksheet' => ['=SUM(\'summary sheet\'!B3:D3)', 57.28],
'relative column range in specified active worksheet' => ['=SUM(\'summary sheet\'!B:B)', 15.4],
'absolute column range in specified active worksheet' => ['=SUM(\'summary sheet\'!$B:$B)', 15.4],
'relative row range in specified active worksheet' => ['=SUM(\'summary sheet\'!3:3)', 57.28],
'absolute row range in specified active worksheet' => ['=SUM(\'summary sheet\'!$3:$3)', 57.28],
'relative range in specified other worksheet' => ['=SUM(\'data sheet\'!B1:B3)', 7.7],
'range with absolute columns in specified other worksheet' => ['=SUM(\'data sheet\'!$B1:$B3)', 7.7],
'range with absolute rows in specified other worksheet' => ['=SUM(\'data sheet\'!B$1:B$3)', 7.7],
'range with absolute columns and rows in specified other worksheet' => ['=SUM(\'data sheet\'!$B$1:$B$3)', 7.7],
'another relative range in specified other worksheet' => ['=SUM(\'data sheet\'!B3:D3)', 29.36],
'relative column range in specified other worksheet' => ['=SUM(\'data sheet\'!B:B)', 7.7],
'absolute column range in specified other worksheet' => ['=SUM(\'data sheet\'!$B:$B)', 7.7],
'relative row range in specified other worksheet' => ['=SUM(\'data sheet\'!3:3)', 29.36],
'absolute row range in specified other worksheet' => ['=SUM(\'data sheet\'!$3:$3)', 29.36],
];
}
}