From 99f488efc60233ecdd5e27ca97bb19cd2ccfd627 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Sun, 27 Feb 2022 10:51:06 +0100 Subject: [PATCH 1/2] Resolve `translateSeparator()` method to handle separators (row and column) for array functions as well as for function argument separators; and cleanly handle nesting levels Note that this method is used when translating Excel functions between en and other locale languages, as well as when converting formulae between different spreadsheet formats (e.g. Ods to Excel) Nor is this a perfect solution, as there may still be issues when function calls have array arguments that themselves contain function calls; but it's still better than the current logic --- phpstan-baseline.neon | 2 +- .../Calculation/Calculation.php | 102 ++++++++++-------- tests/data/Calculation/Translations.php | 5 + 3 files changed, 65 insertions(+), 44 deletions(-) diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index 9c087daa..a5a36992 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -162,7 +162,7 @@ parameters: - message: "#^Parameter \\#3 \\$formula of static method PhpOffice\\\\PhpSpreadsheet\\\\Calculation\\\\Calculation\\:\\:translateSeparator\\(\\) expects string, string\\|null given\\.$#" - count: 2 + count: 1 path: src/PhpSpreadsheet/Calculation/Calculation.php - diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index 2b82f65e..b2384fb5 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -50,8 +50,10 @@ class Calculation const RETURN_ARRAY_AS_VALUE = 'value'; const RETURN_ARRAY_AS_ARRAY = 'array'; - const FORMULA_OPEN_FUNCTION_BRACE = '{'; - const FORMULA_CLOSE_FUNCTION_BRACE = '}'; + const FORMULA_OPEN_FUNCTION_BRACE = '('; + const FORMULA_CLOSE_FUNCTION_BRACE = ')'; + const FORMULA_OPEN_MATRIX_BRACE = '{'; + const FORMULA_CLOSE_MATRIX_BRACE = '}'; const FORMULA_STRING_QUOTE = '"'; private static $returnArrayAsType = self::RETURN_ARRAY_AS_VALUE; @@ -3084,30 +3086,28 @@ class Calculation return false; } - /** - * @param string $fromSeparator - * @param string $toSeparator - * @param string $formula - * @param bool $inBraces - * - * @return string - */ - public static function translateSeparator($fromSeparator, $toSeparator, $formula, &$inBraces) - { + public static function translateSeparator( + string $fromSeparator, + string $toSeparator, + string $formula, + int &$inBracesLevel, + string $openBrace = self::FORMULA_OPEN_FUNCTION_BRACE, + string $closeBrace = self::FORMULA_CLOSE_FUNCTION_BRACE + ): string { $strlen = mb_strlen($formula); for ($i = 0; $i < $strlen; ++$i) { $chr = mb_substr($formula, $i, 1); switch ($chr) { - case self::FORMULA_OPEN_FUNCTION_BRACE: - $inBraces = true; + case $openBrace: + ++$inBracesLevel; break; - case self::FORMULA_CLOSE_FUNCTION_BRACE: - $inBraces = false; + case $closeBrace: + --$inBracesLevel; break; case $fromSeparator: - if (!$inBraces) { + if ($inBracesLevel > 0) { $formula = mb_substr($formula, 0, $i) . $toSeparator . mb_substr($formula, $i + 1); } } @@ -3116,31 +3116,47 @@ class Calculation return $formula; } - /** - * @param string[] $from - * @param string[] $to - * @param string $formula - * @param string $fromSeparator - * @param string $toSeparator - * - * @return string - */ - private static function translateFormula(array $from, array $to, $formula, $fromSeparator, $toSeparator) + private static function translateFormulaBlock( + array $from, + array $to, + string $formula, + int &$inFunctionBracesLevel, + int &$inMatrixBracesLevel, + string $fromSeparator, + string $toSeparator + ): string { + // Function Names + $formula = preg_replace($from, $to, $formula); + + // Temporarily adjust matrix separators so that they won't be confused with function arguments + $formula = self::translateSeparator(';', '|', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE); + $formula = self::translateSeparator(',', '!', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE); + // Function Argument Separators + $formula = self::translateSeparator($fromSeparator, $toSeparator, $formula, $inFunctionBracesLevel); + // Restore matrix separators + $formula = self::translateSeparator('|', ';', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE); + $formula = self::translateSeparator('!', ',', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE); + + return $formula; + } + + private static function translateFormula(array $from, array $to, string $formula, string $fromSeparator, string $toSeparator): string { - // Convert any Excel function names to the required language + // Convert any Excel function names and constant names to the required language; + // and adjust function argument separators if (self::$localeLanguage !== 'en_us') { - $inBraces = false; - // If there is the possibility of braces within a quoted string, then we don't treat those as matrix indicators + $inFunctionBracesLevel = 0; + $inMatrixBracesLevel = 0; + // If there is the possibility of separators within a quoted string, then we treat them as literals if (strpos($formula, self::FORMULA_STRING_QUOTE) !== false) { - // So instead we skip replacing in any quoted strings by only replacing in every other array element after we've exploded - // the formula + // So instead we skip replacing in any quoted strings by only replacing in every other array element + // after we've exploded the formula $temp = explode(self::FORMULA_STRING_QUOTE, $formula); $i = false; foreach ($temp as &$value) { - // Only count/replace in alternating array entries + // Only adjust in alternating array entries if ($i = !$i) { - $value = preg_replace($from, $to, $value); - $value = self::translateSeparator($fromSeparator, $toSeparator, $value, $inBraces); + $value = self::translateFormulaBlock($from, $to, $value, $inFunctionBracesLevel, $inMatrixBracesLevel, $fromSeparator, $toSeparator); } } unset($value); @@ -3148,8 +3164,7 @@ class Calculation $formula = implode(self::FORMULA_STRING_QUOTE, $temp); } else { // If there's no quoted strings, then we do a simple count/replace - $formula = preg_replace($from, $to, $formula); - $formula = self::translateSeparator($fromSeparator, $toSeparator, $formula, $inBraces); + $formula = self::translateFormulaBlock($from, $to, $formula, $inFunctionBracesLevel, $inMatrixBracesLevel, $fromSeparator, $toSeparator); } } @@ -3162,6 +3177,7 @@ class Calculation public function _translateFormulaToLocale($formula) { + // Build list of function names and constants for translation if (self::$functionReplaceFromExcel === null) { self::$functionReplaceFromExcel = []; foreach (array_keys(self::$localeFunctions) as $excelFunctionName) { @@ -3798,11 +3814,11 @@ class Calculation */ private function convertMatrixReferences($formula) { - static $matrixReplaceFrom = [self::FORMULA_OPEN_FUNCTION_BRACE, ';', self::FORMULA_CLOSE_FUNCTION_BRACE]; + static $matrixReplaceFrom = [self::FORMULA_OPEN_MATRIX_BRACE, ';', self::FORMULA_CLOSE_MATRIX_BRACE]; static $matrixReplaceTo = ['MKMATRIX(MKMATRIX(', '),MKMATRIX(', '))']; // Convert any Excel matrix references to the MKMATRIX() function - if (strpos($formula, self::FORMULA_OPEN_FUNCTION_BRACE) !== false) { + if (strpos($formula, self::FORMULA_OPEN_MATRIX_BRACE) !== false) { // If there is the possibility of braces within a quoted string, then we don't treat those as matrix indicators if (strpos($formula, self::FORMULA_STRING_QUOTE) !== false) { // So instead we skip replacing in any quoted strings by only replacing in every other array element after we've exploded @@ -3814,8 +3830,8 @@ class Calculation foreach ($temp as &$value) { // Only count/replace in alternating array entries if ($i = !$i) { - $openCount += substr_count($value, self::FORMULA_OPEN_FUNCTION_BRACE); - $closeCount += substr_count($value, self::FORMULA_CLOSE_FUNCTION_BRACE); + $openCount += substr_count($value, self::FORMULA_OPEN_MATRIX_BRACE); + $closeCount += substr_count($value, self::FORMULA_CLOSE_MATRIX_BRACE); $value = str_replace($matrixReplaceFrom, $matrixReplaceTo, $value); } } @@ -3824,8 +3840,8 @@ class Calculation $formula = implode(self::FORMULA_STRING_QUOTE, $temp); } else { // If there's no quoted strings, then we do a simple count/replace - $openCount = substr_count($formula, self::FORMULA_OPEN_FUNCTION_BRACE); - $closeCount = substr_count($formula, self::FORMULA_CLOSE_FUNCTION_BRACE); + $openCount = substr_count($formula, self::FORMULA_OPEN_MATRIX_BRACE); + $closeCount = substr_count($formula, self::FORMULA_CLOSE_MATRIX_BRACE); $formula = str_replace($matrixReplaceFrom, $matrixReplaceTo, $formula); } // Trap for mismatched braces and trigger an appropriate error diff --git a/tests/data/Calculation/Translations.php b/tests/data/Calculation/Translations.php index aaf6e3f2..604c6721 100644 --- a/tests/data/Calculation/Translations.php +++ b/tests/data/Calculation/Translations.php @@ -75,4 +75,9 @@ return [ 'tr', '=WORKDAY.INTL(B1)', ], + [ + '=STØRST(ABS({2,-3;-4,5}); ABS{-2,3;4,-5})', + 'nb', + '=MAX(ABS({2,-3;-4,5}), ABS{-2,3;4,-5})', + ], ]; From b31d42c52f0e0b46d4c77b40ac41dff639029e54 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Sun, 27 Feb 2022 12:21:35 +0100 Subject: [PATCH 2/2] Some Refactoring of the Ods Reader, moving all formula and address translation from Ods to Excel into a separate class to eliminate code duplication --- CHANGELOG.md | 7 ++ src/PhpSpreadsheet/Reader/Ods.php | 83 ++++++++----------- src/PhpSpreadsheet/Reader/Ods/AutoFilter.php | 4 +- src/PhpSpreadsheet/Reader/Ods/BaseLoader.php | 27 ++++++ .../Reader/Ods/DefinedNames.php | 10 +-- .../{BaseReader.php => FormulaTranslator.php} | 54 ++++++------ 6 files changed, 103 insertions(+), 82 deletions(-) create mode 100644 src/PhpSpreadsheet/Reader/Ods/BaseLoader.php rename src/PhpSpreadsheet/Reader/Ods/{BaseReader.php => FormulaTranslator.php} (62%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d2c65db..c2acd357 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - Gnumeric Reader now loads number formatting for cells. - Gnumeric Reader now correctly identifies selected worksheet. +- Some Refactoring of the Ods Reader, moving all formula and address translation from Ods to Excel into a separate class to eliminate code duplication and ensure consistency. ### Deprecated @@ -27,6 +28,12 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Fixed - Fixed behaviour of XLSX font style vertical align settings +- Resolved formula translations to handle separators (row and column) for array functions as well as for function argument separators; and cleanly handle nesting levels. + + Note that this method is used when translating Excel functions between en and other locale languages, as well as when converting formulae between different spreadsheet formats (e.g. Ods to Excel). + + Nor is this a perfect solution, as there may still be issues when function calls have array arguments that themselves contain function calls; but it's still better than the current logic. + ## 1.22.0 - 2022-02-18 diff --git a/src/PhpSpreadsheet/Reader/Ods.php b/src/PhpSpreadsheet/Reader/Ods.php index 2e2a61fc..d0d87c5f 100644 --- a/src/PhpSpreadsheet/Reader/Ods.php +++ b/src/PhpSpreadsheet/Reader/Ods.php @@ -6,11 +6,11 @@ use DOMAttr; use DOMDocument; use DOMElement; use DOMNode; -use PhpOffice\PhpSpreadsheet\Calculation\Calculation; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\Reader\Ods\AutoFilter; use PhpOffice\PhpSpreadsheet\Reader\Ods\DefinedNames; +use PhpOffice\PhpSpreadsheet\Reader\Ods\FormulaTranslator; use PhpOffice\PhpSpreadsheet\Reader\Ods\PageSettings; use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties; use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; @@ -512,7 +512,7 @@ class Ods extends BaseReader if ($hasCalculatedValue) { $type = DataType::TYPE_FORMULA; $cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1); - $cellDataFormula = $this->convertToExcelFormulaValue($cellDataFormula); + $cellDataFormula = FormulaTranslator::convertToExcelFormulaValue($cellDataFormula); } if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) { @@ -568,31 +568,7 @@ class Ods extends BaseReader } // Merged cells - if ( - $cellData->hasAttributeNS($tableNs, 'number-columns-spanned') - || $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') - ) { - if (($type !== DataType::TYPE_NULL) || (!$this->readDataOnly)) { - $columnTo = $columnID; - - if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { - $columnIndex = Coordinate::columnIndexFromString($columnID); - $columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); - $columnIndex -= 2; - - $columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); - } - - $rowTo = $rowID; - - if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { - $rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; - } - - $cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; - $spreadsheet->getActiveSheet()->mergeCells($cellRange); - } - } + $this->processMergedCells($cellData, $tableNs, $type, $columnID, $rowID, $spreadsheet); ++$columnID; } @@ -735,31 +711,38 @@ class Ods extends BaseReader return $value; } - private function convertToExcelFormulaValue(string $openOfficeFormula): string - { - $temp = explode('"', $openOfficeFormula); - $tKey = false; - foreach ($temp as &$value) { - // Only replace in alternate array entries (i.e. non-quoted blocks) - if ($tKey = !$tKey) { - // Cell range reference in another sheet - $value = preg_replace('/\[\$?([^\.]+)\.([^\.]+):\.([^\.]+)\]/miu', '$1!$2:$3', $value); - // Cell reference in another sheet - $value = preg_replace('/\[\$?([^\.]+)\.([^\.]+)\]/miu', '$1!$2', $value ?? ''); - // Cell range reference - $value = preg_replace('/\[\.([^\.]+):\.([^\.]+)\]/miu', '$1:$2', $value ?? ''); - // Simple cell reference - $value = preg_replace('/\[\.([^\.]+)\]/miu', '$1', $value ?? ''); - // Convert references to defined names/formulae - $value = str_replace('$$', '', $value ?? ''); + private function processMergedCells( + DOMElement $cellData, + string $tableNs, + string $type, + string $columnID, + int $rowID, + Spreadsheet $spreadsheet + ): void { + if ( + $cellData->hasAttributeNS($tableNs, 'number-columns-spanned') + || $cellData->hasAttributeNS($tableNs, 'number-rows-spanned') + ) { + if (($type !== DataType::TYPE_NULL) || ($this->readDataOnly === false)) { + $columnTo = $columnID; - $value = Calculation::translateSeparator(';', ',', $value, $inBraces); + if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) { + $columnIndex = Coordinate::columnIndexFromString($columnID); + $columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned'); + $columnIndex -= 2; + + $columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1); + } + + $rowTo = $rowID; + + if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) { + $rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1; + } + + $cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo; + $spreadsheet->getActiveSheet()->mergeCells($cellRange); } } - - // Then rebuild the formula string - $excelFormula = implode('"', $temp); - - return $excelFormula; } } diff --git a/src/PhpSpreadsheet/Reader/Ods/AutoFilter.php b/src/PhpSpreadsheet/Reader/Ods/AutoFilter.php index bdc8b3ff..1f5f975d 100644 --- a/src/PhpSpreadsheet/Reader/Ods/AutoFilter.php +++ b/src/PhpSpreadsheet/Reader/Ods/AutoFilter.php @@ -5,7 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader\Ods; use DOMElement; use DOMNode; -class AutoFilter extends BaseReader +class AutoFilter extends BaseLoader { public function read(DOMElement $workbookData): void { @@ -20,7 +20,7 @@ class AutoFilter extends BaseReader foreach ($autofilters->childNodes as $autofilter) { $autofilterRange = $this->getAttributeValue($autofilter, 'target-range-address'); if ($autofilterRange !== null) { - $baseAddress = $this->convertToExcelAddressValue($autofilterRange); + $baseAddress = FormulaTranslator::convertToExcelAddressValue($autofilterRange); $this->spreadsheet->getActiveSheet()->setAutoFilter($baseAddress); } } diff --git a/src/PhpSpreadsheet/Reader/Ods/BaseLoader.php b/src/PhpSpreadsheet/Reader/Ods/BaseLoader.php new file mode 100644 index 00000000..b06691f4 --- /dev/null +++ b/src/PhpSpreadsheet/Reader/Ods/BaseLoader.php @@ -0,0 +1,27 @@ +spreadsheet = $spreadsheet; + $this->tableNs = $tableNs; + } + + abstract public function read(DOMElement $workbookData): void; +} diff --git a/src/PhpSpreadsheet/Reader/Ods/DefinedNames.php b/src/PhpSpreadsheet/Reader/Ods/DefinedNames.php index 6810a3c7..e0ab8900 100644 --- a/src/PhpSpreadsheet/Reader/Ods/DefinedNames.php +++ b/src/PhpSpreadsheet/Reader/Ods/DefinedNames.php @@ -6,7 +6,7 @@ use DOMElement; use PhpOffice\PhpSpreadsheet\DefinedName; use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; -class DefinedNames extends BaseReader +class DefinedNames extends BaseLoader { public function read(DOMElement $workbookData): void { @@ -25,8 +25,8 @@ class DefinedNames extends BaseReader $baseAddress = $definedNameElement->getAttributeNS($this->tableNs, 'base-cell-address'); $range = $definedNameElement->getAttributeNS($this->tableNs, 'cell-range-address'); - $baseAddress = $this->convertToExcelAddressValue($baseAddress); - $range = $this->convertToExcelAddressValue($range); + $baseAddress = FormulaTranslator::convertToExcelAddressValue($baseAddress); + $range = FormulaTranslator::convertToExcelAddressValue($range); $this->addDefinedName($baseAddress, $definedName, $range); } @@ -43,9 +43,9 @@ class DefinedNames extends BaseReader $baseAddress = $definedNameElement->getAttributeNS($this->tableNs, 'base-cell-address'); $expression = $definedNameElement->getAttributeNS($this->tableNs, 'expression'); - $baseAddress = $this->convertToExcelAddressValue($baseAddress); + $baseAddress = FormulaTranslator::convertToExcelAddressValue($baseAddress); $expression = substr($expression, strpos($expression, ':=') + 1); - $expression = $this->convertToExcelFormulaValue($expression); + $expression = FormulaTranslator::convertToExcelFormulaValue($expression); $this->addDefinedName($baseAddress, $definedName, $expression); } diff --git a/src/PhpSpreadsheet/Reader/Ods/BaseReader.php b/src/PhpSpreadsheet/Reader/Ods/FormulaTranslator.php similarity index 62% rename from src/PhpSpreadsheet/Reader/Ods/BaseReader.php rename to src/PhpSpreadsheet/Reader/Ods/FormulaTranslator.php index 17e2d4d5..f2ad1a3d 100644 --- a/src/PhpSpreadsheet/Reader/Ods/BaseReader.php +++ b/src/PhpSpreadsheet/Reader/Ods/FormulaTranslator.php @@ -2,31 +2,11 @@ namespace PhpOffice\PhpSpreadsheet\Reader\Ods; -use DOMElement; use PhpOffice\PhpSpreadsheet\Calculation\Calculation; -use PhpOffice\PhpSpreadsheet\Spreadsheet; -abstract class BaseReader +class FormulaTranslator { - /** - * @var Spreadsheet - */ - protected $spreadsheet; - - /** - * @var string - */ - protected $tableNs; - - public function __construct(Spreadsheet $spreadsheet, string $tableNs) - { - $this->spreadsheet = $spreadsheet; - $this->tableNs = $tableNs; - } - - abstract public function read(DOMElement $workbookData): void; - - protected function convertToExcelAddressValue(string $openOfficeAddress): string + public static function convertToExcelAddressValue(string $openOfficeAddress): string { $excelAddress = $openOfficeAddress; @@ -46,13 +26,16 @@ abstract class BaseReader return $excelAddress ?? ''; } - protected function convertToExcelFormulaValue(string $openOfficeFormula): string + public static function convertToExcelFormulaValue(string $openOfficeFormula): string { - $temp = explode('"', $openOfficeFormula); + $temp = explode(Calculation::FORMULA_STRING_QUOTE, $openOfficeFormula); $tKey = false; + $inMatrixBracesLevel = 0; + $inFunctionBracesLevel = 0; foreach ($temp as &$value) { // @var string $value // Only replace in alternate array entries (i.e. non-quoted blocks) + // so that conversion isn't done in string values if ($tKey = !$tKey) { // Cell range reference in another sheet $value = preg_replace('/\[\$?([^\.]+)\.([^\.]+):\.([^\.]+)\]/miu', '$1!$2:$3', $value); @@ -65,7 +48,28 @@ abstract class BaseReader // Convert references to defined names/formulae $value = str_replace('$$', '', $value ?? ''); - $value = Calculation::translateSeparator(';', ',', $value, $inBraces); + // Convert ODS function argument separators to Excel function argument separators + $value = Calculation::translateSeparator(';', ',', $value, $inFunctionBracesLevel); + + // Convert ODS matrix separators to Excel matrix separators + $value = Calculation::translateSeparator( + ';', + ',', + $value, + $inMatrixBracesLevel, + Calculation::FORMULA_OPEN_MATRIX_BRACE, + Calculation::FORMULA_CLOSE_MATRIX_BRACE + ); + $value = Calculation::translateSeparator( + '|', + ';', + $value, + $inMatrixBracesLevel, + Calculation::FORMULA_OPEN_MATRIX_BRACE, + Calculation::FORMULA_CLOSE_MATRIX_BRACE + ); + + $value = preg_replace('/COM\.MICROSOFT\./ui', '', $value); } }