Merge pull request #2627 from PHPOffice/Improve-Translation-Separator-Conflict

Resolve `translateSeparator()` method to handle separators cleanly
Some refactoring of the Ods Reader classes to eliminate code duplication
This commit is contained in:
Mark Baker 2022-02-27 13:48:45 +01:00 committed by GitHub
commit fe969f59cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 168 additions and 126 deletions

View File

@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- Gnumeric Reader now loads number formatting for cells.
- Gnumeric Reader now correctly identifies selected worksheet.
- Some Refactoring of the Ods Reader, moving all formula and address translation from Ods to Excel into a separate class to eliminate code duplication and ensure consistency.
### Deprecated
@ -27,6 +28,12 @@ and this project adheres to [Semantic Versioning](https://semver.org).
### Fixed
- Fixed behaviour of XLSX font style vertical align settings
- Resolved formula translations to handle separators (row and column) for array functions as well as for function argument separators; and cleanly handle nesting levels.
Note that this method is used when translating Excel functions between en and other locale languages, as well as when converting formulae between different spreadsheet formats (e.g. Ods to Excel).
Nor is this a perfect solution, as there may still be issues when function calls have array arguments that themselves contain function calls; but it's still better than the current logic.
## 1.22.0 - 2022-02-18

View File

@ -162,7 +162,7 @@ parameters:
-
message: "#^Parameter \\#3 \\$formula of static method PhpOffice\\\\PhpSpreadsheet\\\\Calculation\\\\Calculation\\:\\:translateSeparator\\(\\) expects string, string\\|null given\\.$#"
count: 2
count: 1
path: src/PhpSpreadsheet/Calculation/Calculation.php
-

View File

@ -50,8 +50,10 @@ class Calculation
const RETURN_ARRAY_AS_VALUE = 'value';
const RETURN_ARRAY_AS_ARRAY = 'array';
const FORMULA_OPEN_FUNCTION_BRACE = '{';
const FORMULA_CLOSE_FUNCTION_BRACE = '}';
const FORMULA_OPEN_FUNCTION_BRACE = '(';
const FORMULA_CLOSE_FUNCTION_BRACE = ')';
const FORMULA_OPEN_MATRIX_BRACE = '{';
const FORMULA_CLOSE_MATRIX_BRACE = '}';
const FORMULA_STRING_QUOTE = '"';
private static $returnArrayAsType = self::RETURN_ARRAY_AS_VALUE;
@ -3084,30 +3086,28 @@ class Calculation
return false;
}
/**
* @param string $fromSeparator
* @param string $toSeparator
* @param string $formula
* @param bool $inBraces
*
* @return string
*/
public static function translateSeparator($fromSeparator, $toSeparator, $formula, &$inBraces)
{
public static function translateSeparator(
string $fromSeparator,
string $toSeparator,
string $formula,
int &$inBracesLevel,
string $openBrace = self::FORMULA_OPEN_FUNCTION_BRACE,
string $closeBrace = self::FORMULA_CLOSE_FUNCTION_BRACE
): string {
$strlen = mb_strlen($formula);
for ($i = 0; $i < $strlen; ++$i) {
$chr = mb_substr($formula, $i, 1);
switch ($chr) {
case self::FORMULA_OPEN_FUNCTION_BRACE:
$inBraces = true;
case $openBrace:
++$inBracesLevel;
break;
case self::FORMULA_CLOSE_FUNCTION_BRACE:
$inBraces = false;
case $closeBrace:
--$inBracesLevel;
break;
case $fromSeparator:
if (!$inBraces) {
if ($inBracesLevel > 0) {
$formula = mb_substr($formula, 0, $i) . $toSeparator . mb_substr($formula, $i + 1);
}
}
@ -3116,31 +3116,47 @@ class Calculation
return $formula;
}
/**
* @param string[] $from
* @param string[] $to
* @param string $formula
* @param string $fromSeparator
* @param string $toSeparator
*
* @return string
*/
private static function translateFormula(array $from, array $to, $formula, $fromSeparator, $toSeparator)
private static function translateFormulaBlock(
array $from,
array $to,
string $formula,
int &$inFunctionBracesLevel,
int &$inMatrixBracesLevel,
string $fromSeparator,
string $toSeparator
): string {
// Function Names
$formula = preg_replace($from, $to, $formula);
// Temporarily adjust matrix separators so that they won't be confused with function arguments
$formula = self::translateSeparator(';', '|', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE);
$formula = self::translateSeparator(',', '!', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE);
// Function Argument Separators
$formula = self::translateSeparator($fromSeparator, $toSeparator, $formula, $inFunctionBracesLevel);
// Restore matrix separators
$formula = self::translateSeparator('|', ';', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE);
$formula = self::translateSeparator('!', ',', $formula, $inMatrixBracesLevel, self::FORMULA_OPEN_MATRIX_BRACE, self::FORMULA_CLOSE_MATRIX_BRACE);
return $formula;
}
private static function translateFormula(array $from, array $to, string $formula, string $fromSeparator, string $toSeparator): string
{
// Convert any Excel function names to the required language
// Convert any Excel function names and constant names to the required language;
// and adjust function argument separators
if (self::$localeLanguage !== 'en_us') {
$inBraces = false;
// If there is the possibility of braces within a quoted string, then we don't treat those as matrix indicators
$inFunctionBracesLevel = 0;
$inMatrixBracesLevel = 0;
// If there is the possibility of separators within a quoted string, then we treat them as literals
if (strpos($formula, self::FORMULA_STRING_QUOTE) !== false) {
// So instead we skip replacing in any quoted strings by only replacing in every other array element after we've exploded
// the formula
// So instead we skip replacing in any quoted strings by only replacing in every other array element
// after we've exploded the formula
$temp = explode(self::FORMULA_STRING_QUOTE, $formula);
$i = false;
foreach ($temp as &$value) {
// Only count/replace in alternating array entries
// Only adjust in alternating array entries
if ($i = !$i) {
$value = preg_replace($from, $to, $value);
$value = self::translateSeparator($fromSeparator, $toSeparator, $value, $inBraces);
$value = self::translateFormulaBlock($from, $to, $value, $inFunctionBracesLevel, $inMatrixBracesLevel, $fromSeparator, $toSeparator);
}
}
unset($value);
@ -3148,8 +3164,7 @@ class Calculation
$formula = implode(self::FORMULA_STRING_QUOTE, $temp);
} else {
// If there's no quoted strings, then we do a simple count/replace
$formula = preg_replace($from, $to, $formula);
$formula = self::translateSeparator($fromSeparator, $toSeparator, $formula, $inBraces);
$formula = self::translateFormulaBlock($from, $to, $formula, $inFunctionBracesLevel, $inMatrixBracesLevel, $fromSeparator, $toSeparator);
}
}
@ -3162,6 +3177,7 @@ class Calculation
public function _translateFormulaToLocale($formula)
{
// Build list of function names and constants for translation
if (self::$functionReplaceFromExcel === null) {
self::$functionReplaceFromExcel = [];
foreach (array_keys(self::$localeFunctions) as $excelFunctionName) {
@ -3798,11 +3814,11 @@ class Calculation
*/
private function convertMatrixReferences($formula)
{
static $matrixReplaceFrom = [self::FORMULA_OPEN_FUNCTION_BRACE, ';', self::FORMULA_CLOSE_FUNCTION_BRACE];
static $matrixReplaceFrom = [self::FORMULA_OPEN_MATRIX_BRACE, ';', self::FORMULA_CLOSE_MATRIX_BRACE];
static $matrixReplaceTo = ['MKMATRIX(MKMATRIX(', '),MKMATRIX(', '))'];
// Convert any Excel matrix references to the MKMATRIX() function
if (strpos($formula, self::FORMULA_OPEN_FUNCTION_BRACE) !== false) {
if (strpos($formula, self::FORMULA_OPEN_MATRIX_BRACE) !== false) {
// If there is the possibility of braces within a quoted string, then we don't treat those as matrix indicators
if (strpos($formula, self::FORMULA_STRING_QUOTE) !== false) {
// So instead we skip replacing in any quoted strings by only replacing in every other array element after we've exploded
@ -3814,8 +3830,8 @@ class Calculation
foreach ($temp as &$value) {
// Only count/replace in alternating array entries
if ($i = !$i) {
$openCount += substr_count($value, self::FORMULA_OPEN_FUNCTION_BRACE);
$closeCount += substr_count($value, self::FORMULA_CLOSE_FUNCTION_BRACE);
$openCount += substr_count($value, self::FORMULA_OPEN_MATRIX_BRACE);
$closeCount += substr_count($value, self::FORMULA_CLOSE_MATRIX_BRACE);
$value = str_replace($matrixReplaceFrom, $matrixReplaceTo, $value);
}
}
@ -3824,8 +3840,8 @@ class Calculation
$formula = implode(self::FORMULA_STRING_QUOTE, $temp);
} else {
// If there's no quoted strings, then we do a simple count/replace
$openCount = substr_count($formula, self::FORMULA_OPEN_FUNCTION_BRACE);
$closeCount = substr_count($formula, self::FORMULA_CLOSE_FUNCTION_BRACE);
$openCount = substr_count($formula, self::FORMULA_OPEN_MATRIX_BRACE);
$closeCount = substr_count($formula, self::FORMULA_CLOSE_MATRIX_BRACE);
$formula = str_replace($matrixReplaceFrom, $matrixReplaceTo, $formula);
}
// Trap for mismatched braces and trigger an appropriate error

View File

@ -6,11 +6,11 @@ use DOMAttr;
use DOMDocument;
use DOMElement;
use DOMNode;
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Reader\Ods\AutoFilter;
use PhpOffice\PhpSpreadsheet\Reader\Ods\DefinedNames;
use PhpOffice\PhpSpreadsheet\Reader\Ods\FormulaTranslator;
use PhpOffice\PhpSpreadsheet\Reader\Ods\PageSettings;
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
@ -512,7 +512,7 @@ class Ods extends BaseReader
if ($hasCalculatedValue) {
$type = DataType::TYPE_FORMULA;
$cellDataFormula = substr($cellDataFormula, strpos($cellDataFormula, ':=') + 1);
$cellDataFormula = $this->convertToExcelFormulaValue($cellDataFormula);
$cellDataFormula = FormulaTranslator::convertToExcelFormulaValue($cellDataFormula);
}
if ($cellData->hasAttributeNS($tableNs, 'number-columns-repeated')) {
@ -568,31 +568,7 @@ class Ods extends BaseReader
}
// Merged cells
if (
$cellData->hasAttributeNS($tableNs, 'number-columns-spanned')
|| $cellData->hasAttributeNS($tableNs, 'number-rows-spanned')
) {
if (($type !== DataType::TYPE_NULL) || (!$this->readDataOnly)) {
$columnTo = $columnID;
if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) {
$columnIndex = Coordinate::columnIndexFromString($columnID);
$columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned');
$columnIndex -= 2;
$columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1);
}
$rowTo = $rowID;
if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) {
$rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1;
}
$cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo;
$spreadsheet->getActiveSheet()->mergeCells($cellRange);
}
}
$this->processMergedCells($cellData, $tableNs, $type, $columnID, $rowID, $spreadsheet);
++$columnID;
}
@ -735,31 +711,38 @@ class Ods extends BaseReader
return $value;
}
private function convertToExcelFormulaValue(string $openOfficeFormula): string
{
$temp = explode('"', $openOfficeFormula);
$tKey = false;
foreach ($temp as &$value) {
// Only replace in alternate array entries (i.e. non-quoted blocks)
if ($tKey = !$tKey) {
// Cell range reference in another sheet
$value = preg_replace('/\[\$?([^\.]+)\.([^\.]+):\.([^\.]+)\]/miu', '$1!$2:$3', $value);
// Cell reference in another sheet
$value = preg_replace('/\[\$?([^\.]+)\.([^\.]+)\]/miu', '$1!$2', $value ?? '');
// Cell range reference
$value = preg_replace('/\[\.([^\.]+):\.([^\.]+)\]/miu', '$1:$2', $value ?? '');
// Simple cell reference
$value = preg_replace('/\[\.([^\.]+)\]/miu', '$1', $value ?? '');
// Convert references to defined names/formulae
$value = str_replace('$$', '', $value ?? '');
private function processMergedCells(
DOMElement $cellData,
string $tableNs,
string $type,
string $columnID,
int $rowID,
Spreadsheet $spreadsheet
): void {
if (
$cellData->hasAttributeNS($tableNs, 'number-columns-spanned')
|| $cellData->hasAttributeNS($tableNs, 'number-rows-spanned')
) {
if (($type !== DataType::TYPE_NULL) || ($this->readDataOnly === false)) {
$columnTo = $columnID;
$value = Calculation::translateSeparator(';', ',', $value, $inBraces);
if ($cellData->hasAttributeNS($tableNs, 'number-columns-spanned')) {
$columnIndex = Coordinate::columnIndexFromString($columnID);
$columnIndex += (int) $cellData->getAttributeNS($tableNs, 'number-columns-spanned');
$columnIndex -= 2;
$columnTo = Coordinate::stringFromColumnIndex($columnIndex + 1);
}
$rowTo = $rowID;
if ($cellData->hasAttributeNS($tableNs, 'number-rows-spanned')) {
$rowTo = $rowTo + (int) $cellData->getAttributeNS($tableNs, 'number-rows-spanned') - 1;
}
$cellRange = $columnID . $rowID . ':' . $columnTo . $rowTo;
$spreadsheet->getActiveSheet()->mergeCells($cellRange);
}
}
// Then rebuild the formula string
$excelFormula = implode('"', $temp);
return $excelFormula;
}
}

View File

@ -5,7 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Reader\Ods;
use DOMElement;
use DOMNode;
class AutoFilter extends BaseReader
class AutoFilter extends BaseLoader
{
public function read(DOMElement $workbookData): void
{
@ -20,7 +20,7 @@ class AutoFilter extends BaseReader
foreach ($autofilters->childNodes as $autofilter) {
$autofilterRange = $this->getAttributeValue($autofilter, 'target-range-address');
if ($autofilterRange !== null) {
$baseAddress = $this->convertToExcelAddressValue($autofilterRange);
$baseAddress = FormulaTranslator::convertToExcelAddressValue($autofilterRange);
$this->spreadsheet->getActiveSheet()->setAutoFilter($baseAddress);
}
}

View File

@ -0,0 +1,27 @@
<?php
namespace PhpOffice\PhpSpreadsheet\Reader\Ods;
use DOMElement;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
abstract class BaseLoader
{
/**
* @var Spreadsheet
*/
protected $spreadsheet;
/**
* @var string
*/
protected $tableNs;
public function __construct(Spreadsheet $spreadsheet, string $tableNs)
{
$this->spreadsheet = $spreadsheet;
$this->tableNs = $tableNs;
}
abstract public function read(DOMElement $workbookData): void;
}

View File

@ -6,7 +6,7 @@ use DOMElement;
use PhpOffice\PhpSpreadsheet\DefinedName;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
class DefinedNames extends BaseReader
class DefinedNames extends BaseLoader
{
public function read(DOMElement $workbookData): void
{
@ -25,8 +25,8 @@ class DefinedNames extends BaseReader
$baseAddress = $definedNameElement->getAttributeNS($this->tableNs, 'base-cell-address');
$range = $definedNameElement->getAttributeNS($this->tableNs, 'cell-range-address');
$baseAddress = $this->convertToExcelAddressValue($baseAddress);
$range = $this->convertToExcelAddressValue($range);
$baseAddress = FormulaTranslator::convertToExcelAddressValue($baseAddress);
$range = FormulaTranslator::convertToExcelAddressValue($range);
$this->addDefinedName($baseAddress, $definedName, $range);
}
@ -43,9 +43,9 @@ class DefinedNames extends BaseReader
$baseAddress = $definedNameElement->getAttributeNS($this->tableNs, 'base-cell-address');
$expression = $definedNameElement->getAttributeNS($this->tableNs, 'expression');
$baseAddress = $this->convertToExcelAddressValue($baseAddress);
$baseAddress = FormulaTranslator::convertToExcelAddressValue($baseAddress);
$expression = substr($expression, strpos($expression, ':=') + 1);
$expression = $this->convertToExcelFormulaValue($expression);
$expression = FormulaTranslator::convertToExcelFormulaValue($expression);
$this->addDefinedName($baseAddress, $definedName, $expression);
}

View File

@ -2,31 +2,11 @@
namespace PhpOffice\PhpSpreadsheet\Reader\Ods;
use DOMElement;
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
abstract class BaseReader
class FormulaTranslator
{
/**
* @var Spreadsheet
*/
protected $spreadsheet;
/**
* @var string
*/
protected $tableNs;
public function __construct(Spreadsheet $spreadsheet, string $tableNs)
{
$this->spreadsheet = $spreadsheet;
$this->tableNs = $tableNs;
}
abstract public function read(DOMElement $workbookData): void;
protected function convertToExcelAddressValue(string $openOfficeAddress): string
public static function convertToExcelAddressValue(string $openOfficeAddress): string
{
$excelAddress = $openOfficeAddress;
@ -46,13 +26,16 @@ abstract class BaseReader
return $excelAddress ?? '';
}
protected function convertToExcelFormulaValue(string $openOfficeFormula): string
public static function convertToExcelFormulaValue(string $openOfficeFormula): string
{
$temp = explode('"', $openOfficeFormula);
$temp = explode(Calculation::FORMULA_STRING_QUOTE, $openOfficeFormula);
$tKey = false;
$inMatrixBracesLevel = 0;
$inFunctionBracesLevel = 0;
foreach ($temp as &$value) {
// @var string $value
// Only replace in alternate array entries (i.e. non-quoted blocks)
// so that conversion isn't done in string values
if ($tKey = !$tKey) {
// Cell range reference in another sheet
$value = preg_replace('/\[\$?([^\.]+)\.([^\.]+):\.([^\.]+)\]/miu', '$1!$2:$3', $value);
@ -65,7 +48,28 @@ abstract class BaseReader
// Convert references to defined names/formulae
$value = str_replace('$$', '', $value ?? '');
$value = Calculation::translateSeparator(';', ',', $value, $inBraces);
// Convert ODS function argument separators to Excel function argument separators
$value = Calculation::translateSeparator(';', ',', $value, $inFunctionBracesLevel);
// Convert ODS matrix separators to Excel matrix separators
$value = Calculation::translateSeparator(
';',
',',
$value,
$inMatrixBracesLevel,
Calculation::FORMULA_OPEN_MATRIX_BRACE,
Calculation::FORMULA_CLOSE_MATRIX_BRACE
);
$value = Calculation::translateSeparator(
'|',
';',
$value,
$inMatrixBracesLevel,
Calculation::FORMULA_OPEN_MATRIX_BRACE,
Calculation::FORMULA_CLOSE_MATRIX_BRACE
);
$value = preg_replace('/COM\.MICROSOFT\./ui', '', $value);
}
}

View File

@ -75,4 +75,9 @@ return [
'tr',
'=WORKDAY.INTL(B1)',
],
[
'=STØRST(ABS({2,-3;-4,5}); ABS{-2,3;4,-5})',
'nb',
'=MAX(ABS({2,-3;-4,5}), ABS{-2,3;4,-5})',
],
];