From faf6d819c65a0d1188a4d4d17997941759cd2219 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Mon, 4 Jul 2022 08:30:46 -0700 Subject: [PATCH] Keep Calculated String Results Below 32K (#2921) * Keep Calculated String Results Below 32K This is the result of an investigation into issue #2884 (see also PR #2913). It is, unfortunately, not a fix for the original problem; see the discussion in that PR for why I don't think there is a practical fix for that specific problem at this time. Excel limits strings to 32,767 characters. We already truncate strings to that length when added to the spreadsheet. However, we have been able to exceed that length as a result of the concatenation operator (Excel truncates); as a result of the CONCATENATE or TEXTJOIN functions (Excel returns #CALC!); or as a result of the REPLACE, REPT, SUBSTITUTE functions (Excel returns #VALUE!). This PR changes PhpSpreadsheet to return the same value as Excel in these cases. Note that Excel2003 truncates in all those cases; I don't think there is a way to differentiate that behavior in PhpSpreadsheet. However, LibreOffice and Gnumeric do not have that limit; if they have a limit at all, it is much higher. It would be fairly easy to use existing settings to differentiate between Excel and LibreOffice/Gnumeric in this respect. I have not done so in this PR because I am not sure how useful that is, and I can easily see it leading to problems (read in a LibreOffice spreadsheet with a 33K cell and then output to an Excel spreadsheet). Perhaps it should be handled with an additional opt-in setting. I changed the maximum size from a literal to a constant in the one place where it was already being enforced (Cell/DataType). I am not sure that is the best place for it to be defined; I am open to suggestions. * Implement Some Suggestions ... from @MarkBaker. --- .../Calculation/Calculation.php | 9 ++++ .../Calculation/Information/ErrorValue.php | 2 +- .../Calculation/Information/ExcelError.php | 29 ++++++------ .../Calculation/TextData/Concatenate.php | 39 ++++++++++++++-- .../Calculation/TextData/Helpers.php | 6 ++- .../Calculation/TextData/Replace.php | 45 ++++++++++++------- src/PhpSpreadsheet/Cell/DataType.php | 5 ++- src/PhpSpreadsheet/Shared/StringHelper.php | 4 +- .../Calculation/StringLengthTest.php | 34 ++++++++++++++ .../data/Calculation/TextData/CONCATENATE.php | 15 +++++++ tests/data/Calculation/TextData/REPLACE.php | 16 +++++++ tests/data/Calculation/TextData/REPT.php | 4 ++ .../data/Calculation/TextData/SUBSTITUTE.php | 28 ++++++++++++ tests/data/Calculation/TextData/TEXTJOIN.php | 22 +++++++++ 14 files changed, 219 insertions(+), 39 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Calculation/StringLengthTest.php diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index 4e3a7c53..5b1c5520 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -11,6 +11,7 @@ use PhpOffice\PhpSpreadsheet\Calculation\Information\Value; use PhpOffice\PhpSpreadsheet\Calculation\Token\Stack; use PhpOffice\PhpSpreadsheet\Cell\Cell; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; +use PhpOffice\PhpSpreadsheet\Cell\DataType; use PhpOffice\PhpSpreadsheet\DefinedName; use PhpOffice\PhpSpreadsheet\ReferenceHelper; use PhpOffice\PhpSpreadsheet\Shared; @@ -4711,11 +4712,19 @@ class Calculation // Perform the required operation against the operand 1 matrix, passing in operand 2 $matrixResult = $matrix->concat($operand2); $result = $matrixResult->getArray(); + if (isset($result[0][0])) { + $result[0][0] = Shared\StringHelper::substring($result[0][0], 0, DataType::MAX_STRING_LENGTH); + } } catch (\Exception $ex) { $this->debugLog->writeDebugLog('JAMA Matrix Exception: %s', $ex->getMessage()); $result = '#VALUE!'; } } else { + // In theory, we should truncate here. + // But I can't figure out a formula + // using the concatenation operator + // with literals that fits in 32K, + // so I don't think we can overflow here. $result = self::FORMULA_STRING_QUOTE . str_replace('""', self::FORMULA_STRING_QUOTE, self::unwrapResult($operand1) . self::unwrapResult($operand2)) . self::FORMULA_STRING_QUOTE; } $this->debugLog->writeDebugLog('Evaluation Result is %s', $this->showTypeDetails($result)); diff --git a/src/PhpSpreadsheet/Calculation/Information/ErrorValue.php b/src/PhpSpreadsheet/Calculation/Information/ErrorValue.php index 869350ed..dda2c705 100644 --- a/src/PhpSpreadsheet/Calculation/Information/ErrorValue.php +++ b/src/PhpSpreadsheet/Calculation/Information/ErrorValue.php @@ -47,7 +47,7 @@ class ErrorValue return false; } - return in_array($value, ExcelError::$errorCodes, true) || $value === ExcelError::CALC(); + return in_array($value, ExcelError::$errorCodes, true); } /** diff --git a/src/PhpSpreadsheet/Calculation/Information/ExcelError.php b/src/PhpSpreadsheet/Calculation/Information/ExcelError.php index 6305e502..5ca74a3e 100644 --- a/src/PhpSpreadsheet/Calculation/Information/ExcelError.php +++ b/src/PhpSpreadsheet/Calculation/Information/ExcelError.php @@ -14,15 +14,20 @@ class ExcelError * @var array */ public static $errorCodes = [ - 'null' => '#NULL!', - 'divisionbyzero' => '#DIV/0!', - 'value' => '#VALUE!', - 'reference' => '#REF!', - 'name' => '#NAME?', - 'num' => '#NUM!', - 'na' => '#N/A', - 'gettingdata' => '#GETTING_DATA', - 'spill' => '#SPILL!', + 'null' => '#NULL!', // 1 + 'divisionbyzero' => '#DIV/0!', // 2 + 'value' => '#VALUE!', // 3 + 'reference' => '#REF!', // 4 + 'name' => '#NAME?', // 5 + 'num' => '#NUM!', // 6 + 'na' => '#N/A', // 7 + 'gettingdata' => '#GETTING_DATA', // 8 + 'spill' => '#SPILL!', // 9 + 'connect' => '#CONNECT!', //10 + 'blocked' => '#BLOCKED!', //11 + 'unknown' => '#UNKNOWN!', //12 + 'field' => '#FIELD!', //13 + 'calculation' => '#CALC!', //14 ]; /** @@ -54,10 +59,6 @@ class ExcelError ++$i; } - if ($value === self::CALC()) { - return 14; - } - return self::NA(); } @@ -154,6 +155,6 @@ class ExcelError */ public static function CALC(): string { - return '#CALC!'; + return self::$errorCodes['calculation']; } } diff --git a/src/PhpSpreadsheet/Calculation/TextData/Concatenate.php b/src/PhpSpreadsheet/Calculation/TextData/Concatenate.php index 4413b4a4..7bd60e90 100644 --- a/src/PhpSpreadsheet/Calculation/TextData/Concatenate.php +++ b/src/PhpSpreadsheet/Calculation/TextData/Concatenate.php @@ -4,7 +4,10 @@ namespace PhpOffice\PhpSpreadsheet\Calculation\TextData; use PhpOffice\PhpSpreadsheet\Calculation\ArrayEnabled; use PhpOffice\PhpSpreadsheet\Calculation\Functions; +use PhpOffice\PhpSpreadsheet\Calculation\Information\ErrorValue; use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; +use PhpOffice\PhpSpreadsheet\Cell\DataType; +use PhpOffice\PhpSpreadsheet\Shared\StringHelper; class Concatenate { @@ -23,7 +26,18 @@ class Concatenate $aArgs = Functions::flattenArray($args); foreach ($aArgs as $arg) { + $value = Helpers::extractString($arg); + if (ErrorValue::isError($value)) { + $returnValue = $value; + + break; + } $returnValue .= Helpers::extractString($arg); + if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) { + $returnValue = ExcelError::CALC(); + + break; + } } return $returnValue; @@ -56,7 +70,14 @@ class Concatenate // Loop through arguments $aArgs = Functions::flattenArray($args); + $returnValue = ''; foreach ($aArgs as $key => &$arg) { + $value = Helpers::extractString($arg); + if (ErrorValue::isError($value)) { + $returnValue = $value; + + break; + } if ($ignoreEmpty === true && is_string($arg) && trim($arg) === '') { unset($aArgs[$key]); } elseif (is_bool($arg)) { @@ -64,7 +85,12 @@ class Concatenate } } - return implode($delimiter, $aArgs); + $returnValue = ($returnValue !== '') ? $returnValue : implode($delimiter, $aArgs); + if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) { + $returnValue = ExcelError::CALC(); + } + + return $returnValue; } /** @@ -90,9 +116,16 @@ class Concatenate $stringValue = Helpers::extractString($stringValue); if (!is_numeric($repeatCount) || $repeatCount < 0) { - return ExcelError::VALUE(); + $returnValue = ExcelError::VALUE(); + } elseif (ErrorValue::isError($stringValue)) { + $returnValue = $stringValue; + } else { + $returnValue = str_repeat($stringValue, (int) $repeatCount); + if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) { + $returnValue = ExcelError::VALUE(); // note VALUE not CALC + } } - return str_repeat($stringValue, (int) $repeatCount); + return $returnValue; } } diff --git a/src/PhpSpreadsheet/Calculation/TextData/Helpers.php b/src/PhpSpreadsheet/Calculation/TextData/Helpers.php index 0fdf6af8..e7b67a34 100644 --- a/src/PhpSpreadsheet/Calculation/TextData/Helpers.php +++ b/src/PhpSpreadsheet/Calculation/TextData/Helpers.php @@ -5,6 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Calculation\TextData; use PhpOffice\PhpSpreadsheet\Calculation\Calculation; use PhpOffice\PhpSpreadsheet\Calculation\Exception as CalcExp; use PhpOffice\PhpSpreadsheet\Calculation\Functions; +use PhpOffice\PhpSpreadsheet\Calculation\Information\ErrorValue; use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; class Helpers @@ -21,11 +22,14 @@ class Helpers /** * @param mixed $value String value from which to extract characters */ - public static function extractString($value): string + public static function extractString($value, bool $throwIfError = false): string { if (is_bool($value)) { return self::convertBooleanValue($value); } + if ($throwIfError && is_string($value) && ErrorValue::isError($value)) { + throw new CalcExp($value); + } return (string) $value; } diff --git a/src/PhpSpreadsheet/Calculation/TextData/Replace.php b/src/PhpSpreadsheet/Calculation/TextData/Replace.php index a07ea104..03b66321 100644 --- a/src/PhpSpreadsheet/Calculation/TextData/Replace.php +++ b/src/PhpSpreadsheet/Calculation/TextData/Replace.php @@ -6,6 +6,8 @@ use PhpOffice\PhpSpreadsheet\Calculation\ArrayEnabled; use PhpOffice\PhpSpreadsheet\Calculation\Exception as CalcExp; use PhpOffice\PhpSpreadsheet\Calculation\Functions; use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; +use PhpOffice\PhpSpreadsheet\Cell\DataType; +use PhpOffice\PhpSpreadsheet\Shared\StringHelper; class Replace { @@ -36,16 +38,20 @@ class Replace try { $start = Helpers::extractInt($start, 1, 0, true); $chars = Helpers::extractInt($chars, 0, 0, true); - $oldText = Helpers::extractString($oldText); - $newText = Helpers::extractString($newText); - $left = mb_substr($oldText, 0, $start - 1, 'UTF-8'); + $oldText = Helpers::extractString($oldText, true); + $newText = Helpers::extractString($newText, true); + $left = StringHelper::substring($oldText, 0, $start - 1); - $right = mb_substr($oldText, $start + $chars - 1, null, 'UTF-8'); + $right = StringHelper::substring($oldText, $start + $chars - 1, null); } catch (CalcExp $e) { return $e->getMessage(); } + $returnValue = $left . $newText . $right; + if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) { + $returnValue = ExcelError::VALUE(); + } - return $left . $newText . $right; + return $returnValue; } /** @@ -71,24 +77,29 @@ class Replace } try { - $text = Helpers::extractString($text); - $fromText = Helpers::extractString($fromText); - $toText = Helpers::extractString($toText); + $text = Helpers::extractString($text, true); + $fromText = Helpers::extractString($fromText, true); + $toText = Helpers::extractString($toText, true); if ($instance === null) { - return str_replace($fromText, $toText, $text); - } - if (is_bool($instance)) { - if ($instance === false || Functions::getCompatibilityMode() !== Functions::COMPATIBILITY_OPENOFFICE) { - return ExcelError::Value(); + $returnValue = str_replace($fromText, $toText, $text); + } else { + if (is_bool($instance)) { + if ($instance === false || Functions::getCompatibilityMode() !== Functions::COMPATIBILITY_OPENOFFICE) { + return ExcelError::Value(); + } + $instance = 1; } - $instance = 1; + $instance = Helpers::extractInt($instance, 1, 0, true); + $returnValue = self::executeSubstitution($text, $fromText, $toText, $instance); } - $instance = Helpers::extractInt($instance, 1, 0, true); } catch (CalcExp $e) { return $e->getMessage(); } + if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) { + $returnValue = ExcelError::VALUE(); + } - return self::executeSubstitution($text, $fromText, $toText, $instance); + return $returnValue; } /** @@ -106,7 +117,7 @@ class Replace } if ($pos !== false) { - return Functions::scalar(self::REPLACE($text, ++$pos, mb_strlen($fromText, 'UTF-8'), $toText)); + return Functions::scalar(self::REPLACE($text, ++$pos, StringHelper::countCharacters($fromText), $toText)); } return $text; diff --git a/src/PhpSpreadsheet/Cell/DataType.php b/src/PhpSpreadsheet/Cell/DataType.php index 16de2a00..f19984db 100644 --- a/src/PhpSpreadsheet/Cell/DataType.php +++ b/src/PhpSpreadsheet/Cell/DataType.php @@ -31,8 +31,11 @@ class DataType '#NAME?' => 4, '#NUM!' => 5, '#N/A' => 6, + '#CALC!' => 7, ]; + public const MAX_STRING_LENGTH = 32767; + /** * Get list of error codes. * @@ -58,7 +61,7 @@ class DataType } // string must never be longer than 32,767 characters, truncate if necessary - $textValue = StringHelper::substring((string) $textValue, 0, 32767); + $textValue = StringHelper::substring((string) $textValue, 0, self::MAX_STRING_LENGTH); // we require that newline is represented as "\n" in core, not as "\r\n" or "\r" $textValue = str_replace(["\r\n", "\r"], "\n", $textValue); diff --git a/src/PhpSpreadsheet/Shared/StringHelper.php b/src/PhpSpreadsheet/Shared/StringHelper.php index c16de9ce..030df66d 100644 --- a/src/PhpSpreadsheet/Shared/StringHelper.php +++ b/src/PhpSpreadsheet/Shared/StringHelper.php @@ -467,9 +467,9 @@ class StringHelper * * @param string $textValue UTF-8 encoded string * @param int $offset Start offset - * @param int $length Maximum number of characters in substring + * @param ?int $length Maximum number of characters in substring */ - public static function substring(string $textValue, int $offset, int $length = 0): string + public static function substring(string $textValue, int $offset, ?int $length = 0): string { return mb_substr($textValue, $offset, $length, 'UTF-8'); } diff --git a/tests/PhpSpreadsheetTests/Calculation/StringLengthTest.php b/tests/PhpSpreadsheetTests/Calculation/StringLengthTest.php new file mode 100644 index 00000000..b1152f3f --- /dev/null +++ b/tests/PhpSpreadsheetTests/Calculation/StringLengthTest.php @@ -0,0 +1,34 @@ +getActiveSheet(); + // Note use Armenian character below to make sure chars, not bytes + $longstring = str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5); + $sheet->getCell('C1')->setValue($longstring); + self::assertSame($longstring, $sheet->getCell('C1')->getValue()); + $sheet->getCell('C2')->setValue($longstring . 'abcdef'); + self::assertSame($longstring . 'abcde', $sheet->getCell('C2')->getValue()); + $sheet->getCell('C3')->setValue('abcdef'); + $sheet->getCell('C4')->setValue('=C1 & C3'); + self::assertSame($longstring . 'abcde', $sheet->getCell('C4')->getCalculatedValue(), 'truncate cell concat with cell'); + $sheet->getCell('C5')->setValue('=C1 & "A"'); + self::assertSame($longstring . 'A', $sheet->getCell('C5')->getCalculatedValue(), 'okay cell concat with literal'); + $sheet->getCell('C6')->setValue('=C1 & "ABCDEF"'); + self::assertSame($longstring . 'ABCDE', $sheet->getCell('C6')->getCalculatedValue(), 'truncate cell concat with literal'); + $sheet->getCell('C7')->setValue('="ABCDEF" & C1'); + self::assertSame('ABCDEF' . str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 6), $sheet->getCell('C7')->getCalculatedValue(), 'truncate literal concat with cell'); + $sheet->getCell('C8')->setValue('="ABCDE" & C1'); + self::assertSame('ABCDE' . $longstring, $sheet->getCell('C8')->getCalculatedValue(), 'okay literal concat with cell'); + $spreadsheet->disconnectWorksheets(); + } +} diff --git a/tests/data/Calculation/TextData/CONCATENATE.php b/tests/data/Calculation/TextData/CONCATENATE.php index c6b583eb..b29843f7 100644 --- a/tests/data/Calculation/TextData/CONCATENATE.php +++ b/tests/data/Calculation/TextData/CONCATENATE.php @@ -1,5 +1,7 @@ ['exception'], + 'result just fits' => [ + // Note use Armenian character below to make sure chars, not bytes + str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5) . 'ABCDE', + str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5), + 'ABCDE', + ], + 'result too long' => [ + '#CALC!', + str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5), + 'abc', + '=A2', + ], + 'propagate DIV0' => ['#DIV/0!', '1', '=2/0', '3'], ]; diff --git a/tests/data/Calculation/TextData/REPLACE.php b/tests/data/Calculation/TextData/REPLACE.php index 8f02e2e3..2268bf48 100644 --- a/tests/data/Calculation/TextData/REPLACE.php +++ b/tests/data/Calculation/TextData/REPLACE.php @@ -65,4 +65,20 @@ return [ 'negative length' => ['#VALUE!', 'hello', 3, -1, 'xyz'], 'boolean 1st parm' => ['TRDFGE', true, 3, 1, 'DFG'], 'boolean 4th parm' => ['heFALSElo', 'hello', 3, 1, false], + 'propagate REF' => ['#REF!', '=sheet99!A1', 3, 1, 'x'], + 'propagate DIV0' => ['#DIV/0!', '=1/0', 3, 1, 'x'], + 'string which just sneaks in' => [ + str_repeat('A', 32766) . 'C', + str_repeat('A', 32766) . 'B', + 32767, + '1', + 'C', + ], + 'string which overflows' => [ + '#VALUE!', + str_repeat('A', 32766) . 'B', + 32767, + '1', + 'CC', + ], ]; diff --git a/tests/data/Calculation/TextData/REPT.php b/tests/data/Calculation/TextData/REPT.php index f8aef72c..172d8fae 100644 --- a/tests/data/Calculation/TextData/REPT.php +++ b/tests/data/Calculation/TextData/REPT.php @@ -11,4 +11,8 @@ return [ ['111', 1, 3], ['δύο δύο ', 'δύο ', 2], ['#VALUE!', 'ABC', -1], + 'result too long' => ['#VALUE!', 'A', 32768], + 'result just fits' => [str_repeat('A', 32767), 'A', 32767], + 'propagate NUM' => ['#NUM!', '=SQRT(-1)', 5], + 'propagate REF' => ['#REF!', '=sheet99!A1', 5], ]; diff --git a/tests/data/Calculation/TextData/SUBSTITUTE.php b/tests/data/Calculation/TextData/SUBSTITUTE.php index 9b695a71..baf1c87b 100644 --- a/tests/data/Calculation/TextData/SUBSTITUTE.php +++ b/tests/data/Calculation/TextData/SUBSTITUTE.php @@ -85,4 +85,32 @@ return [ 'bool false instance' => ['#VALUE!', 'abcdefg', 'def', '123', false], 'bool true instance' => ['#VALUE!', 'abcdefg', 'def', '123', true], 'bool text' => ['FA-SE', false, 'L', '-'], + 'propagate REF' => ['#REF!', '=sheet99!A1', 'A', 'x'], + 'propagate DIV0' => ['#DIV/0!', 'hello', '=1/0', 1, 'x'], + 'string which just sneaks in' => [ + str_repeat('A', 32766) . 'C', + str_repeat('A', 32766) . 'B', + 'B', + 'C', + ], + 'string which overflows' => [ + '#VALUE!', + str_repeat('A', 32766) . 'B', + 'B', + 'CC', + ], + 'okay long string instance' => [ + 'AAAAB' . str_repeat('A', 32762), + str_repeat('A', 32767), + 'A', + 'B', + 5, + ], + 'overflow long string instance' => [ + '#VALUE!', + str_repeat('A', 32767), + 'A', + 'BB', + 5, + ], ]; diff --git a/tests/data/Calculation/TextData/TEXTJOIN.php b/tests/data/Calculation/TextData/TEXTJOIN.php index e345f7c1..565358a7 100644 --- a/tests/data/Calculation/TextData/TEXTJOIN.php +++ b/tests/data/Calculation/TextData/TEXTJOIN.php @@ -1,5 +1,7 @@ ['exception', ['-', true]], 'three arguments' => ['a', ['-', true, 'a']], 'boolean as string' => ['TRUE-FALSE-TRUE', ['-', true, true, false, true]], + 'result too long' => [ + '#CALC!', + [ + ',', + true, + str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5), + 'abcde', + ], + ], + 'result just fits' => [ + str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5) . ',abcd', + [ + ',', + true, + str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5), + 'abcd', + ], + ], + 'propagate REF' => ['#REF!', [',', true, '1', '=sheet99!A1', '3']], + 'propagate NUM' => ['#NUM!', [',', true, '1', '=SQRT(-1)', '3']], ];