Keep Calculated String Results Below 32K (#2921)

* Keep Calculated String Results Below 32K

This is the result of an investigation into issue #2884 (see also PR #2913). It is, unfortunately, not a fix for the original problem; see the discussion in that PR for why I don't think there is a practical fix for that specific problem at this time.

Excel limits strings to 32,767 characters. We already truncate strings to that length when added to the spreadsheet. However, we have been able to exceed that length as a result of the concatenation operator (Excel truncates); as a result of the CONCATENATE or TEXTJOIN functions (Excel returns #CALC!); or as a result of the REPLACE, REPT, SUBSTITUTE functions (Excel returns #VALUE!). This PR changes PhpSpreadsheet to return the same value as Excel in these cases. Note that Excel2003 truncates in all those cases; I don't think there is a way to differentiate that behavior in PhpSpreadsheet.

However, LibreOffice and Gnumeric do not have that limit; if they have a limit at all, it is much higher. It would be fairly easy to use existing settings to differentiate between Excel and LibreOffice/Gnumeric in this respect. I have not done so in this PR because I am not sure how useful that is, and I can easily see it leading to problems (read in a LibreOffice spreadsheet with a 33K cell and then output to an Excel spreadsheet). Perhaps it should be handled with an additional opt-in setting.

I changed the maximum size from a literal to a constant in the one place where it was already being enforced (Cell/DataType). I am not sure that is the best place for it to be defined; I am open to suggestions.

* Implement Some Suggestions

... from @MarkBaker.
This commit is contained in:
oleibman 2022-07-04 08:30:46 -07:00 committed by GitHub
parent db12b73f16
commit faf6d819c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 219 additions and 39 deletions

View File

@ -11,6 +11,7 @@ use PhpOffice\PhpSpreadsheet\Calculation\Information\Value;
use PhpOffice\PhpSpreadsheet\Calculation\Token\Stack; use PhpOffice\PhpSpreadsheet\Calculation\Token\Stack;
use PhpOffice\PhpSpreadsheet\Cell\Cell; use PhpOffice\PhpSpreadsheet\Cell\Cell;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\DefinedName; use PhpOffice\PhpSpreadsheet\DefinedName;
use PhpOffice\PhpSpreadsheet\ReferenceHelper; use PhpOffice\PhpSpreadsheet\ReferenceHelper;
use PhpOffice\PhpSpreadsheet\Shared; use PhpOffice\PhpSpreadsheet\Shared;
@ -4711,11 +4712,19 @@ class Calculation
// Perform the required operation against the operand 1 matrix, passing in operand 2 // Perform the required operation against the operand 1 matrix, passing in operand 2
$matrixResult = $matrix->concat($operand2); $matrixResult = $matrix->concat($operand2);
$result = $matrixResult->getArray(); $result = $matrixResult->getArray();
if (isset($result[0][0])) {
$result[0][0] = Shared\StringHelper::substring($result[0][0], 0, DataType::MAX_STRING_LENGTH);
}
} catch (\Exception $ex) { } catch (\Exception $ex) {
$this->debugLog->writeDebugLog('JAMA Matrix Exception: %s', $ex->getMessage()); $this->debugLog->writeDebugLog('JAMA Matrix Exception: %s', $ex->getMessage());
$result = '#VALUE!'; $result = '#VALUE!';
} }
} else { } else {
// In theory, we should truncate here.
// But I can't figure out a formula
// using the concatenation operator
// with literals that fits in 32K,
// so I don't think we can overflow here.
$result = self::FORMULA_STRING_QUOTE . str_replace('""', self::FORMULA_STRING_QUOTE, self::unwrapResult($operand1) . self::unwrapResult($operand2)) . self::FORMULA_STRING_QUOTE; $result = self::FORMULA_STRING_QUOTE . str_replace('""', self::FORMULA_STRING_QUOTE, self::unwrapResult($operand1) . self::unwrapResult($operand2)) . self::FORMULA_STRING_QUOTE;
} }
$this->debugLog->writeDebugLog('Evaluation Result is %s', $this->showTypeDetails($result)); $this->debugLog->writeDebugLog('Evaluation Result is %s', $this->showTypeDetails($result));

View File

@ -47,7 +47,7 @@ class ErrorValue
return false; return false;
} }
return in_array($value, ExcelError::$errorCodes, true) || $value === ExcelError::CALC(); return in_array($value, ExcelError::$errorCodes, true);
} }
/** /**

View File

@ -14,15 +14,20 @@ class ExcelError
* @var array<string, string> * @var array<string, string>
*/ */
public static $errorCodes = [ public static $errorCodes = [
'null' => '#NULL!', 'null' => '#NULL!', // 1
'divisionbyzero' => '#DIV/0!', 'divisionbyzero' => '#DIV/0!', // 2
'value' => '#VALUE!', 'value' => '#VALUE!', // 3
'reference' => '#REF!', 'reference' => '#REF!', // 4
'name' => '#NAME?', 'name' => '#NAME?', // 5
'num' => '#NUM!', 'num' => '#NUM!', // 6
'na' => '#N/A', 'na' => '#N/A', // 7
'gettingdata' => '#GETTING_DATA', 'gettingdata' => '#GETTING_DATA', // 8
'spill' => '#SPILL!', 'spill' => '#SPILL!', // 9
'connect' => '#CONNECT!', //10
'blocked' => '#BLOCKED!', //11
'unknown' => '#UNKNOWN!', //12
'field' => '#FIELD!', //13
'calculation' => '#CALC!', //14
]; ];
/** /**
@ -54,10 +59,6 @@ class ExcelError
++$i; ++$i;
} }
if ($value === self::CALC()) {
return 14;
}
return self::NA(); return self::NA();
} }
@ -154,6 +155,6 @@ class ExcelError
*/ */
public static function CALC(): string public static function CALC(): string
{ {
return '#CALC!'; return self::$errorCodes['calculation'];
} }
} }

View File

@ -4,7 +4,10 @@ namespace PhpOffice\PhpSpreadsheet\Calculation\TextData;
use PhpOffice\PhpSpreadsheet\Calculation\ArrayEnabled; use PhpOffice\PhpSpreadsheet\Calculation\ArrayEnabled;
use PhpOffice\PhpSpreadsheet\Calculation\Functions; use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ErrorValue;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
class Concatenate class Concatenate
{ {
@ -23,7 +26,18 @@ class Concatenate
$aArgs = Functions::flattenArray($args); $aArgs = Functions::flattenArray($args);
foreach ($aArgs as $arg) { foreach ($aArgs as $arg) {
$value = Helpers::extractString($arg);
if (ErrorValue::isError($value)) {
$returnValue = $value;
break;
}
$returnValue .= Helpers::extractString($arg); $returnValue .= Helpers::extractString($arg);
if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) {
$returnValue = ExcelError::CALC();
break;
}
} }
return $returnValue; return $returnValue;
@ -56,7 +70,14 @@ class Concatenate
// Loop through arguments // Loop through arguments
$aArgs = Functions::flattenArray($args); $aArgs = Functions::flattenArray($args);
$returnValue = '';
foreach ($aArgs as $key => &$arg) { foreach ($aArgs as $key => &$arg) {
$value = Helpers::extractString($arg);
if (ErrorValue::isError($value)) {
$returnValue = $value;
break;
}
if ($ignoreEmpty === true && is_string($arg) && trim($arg) === '') { if ($ignoreEmpty === true && is_string($arg) && trim($arg) === '') {
unset($aArgs[$key]); unset($aArgs[$key]);
} elseif (is_bool($arg)) { } elseif (is_bool($arg)) {
@ -64,7 +85,12 @@ class Concatenate
} }
} }
return implode($delimiter, $aArgs); $returnValue = ($returnValue !== '') ? $returnValue : implode($delimiter, $aArgs);
if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) {
$returnValue = ExcelError::CALC();
}
return $returnValue;
} }
/** /**
@ -90,9 +116,16 @@ class Concatenate
$stringValue = Helpers::extractString($stringValue); $stringValue = Helpers::extractString($stringValue);
if (!is_numeric($repeatCount) || $repeatCount < 0) { if (!is_numeric($repeatCount) || $repeatCount < 0) {
return ExcelError::VALUE(); $returnValue = ExcelError::VALUE();
} elseif (ErrorValue::isError($stringValue)) {
$returnValue = $stringValue;
} else {
$returnValue = str_repeat($stringValue, (int) $repeatCount);
if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) {
$returnValue = ExcelError::VALUE(); // note VALUE not CALC
}
} }
return str_repeat($stringValue, (int) $repeatCount); return $returnValue;
} }
} }

View File

@ -5,6 +5,7 @@ namespace PhpOffice\PhpSpreadsheet\Calculation\TextData;
use PhpOffice\PhpSpreadsheet\Calculation\Calculation; use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Calculation\Exception as CalcExp; use PhpOffice\PhpSpreadsheet\Calculation\Exception as CalcExp;
use PhpOffice\PhpSpreadsheet\Calculation\Functions; use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ErrorValue;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError;
class Helpers class Helpers
@ -21,11 +22,14 @@ class Helpers
/** /**
* @param mixed $value String value from which to extract characters * @param mixed $value String value from which to extract characters
*/ */
public static function extractString($value): string public static function extractString($value, bool $throwIfError = false): string
{ {
if (is_bool($value)) { if (is_bool($value)) {
return self::convertBooleanValue($value); return self::convertBooleanValue($value);
} }
if ($throwIfError && is_string($value) && ErrorValue::isError($value)) {
throw new CalcExp($value);
}
return (string) $value; return (string) $value;
} }

View File

@ -6,6 +6,8 @@ use PhpOffice\PhpSpreadsheet\Calculation\ArrayEnabled;
use PhpOffice\PhpSpreadsheet\Calculation\Exception as CalcExp; use PhpOffice\PhpSpreadsheet\Calculation\Exception as CalcExp;
use PhpOffice\PhpSpreadsheet\Calculation\Functions; use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
class Replace class Replace
{ {
@ -36,16 +38,20 @@ class Replace
try { try {
$start = Helpers::extractInt($start, 1, 0, true); $start = Helpers::extractInt($start, 1, 0, true);
$chars = Helpers::extractInt($chars, 0, 0, true); $chars = Helpers::extractInt($chars, 0, 0, true);
$oldText = Helpers::extractString($oldText); $oldText = Helpers::extractString($oldText, true);
$newText = Helpers::extractString($newText); $newText = Helpers::extractString($newText, true);
$left = mb_substr($oldText, 0, $start - 1, 'UTF-8'); $left = StringHelper::substring($oldText, 0, $start - 1);
$right = mb_substr($oldText, $start + $chars - 1, null, 'UTF-8'); $right = StringHelper::substring($oldText, $start + $chars - 1, null);
} catch (CalcExp $e) { } catch (CalcExp $e) {
return $e->getMessage(); return $e->getMessage();
} }
$returnValue = $left . $newText . $right;
if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) {
$returnValue = ExcelError::VALUE();
}
return $left . $newText . $right; return $returnValue;
} }
/** /**
@ -71,12 +77,12 @@ class Replace
} }
try { try {
$text = Helpers::extractString($text); $text = Helpers::extractString($text, true);
$fromText = Helpers::extractString($fromText); $fromText = Helpers::extractString($fromText, true);
$toText = Helpers::extractString($toText); $toText = Helpers::extractString($toText, true);
if ($instance === null) { if ($instance === null) {
return str_replace($fromText, $toText, $text); $returnValue = str_replace($fromText, $toText, $text);
} } else {
if (is_bool($instance)) { if (is_bool($instance)) {
if ($instance === false || Functions::getCompatibilityMode() !== Functions::COMPATIBILITY_OPENOFFICE) { if ($instance === false || Functions::getCompatibilityMode() !== Functions::COMPATIBILITY_OPENOFFICE) {
return ExcelError::Value(); return ExcelError::Value();
@ -84,11 +90,16 @@ class Replace
$instance = 1; $instance = 1;
} }
$instance = Helpers::extractInt($instance, 1, 0, true); $instance = Helpers::extractInt($instance, 1, 0, true);
$returnValue = self::executeSubstitution($text, $fromText, $toText, $instance);
}
} catch (CalcExp $e) { } catch (CalcExp $e) {
return $e->getMessage(); return $e->getMessage();
} }
if (StringHelper::countCharacters($returnValue) > DataType::MAX_STRING_LENGTH) {
$returnValue = ExcelError::VALUE();
}
return self::executeSubstitution($text, $fromText, $toText, $instance); return $returnValue;
} }
/** /**
@ -106,7 +117,7 @@ class Replace
} }
if ($pos !== false) { if ($pos !== false) {
return Functions::scalar(self::REPLACE($text, ++$pos, mb_strlen($fromText, 'UTF-8'), $toText)); return Functions::scalar(self::REPLACE($text, ++$pos, StringHelper::countCharacters($fromText), $toText));
} }
return $text; return $text;

View File

@ -31,8 +31,11 @@ class DataType
'#NAME?' => 4, '#NAME?' => 4,
'#NUM!' => 5, '#NUM!' => 5,
'#N/A' => 6, '#N/A' => 6,
'#CALC!' => 7,
]; ];
public const MAX_STRING_LENGTH = 32767;
/** /**
* Get list of error codes. * Get list of error codes.
* *
@ -58,7 +61,7 @@ class DataType
} }
// string must never be longer than 32,767 characters, truncate if necessary // string must never be longer than 32,767 characters, truncate if necessary
$textValue = StringHelper::substring((string) $textValue, 0, 32767); $textValue = StringHelper::substring((string) $textValue, 0, self::MAX_STRING_LENGTH);
// we require that newline is represented as "\n" in core, not as "\r\n" or "\r" // we require that newline is represented as "\n" in core, not as "\r\n" or "\r"
$textValue = str_replace(["\r\n", "\r"], "\n", $textValue); $textValue = str_replace(["\r\n", "\r"], "\n", $textValue);

View File

@ -467,9 +467,9 @@ class StringHelper
* *
* @param string $textValue UTF-8 encoded string * @param string $textValue UTF-8 encoded string
* @param int $offset Start offset * @param int $offset Start offset
* @param int $length Maximum number of characters in substring * @param ?int $length Maximum number of characters in substring
*/ */
public static function substring(string $textValue, int $offset, int $length = 0): string public static function substring(string $textValue, int $offset, ?int $length = 0): string
{ {
return mb_substr($textValue, $offset, $length, 'UTF-8'); return mb_substr($textValue, $offset, $length, 'UTF-8');
} }

View File

@ -0,0 +1,34 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Calculation;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PHPUnit\Framework\TestCase;
class StringLengthTest extends TestCase
{
public function testStringLength(): void
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
// Note use Armenian character below to make sure chars, not bytes
$longstring = str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5);
$sheet->getCell('C1')->setValue($longstring);
self::assertSame($longstring, $sheet->getCell('C1')->getValue());
$sheet->getCell('C2')->setValue($longstring . 'abcdef');
self::assertSame($longstring . 'abcde', $sheet->getCell('C2')->getValue());
$sheet->getCell('C3')->setValue('abcdef');
$sheet->getCell('C4')->setValue('=C1 & C3');
self::assertSame($longstring . 'abcde', $sheet->getCell('C4')->getCalculatedValue(), 'truncate cell concat with cell');
$sheet->getCell('C5')->setValue('=C1 & "A"');
self::assertSame($longstring . 'A', $sheet->getCell('C5')->getCalculatedValue(), 'okay cell concat with literal');
$sheet->getCell('C6')->setValue('=C1 & "ABCDEF"');
self::assertSame($longstring . 'ABCDE', $sheet->getCell('C6')->getCalculatedValue(), 'truncate cell concat with literal');
$sheet->getCell('C7')->setValue('="ABCDEF" & C1');
self::assertSame('ABCDEF' . str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 6), $sheet->getCell('C7')->getCalculatedValue(), 'truncate literal concat with cell');
$sheet->getCell('C8')->setValue('="ABCDE" & C1');
self::assertSame('ABCDE' . $longstring, $sheet->getCell('C8')->getCalculatedValue(), 'okay literal concat with cell');
$spreadsheet->disconnectWorksheets();
}
}

View File

@ -1,5 +1,7 @@
<?php <?php
use PhpOffice\PhpSpreadsheet\Cell\DataType;
return [ return [
[ [
'ABCDEFGHIJ', 'ABCDEFGHIJ',
@ -19,4 +21,17 @@ return [
true, true,
], ],
'no arguments' => ['exception'], 'no arguments' => ['exception'],
'result just fits' => [
// Note use Armenian character below to make sure chars, not bytes
str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5) . 'ABCDE',
str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5),
'ABCDE',
],
'result too long' => [
'#CALC!',
str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5),
'abc',
'=A2',
],
'propagate DIV0' => ['#DIV/0!', '1', '=2/0', '3'],
]; ];

View File

@ -65,4 +65,20 @@ return [
'negative length' => ['#VALUE!', 'hello', 3, -1, 'xyz'], 'negative length' => ['#VALUE!', 'hello', 3, -1, 'xyz'],
'boolean 1st parm' => ['TRDFGE', true, 3, 1, 'DFG'], 'boolean 1st parm' => ['TRDFGE', true, 3, 1, 'DFG'],
'boolean 4th parm' => ['heFALSElo', 'hello', 3, 1, false], 'boolean 4th parm' => ['heFALSElo', 'hello', 3, 1, false],
'propagate REF' => ['#REF!', '=sheet99!A1', 3, 1, 'x'],
'propagate DIV0' => ['#DIV/0!', '=1/0', 3, 1, 'x'],
'string which just sneaks in' => [
str_repeat('A', 32766) . 'C',
str_repeat('A', 32766) . 'B',
32767,
'1',
'C',
],
'string which overflows' => [
'#VALUE!',
str_repeat('A', 32766) . 'B',
32767,
'1',
'CC',
],
]; ];

View File

@ -11,4 +11,8 @@ return [
['111', 1, 3], ['111', 1, 3],
['δύο δύο ', 'δύο ', 2], ['δύο δύο ', 'δύο ', 2],
['#VALUE!', 'ABC', -1], ['#VALUE!', 'ABC', -1],
'result too long' => ['#VALUE!', 'A', 32768],
'result just fits' => [str_repeat('A', 32767), 'A', 32767],
'propagate NUM' => ['#NUM!', '=SQRT(-1)', 5],
'propagate REF' => ['#REF!', '=sheet99!A1', 5],
]; ];

View File

@ -85,4 +85,32 @@ return [
'bool false instance' => ['#VALUE!', 'abcdefg', 'def', '123', false], 'bool false instance' => ['#VALUE!', 'abcdefg', 'def', '123', false],
'bool true instance' => ['#VALUE!', 'abcdefg', 'def', '123', true], 'bool true instance' => ['#VALUE!', 'abcdefg', 'def', '123', true],
'bool text' => ['FA-SE', false, 'L', '-'], 'bool text' => ['FA-SE', false, 'L', '-'],
'propagate REF' => ['#REF!', '=sheet99!A1', 'A', 'x'],
'propagate DIV0' => ['#DIV/0!', 'hello', '=1/0', 1, 'x'],
'string which just sneaks in' => [
str_repeat('A', 32766) . 'C',
str_repeat('A', 32766) . 'B',
'B',
'C',
],
'string which overflows' => [
'#VALUE!',
str_repeat('A', 32766) . 'B',
'B',
'CC',
],
'okay long string instance' => [
'AAAAB' . str_repeat('A', 32762),
str_repeat('A', 32767),
'A',
'B',
5,
],
'overflow long string instance' => [
'#VALUE!',
str_repeat('A', 32767),
'A',
'BB',
5,
],
]; ];

View File

@ -1,5 +1,7 @@
<?php <?php
use PhpOffice\PhpSpreadsheet\Cell\DataType;
return [ return [
[ [
'ABCDE,FGHIJ', 'ABCDE,FGHIJ',
@ -42,4 +44,24 @@ return [
'two arguments' => ['exception', ['-', true]], 'two arguments' => ['exception', ['-', true]],
'three arguments' => ['a', ['-', true, 'a']], 'three arguments' => ['a', ['-', true, 'a']],
'boolean as string' => ['TRUE-FALSE-TRUE', ['-', true, true, false, true]], 'boolean as string' => ['TRUE-FALSE-TRUE', ['-', true, true, false, true]],
'result too long' => [
'#CALC!',
[
',',
true,
str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5),
'abcde',
],
],
'result just fits' => [
str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5) . ',abcd',
[
',',
true,
str_repeat('Ԁ', DataType::MAX_STRING_LENGTH - 5),
'abcd',
],
],
'propagate REF' => ['#REF!', [',', true, '1', '=sheet99!A1', '3']],
'propagate NUM' => ['#NUM!', [',', true, '1', '=SQRT(-1)', '3']],
]; ];