diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index 14160edf..04bdf273 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -2785,36 +2785,6 @@ parameters: count: 1 path: src/PhpSpreadsheet/Shared/OLERead.php - - - message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:formatNumber\\(\\) should return string but returns array\\|string\\.$#" - count: 1 - path: src/PhpSpreadsheet/Shared/StringHelper.php - - - - message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:sanitizeUTF8\\(\\) should return string but returns string\\|false\\.$#" - count: 1 - path: src/PhpSpreadsheet/Shared/StringHelper.php - - - - message: "#^Parameter \\#1 \\$string of function strlen expects string, float given\\.$#" - count: 1 - path: src/PhpSpreadsheet/Shared/StringHelper.php - - - - message: "#^Parameter \\#3 \\$subject of function str_replace expects array\\|string, float given\\.$#" - count: 1 - path: src/PhpSpreadsheet/Shared/StringHelper.php - - - - message: "#^Static property PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:\\$decimalSeparator \\(string\\) in isset\\(\\) is not nullable\\.$#" - count: 1 - path: src/PhpSpreadsheet/Shared/StringHelper.php - - - - message: "#^Static property PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:\\$thousandsSeparator \\(string\\) in isset\\(\\) is not nullable\\.$#" - count: 1 - path: src/PhpSpreadsheet/Shared/StringHelper.php - - message: "#^Static method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\TimeZone\\:\\:validateTimeZone\\(\\) is unused\\.$#" count: 1 diff --git a/src/PhpSpreadsheet/Shared/StringHelper.php b/src/PhpSpreadsheet/Shared/StringHelper.php index 2ccb2424..c16de9ce 100644 --- a/src/PhpSpreadsheet/Shared/StringHelper.php +++ b/src/PhpSpreadsheet/Shared/StringHelper.php @@ -3,13 +3,14 @@ namespace PhpOffice\PhpSpreadsheet\Shared; use PhpOffice\PhpSpreadsheet\Calculation\Calculation; +use UConverter; class StringHelper { /** Constants */ /** Regular Expressions */ // Fraction - const STRING_REGEXP_FRACTION = '(-?)(\d+)\s+(\d+\/\d+)'; + const STRING_REGEXP_FRACTION = '~^\s*(-?)((\d*)\s+)?(\d+\/\d+)\s*$~'; /** * Control characters array. @@ -28,14 +29,14 @@ class StringHelper /** * Decimal separator. * - * @var string + * @var ?string */ private static $decimalSeparator; /** * Thousands separator. * - * @var string + * @var ?string */ private static $thousandsSeparator; @@ -328,19 +329,31 @@ class StringHelper } /** - * Try to sanitize UTF8, stripping invalid byte sequences. Not perfect. Does not surrogate characters. + * Try to sanitize UTF8, replacing invalid sequences with Unicode substitution characters. */ public static function sanitizeUTF8(string $textValue): string { + $textValue = str_replace(["\xef\xbf\xbe", "\xef\xbf\xbf"], "\xef\xbf\xbd", $textValue); + if (class_exists(UConverter::class)) { + $returnValue = UConverter::transcode($textValue, 'UTF-8', 'UTF-8'); + if ($returnValue !== false) { + return $returnValue; + } + } + // @codeCoverageIgnoreStart + // I don't think any of the code below should ever be executed. if (self::getIsIconvEnabled()) { - $textValue = @iconv('UTF-8', 'UTF-8', $textValue); - - return $textValue; + $returnValue = @iconv('UTF-8', 'UTF-8', $textValue); + if ($returnValue !== false) { + return $returnValue; + } } - $textValue = mb_convert_encoding($textValue, 'UTF-8', 'UTF-8'); + // Phpstan does not think this can return false. + $returnValue = mb_convert_encoding($textValue, 'UTF-8', 'UTF-8'); - return $textValue; + return $returnValue; + // @codeCoverageIgnoreEnd } /** @@ -348,19 +361,19 @@ class StringHelper */ public static function isUTF8(string $textValue): bool { - return $textValue === '' || preg_match('/^./su', $textValue) === 1; + return $textValue === self::sanitizeUTF8($textValue); } /** * Formats a numeric value as a string for output in various output writers forcing * point as decimal separator in case locale is other than English. * - * @param mixed $numericValue + * @param float|int|string $numericValue */ public static function formatNumber($numericValue): string { if (is_float($numericValue)) { - return str_replace(',', '.', $numericValue); + return str_replace(',', '.', (string) $numericValue); } return (string) $numericValue; @@ -537,9 +550,10 @@ class StringHelper */ public static function convertToNumberIfFraction(string &$operand): bool { - if (preg_match('/^' . self::STRING_REGEXP_FRACTION . '$/i', $operand, $match)) { + if (preg_match(self::STRING_REGEXP_FRACTION, $operand, $match)) { $sign = ($match[1] == '-') ? '-' : '+'; - $fractionFormula = '=' . $sign . $match[2] . $sign . $match[3]; + $wholePart = ($match[3] === '') ? '' : ($sign . $match[3]); + $fractionFormula = '=' . $wholePart . $sign . $match[4]; $operand = Calculation::getInstance()->_calculateFormulaValue($fractionFormula); return true; @@ -686,6 +700,6 @@ class StringHelper } $v = (float) $textValue; - return (is_numeric(substr($textValue, 0, strlen($v)))) ? $v : $textValue; + return (is_numeric(substr($textValue, 0, strlen((string) $v)))) ? $v : $textValue; } } diff --git a/tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php b/tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php index 448d3d1e..0cd79853 100644 --- a/tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Csv/CsvEncodingTest.php @@ -66,6 +66,25 @@ class CsvEncodingTest extends TestCase self::assertEquals('sixième', $sheet->getCell('C2')->getValue()); } + public function testSurrogate(): void + { + // Surrogates should occur only in UTF-16, and should + // be properly converted to UTF8 when read. + // FFFE/FFFF are illegal, and should be converted to + // substitution character when read. + // Excel does not handle any of the cells in row 3 well. + // LibreOffice handles A3 fine, and discards B3/C3, + // which is a reasonable action. + $filename = 'tests/data/Reader/CSV/premiere.utf16le.csv'; + $reader = new Csv(); + $reader->setInputEncoding(Csv::guessEncoding($filename)); + $spreadsheet = $reader->load($filename); + $sheet = $spreadsheet->getActiveSheet(); + self::assertEquals('𐐀', $sheet->getCell('A3')->getValue()); + self::assertEquals('�', $sheet->getCell('B3')->getValue()); + self::assertEquals('�', $sheet->getCell('C3')->getValue()); + } + /** * @dataProvider providerGuessEncoding */ diff --git a/tests/PhpSpreadsheetTests/Shared/StringHelperInvalidCharTest.php b/tests/PhpSpreadsheetTests/Shared/StringHelperInvalidCharTest.php new file mode 100644 index 00000000..54679499 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Shared/StringHelperInvalidCharTest.php @@ -0,0 +1,44 @@ +getActiveSheet(); + $substitution = '�'; + $array = [ + ['Normal string', 'Hello', 'Hello'], + ['integer', 2, 2], + ['float', 2.1, 2.1], + ['boolean true', true, true], + ['illegal FFFE/FFFF', "H\xef\xbf\xbe\xef\xbf\xbfello", "H{$substitution}{$substitution}ello"], + ['illegal character', "H\xef\x00\x00ello", "H{$substitution}\x00\x00ello"], + ['overlong character', "H\xc0\xa0ello", "H{$substitution}{$substitution}ello"], + ['Osmanya as single character', "H\xf0\x90\x90\x80ello", 'H𐐀ello'], + ['Osmanya as surrogate pair (x)', "\xed\xa0\x81\xed\xb0\x80", "{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}"], + ['Osmanya as surrogate pair (u)', "\u{d801}\u{dc00}", "{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}{$substitution}"], + ['Half surrogate pair (u)', "\u{d801}", "{$substitution}{$substitution}{$substitution}"], + ['Control character', "\u{7}", "\u{7}"], + ]; + + $sheet->fromArray($array); + $row = 0; + foreach ($array as $value) { + self::assertSame($value[1] === $value[2], StringHelper::isUTF8((string) $value[1])); + ++$row; + $expected = $value[2]; + self::assertSame( + $expected, + $sheet->getCell("B$row")->getValue(), + $sheet->getCell("A$row")->getValue() + ); + } + } +} diff --git a/tests/PhpSpreadsheetTests/Shared/StringHelperTest.php b/tests/PhpSpreadsheetTests/Shared/StringHelperTest.php index bcbcd5d2..85a92613 100644 --- a/tests/PhpSpreadsheetTests/Shared/StringHelperTest.php +++ b/tests/PhpSpreadsheetTests/Shared/StringHelperTest.php @@ -119,4 +119,33 @@ class StringHelperTest extends TestCase self::assertEquals($expectedResult, $result); } + + /** + * @dataProvider providerFractions + */ + public function testFraction(string $expected, string $value): void + { + $originalValue = $value; + $result = StringHelper::convertToNumberIfFraction($value); + if ($result === false) { + self::assertSame($expected, $originalValue); + self::assertSame($expected, $value); + } else { + self::assertSame($expected, (string) $value); + self::assertNotEquals($value, $originalValue); + } + } + + public function providerFractions(): array + { + return [ + 'non-fraction' => ['1', '1'], + 'common fraction' => ['1.5', '1 1/2'], + 'fraction between -1 and 0' => ['-0.5', '-1/2'], + 'fraction between -1 and 0 with space' => ['-0.5', ' - 1/2'], + 'fraction between 0 and 1' => ['0.75', '3/4 '], + 'fraction between 0 and 1 with space' => ['0.75', ' 3/4'], + 'improper fraction' => ['1.75', '7/4'], + ]; + } } diff --git a/tests/data/Reader/CSV/premiere.utf16le.csv b/tests/data/Reader/CSV/premiere.utf16le.csv index a5bb1ff1..4ca3c3e4 100644 Binary files a/tests/data/Reader/CSV/premiere.utf16le.csv and b/tests/data/Reader/CSV/premiere.utf16le.csv differ