Merge pull request #2103 from oleibman/csvdflts

CSV Reader Enhancements
This commit is contained in:
oleibman 2021-05-29 23:53:50 -07:00 committed by GitHub
commit 1d86840429
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 380 additions and 343 deletions

View File

@ -480,6 +480,41 @@ $reader->setSheetIndex(0);
$spreadsheet = $reader->load('sample.csv');
```
You can also set the reader to guess the encoding
rather than calling guessEncoding directly. In this case,
the user-settable fallback encoding is used if nothing else works.
```php
$reader = new \PhpOffice\PhpSpreadsheet\Reader\Csv();
$reader->setInputEncoding(\PhpOffice\PhpSpreadsheet\Reader\Csv::GUESS_ENCODING);
$reader->setFallbackEncoding('ISO-8859-2'); // default CP1252 without this statement
$reader->setDelimiter(';');
$reader->setEnclosure('');
$reader->setSheetIndex(0);
$spreadsheet = $reader->load('sample.csv');
```
Finally, you can set a callback to be invoked when the constructor is executed,
either through `new Csv()` or `IOFactory::load`,
and have that callback set the customizable attributes to whatever
defaults are appropriate for your environment.
```php
function constructorCallback(\PhpOffice\PhpSpreadsheet\Reader\Csv $reader): void
{
$reader->setInputEncoding(\PhpOffice\PhpSpreadsheet\Reader\Csv::GUESS_ENCODING);
$reader->setFallbackEncoding('ISO-8859-2');
$reader->setDelimiter(',');
$reader->setEnclosure('"');
// Following represents how Excel behaves better than the default escape character
$reader->setEscapeCharacter((version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : '');
}
\PhpOffice\PhpSpreadsheet\Reader\Csv::setConstructorCallback('constructorCallback');
$spreadsheet = \PhpSpreadsheet\IOFactory::load('sample.csv');
```
#### Read a specific worksheet
CSV files can only contain one worksheet. Therefore, you can specify

View File

@ -2395,101 +2395,6 @@ parameters:
count: 1
path: src/PhpSpreadsheet/Reader/BaseReader.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\:\\:\\$delimiter \\(string\\) does not accept string\\|null\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Parameter \\#1 \\$var of function count expects array\\|Countable, array\\|null given\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\:\\:openFileOrMemory\\(\\) has parameter \\$pFilename with no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Parameter \\#1 \\$value of static method PhpOffice\\\\PhpSpreadsheet\\\\Shared\\\\StringHelper\\:\\:convertEncoding\\(\\) expects string, string\\|false given\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Parameter \\#1 \\$fp of function fwrite expects resource, resource\\|false given\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Argument of an invalid type array\\|null supplied for foreach, only iterables are supported\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Parameter \\#2 \\$newvalue of function ini_set expects string, string\\|false given\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Call to function is_array\\(\\) with string will always evaluate to false\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$fileHandle has no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$escapeCharacter has no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$enclosure has no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$counts has no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$numberLines has no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:\\$delimiter has no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:__construct\\(\\) has parameter \\$enclosure with no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:__construct\\(\\) has parameter \\$escapeCharacter with no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:__construct\\(\\) has parameter \\$fileHandle with no typehint specified\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Parameter \\#2 \\$subject of function preg_match expects string, string\\|null given\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Csv\\\\Delimiter\\:\\:getNextLine\\(\\) should return string\\|false but returns string\\|null\\.$#"
count: 1
path: src/PhpSpreadsheet/Reader/Csv/Delimiter.php
-
message: "#^Property PhpOffice\\\\PhpSpreadsheet\\\\Reader\\\\Html\\:\\:\\$rowspan has no typehint specified\\.$#"
count: 1
@ -7535,41 +7440,6 @@ parameters:
count: 5
path: tests/PhpSpreadsheetTests/NamedRangeTest.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:setFilterType\\(\\) has parameter \\$type with no typehint specified\\.$#"
count: 1
path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter1\\(\\) has no return typehint specified\\.$#"
count: 1
path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter1\\(\\) has parameter \\$row with no typehint specified\\.$#"
count: 1
path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter0\\(\\) has no return typehint specified\\.$#"
count: 1
path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php
-
message: "#^Method PhpOffice\\\\PhpSpreadsheetTests\\\\Reader\\\\CsvContiguousFilter\\:\\:filter0\\(\\) has parameter \\$row with no typehint specified\\.$#"
count: 1
path: tests/PhpSpreadsheetTests/Reader/CsvContiguousFilter.php
-
message: "#^Cannot call method getCell\\(\\) on PhpOffice\\\\PhpSpreadsheet\\\\Worksheet\\\\Worksheet\\|null\\.$#"
count: 3
path: tests/PhpSpreadsheetTests/Reader/CsvContiguousTest.php
-
message: "#^Call to static method PHPUnit\\\\Framework\\\\Assert\\:\\:assertNull\\(\\) with string will always evaluate to false\\.$#"
count: 1
path: tests/PhpSpreadsheetTests/Reader/CsvTest.php
-
message: "#^Unreachable statement \\- code above always terminates\\.$#"
count: 1

View File

@ -10,6 +10,8 @@ use PhpOffice\PhpSpreadsheet\Spreadsheet;
class Csv extends BaseReader
{
const DEFAULT_FALLBACK_ENCODING = 'CP1252';
const GUESS_ENCODING = 'guess';
const UTF8_BOM = "\xEF\xBB\xBF";
const UTF8_BOM_LEN = 3;
const UTF16BE_BOM = "\xfe\xff";
@ -33,10 +35,17 @@ class Csv extends BaseReader
private $inputEncoding = 'UTF-8';
/**
* Delimiter.
* Fallback encoding if 'guess' strikes out.
*
* @var string
*/
private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
/**
* Delimiter.
*
* @var ?string
*/
private $delimiter;
/**
@ -67,38 +76,65 @@ class Csv extends BaseReader
*/
private $escapeCharacter = '\\';
/**
* Callback for setting defaults in construction.
*
* @var ?callable
*/
private static $constructorCallback;
/**
* Create a new CSV Reader instance.
*/
public function __construct()
{
parent::__construct();
$callback = self::$constructorCallback;
if ($callback !== null) {
$callback($this);
}
}
/**
* Set input encoding.
* Set a callback to change the defaults.
*
* @param string $pValue Input encoding, eg: 'UTF-8'
*
* @return $this
* The callback must accept the Csv Reader object as the first parameter,
* and it should return void.
*/
public function setInputEncoding($pValue)
public static function setConstructorCallback(?callable $callback): void
{
self::$constructorCallback = $callback;
}
public static function getConstructorCallback(): ?callable
{
return self::$constructorCallback;
}
public function setInputEncoding(string $pValue): self
{
$this->inputEncoding = $pValue;
return $this;
}
/**
* Get input encoding.
*
* @return string
*/
public function getInputEncoding()
public function getInputEncoding(): string
{
return $this->inputEncoding;
}
public function setFallbackEncoding(string $pValue): self
{
$this->fallbackEncoding = $pValue;
return $this;
}
public function getFallbackEncoding(): string
{
return $this->fallbackEncoding;
}
/**
* Move filepointer past any BOM marker.
*/
@ -161,12 +197,8 @@ class Csv extends BaseReader
/**
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
*
* @param string $pFilename
*
* @return array
*/
public function listWorksheetInfo($pFilename)
public function listWorksheetInfo(string $pFilename): array
{
// Open file
$this->openFileOrMemory($pFilename);
@ -185,9 +217,11 @@ class Csv extends BaseReader
$worksheetInfo[0]['totalColumns'] = 0;
// Loop through each line of the file in turn
while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
while (is_array($rowData)) {
++$worksheetInfo[0]['totalRows'];
$worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
}
$worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
@ -215,34 +249,35 @@ class Csv extends BaseReader
return $this->loadIntoExisting($pFilename, $spreadsheet);
}
private function openFileOrMemory($pFilename): void
private function openFileOrMemory(string $pFilename): void
{
// Open file
$fhandle = $this->canRead($pFilename);
if (!$fhandle) {
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
}
if ($this->inputEncoding === self::GUESS_ENCODING) {
$this->inputEncoding = self::guessEncoding($pFilename, $this->fallbackEncoding);
}
$this->openFile($pFilename);
if ($this->inputEncoding !== 'UTF-8') {
fclose($this->fileHandle);
$entireFile = file_get_contents($pFilename);
$this->fileHandle = fopen('php://memory', 'r+b');
$data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
fwrite($this->fileHandle, $data);
$this->skipBOM();
if ($this->fileHandle !== false && $entireFile !== false) {
$data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
fwrite($this->fileHandle, $data);
$this->skipBOM();
}
}
}
/**
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
*
* @param string $pFilename
*
* @return Spreadsheet
*/
public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
public function loadIntoExisting(string $pFilename, Spreadsheet $spreadsheet): Spreadsheet
{
$lineEnding = ini_get('auto_detect_line_endings');
$lineEnding = ini_get('auto_detect_line_endings') ?: '0';
ini_set('auto_detect_line_endings', '1');
// Open file
@ -265,7 +300,8 @@ class Csv extends BaseReader
$outRow = 0;
// Loop through each line of the file in turn
while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
while (is_array($rowData)) {
$noOutputYet = true;
$columnLetter = 'A';
foreach ($rowData as $rowDatum) {
@ -283,6 +319,7 @@ class Csv extends BaseReader
}
++$columnLetter;
}
$rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
++$currentRow;
}
@ -295,48 +332,24 @@ class Csv extends BaseReader
return $spreadsheet;
}
/**
* Get delimiter.
*
* @return string
*/
public function getDelimiter()
public function getDelimiter(): ?string
{
return $this->delimiter;
}
/**
* Set delimiter.
*
* @param string $delimiter Delimiter, eg: ','
*
* @return $this
*/
public function setDelimiter($delimiter)
public function setDelimiter(string $delimiter): self
{
$this->delimiter = $delimiter;
return $this;
}
/**
* Get enclosure.
*
* @return string
*/
public function getEnclosure()
public function getEnclosure(): string
{
return $this->enclosure;
}
/**
* Set enclosure.
*
* @param string $enclosure Enclosure, defaults to "
*
* @return $this
*/
public function setEnclosure($enclosure)
public function setEnclosure(string $enclosure): self
{
if ($enclosure == '') {
$enclosure = '"';
@ -346,78 +359,55 @@ class Csv extends BaseReader
return $this;
}
/**
* Get sheet index.
*
* @return int
*/
public function getSheetIndex()
public function getSheetIndex(): int
{
return $this->sheetIndex;
}
/**
* Set sheet index.
*
* @param int $pValue Sheet index
*
* @return $this
*/
public function setSheetIndex($pValue)
public function setSheetIndex(int $pValue): self
{
$this->sheetIndex = $pValue;
return $this;
}
/**
* Set Contiguous.
*
* @param bool $contiguous
*
* @return $this
*/
public function setContiguous($contiguous)
public function setContiguous(bool $contiguous): self
{
$this->contiguous = (bool) $contiguous;
return $this;
}
/**
* Get Contiguous.
*
* @return bool
*/
public function getContiguous()
public function getContiguous(): bool
{
return $this->contiguous;
}
/**
* Set escape backslashes.
*
* @param string $escapeCharacter
*
* @return $this
*/
public function setEscapeCharacter($escapeCharacter)
public function setEscapeCharacter(string $escapeCharacter): self
{
$this->escapeCharacter = $escapeCharacter;
return $this;
}
/**
* Get escape backslashes.
*
* @return string
*/
public function getEscapeCharacter()
public function getEscapeCharacter(): string
{
return $this->escapeCharacter;
}
/**
* Scrutinizer believes, incorrectly, that the specific pathinfo
* call in canRead can return something other than an array.
* Phpstan knows better.
* This function satisfies both.
*
* @param mixed $extension
*/
private static function extractStringLower($extension): string
{
return is_string($extension) ? strtolower($extension) : '';
}
/**
* Can the current IReader read the file?
*
@ -437,8 +427,7 @@ class Csv extends BaseReader
fclose($this->fileHandle);
// Trust file extension if any
$extension = pathinfo($pFilename, PATHINFO_EXTENSION);
$extension = is_array($extension) ? '' : strtolower($extension);
$extension = self::extractStringLower(pathinfo($pFilename, PATHINFO_EXTENSION));
if (in_array($extension, ['csv', 'tsv'])) {
return true;
}
@ -504,7 +493,7 @@ class Csv extends BaseReader
return $encoding;
}
public static function guessEncoding(string $filename, string $dflt = 'CP1252'): string
public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
{
$encoding = self::guessEncodingBom($filename);
if ($encoding === '') {

View File

@ -6,19 +6,28 @@ class Delimiter
{
protected const POTENTIAL_DELIMETERS = [',', ';', "\t", '|', ':', ' ', '~'];
/** @var resource */
protected $fileHandle;
/** @var string */
protected $escapeCharacter;
/** @var string */
protected $enclosure;
/** @var array */
protected $counts = [];
/** @var int */
protected $numberLines = 0;
/** @var ?string */
protected $delimiter;
public function __construct($fileHandle, $escapeCharacter, $enclosure)
/**
* @param resource $fileHandle
*/
public function __construct($fileHandle, string $escapeCharacter, string $enclosure)
{
$this->fileHandle = $fileHandle;
$this->escapeCharacter = $escapeCharacter;
@ -52,15 +61,13 @@ class Delimiter
protected function countDelimiterValues(string $line, array $delimiterKeys): void
{
$splitString = str_split($line, 1);
if (!is_array($splitString)) {
return;
}
if (is_array($splitString)) {
$distribution = array_count_values($splitString);
$countLine = array_intersect_key($distribution, $delimiterKeys);
$distribution = array_count_values($splitString);
$countLine = array_intersect_key($distribution, $delimiterKeys);
foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
$this->counts[$delimiter][] = $countLine[$delimiter] ?? 0;
foreach (self::POTENTIAL_DELIMETERS as $delimiter) {
$this->counts[$delimiter][] = $countLine[$delimiter] ?? 0;
}
}
}
@ -137,8 +144,8 @@ class Delimiter
// See if we have any enclosures left in the line
// if we still have an enclosure then we need to read the next line as well
} while (preg_match('/(' . $enclosure . ')/', $line) > 0);
} while (preg_match('/(' . $enclosure . ')/', $line ?? '') > 0);
return $line;
return $line ?? false;
}
}

View File

@ -0,0 +1,93 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
use PhpOffice\PhpSpreadsheet\IOFactory;
use PhpOffice\PhpSpreadsheet\Reader\Csv;
use PHPUnit\Framework\TestCase;
class CsvCallbackTest extends TestCase
{
protected function tearDown(): void
{
Csv::setConstructorCallback(null);
}
/**
* @param mixed $obj
*/
public function callbackDoNothing($obj): void
{
self::assertInstanceOf(Csv::class, $obj);
}
public function testCallbackDoNothing(): void
{
Csv::setConstructorCallback([$this, 'callbackDoNothing']);
$filename = 'tests/data/Reader/CSV/encoding.iso88591.csv';
$reader = new Csv();
$reader->setInputEncoding(Csv::GUESS_ENCODING);
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('Å', $sheet->getCell('A1')->getValue());
}
public function callbackSetFallbackEncoding(Csv $reader): void
{
$reader->setFallbackEncoding('ISO-8859-2');
$reader->setInputEncoding(Csv::GUESS_ENCODING);
$reader->setEscapeCharacter((version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : '');
}
public function testFallbackEncodingDefltIso2(): void
{
Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']);
$filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
$reader = new Csv();
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
}
public function testIOFactory(): void
{
Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']);
$filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
$spreadsheet = IOFactory::load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
}
public function testNonFallbackEncoding(): void
{
Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']);
$filename = 'tests/data/Reader/CSV/premiere.utf16be.csv';
$reader = new Csv();
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('première', $sheet->getCell('A1')->getValue());
self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
}
public function testDefaultEscape(): void
{
self::assertNull(Csv::getConstructorCallback());
$filename = 'tests/data/Reader/CSV/escape.csv';
$spreadsheet = IOFactory::load($filename);
$sheet = $spreadsheet->getActiveSheet();
// this is not how Excel views the file
self::assertEquals('a\"hello', $sheet->getCell('A1')->getValue());
}
public function testBetterEscape(): void
{
Csv::setConstructorCallback([$this, 'callbackSetFallbackEncoding']);
$filename = 'tests/data/Reader/CSV/escape.csv';
$spreadsheet = IOFactory::load($filename);
$sheet = $spreadsheet->getActiveSheet();
// this is how Excel views the file
self::assertEquals('a\"hello;hello;hello;\"', $sheet->getCell('A1')->getValue());
}
}

View File

@ -1,6 +1,6 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader;
namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Reader\IReadFilter;
@ -34,18 +34,18 @@ class CsvContiguousFilter implements IReadFilter
$this->endRow = $startRow + $chunkSize;
}
public function setFilterType($type): void
public function setFilterType(int $type): void
{
$this->filterType = $type;
}
public function filter1($row)
public function filter1(int $row): bool
{
// Include rows 1-10, followed by 100-110, etc.
return $row % 100 <= 10;
}
public function filter0($row)
public function filter0(int $row): bool
{
// Only read the heading row, and the rows that are configured in $this->_startRow and $this->_endRow
if (($row == 1) || ($row >= $this->startRow && $row < $this->endRow)) {

View File

@ -1,6 +1,6 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader;
namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@ -49,12 +49,19 @@ class CsvContiguousTest extends TestCase
$spreadsheet->getActiveSheet()->setTitle('Country Data #' . (++$sheet));
}
$sheet = $spreadsheet->getSheetByName('Country Data #1');
self::assertEquals('Kabul', $sheet->getCell('A2')->getValue());
$sheet = $spreadsheet->getSheetByName('Country Data #2');
self::assertEquals('Lesotho', $sheet->getCell('B4')->getValue());
$sheet = $spreadsheet->getSheetByName('Country Data #3');
self::assertEquals(-20.1, $sheet->getCell('C6')->getValue());
self::assertSame('Kabul', self::getCellValue($spreadsheet, 'Country Data #1', 'A2'));
self::assertSame('Lesotho', self::getCellValue($spreadsheet, 'Country Data #2', 'B4'));
self::assertSame('-20.1', self::getCellValue($spreadsheet, 'Country Data #3', 'C6'));
}
private static function getCellValue(Spreadsheet $spreadsheet, string $sheetName, string $cellAddress): string
{
$sheet = $spreadsheet->getSheetByName($sheetName);
if ($sheet === null) {
return '';
}
return (string) $sheet->getCell($cellAddress)->getValue();
}
public function testContiguous2(): void

View File

@ -0,0 +1,122 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Reader\Csv;
use PHPUnit\Framework\TestCase;
class CsvEncodingTest extends TestCase
{
/**
* @dataProvider providerEncodings
*
* @param string $filename
* @param string $encoding
*/
public function testEncodings($filename, $encoding): void
{
$reader = new Csv();
$reader->setInputEncoding($encoding);
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('Å', $sheet->getCell('A1')->getValue());
}
/**
* @dataProvider providerEncodings
*
* @param string $filename
* @param string $encoding
*/
public function testWorkSheetInfo($filename, $encoding): void
{
$reader = new Csv();
$reader->setInputEncoding($encoding);
$info = $reader->listWorksheetInfo($filename);
self::assertEquals('Worksheet', $info[0]['worksheetName']);
self::assertEquals('B', $info[0]['lastColumnLetter']);
self::assertEquals(1, $info[0]['lastColumnIndex']);
self::assertEquals(2, $info[0]['totalRows']);
self::assertEquals(2, $info[0]['totalColumns']);
}
public function providerEncodings(): array
{
return [
['tests/data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'],
['tests/data/Reader/CSV/encoding.utf8.csv', 'UTF-8'],
['tests/data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'],
['tests/data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'],
['tests/data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'],
['tests/data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'],
['tests/data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'],
];
}
/**
* @dataProvider providerGuessEncoding
*/
public function testGuessEncoding(string $filename): void
{
$reader = new Csv();
$reader->setInputEncoding(Csv::guessEncoding($filename));
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('première', $sheet->getCell('A1')->getValue());
self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
}
/**
* @dataProvider providerGuessEncoding
*/
public function testFallbackEncoding(string $filename): void
{
$reader = new Csv();
$reader->setInputEncoding(Csv::GUESS_ENCODING);
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('première', $sheet->getCell('A1')->getValue());
self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
}
public function providerGuessEncoding(): array
{
return [
['tests/data/Reader/CSV/premiere.utf8.csv'],
['tests/data/Reader/CSV/premiere.utf8bom.csv'],
['tests/data/Reader/CSV/premiere.utf16be.csv'],
['tests/data/Reader/CSV/premiere.utf16bebom.csv'],
['tests/data/Reader/CSV/premiere.utf16le.csv'],
['tests/data/Reader/CSV/premiere.utf16lebom.csv'],
['tests/data/Reader/CSV/premiere.utf32be.csv'],
['tests/data/Reader/CSV/premiere.utf32bebom.csv'],
['tests/data/Reader/CSV/premiere.utf32le.csv'],
['tests/data/Reader/CSV/premiere.utf32lebom.csv'],
['tests/data/Reader/CSV/premiere.win1252.csv'],
];
}
public function testGuessEncodingDefltIso2(): void
{
$filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
$reader = new Csv();
$reader->setInputEncoding(Csv::guessEncoding($filename, 'ISO-8859-2'));
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
}
public function testFallbackEncodingDefltIso2(): void
{
$filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
$reader = new Csv();
self::assertSame('CP1252', $reader->getFallbackEncoding());
$reader->setInputEncoding(Csv::GUESS_ENCODING);
$reader->setFallbackEncoding('ISO-8859-2');
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
}
}

View File

@ -1,6 +1,6 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader;
namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Reader\Csv;
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
@ -19,7 +19,8 @@ class CsvTest extends TestCase
public function testDelimiterDetection($filename, $expectedDelimiter, $cell, $expectedValue): void
{
$reader = new Csv();
self::assertNull($reader->getDelimiter());
$delim1 = $reader->getDelimiter();
self::assertNull($delim1);
$spreadsheet = $reader->load($filename);
@ -132,21 +133,6 @@ class CsvTest extends TestCase
self::assertSame($expected, $worksheet->toArray());
}
/**
* @dataProvider providerEncodings
*
* @param string $filename
* @param string $encoding
*/
public function testEncodings($filename, $encoding): void
{
$reader = new Csv();
$reader->setInputEncoding($encoding);
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('Å', $sheet->getCell('A1')->getValue());
}
public function testInvalidWorkSheetInfo(): void
{
$this->expectException(ReaderException::class);
@ -154,37 +140,6 @@ class CsvTest extends TestCase
$reader->listWorksheetInfo('');
}
/**
* @dataProvider providerEncodings
*
* @param string $filename
* @param string $encoding
*/
public function testWorkSheetInfo($filename, $encoding): void
{
$reader = new Csv();
$reader->setInputEncoding($encoding);
$info = $reader->listWorksheetInfo($filename);
self::assertEquals('Worksheet', $info[0]['worksheetName']);
self::assertEquals('B', $info[0]['lastColumnLetter']);
self::assertEquals(1, $info[0]['lastColumnIndex']);
self::assertEquals(2, $info[0]['totalRows']);
self::assertEquals(2, $info[0]['totalColumns']);
}
public function providerEncodings(): array
{
return [
['tests/data/Reader/CSV/encoding.iso88591.csv', 'ISO-8859-1'],
['tests/data/Reader/CSV/encoding.utf8.csv', 'UTF-8'],
['tests/data/Reader/CSV/encoding.utf8bom.csv', 'UTF-8'],
['tests/data/Reader/CSV/encoding.utf16be.csv', 'UTF-16BE'],
['tests/data/Reader/CSV/encoding.utf16le.csv', 'UTF-16LE'],
['tests/data/Reader/CSV/encoding.utf32be.csv', 'UTF-32BE'],
['tests/data/Reader/CSV/encoding.utf32le.csv', 'UTF-32LE'],
];
}
public function testUtf16LineBreak(): void
{
$reader = new Csv();
@ -296,45 +251,4 @@ EOF;
[(version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : '', ','],
];
}
/**
* @dataProvider providerGuessEncoding
*/
public function testGuessEncoding(string $filename): void
{
$reader = new Csv();
$reader->setInputEncoding(Csv::guessEncoding($filename));
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('première', $sheet->getCell('A1')->getValue());
self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
}
public function providerGuessEncoding(): array
{
return [
['tests/data/Reader/CSV/premiere.utf8.csv'],
['tests/data/Reader/CSV/premiere.utf8bom.csv'],
['tests/data/Reader/CSV/premiere.utf16be.csv'],
['tests/data/Reader/CSV/premiere.utf16bebom.csv'],
['tests/data/Reader/CSV/premiere.utf16le.csv'],
['tests/data/Reader/CSV/premiere.utf16lebom.csv'],
['tests/data/Reader/CSV/premiere.utf32be.csv'],
['tests/data/Reader/CSV/premiere.utf32bebom.csv'],
['tests/data/Reader/CSV/premiere.utf32le.csv'],
['tests/data/Reader/CSV/premiere.utf32lebom.csv'],
['tests/data/Reader/CSV/premiere.win1252.csv'],
];
}
public function testGuessEncodingDefltIso2(): void
{
$filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
$reader = new Csv();
$reader->setInputEncoding(Csv::guessEncoding($filename, 'ISO-8859-2'));
$spreadsheet = $reader->load($filename);
$sheet = $spreadsheet->getActiveSheet();
self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
}
}