Merge pull request #2757 from PHPOffice/Issue-2547_Narrow-down-Reader-format-identification
Allow Reader format identification to use a subset of possible Readers
This commit is contained in:
commit
eabe0ca68a
|
|
@ -80,7 +80,8 @@ semi-colon (`;`) are used as separators instead of a comma, although
|
||||||
other symbols can be used. Because CSV is a text-only format, it doesn't
|
other symbols can be used. Because CSV is a text-only format, it doesn't
|
||||||
support any data formatting options.
|
support any data formatting options.
|
||||||
|
|
||||||
"CSV" is not a single, well-defined format (although see RFC 4180 for
|
"CSV" is not a single, well-defined format (although see
|
||||||
|
[RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.html) for
|
||||||
one definition that is commonly used). Rather, in practice the term
|
one definition that is commonly used). Rather, in practice the term
|
||||||
"CSV" refers to any file that:
|
"CSV" refers to any file that:
|
||||||
|
|
||||||
|
|
@ -117,5 +118,5 @@ Wide Web Consortium (W3C). However, in 2000, HTML also became an
|
||||||
international standard (ISO/IEC 15445:2000). HTML 4.01 was published in
|
international standard (ISO/IEC 15445:2000). HTML 4.01 was published in
|
||||||
late 1999, with further errata published through 2001. In 2004
|
late 1999, with further errata published through 2001. In 2004
|
||||||
development began on HTML5 in the Web Hypertext Application Technology
|
development began on HTML5 in the Web Hypertext Application Technology
|
||||||
Working Group (WHATWG), which became a joint deliverable with the W3C in
|
Working Group (WHATWG), which became a joint deliverable with the W3C in 2008.
|
||||||
2008.
|
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,22 @@ practise), it will reject the Xls loader that it would normally use for
|
||||||
a .xls file; and test the file using the other loaders until it finds
|
a .xls file; and test the file using the other loaders until it finds
|
||||||
the appropriate loader, and then use that to read the file.
|
the appropriate loader, and then use that to read the file.
|
||||||
|
|
||||||
|
If you know that this is an `xls` file, but don't know whether it is a
|
||||||
|
genuine BIFF-format Excel or Html markup with an xls extension, you can
|
||||||
|
limit the loader to check only those two possibilities by passing in an
|
||||||
|
array of Readers to test against.
|
||||||
|
|
||||||
|
```php
|
||||||
|
$inputFileName = './sampleData/example1.xls';
|
||||||
|
$testAgainstFormats = [
|
||||||
|
\PhpOffice\PhpSpreadsheet\IOFactory::READER_XLS,
|
||||||
|
\PhpOffice\PhpSpreadsheet\IOFactory::READER_HTML,
|
||||||
|
];
|
||||||
|
|
||||||
|
/** Load $inputFileName to a Spreadsheet Object **/
|
||||||
|
$spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::load($inputFileName, 0, $testAgainstFormats);
|
||||||
|
```
|
||||||
|
|
||||||
While easy to implement in your code, and you don't need to worry about
|
While easy to implement in your code, and you don't need to worry about
|
||||||
the file type; this isn't the most efficient method to load a file; and
|
the file type; this isn't the most efficient method to load a file; and
|
||||||
it lacks the flexibility to configure the loader in any way before
|
it lacks the flexibility to configure the loader in any way before
|
||||||
|
|
@ -118,6 +134,34 @@ $spreadsheet = $reader->load($inputFileName);
|
||||||
See `samples/Reader/04_Simple_file_reader_using_the_IOFactory_to_identify_a_reader_to_use.php`
|
See `samples/Reader/04_Simple_file_reader_using_the_IOFactory_to_identify_a_reader_to_use.php`
|
||||||
for a working example of this code.
|
for a working example of this code.
|
||||||
|
|
||||||
|
As with the IOFactory `load()` method, you can also pass an array of formats
|
||||||
|
for the `identify()` method to check against if you know that it will only
|
||||||
|
be in a subset of the possible formats that PhpSpreadsheet supports.
|
||||||
|
|
||||||
|
```php
|
||||||
|
$inputFileName = './sampleData/example1.xls';
|
||||||
|
$testAgainstFormats = [
|
||||||
|
\PhpOffice\PhpSpreadsheet\IOFactory::READER_XLS,
|
||||||
|
\PhpOffice\PhpSpreadsheet\IOFactory::READER_HTML,
|
||||||
|
];
|
||||||
|
|
||||||
|
/** Identify the type of $inputFileName **/
|
||||||
|
$inputFileType = \PhpOffice\PhpSpreadsheet\IOFactory::identify($inputFileName, $testAgainstFormats);
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also use this to confirm that a file is what it claims to be:
|
||||||
|
|
||||||
|
```php
|
||||||
|
$inputFileName = './sampleData/example1.xls';
|
||||||
|
|
||||||
|
try {
|
||||||
|
/** Verify that $inputFileName really is an Xls file **/
|
||||||
|
$inputFileType = \PhpOffice\PhpSpreadsheet\IOFactory::identify($inputFileName, [\PhpOffice\PhpSpreadsheet\IOFactory::READER_XLS]);
|
||||||
|
} catch (\PhpOffice\PhpSpreadsheet\Reader\Exception $e) {
|
||||||
|
// File isn't actually an Xls file, even though it has an xls extension
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Spreadsheet Reader Options
|
## Spreadsheet Reader Options
|
||||||
|
|
||||||
Once you have created a reader object for the workbook that you want to
|
Once you have created a reader object for the workbook that you want to
|
||||||
|
|
@ -146,7 +190,7 @@ $spreadsheet = $reader->load($inputFileName);
|
||||||
See `samples/Reader/05_Simple_file_reader_using_the_read_data_only_option.php`
|
See `samples/Reader/05_Simple_file_reader_using_the_read_data_only_option.php`
|
||||||
for a working example of this code.
|
for a working example of this code.
|
||||||
|
|
||||||
It is important to note that Workbooks (and PhpSpreadsheet) store dates
|
It is important to note that most Workbooks (and PhpSpreadsheet) store dates
|
||||||
and times as simple numeric values: they can only be distinguished from
|
and times as simple numeric values: they can only be distinguished from
|
||||||
other numeric values by the format mask that is applied to that cell.
|
other numeric values by the format mask that is applied to that cell.
|
||||||
When setting read data only to true, PhpSpreadsheet doesn't read the
|
When setting read data only to true, PhpSpreadsheet doesn't read the
|
||||||
|
|
@ -162,8 +206,8 @@ Reading Only Data from a Spreadsheet File applies to Readers:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
||||||
----------|:---:|--------|:---:|--------------|:---:|
|
----------|:---:|--------|:---:|--------------|:---:|
|
||||||
Xlsx | YES | Xls | YES | Xml | YES |
|
Xlsx | YES | Xls | YES | Xml | YES |
|
||||||
Ods | YES | SYLK | NO | Gnumeric | YES |
|
Ods | YES | SYLK | NO | Gnumeric | YES |
|
||||||
CSV | NO | HTML | NO
|
CSV | NO | HTML | NO
|
||||||
|
|
||||||
### Reading Only Named WorkSheets from a File
|
### Reading Only Named WorkSheets from a File
|
||||||
|
|
@ -233,8 +277,8 @@ Reading Only Named WorkSheets from a File applies to Readers:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
||||||
----------|:---:|--------|:---:|--------------|:---:|
|
----------|:---:|--------|:---:|--------------|:---:|
|
||||||
Xlsx | YES | Xls | YES | Xml | YES |
|
Xlsx | YES | Xls | YES | Xml | YES |
|
||||||
Ods | YES | SYLK | NO | Gnumeric | YES |
|
Ods | YES | SYLK | NO | Gnumeric | YES |
|
||||||
CSV | NO | HTML | NO
|
CSV | NO | HTML | NO
|
||||||
|
|
||||||
### Reading Only Specific Columns and Rows from a File (Read Filters)
|
### Reading Only Specific Columns and Rows from a File (Read Filters)
|
||||||
|
|
@ -381,7 +425,7 @@ Using Read Filters applies to:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
||||||
----------|:---:|--------|:---:|--------------|:---:|
|
----------|:---:|--------|:---:|--------------|:---:|
|
||||||
Xlsx | YES | Xls | YES | Xml | YES |
|
Xlsx | YES | Xls | YES | Xml | YES |
|
||||||
Ods | YES | SYLK | NO | Gnumeric | YES |
|
Ods | YES | SYLK | NO | Gnumeric | YES |
|
||||||
CSV | YES | HTML | NO | | |
|
CSV | YES | HTML | NO | | |
|
||||||
|
|
||||||
|
|
@ -439,7 +483,7 @@ Combining Multiple Files into a Single Spreadsheet Object applies to:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
||||||
----------|:---:|--------|:---:|--------------|:---:|
|
----------|:---:|--------|:---:|--------------|:---:|
|
||||||
Xlsx | NO | Xls | NO | Xml | NO |
|
Xlsx | NO | Xls | NO | Xml | NO |
|
||||||
Ods | NO | SYLK | YES | Gnumeric | NO |
|
Ods | NO | SYLK | YES | Gnumeric | NO |
|
||||||
CSV | YES | HTML | NO
|
CSV | YES | HTML | NO
|
||||||
|
|
||||||
|
|
@ -516,7 +560,7 @@ Splitting a single loaded file across multiple worksheets applies to:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
||||||
----------|:---:|--------|:---:|--------------|:---:|
|
----------|:---:|--------|:---:|--------------|:---:|
|
||||||
Xlsx | NO | Xls | NO | Xml | NO |
|
Xlsx | NO | Xls | NO | Xml | NO |
|
||||||
Ods | NO | SYLK | NO | Gnumeric | NO |
|
Ods | NO | SYLK | NO | Gnumeric | NO |
|
||||||
CSV | YES | HTML | NO
|
CSV | YES | HTML | NO
|
||||||
|
|
||||||
|
|
@ -556,7 +600,7 @@ Setting CSV delimiter applies to:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
||||||
----------|:---:|--------|:---:|--------------|:---:|
|
----------|:---:|--------|:---:|--------------|:---:|
|
||||||
Xlsx | NO | Xls | NO | Xml | NO |
|
Xlsx | NO | Xls | NO | Xml | NO |
|
||||||
Ods | NO | SYLK | NO | Gnumeric | NO |
|
Ods | NO | SYLK | NO | Gnumeric | NO |
|
||||||
CSV | YES | HTML | NO
|
CSV | YES | HTML | NO
|
||||||
|
|
||||||
|
|
@ -594,7 +638,7 @@ Applies to:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
Reader | Y/N |Reader | Y/N |Reader | Y/N |
|
||||||
----------|:---:|--------|:---:|--------------|:---:|
|
----------|:---:|--------|:---:|--------------|:---:|
|
||||||
Xlsx | NO | Xls | NO | Xml | NO |
|
Xlsx | NO | Xls | NO | Xml | NO |
|
||||||
Ods | NO | SYLK | NO | Gnumeric | NO |
|
Ods | NO | SYLK | NO | Gnumeric | NO |
|
||||||
CSV | YES | HTML | NO
|
CSV | YES | HTML | NO
|
||||||
|
|
||||||
|
|
@ -646,7 +690,7 @@ Loading using a Value Binder applies to:
|
||||||
|
|
||||||
Reader | Y/N |Reader | Y/N |Reader | Y/N
|
Reader | Y/N |Reader | Y/N |Reader | Y/N
|
||||||
----------|:---:|--------|:---:|--------------|:---:
|
----------|:---:|--------|:---:|--------------|:---:
|
||||||
Xlsx | NO | Xls | NO | Xml | NO
|
Xlsx | NO | Xls | NO | Xml | NO
|
||||||
Ods | NO | SYLK | NO | Gnumeric | NO
|
Ods | NO | SYLK | NO | Gnumeric | NO
|
||||||
CSV | YES | HTML | YES
|
CSV | YES | HTML | YES
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,23 +14,39 @@ use PhpOffice\PhpSpreadsheet\Writer\IWriter;
|
||||||
*/
|
*/
|
||||||
abstract class IOFactory
|
abstract class IOFactory
|
||||||
{
|
{
|
||||||
|
public const READER_XLSX = 'Xlsx';
|
||||||
|
public const READER_XLS = 'Xls';
|
||||||
|
public const READER_XML = 'Xml';
|
||||||
|
public const READER_ODS = 'Ods';
|
||||||
|
public const READER_SYLK = 'Slk';
|
||||||
|
public const READER_SLK = 'Slk';
|
||||||
|
public const READER_GNUMERIC = 'Gnumeric';
|
||||||
|
public const READER_HTML = 'Html';
|
||||||
|
public const READER_CSV = 'Csv';
|
||||||
|
|
||||||
|
public const WRITER_XLSX = 'Xlsx';
|
||||||
|
public const WRITER_XLS = 'Xls';
|
||||||
|
public const WRITER_ODS = 'Ods';
|
||||||
|
public const WRITER_CSV = 'Csv';
|
||||||
|
public const WRITER_HTML = 'Html';
|
||||||
|
|
||||||
private static $readers = [
|
private static $readers = [
|
||||||
'Xlsx' => Reader\Xlsx::class,
|
self::READER_XLSX => Reader\Xlsx::class,
|
||||||
'Xls' => Reader\Xls::class,
|
self::READER_XLS => Reader\Xls::class,
|
||||||
'Xml' => Reader\Xml::class,
|
self::READER_XML => Reader\Xml::class,
|
||||||
'Ods' => Reader\Ods::class,
|
self::READER_ODS => Reader\Ods::class,
|
||||||
'Slk' => Reader\Slk::class,
|
self::READER_SLK => Reader\Slk::class,
|
||||||
'Gnumeric' => Reader\Gnumeric::class,
|
self::READER_GNUMERIC => Reader\Gnumeric::class,
|
||||||
'Html' => Reader\Html::class,
|
self::READER_HTML => Reader\Html::class,
|
||||||
'Csv' => Reader\Csv::class,
|
self::READER_CSV => Reader\Csv::class,
|
||||||
];
|
];
|
||||||
|
|
||||||
private static $writers = [
|
private static $writers = [
|
||||||
'Xls' => Writer\Xls::class,
|
self::WRITER_XLS => Writer\Xls::class,
|
||||||
'Xlsx' => Writer\Xlsx::class,
|
self::WRITER_XLSX => Writer\Xlsx::class,
|
||||||
'Ods' => Writer\Ods::class,
|
self::WRITER_ODS => Writer\Ods::class,
|
||||||
'Csv' => Writer\Csv::class,
|
self::WRITER_CSV => Writer\Csv::class,
|
||||||
'Html' => Writer\Html::class,
|
self::WRITER_HTML => Writer\Html::class,
|
||||||
'Tcpdf' => Writer\Pdf\Tcpdf::class,
|
'Tcpdf' => Writer\Pdf\Tcpdf::class,
|
||||||
'Dompdf' => Writer\Pdf\Dompdf::class,
|
'Dompdf' => Writer\Pdf\Dompdf::class,
|
||||||
'Mpdf' => Writer\Pdf\Mpdf::class,
|
'Mpdf' => Writer\Pdf\Mpdf::class,
|
||||||
|
|
@ -70,10 +86,18 @@ abstract class IOFactory
|
||||||
* Loads Spreadsheet from file using automatic Reader\IReader resolution.
|
* Loads Spreadsheet from file using automatic Reader\IReader resolution.
|
||||||
*
|
*
|
||||||
* @param string $filename The name of the spreadsheet file
|
* @param string $filename The name of the spreadsheet file
|
||||||
|
* @param int $flags the optional second parameter flags may be used to identify specific elements
|
||||||
|
* that should be loaded, but which won't be loaded by default, using these values:
|
||||||
|
* IReader::LOAD_WITH_CHARTS - Include any charts that are defined in the loaded file
|
||||||
|
* @param string[] $readers An array of Readers to use to identify the file type. By default, load() will try
|
||||||
|
* all possible Readers until it finds a match; but this allows you to pass in a
|
||||||
|
* list of Readers so it will only try the subset that you specify here.
|
||||||
|
* Values in this list can be any of the constant values defined in the set
|
||||||
|
* IOFactory::READER_*.
|
||||||
*/
|
*/
|
||||||
public static function load(string $filename, int $flags = 0): Spreadsheet
|
public static function load(string $filename, int $flags = 0, ?array $readers = null): Spreadsheet
|
||||||
{
|
{
|
||||||
$reader = self::createReaderForFile($filename);
|
$reader = self::createReaderForFile($filename, $readers);
|
||||||
|
|
||||||
return $reader->load($filename, $flags);
|
return $reader->load($filename, $flags);
|
||||||
}
|
}
|
||||||
|
|
@ -81,9 +105,9 @@ abstract class IOFactory
|
||||||
/**
|
/**
|
||||||
* Identify file type using automatic IReader resolution.
|
* Identify file type using automatic IReader resolution.
|
||||||
*/
|
*/
|
||||||
public static function identify(string $filename): string
|
public static function identify(string $filename, ?array $readers = null): string
|
||||||
{
|
{
|
||||||
$reader = self::createReaderForFile($filename);
|
$reader = self::createReaderForFile($filename, $readers);
|
||||||
$className = get_class($reader);
|
$className = get_class($reader);
|
||||||
$classType = explode('\\', $className);
|
$classType = explode('\\', $className);
|
||||||
unset($reader);
|
unset($reader);
|
||||||
|
|
@ -93,14 +117,32 @@ abstract class IOFactory
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create Reader\IReader for file using automatic IReader resolution.
|
* Create Reader\IReader for file using automatic IReader resolution.
|
||||||
|
*
|
||||||
|
* @param string[] $readers An array of Readers to use to identify the file type. By default, load() will try
|
||||||
|
* all possible Readers until it finds a match; but this allows you to pass in a
|
||||||
|
* list of Readers so it will only try the subset that you specify here.
|
||||||
|
* Values in this list can be any of the constant values defined in the set
|
||||||
|
* IOFactory::READER_*.
|
||||||
*/
|
*/
|
||||||
public static function createReaderForFile(string $filename): IReader
|
public static function createReaderForFile(string $filename, ?array $readers = null): IReader
|
||||||
{
|
{
|
||||||
File::assertFile($filename);
|
File::assertFile($filename);
|
||||||
|
|
||||||
|
$testReaders = self::$readers;
|
||||||
|
if ($readers !== null) {
|
||||||
|
$readers = array_map('strtoupper', $readers);
|
||||||
|
$testReaders = array_filter(
|
||||||
|
self::$readers,
|
||||||
|
function (string $readerType) use ($readers) {
|
||||||
|
return in_array(strtoupper($readerType), $readers, true);
|
||||||
|
},
|
||||||
|
ARRAY_FILTER_USE_KEY
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// First, lucky guess by inspecting file extension
|
// First, lucky guess by inspecting file extension
|
||||||
$guessedReader = self::getReaderTypeFromExtension($filename);
|
$guessedReader = self::getReaderTypeFromExtension($filename);
|
||||||
if ($guessedReader !== null) {
|
if (($guessedReader !== null) && array_key_exists($guessedReader, $testReaders)) {
|
||||||
$reader = self::createReader($guessedReader);
|
$reader = self::createReader($guessedReader);
|
||||||
|
|
||||||
// Let's see if we are lucky
|
// Let's see if we are lucky
|
||||||
|
|
@ -110,11 +152,11 @@ abstract class IOFactory
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we reach here then "lucky guess" didn't give any result
|
// If we reach here then "lucky guess" didn't give any result
|
||||||
// Try walking through all the options in self::$autoResolveClasses
|
// Try walking through all the options in self::$readers (or the selected subset)
|
||||||
foreach (self::$readers as $type => $class) {
|
foreach ($testReaders as $readerType => $class) {
|
||||||
// Ignore our original guess, we know that won't work
|
// Ignore our original guess, we know that won't work
|
||||||
if ($type !== $guessedReader) {
|
if ($readerType !== $guessedReader) {
|
||||||
$reader = self::createReader($type);
|
$reader = self::createReader($readerType);
|
||||||
if ($reader->canRead($filename)) {
|
if ($reader->canRead($filename)) {
|
||||||
return $reader;
|
return $reader;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,10 @@ abstract class BaseReader implements IReader
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads Spreadsheet from file.
|
* Loads Spreadsheet from file.
|
||||||
|
*
|
||||||
|
* @param int $flags the optional second parameter flags may be used to identify specific elements
|
||||||
|
* that should be loaded, but which won't be loaded by default, using these values:
|
||||||
|
* IReader::LOAD_WITH_CHARTS - Include any charts that are defined in the loaded file
|
||||||
*/
|
*/
|
||||||
public function load(string $filename, int $flags = 0): Spreadsheet
|
public function load(string $filename, int $flags = 0): Spreadsheet
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -108,6 +108,22 @@ class IOFactoryTest extends TestCase
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testFormatAsExpected(): void
|
||||||
|
{
|
||||||
|
$fileName = 'samples/templates/30template.xls';
|
||||||
|
|
||||||
|
$actual = IOFactory::identify($fileName, [IOFactory::READER_XLS]);
|
||||||
|
self::assertSame('Xls', $actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testFormatNotAsExpectedThrowsException(): void
|
||||||
|
{
|
||||||
|
$fileName = 'samples/templates/30template.xls';
|
||||||
|
|
||||||
|
$this->expectException(ReaderException::class);
|
||||||
|
IOFactory::identify($fileName, [IOFactory::READER_ODS]);
|
||||||
|
}
|
||||||
|
|
||||||
public function testIdentifyNonExistingFileThrowException(): void
|
public function testIdentifyNonExistingFileThrowException(): void
|
||||||
{
|
{
|
||||||
$this->expectException(ReaderException::class);
|
$this->expectException(ReaderException::class);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue