Allow Reader format identification to use a subset of possible Readers
This commit is contained in:
parent
c05fb6efa3
commit
4a65011a2f
|
|
@ -80,7 +80,8 @@ semi-colon (`;`) are used as separators instead of a comma, although
|
|||
other symbols can be used. Because CSV is a text-only format, it doesn't
|
||||
support any data formatting options.
|
||||
|
||||
"CSV" is not a single, well-defined format (although see RFC 4180 for
|
||||
"CSV" is not a single, well-defined format (although see
|
||||
[RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.html) for
|
||||
one definition that is commonly used). Rather, in practice the term
|
||||
"CSV" refers to any file that:
|
||||
|
||||
|
|
@ -117,5 +118,5 @@ Wide Web Consortium (W3C). However, in 2000, HTML also became an
|
|||
international standard (ISO/IEC 15445:2000). HTML 4.01 was published in
|
||||
late 1999, with further errata published through 2001. In 2004
|
||||
development began on HTML5 in the Web Hypertext Application Technology
|
||||
Working Group (WHATWG), which became a joint deliverable with the W3C in
|
||||
2008.
|
||||
Working Group (WHATWG), which became a joint deliverable with the W3C in 2008.
|
||||
|
||||
|
|
|
|||
|
|
@ -44,6 +44,22 @@ practise), it will reject the Xls loader that it would normally use for
|
|||
a .xls file; and test the file using the other loaders until it finds
|
||||
the appropriate loader, and then use that to read the file.
|
||||
|
||||
If you know that this is an `xls` file, but don't know whether it is a
|
||||
genuine BIFF-format Excel or Html markup with an xls extension, you can
|
||||
limit the loader to check only those two possibilities by passing in an
|
||||
array of Readers to test against.
|
||||
|
||||
```php
|
||||
$inputFileName = './sampleData/example1.xls';
|
||||
$testAgainstFormats = [
|
||||
\PhpOffice\PhpSpreadsheet\IOFactory::READER_XLS,
|
||||
\PhpOffice\PhpSpreadsheet\IOFactory::READER_HTML,
|
||||
];
|
||||
|
||||
/** Load $inputFileName to a Spreadsheet Object **/
|
||||
$spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::load($inputFileName, 0, $testAgainstFormats);
|
||||
```
|
||||
|
||||
While easy to implement in your code, and you don't need to worry about
|
||||
the file type; this isn't the most efficient method to load a file; and
|
||||
it lacks the flexibility to configure the loader in any way before
|
||||
|
|
@ -118,6 +134,34 @@ $spreadsheet = $reader->load($inputFileName);
|
|||
See `samples/Reader/04_Simple_file_reader_using_the_IOFactory_to_identify_a_reader_to_use.php`
|
||||
for a working example of this code.
|
||||
|
||||
As with the IOFactory `load()` method, you can also pass an array of formats
|
||||
for the `identify()` method to check against if you know that it will only
|
||||
be in a subset of the possible formats that PhpSpreadsheet supports.
|
||||
|
||||
```php
|
||||
$inputFileName = './sampleData/example1.xls';
|
||||
$testAgainstFormats = [
|
||||
\PhpOffice\PhpSpreadsheet\IOFactory::READER_XLS,
|
||||
\PhpOffice\PhpSpreadsheet\IOFactory::READER_HTML,
|
||||
];
|
||||
|
||||
/** Identify the type of $inputFileName **/
|
||||
$inputFileType = \PhpOffice\PhpSpreadsheet\IOFactory::identify($inputFileName, $testAgainstFormats);
|
||||
```
|
||||
|
||||
You can also use this to confirm that a file is what it claims to be:
|
||||
|
||||
```php
|
||||
$inputFileName = './sampleData/example1.xls';
|
||||
|
||||
try {
|
||||
/** Verify that $inputFileName really is an Xls file **/
|
||||
$inputFileType = \PhpOffice\PhpSpreadsheet\IOFactory::identify($inputFileName, [\PhpOffice\PhpSpreadsheet\IOFactory::READER_XLS]);
|
||||
} catch (\PhpOffice\PhpSpreadsheet\Reader\Exception $e) {
|
||||
// File isn't actually an Xls file, even though it has an xls extension
|
||||
}
|
||||
```
|
||||
|
||||
## Spreadsheet Reader Options
|
||||
|
||||
Once you have created a reader object for the workbook that you want to
|
||||
|
|
@ -146,7 +190,7 @@ $spreadsheet = $reader->load($inputFileName);
|
|||
See `samples/Reader/05_Simple_file_reader_using_the_read_data_only_option.php`
|
||||
for a working example of this code.
|
||||
|
||||
It is important to note that Workbooks (and PhpSpreadsheet) store dates
|
||||
It is important to note that most Workbooks (and PhpSpreadsheet) store dates
|
||||
and times as simple numeric values: they can only be distinguished from
|
||||
other numeric values by the format mask that is applied to that cell.
|
||||
When setting read data only to true, PhpSpreadsheet doesn't read the
|
||||
|
|
|
|||
|
|
@ -14,23 +14,39 @@ use PhpOffice\PhpSpreadsheet\Writer\IWriter;
|
|||
*/
|
||||
abstract class IOFactory
|
||||
{
|
||||
public const READER_XLSX = 'Xlsx';
|
||||
public const READER_XLS = 'Xls';
|
||||
public const READER_XML = 'Xml';
|
||||
public const READER_ODS = 'Ods';
|
||||
public const READER_SYLK = 'Slk';
|
||||
public const READER_SLK = 'Slk';
|
||||
public const READER_GNUMERIC = 'Gnumeric';
|
||||
public const READER_HTML = 'Html';
|
||||
public const READER_CSV = 'Csv';
|
||||
|
||||
public const WRITER_XLSX = 'Xlsx';
|
||||
public const WRITER_XLS = 'Xls';
|
||||
public const WRITER_ODS = 'Ods';
|
||||
public const WRITER_CSV = 'Csv';
|
||||
public const WRITER_HTML = 'Html';
|
||||
|
||||
private static $readers = [
|
||||
'Xlsx' => Reader\Xlsx::class,
|
||||
'Xls' => Reader\Xls::class,
|
||||
'Xml' => Reader\Xml::class,
|
||||
'Ods' => Reader\Ods::class,
|
||||
'Slk' => Reader\Slk::class,
|
||||
'Gnumeric' => Reader\Gnumeric::class,
|
||||
'Html' => Reader\Html::class,
|
||||
'Csv' => Reader\Csv::class,
|
||||
self::READER_XLSX => Reader\Xlsx::class,
|
||||
self::READER_XLS => Reader\Xls::class,
|
||||
self::READER_XML => Reader\Xml::class,
|
||||
self::READER_ODS => Reader\Ods::class,
|
||||
self::READER_SLK => Reader\Slk::class,
|
||||
self::READER_GNUMERIC => Reader\Gnumeric::class,
|
||||
self::READER_HTML => Reader\Html::class,
|
||||
self::READER_CSV => Reader\Csv::class,
|
||||
];
|
||||
|
||||
private static $writers = [
|
||||
'Xls' => Writer\Xls::class,
|
||||
'Xlsx' => Writer\Xlsx::class,
|
||||
'Ods' => Writer\Ods::class,
|
||||
'Csv' => Writer\Csv::class,
|
||||
'Html' => Writer\Html::class,
|
||||
self::WRITER_XLS => Writer\Xls::class,
|
||||
self::WRITER_XLSX => Writer\Xlsx::class,
|
||||
self::WRITER_ODS => Writer\Ods::class,
|
||||
self::WRITER_CSV => Writer\Csv::class,
|
||||
self::WRITER_HTML => Writer\Html::class,
|
||||
'Tcpdf' => Writer\Pdf\Tcpdf::class,
|
||||
'Dompdf' => Writer\Pdf\Dompdf::class,
|
||||
'Mpdf' => Writer\Pdf\Mpdf::class,
|
||||
|
|
@ -70,10 +86,18 @@ abstract class IOFactory
|
|||
* Loads Spreadsheet from file using automatic Reader\IReader resolution.
|
||||
*
|
||||
* @param string $filename The name of the spreadsheet file
|
||||
* @param int $flags the optional second parameter flags may be used to identify specific elements
|
||||
* that should be loaded, but which won't be loaded by default, using these values:
|
||||
* IReader::LOAD_WITH_CHARTS - Include any charts that are defined in the loaded file
|
||||
* @param string[] $readers An array of Readers to use to identify the file type. By default, load() will try
|
||||
* all possible Readers until it finds a match; but this allows you to pass in a
|
||||
* list of Readers so it will only try the subset that you specify here.
|
||||
* Values in this list can be any of the constant values defined in the set
|
||||
* IOFactory::READER_*.
|
||||
*/
|
||||
public static function load(string $filename, int $flags = 0): Spreadsheet
|
||||
public static function load(string $filename, int $flags = 0, ?array $readers = null): Spreadsheet
|
||||
{
|
||||
$reader = self::createReaderForFile($filename);
|
||||
$reader = self::createReaderForFile($filename, $readers);
|
||||
|
||||
return $reader->load($filename, $flags);
|
||||
}
|
||||
|
|
@ -81,9 +105,9 @@ abstract class IOFactory
|
|||
/**
|
||||
* Identify file type using automatic IReader resolution.
|
||||
*/
|
||||
public static function identify(string $filename): string
|
||||
public static function identify(string $filename, ?array $readers = null): string
|
||||
{
|
||||
$reader = self::createReaderForFile($filename);
|
||||
$reader = self::createReaderForFile($filename, $readers);
|
||||
$className = get_class($reader);
|
||||
$classType = explode('\\', $className);
|
||||
unset($reader);
|
||||
|
|
@ -93,14 +117,32 @@ abstract class IOFactory
|
|||
|
||||
/**
|
||||
* Create Reader\IReader for file using automatic IReader resolution.
|
||||
*
|
||||
* @param string[] $readers An array of Readers to use to identify the file type. By default, load() will try
|
||||
* all possible Readers until it finds a match; but this allows you to pass in a
|
||||
* list of Readers so it will only try the subset that you specify here.
|
||||
* Values in this list can be any of the constant values defined in the set
|
||||
* IOFactory::READER_*.
|
||||
*/
|
||||
public static function createReaderForFile(string $filename): IReader
|
||||
public static function createReaderForFile(string $filename, ?array $readers = null): IReader
|
||||
{
|
||||
File::assertFile($filename);
|
||||
|
||||
$testReaders = self::$readers;
|
||||
if ($readers !== null) {
|
||||
$readers = array_map('strtoupper', $readers);
|
||||
$testReaders = array_filter(
|
||||
self::$readers,
|
||||
function (string $readerType) use ($readers) {
|
||||
return in_array(strtoupper($readerType), $readers, true);
|
||||
},
|
||||
ARRAY_FILTER_USE_KEY
|
||||
);
|
||||
}
|
||||
|
||||
// First, lucky guess by inspecting file extension
|
||||
$guessedReader = self::getReaderTypeFromExtension($filename);
|
||||
if ($guessedReader !== null) {
|
||||
if (($guessedReader !== null) && array_key_exists($guessedReader, $testReaders)) {
|
||||
$reader = self::createReader($guessedReader);
|
||||
|
||||
// Let's see if we are lucky
|
||||
|
|
@ -110,11 +152,11 @@ abstract class IOFactory
|
|||
}
|
||||
|
||||
// If we reach here then "lucky guess" didn't give any result
|
||||
// Try walking through all the options in self::$autoResolveClasses
|
||||
foreach (self::$readers as $type => $class) {
|
||||
// Try walking through all the options in self::$readers (or the selected subset)
|
||||
foreach ($testReaders as $readerType => $class) {
|
||||
// Ignore our original guess, we know that won't work
|
||||
if ($type !== $guessedReader) {
|
||||
$reader = self::createReader($type);
|
||||
if ($readerType !== $guessedReader) {
|
||||
$reader = self::createReader($readerType);
|
||||
if ($reader->canRead($filename)) {
|
||||
return $reader;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -153,6 +153,10 @@ abstract class BaseReader implements IReader
|
|||
|
||||
/**
|
||||
* Loads Spreadsheet from file.
|
||||
*
|
||||
* @param int $flags the optional second parameter flags may be used to identify specific elements
|
||||
* that should be loaded, but which won't be loaded by default, using these values:
|
||||
* IReader::LOAD_WITH_CHARTS - Include any charts that are defined in the loaded file
|
||||
*/
|
||||
public function load(string $filename, int $flags = 0): Spreadsheet
|
||||
{
|
||||
|
|
|
|||
|
|
@ -108,6 +108,22 @@ class IOFactoryTest extends TestCase
|
|||
];
|
||||
}
|
||||
|
||||
public function testFormatAsExpected(): void
|
||||
{
|
||||
$fileName = 'samples/templates/30template.xls';
|
||||
|
||||
$actual = IOFactory::identify($fileName, [IOFactory::READER_XLS]);
|
||||
self::assertSame('Xls', $actual);
|
||||
}
|
||||
|
||||
public function testFormatNotAsExpectedThrowsException(): void
|
||||
{
|
||||
$fileName = 'samples/templates/30template.xls';
|
||||
|
||||
$this->expectException(ReaderException::class);
|
||||
IOFactory::identify($fileName, [IOFactory::READER_ODS]);
|
||||
}
|
||||
|
||||
public function testIdentifyNonExistingFileThrowException(): void
|
||||
{
|
||||
$this->expectException(ReaderException::class);
|
||||
|
|
|
|||
Loading…
Reference in New Issue