Copy from Xls(x) to Html/Pdf Loses Drawings (#2788)

Drawings in an Xlsx file are stored in such a way that Php can read their contents using the `zip:` protocol. This does not, however, work when the file is read by PhpSpreadsheet and then saved as Html or Pdf, since the browser will not recognize that protocol even if the file is available. Such drawings need to be saved in the html as embedded images in order for the copy to display them properly. This is true even when the writer is set to not embed images (default).

An additional problem arises when an Html file with an embedded image is read, because `Worksheet\Drawing::setPath` attempts to validate the path, which it cannot do for the `data:image` Url which embedded images use.

And yet another problem. Writer/Html writes out a MemoryDrawing as a png using the imagepng function; but then declares it as jpeg in the Html. This is now corrected.

And a fourth problem. Writer/Html ignores the last row if it contains nothing but a Memory Drawing, which can be true when copying an Xls file.

These changes are testable (it's how I discovered the second part of this parlay). I think it is also useful to add a sample to see the results of this type of copy.
This commit is contained in:
oleibman 2022-05-07 08:10:24 -07:00 committed by GitHub
parent 9776efc226
commit a32861a0a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 182 additions and 3 deletions

View File

@ -0,0 +1,39 @@
<?php
use PhpOffice\PhpSpreadsheet\IOFactory;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx as XlsxReader;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Worksheet\PageSetup;
use PhpOffice\PhpSpreadsheet\Writer\Pdf\Mpdf;
use PhpOffice\PhpSpreadsheet\Writer\Xlsx as XlsxWriter;
require __DIR__ . '/../Header.php';
// Read from Xls (.xls) template
$helper->log('Load Xlsx template file');
$reader = IOFactory::createReader('Xls');
$initialSpreadsheet = $reader->load(__DIR__ . '/../templates/27template.xls');
$xlsxFile = File::temporaryFilename();
$writer = new XlsxWriter($initialSpreadsheet);
$helper->log('Save as Xlsx');
$writer->save($xlsxFile);
$initialSpreadsheet->disconnectWorksheets();
$reader2 = new XlsxReader();
$helper->log('Load Xlsx');
$spreadsheet = $reader2->load($xlsxFile);
$helper->log('Hide grid lines');
$spreadsheet->getActiveSheet()->setShowGridLines(false);
$helper->log('Set orientation to landscape');
$spreadsheet->getActiveSheet()->getPageSetup()->setOrientation(PageSetup::ORIENTATION_LANDSCAPE);
$className = Mpdf::class;
$helper->log("Write to PDF format using {$className}, and to Html");
IOFactory::registerWriter('Pdf', $className);
// Save
$helper->write($spreadsheet, __FILE__, ['Pdf', 'Html']);
unlink($xlsxFile);
$spreadsheet->disconnectWorksheets();

View File

@ -106,7 +106,7 @@ class Drawing extends BaseDrawing
*/
public function setPath($path, $verifyFile = true, $zip = null)
{
if ($verifyFile) {
if ($verifyFile && preg_match('~^data:image/[a-z]+;base64,~', $path) !== 1) {
// Check if a URL has been passed. https://stackoverflow.com/a/2058596/1252979
if (filter_var($path, FILTER_VALIDATE_URL)) {
$this->path = $path;

View File

@ -485,6 +485,7 @@ class Html extends BaseWriter
$html .= $endTag;
}
--$row;
$html .= $this->extendRowsForChartsAndImages($sheet, $row);
// Write table footer
@ -675,7 +676,7 @@ class Html extends BaseWriter
$html .= PHP_EOL;
$imageData = self::winFileToUrl($filename);
if ($this->embedImages && !$this->isPdf) {
if (($this->embedImages && !$this->isPdf) || substr($imageData, 0, 6) === 'zip://') {
$picture = @file_get_contents($filename);
if ($picture !== false) {
$imageDetails = getimagesize($filename);
@ -699,7 +700,7 @@ class Html extends BaseWriter
ob_end_clean(); // End the output buffer.
/** @phpstan-ignore-next-line */
$dataUri = 'data:image/jpeg;base64,' . base64_encode($contents);
$dataUri = 'data:image/png;base64,' . base64_encode($contents);
// Because of the nature of tables, width is more important than height.
// max-width: 100% ensures that image doesnt overflow containing cell

View File

@ -0,0 +1,70 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Writer\Html;
use PhpOffice\PhpSpreadsheet\Reader\Xls as XlsReader;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx as XlsxReader;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Writer\Html;
use PhpOffice\PhpSpreadsheet\Writer\Xlsx as XlsxWriter;
use PhpOffice\PhpSpreadsheetTests\Functional;
class ImageCopyTest extends Functional\AbstractFunctional
{
/** @var string */
private $xlsxFile = '';
protected function tearDown(): void
{
if ($this->xlsxFile !== '') {
unlink($this->xlsxFile);
$this->xlsxFile = '';
}
}
public function testImageCopyXls(): void
{
$file = 'samples/templates/27template.xls';
$reader = new XlsReader();
$reloadedSpreadsheet = $reader->load($file);
$writer = new Html($reloadedSpreadsheet);
$writer->writeAllSheets();
self::assertFalse($writer->getEmbedImages());
$html = $writer->generateHTMLAll();
self::assertSame(4, substr_count($html, '<img'));
self::assertSame(0, substr_count($html, 'zip://'));
// all 4 images converted to png
self::assertSame(4, substr_count($html, 'data:image/png;base64'));
$this->writeAndReload($reloadedSpreadsheet, 'Html');
$reloadedSpreadsheet->disconnectWorksheets();
}
public function testImageCopyXlsx(): void
{
$file = 'samples/templates/27template.xls';
$reader = new XlsReader();
$spreadsheet = $reader->load($file);
$this->xlsxFile = File::temporaryFilename();
$writer = new XlsxWriter($spreadsheet);
$writer->save($this->xlsxFile);
$spreadsheet->disconnectWorksheets();
$reader2 = new XlsxReader();
$reloadedSpreadsheet = $reader2->load($this->xlsxFile);
$writer = new Html($reloadedSpreadsheet);
$writer->writeAllSheets();
self::assertFalse($writer->getEmbedImages());
$html = $writer->generateHTMLAll();
self::assertSame(4, substr_count($html, '<img'));
self::assertSame(0, substr_count($html, 'zip://'));
// "gif" is actually stored as png in this file
self::assertSame(2, substr_count($html, 'data:image/png;base64'));
//self::assertSame(1, substr_count($html, 'data:image/gif;base64'));
self::assertSame(2, substr_count($html, 'data:image/jpeg;base64'));
$this->writeAndReload($reloadedSpreadsheet, 'Html');
$reloadedSpreadsheet->disconnectWorksheets();
}
}

View File

@ -0,0 +1,69 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Writer\Html;
use PhpOffice\PhpSpreadsheet\Reader\Xls as XlsReader;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx as XlsxReader;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Writer\Pdf\Mpdf;
use PhpOffice\PhpSpreadsheet\Writer\Xlsx as XlsxWriter;
use PhpOffice\PhpSpreadsheetTests\Functional;
class ImageCopyPdfTest extends Functional\AbstractFunctional
{
/** @var string */
private $xlsxFile = '';
protected function tearDown(): void
{
if ($this->xlsxFile !== '') {
unlink($this->xlsxFile);
$this->xlsxFile = '';
}
}
public function testImageCopyXls(): void
{
$file = 'samples/templates/27template.xls';
$reader = new XlsReader();
$reloadedSpreadsheet = $reader->load($file);
$this->xlsxFile = File::temporaryFilename();
$writer = new Mpdf($reloadedSpreadsheet);
self::assertFalse($writer->getEmbedImages());
$html = $writer->generateHTMLAll();
self::assertSame(4, substr_count($html, '<img'));
self::assertSame(0, substr_count($html, 'zip://'));
// all 4 images converted to png
self::assertSame(4, substr_count($html, 'data:image/png;base64'));
$this->writeAndReload($reloadedSpreadsheet, 'Html');
$reloadedSpreadsheet->disconnectWorksheets();
}
public function testImageCopyXlsx(): void
{
$file = 'samples/templates/27template.xls';
$reader = new XlsReader();
$spreadsheet = $reader->load($file);
$this->xlsxFile = File::temporaryFilename();
$writer = new XlsxWriter($spreadsheet);
$writer->save($this->xlsxFile);
$spreadsheet->disconnectWorksheets();
$reader2 = new XlsxReader();
$reloadedSpreadsheet = $reader2->load($this->xlsxFile);
$writer = new Mpdf($reloadedSpreadsheet);
self::assertFalse($writer->getEmbedImages());
$html = $writer->generateHTMLAll();
self::assertSame(4, substr_count($html, '<img'));
self::assertSame(0, substr_count($html, 'zip://'));
// "gif" is actually stored as png in this file
self::assertSame(2, substr_count($html, 'data:image/png;base64'));
//self::assertSame(1, substr_count($html, 'data:image/gif;base64'));
self::assertSame(2, substr_count($html, 'data:image/jpeg;base64'));
$this->writeAndReload($reloadedSpreadsheet, 'Html');
$reloadedSpreadsheet->disconnectWorksheets();
}
}