When image source is a URL, store the URL for use during extraction. (#2072)

When image source is a link store the link.
Add url mutator.

Update section in documentation on image extraction.
This commit is contained in:
jarrett jordaan 2021-06-24 10:50:44 +02:00 committed by GitHub
parent d0dd5b4594
commit 795992835f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 359 additions and 19 deletions

View File

@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
## Unreleased - TBD ## Unreleased - TBD
### Added ### Added
- Add ability to extract images if source is a URL. [Issue #1997](https://github.com/PHPOffice/PhpSpreadsheet/issues/1997) [PR #2072](https://github.com/PHPOffice/PhpSpreadsheet/pull/2072)
- Support for passing flags in the Reader `load()` and Writer `save()`methods, and through the IOFactory, to set behaviours. [PR #2136](https://github.com/PHPOffice/PhpSpreadsheet/pull/2136) - Support for passing flags in the Reader `load()` and Writer `save()`methods, and through the IOFactory, to set behaviours. [PR #2136](https://github.com/PHPOffice/PhpSpreadsheet/pull/2136)
- See [documentation](https://phpspreadsheet.readthedocs.io/en/latest/topics/reading-and-writing-to-file/) for details - See [documentation](https://phpspreadsheet.readthedocs.io/en/latest/topics/reading-and-writing-to-file/) for details

View File

@ -1372,9 +1372,11 @@ The following code extracts images from the current active worksheet,
and writes each as a separate file. and writes each as a separate file.
```php ```php
use PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing;
$i = 0; $i = 0;
foreach ($spreadsheet->getActiveSheet()->getDrawingCollection() as $drawing) { foreach ($spreadsheet->getActiveSheet()->getDrawingCollection() as $drawing) {
if ($drawing instanceof \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing) { if ($drawing instanceof MemoryDrawing) {
ob_start(); ob_start();
call_user_func( call_user_func(
$drawing->getRenderingFunction(), $drawing->getRenderingFunction(),
@ -1383,24 +1385,39 @@ foreach ($spreadsheet->getActiveSheet()->getDrawingCollection() as $drawing) {
$imageContents = ob_get_contents(); $imageContents = ob_get_contents();
ob_end_clean(); ob_end_clean();
switch ($drawing->getMimeType()) { switch ($drawing->getMimeType()) {
case \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing::MIMETYPE_PNG : case MemoryDrawing::MIMETYPE_PNG :
$extension = 'png'; $extension = 'png';
break; break;
case \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing::MIMETYPE_GIF: case MemoryDrawing::MIMETYPE_GIF:
$extension = 'gif'; $extension = 'gif';
break; break;
case \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing::MIMETYPE_JPEG : case MemoryDrawing::MIMETYPE_JPEG :
$extension = 'jpg'; $extension = 'jpg';
break; break;
} }
} else { } else {
$zipReader = fopen($drawing->getPath(),'r'); if ($drawing->getPath()) {
$imageContents = ''; // Check if the source is a URL or a file path
while (!feof($zipReader)) { if ($drawing->getIsURL()) {
$imageContents .= fread($zipReader,1024); $imageContents = file_get_contents($drawing->getPath());
$filePath = tempnam(sys_get_temp_dir(), 'Drawing');
file_put_contents($filePath , $imageContents);
$mimeType = mime_content_type($filePath);
// You could use the below to find the extension from mime type.
// https://gist.github.com/alexcorvi/df8faecb59e86bee93411f6a7967df2c#gistcomment-2722664
$extension = File::mime2ext($mimeType);
unlink($filePath);
}
else {
$zipReader = fopen($drawing->getPath(),'r');
$imageContents = '';
while (!feof($zipReader)) {
$imageContents .= fread($zipReader,1024);
}
fclose($zipReader);
$extension = $drawing->getExtension();
}
} }
fclose($zipReader);
$extension = $drawing->getExtension();
} }
$myFileName = '00_Image_'.++$i.'.'.$extension; $myFileName = '00_Image_'.++$i.'.'.$extension;
file_put_contents($myFileName,$imageContents); file_put_contents($myFileName,$imageContents);

View File

@ -1149,17 +1149,25 @@ class Xlsx extends BaseReader
$objDrawing = new \PhpOffice\PhpSpreadsheet\Worksheet\Drawing(); $objDrawing = new \PhpOffice\PhpSpreadsheet\Worksheet\Drawing();
$objDrawing->setName((string) self::getArrayItem($oneCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'name')); $objDrawing->setName((string) self::getArrayItem($oneCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'name'));
$objDrawing->setDescription((string) self::getArrayItem($oneCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'descr')); $objDrawing->setDescription((string) self::getArrayItem($oneCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'descr'));
$imageKey = (string) self::getArrayItem( $embedImageKey = (string) self::getArrayItem(
$blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), $blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'),
'embed' 'embed'
); );
if (isset($images[$embedImageKey])) {
if (isset($images[$imageKey])) {
$objDrawing->setPath( $objDrawing->setPath(
'zip://' . File::realpath($pFilename) . '#' . 'zip://' . File::realpath($pFilename) . '#' .
$images[$imageKey], $images[$embedImageKey],
false false
); );
} else {
$linkImageKey = (string) self::getArrayItem(
$blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'),
'link'
);
if (isset($images[$linkImageKey])) {
$url = str_replace('xl/drawings/', '', $images[$linkImageKey]);
$objDrawing->setPath($url);
}
} }
$objDrawing->setCoordinates(Coordinate::stringFromColumnIndex(((int) $oneCellAnchor->from->col) + 1) . ($oneCellAnchor->from->row + 1)); $objDrawing->setCoordinates(Coordinate::stringFromColumnIndex(((int) $oneCellAnchor->from->col) + 1) . ($oneCellAnchor->from->row + 1));
@ -1220,16 +1228,25 @@ class Xlsx extends BaseReader
$objDrawing = new \PhpOffice\PhpSpreadsheet\Worksheet\Drawing(); $objDrawing = new \PhpOffice\PhpSpreadsheet\Worksheet\Drawing();
$objDrawing->setName((string) self::getArrayItem($twoCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'name')); $objDrawing->setName((string) self::getArrayItem($twoCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'name'));
$objDrawing->setDescription((string) self::getArrayItem($twoCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'descr')); $objDrawing->setDescription((string) self::getArrayItem($twoCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'descr'));
$imageKey = (string) self::getArrayItem( $embedImageKey = (string) self::getArrayItem(
$blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), $blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'),
'embed' 'embed'
); );
if (isset($images[$imageKey])) { if (isset($images[$embedImageKey])) {
$objDrawing->setPath( $objDrawing->setPath(
'zip://' . File::realpath($pFilename) . '#' . 'zip://' . File::realpath($pFilename) . '#' .
$images[$imageKey], $images[$embedImageKey],
false false
); );
} else {
$linkImageKey = (string) self::getArrayItem(
$blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'),
'link'
);
if (isset($images[$linkImageKey])) {
$url = str_replace('xl/drawings/', '', $images[$linkImageKey]);
$objDrawing->setPath($url);
}
} }
$objDrawing->setCoordinates(Coordinate::stringFromColumnIndex(((int) $twoCellAnchor->from->col) + 1) . ($twoCellAnchor->from->row + 1)); $objDrawing->setCoordinates(Coordinate::stringFromColumnIndex(((int) $twoCellAnchor->from->col) + 1) . ($twoCellAnchor->from->row + 1));

View File

@ -13,6 +13,13 @@ class Drawing extends BaseDrawing
*/ */
private $path; private $path;
/**
* Whether or not we are dealing with a URL.
*
* @var bool
*/
private $isUrl;
/** /**
* Create a new Drawing. * Create a new Drawing.
*/ */
@ -20,6 +27,7 @@ class Drawing extends BaseDrawing
{ {
// Initialise values // Initialise values
$this->path = ''; $this->path = '';
$this->isUrl = false;
// Initialize parent // Initialize parent
parent::__construct(); parent::__construct();
@ -81,9 +89,25 @@ class Drawing extends BaseDrawing
public function setPath($pValue, $pVerifyFile = true) public function setPath($pValue, $pVerifyFile = true)
{ {
if ($pVerifyFile) { if ($pVerifyFile) {
if (file_exists($pValue)) { // Check if a URL has been passed. https://stackoverflow.com/a/2058596/1252979
if (filter_var($pValue, FILTER_VALIDATE_URL)) {
$this->path = $pValue;
// Implicit that it is a URL, rather store info than running check above on value in other places.
$this->isUrl = true;
$imageContents = file_get_contents($pValue);
$filePath = tempnam(sys_get_temp_dir(), 'Drawing');
if ($filePath) {
file_put_contents($filePath, $imageContents);
if (file_exists($filePath)) {
if ($this->width == 0 && $this->height == 0) {
// Get width/height
[$this->width, $this->height] = getimagesize($filePath);
}
unlink($filePath);
}
}
} elseif (file_exists($pValue)) {
$this->path = $pValue; $this->path = $pValue;
if ($this->width == 0 && $this->height == 0) { if ($this->width == 0 && $this->height == 0) {
// Get width/height // Get width/height
[$this->width, $this->height] = getimagesize($pValue); [$this->width, $this->height] = getimagesize($pValue);
@ -98,6 +122,26 @@ class Drawing extends BaseDrawing
return $this; return $this;
} }
/**
* Get isURL.
*/
public function getIsURL(): bool
{
return $this->isUrl;
}
/**
* Set isURL.
*
* @return $this
*/
public function setIsURL(bool $isUrl): self
{
$this->isUrl = $isUrl;
return $this;
}
/** /**
* Get hash code. * Get hash code.
* *

View File

@ -0,0 +1,199 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Utility;
class File
{
// https://stackoverflow.com/questions/16511021/convert-mime-type-to-file-extension-php
public static function mime2ext(string $mime): string
{
$mime_map = [
'video/3gpp2' => '3g2',
'video/3gp' => '3gp',
'video/3gpp' => '3gp',
'application/x-compressed' => '7zip',
'audio/x-acc' => 'aac',
'audio/ac3' => 'ac3',
'application/postscript' => 'ai',
'audio/x-aiff' => 'aif',
'audio/aiff' => 'aif',
'audio/x-au' => 'au',
'video/x-msvideo' => 'avi',
'video/msvideo' => 'avi',
'video/avi' => 'avi',
'application/x-troff-msvideo' => 'avi',
'application/macbinary' => 'bin',
'application/mac-binary' => 'bin',
'application/x-binary' => 'bin',
'application/x-macbinary' => 'bin',
'image/bmp' => 'bmp',
'image/x-bmp' => 'bmp',
'image/x-bitmap' => 'bmp',
'image/x-xbitmap' => 'bmp',
'image/x-win-bitmap' => 'bmp',
'image/x-windows-bmp' => 'bmp',
'image/ms-bmp' => 'bmp',
'image/x-ms-bmp' => 'bmp',
'application/bmp' => 'bmp',
'application/x-bmp' => 'bmp',
'application/x-win-bitmap' => 'bmp',
'application/cdr' => 'cdr',
'application/coreldraw' => 'cdr',
'application/x-cdr' => 'cdr',
'application/x-coreldraw' => 'cdr',
'image/cdr' => 'cdr',
'image/x-cdr' => 'cdr',
'zz-application/zz-winassoc-cdr' => 'cdr',
'application/mac-compactpro' => 'cpt',
'application/pkix-crl' => 'crl',
'application/pkcs-crl' => 'crl',
'application/x-x509-ca-cert' => 'crt',
'application/pkix-cert' => 'crt',
'text/css' => 'css',
'text/x-comma-separated-values' => 'csv',
'text/comma-separated-values' => 'csv',
'application/vnd.msexcel' => 'csv',
'application/x-director' => 'dcr',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
'application/x-dvi' => 'dvi',
'message/rfc822' => 'eml',
'application/x-msdownload' => 'exe',
'video/x-f4v' => 'f4v',
'audio/x-flac' => 'flac',
'video/x-flv' => 'flv',
'image/gif' => 'gif',
'application/gpg-keys' => 'gpg',
'application/x-gtar' => 'gtar',
'application/x-gzip' => 'gzip',
'application/mac-binhex40' => 'hqx',
'application/mac-binhex' => 'hqx',
'application/x-binhex40' => 'hqx',
'application/x-mac-binhex40' => 'hqx',
'text/html' => 'html',
'image/x-icon' => 'ico',
'image/x-ico' => 'ico',
'image/vnd.microsoft.icon' => 'ico',
'text/calendar' => 'ics',
'application/java-archive' => 'jar',
'application/x-java-application' => 'jar',
'application/x-jar' => 'jar',
'image/jp2' => 'jp2',
'video/mj2' => 'jp2',
'image/jpx' => 'jp2',
'image/jpm' => 'jp2',
'image/jpeg' => 'jpeg',
'image/pjpeg' => 'jpeg',
'application/x-javascript' => 'js',
'application/json' => 'json',
'text/json' => 'json',
'application/vnd.google-earth.kml+xml' => 'kml',
'application/vnd.google-earth.kmz' => 'kmz',
'text/x-log' => 'log',
'audio/x-m4a' => 'm4a',
'audio/mp4' => 'm4a',
'application/vnd.mpegurl' => 'm4u',
'audio/midi' => 'mid',
'application/vnd.mif' => 'mif',
'video/quicktime' => 'mov',
'video/x-sgi-movie' => 'movie',
'audio/mpeg' => 'mp3',
'audio/mpg' => 'mp3',
'audio/mpeg3' => 'mp3',
'audio/mp3' => 'mp3',
'video/mp4' => 'mp4',
'video/mpeg' => 'mpeg',
'application/oda' => 'oda',
'audio/ogg' => 'ogg',
'video/ogg' => 'ogg',
'application/ogg' => 'ogg',
'font/otf' => 'otf',
'application/x-pkcs10' => 'p10',
'application/pkcs10' => 'p10',
'application/x-pkcs12' => 'p12',
'application/x-pkcs7-signature' => 'p7a',
'application/pkcs7-mime' => 'p7c',
'application/x-pkcs7-mime' => 'p7c',
'application/x-pkcs7-certreqresp' => 'p7r',
'application/pkcs7-signature' => 'p7s',
'application/pdf' => 'pdf',
'application/octet-stream' => 'pdf',
'application/x-x509-user-cert' => 'pem',
'application/x-pem-file' => 'pem',
'application/pgp' => 'pgp',
'application/x-httpd-php' => 'php',
'application/php' => 'php',
'application/x-php' => 'php',
'text/php' => 'php',
'text/x-php' => 'php',
'application/x-httpd-php-source' => 'php',
'image/png' => 'png',
'image/x-png' => 'png',
'application/powerpoint' => 'ppt',
'application/vnd.ms-powerpoint' => 'ppt',
'application/vnd.ms-office' => 'ppt',
'application/msword' => 'doc',
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx',
'application/x-photoshop' => 'psd',
'image/vnd.adobe.photoshop' => 'psd',
'audio/x-realaudio' => 'ra',
'audio/x-pn-realaudio' => 'ram',
'application/x-rar' => 'rar',
'application/rar' => 'rar',
'application/x-rar-compressed' => 'rar',
'audio/x-pn-realaudio-plugin' => 'rpm',
'application/x-pkcs7' => 'rsa',
'text/rtf' => 'rtf',
'text/richtext' => 'rtx',
'video/vnd.rn-realvideo' => 'rv',
'application/x-stuffit' => 'sit',
'application/smil' => 'smil',
'text/srt' => 'srt',
'image/svg+xml' => 'svg',
'application/x-shockwave-flash' => 'swf',
'application/x-tar' => 'tar',
'application/x-gzip-compressed' => 'tgz',
'image/tiff' => 'tiff',
'font/ttf' => 'ttf',
'text/plain' => 'txt',
'text/x-vcard' => 'vcf',
'application/videolan' => 'vlc',
'text/vtt' => 'vtt',
'audio/x-wav' => 'wav',
'audio/wave' => 'wav',
'audio/wav' => 'wav',
'application/wbxml' => 'wbxml',
'video/webm' => 'webm',
'image/webp' => 'webp',
'audio/x-ms-wma' => 'wma',
'application/wmlc' => 'wmlc',
'video/x-ms-wmv' => 'wmv',
'video/x-ms-asf' => 'wmv',
'font/woff' => 'woff',
'font/woff2' => 'woff2',
'application/xhtml+xml' => 'xhtml',
'application/excel' => 'xl',
'application/msexcel' => 'xls',
'application/x-msexcel' => 'xls',
'application/x-ms-excel' => 'xls',
'application/x-excel' => 'xls',
'application/x-dos_ms_excel' => 'xls',
'application/xls' => 'xls',
'application/x-xls' => 'xls',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx',
'application/vnd.ms-excel' => 'xlsx',
'application/xml' => 'xml',
'text/xml' => 'xml',
'text/xsl' => 'xsl',
'application/xspf+xml' => 'xspf',
'application/x-compress' => 'z',
'application/x-zip' => 'zip',
'application/zip' => 'zip',
'application/x-zip-compressed' => 'zip',
'application/s-compressed' => 'zip',
'multipart/x-zip' => 'zip',
'text/x-scriptzsh' => 'zsh',
];
return $mime_map[$mime] ?? '';
}
}

View File

@ -0,0 +1,62 @@
<?php
namespace PhpOffice\PhpSpreadsheetTests\Reader\Xlsx;
use PhpOffice\PhpSpreadsheet\IOFactory;
use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
use PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing;
use PhpOffice\PhpSpreadsheetTests\Reader\Utility\File;
use PHPUnit\Framework\TestCase;
class URLImageTest extends TestCase
{
public function testURLImageSource(): void
{
$filename = realpath(__DIR__ . '/../../../data/Reader/XLSX/urlImage.xlsx');
if (!$filename) {
self::fail('No test file found.');
} else {
$reader = IOFactory::createReader('Xlsx');
$spreadsheet = $reader->load($filename);
$worksheet = $spreadsheet->getActiveSheet();
foreach ($worksheet->getDrawingCollection() as $drawing) {
if ($drawing instanceof MemoryDrawing) {
// Skip memory drawings
} elseif ($drawing instanceof Drawing) {
// Check if the source is a URL or a file path
if ($drawing->getPath() && $drawing->getIsURL()) {
$imageContents = file_get_contents($drawing->getPath());
$filePath = tempnam(sys_get_temp_dir(), 'Drawing');
if ($filePath) {
file_put_contents($filePath, $imageContents);
if (file_exists($filePath)) {
$mimeType = mime_content_type($filePath);
// You could use the below to find the extension from mime type.
if ($mimeType) {
$extension = File::mime2ext($mimeType);
self::assertEquals('jpeg', $extension);
unlink($filePath);
} else {
self::fail('Could establish mime type.');
}
} else {
self::fail('Could not write file to disk.');
}
} else {
self::fail('Could not create fiel path.');
}
} else {
self::fail('Could not assert that the file contains an image that is URL sourced.');
}
} else {
self::fail('No image path found.');
}
}
if (empty($worksheet->getDrawingCollection())) {
self::fail('No image found in file.');
}
}
}
}

Binary file not shown.