diff --git a/CHANGELOG.md b/CHANGELOG.md index ad529aac..040e1a47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). ## Unreleased - TBD ### Added +- Add ability to extract images if source is a URL. [Issue #1997](https://github.com/PHPOffice/PhpSpreadsheet/issues/1997) [PR #2072](https://github.com/PHPOffice/PhpSpreadsheet/pull/2072) - Support for passing flags in the Reader `load()` and Writer `save()`methods, and through the IOFactory, to set behaviours. [PR #2136](https://github.com/PHPOffice/PhpSpreadsheet/pull/2136) - See [documentation](https://phpspreadsheet.readthedocs.io/en/latest/topics/reading-and-writing-to-file/) for details diff --git a/docs/topics/recipes.md b/docs/topics/recipes.md index 471d1dda..ddf315be 100644 --- a/docs/topics/recipes.md +++ b/docs/topics/recipes.md @@ -1372,9 +1372,11 @@ The following code extracts images from the current active worksheet, and writes each as a separate file. ```php +use PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing; $i = 0; + foreach ($spreadsheet->getActiveSheet()->getDrawingCollection() as $drawing) { - if ($drawing instanceof \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing) { + if ($drawing instanceof MemoryDrawing) { ob_start(); call_user_func( $drawing->getRenderingFunction(), @@ -1383,24 +1385,39 @@ foreach ($spreadsheet->getActiveSheet()->getDrawingCollection() as $drawing) { $imageContents = ob_get_contents(); ob_end_clean(); switch ($drawing->getMimeType()) { - case \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing::MIMETYPE_PNG : + case MemoryDrawing::MIMETYPE_PNG : $extension = 'png'; break; - case \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing::MIMETYPE_GIF: + case MemoryDrawing::MIMETYPE_GIF: $extension = 'gif'; break; - case \PhpOffice\PhpSpreadsheet\Worksheet\MemoryDrawing::MIMETYPE_JPEG : + case MemoryDrawing::MIMETYPE_JPEG : $extension = 'jpg'; break; } } else { - $zipReader = fopen($drawing->getPath(),'r'); - $imageContents = ''; - while (!feof($zipReader)) { - $imageContents .= fread($zipReader,1024); + if ($drawing->getPath()) { + // Check if the source is a URL or a file path + if ($drawing->getIsURL()) { + $imageContents = file_get_contents($drawing->getPath()); + $filePath = tempnam(sys_get_temp_dir(), 'Drawing'); + file_put_contents($filePath , $imageContents); + $mimeType = mime_content_type($filePath); + // You could use the below to find the extension from mime type. + // https://gist.github.com/alexcorvi/df8faecb59e86bee93411f6a7967df2c#gistcomment-2722664 + $extension = File::mime2ext($mimeType); + unlink($filePath); + } + else { + $zipReader = fopen($drawing->getPath(),'r'); + $imageContents = ''; + while (!feof($zipReader)) { + $imageContents .= fread($zipReader,1024); + } + fclose($zipReader); + $extension = $drawing->getExtension(); + } } - fclose($zipReader); - $extension = $drawing->getExtension(); } $myFileName = '00_Image_'.++$i.'.'.$extension; file_put_contents($myFileName,$imageContents); diff --git a/src/PhpSpreadsheet/Reader/Xlsx.php b/src/PhpSpreadsheet/Reader/Xlsx.php index da4a80d1..50b2a709 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx.php +++ b/src/PhpSpreadsheet/Reader/Xlsx.php @@ -1149,17 +1149,25 @@ class Xlsx extends BaseReader $objDrawing = new \PhpOffice\PhpSpreadsheet\Worksheet\Drawing(); $objDrawing->setName((string) self::getArrayItem($oneCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'name')); $objDrawing->setDescription((string) self::getArrayItem($oneCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'descr')); - $imageKey = (string) self::getArrayItem( + $embedImageKey = (string) self::getArrayItem( $blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'embed' ); - - if (isset($images[$imageKey])) { + if (isset($images[$embedImageKey])) { $objDrawing->setPath( 'zip://' . File::realpath($pFilename) . '#' . - $images[$imageKey], + $images[$embedImageKey], false ); + } else { + $linkImageKey = (string) self::getArrayItem( + $blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), + 'link' + ); + if (isset($images[$linkImageKey])) { + $url = str_replace('xl/drawings/', '', $images[$linkImageKey]); + $objDrawing->setPath($url); + } } $objDrawing->setCoordinates(Coordinate::stringFromColumnIndex(((int) $oneCellAnchor->from->col) + 1) . ($oneCellAnchor->from->row + 1)); @@ -1220,16 +1228,25 @@ class Xlsx extends BaseReader $objDrawing = new \PhpOffice\PhpSpreadsheet\Worksheet\Drawing(); $objDrawing->setName((string) self::getArrayItem($twoCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'name')); $objDrawing->setDescription((string) self::getArrayItem($twoCellAnchor->pic->nvPicPr->cNvPr->attributes(), 'descr')); - $imageKey = (string) self::getArrayItem( + $embedImageKey = (string) self::getArrayItem( $blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), 'embed' ); - if (isset($images[$imageKey])) { + if (isset($images[$embedImageKey])) { $objDrawing->setPath( 'zip://' . File::realpath($pFilename) . '#' . - $images[$imageKey], + $images[$embedImageKey], false ); + } else { + $linkImageKey = (string) self::getArrayItem( + $blip->attributes('http://schemas.openxmlformats.org/officeDocument/2006/relationships'), + 'link' + ); + if (isset($images[$linkImageKey])) { + $url = str_replace('xl/drawings/', '', $images[$linkImageKey]); + $objDrawing->setPath($url); + } } $objDrawing->setCoordinates(Coordinate::stringFromColumnIndex(((int) $twoCellAnchor->from->col) + 1) . ($twoCellAnchor->from->row + 1)); diff --git a/src/PhpSpreadsheet/Worksheet/Drawing.php b/src/PhpSpreadsheet/Worksheet/Drawing.php index 1f1dae93..a8cbb79d 100644 --- a/src/PhpSpreadsheet/Worksheet/Drawing.php +++ b/src/PhpSpreadsheet/Worksheet/Drawing.php @@ -13,6 +13,13 @@ class Drawing extends BaseDrawing */ private $path; + /** + * Whether or not we are dealing with a URL. + * + * @var bool + */ + private $isUrl; + /** * Create a new Drawing. */ @@ -20,6 +27,7 @@ class Drawing extends BaseDrawing { // Initialise values $this->path = ''; + $this->isUrl = false; // Initialize parent parent::__construct(); @@ -81,9 +89,25 @@ class Drawing extends BaseDrawing public function setPath($pValue, $pVerifyFile = true) { if ($pVerifyFile) { - if (file_exists($pValue)) { + // Check if a URL has been passed. https://stackoverflow.com/a/2058596/1252979 + if (filter_var($pValue, FILTER_VALIDATE_URL)) { + $this->path = $pValue; + // Implicit that it is a URL, rather store info than running check above on value in other places. + $this->isUrl = true; + $imageContents = file_get_contents($pValue); + $filePath = tempnam(sys_get_temp_dir(), 'Drawing'); + if ($filePath) { + file_put_contents($filePath, $imageContents); + if (file_exists($filePath)) { + if ($this->width == 0 && $this->height == 0) { + // Get width/height + [$this->width, $this->height] = getimagesize($filePath); + } + unlink($filePath); + } + } + } elseif (file_exists($pValue)) { $this->path = $pValue; - if ($this->width == 0 && $this->height == 0) { // Get width/height [$this->width, $this->height] = getimagesize($pValue); @@ -98,6 +122,26 @@ class Drawing extends BaseDrawing return $this; } + /** + * Get isURL. + */ + public function getIsURL(): bool + { + return $this->isUrl; + } + + /** + * Set isURL. + * + * @return $this + */ + public function setIsURL(bool $isUrl): self + { + $this->isUrl = $isUrl; + + return $this; + } + /** * Get hash code. * diff --git a/tests/PhpSpreadsheetTests/Reader/Utility/File.php b/tests/PhpSpreadsheetTests/Reader/Utility/File.php new file mode 100644 index 00000000..f2283326 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Utility/File.php @@ -0,0 +1,199 @@ + '3g2', + 'video/3gp' => '3gp', + 'video/3gpp' => '3gp', + 'application/x-compressed' => '7zip', + 'audio/x-acc' => 'aac', + 'audio/ac3' => 'ac3', + 'application/postscript' => 'ai', + 'audio/x-aiff' => 'aif', + 'audio/aiff' => 'aif', + 'audio/x-au' => 'au', + 'video/x-msvideo' => 'avi', + 'video/msvideo' => 'avi', + 'video/avi' => 'avi', + 'application/x-troff-msvideo' => 'avi', + 'application/macbinary' => 'bin', + 'application/mac-binary' => 'bin', + 'application/x-binary' => 'bin', + 'application/x-macbinary' => 'bin', + 'image/bmp' => 'bmp', + 'image/x-bmp' => 'bmp', + 'image/x-bitmap' => 'bmp', + 'image/x-xbitmap' => 'bmp', + 'image/x-win-bitmap' => 'bmp', + 'image/x-windows-bmp' => 'bmp', + 'image/ms-bmp' => 'bmp', + 'image/x-ms-bmp' => 'bmp', + 'application/bmp' => 'bmp', + 'application/x-bmp' => 'bmp', + 'application/x-win-bitmap' => 'bmp', + 'application/cdr' => 'cdr', + 'application/coreldraw' => 'cdr', + 'application/x-cdr' => 'cdr', + 'application/x-coreldraw' => 'cdr', + 'image/cdr' => 'cdr', + 'image/x-cdr' => 'cdr', + 'zz-application/zz-winassoc-cdr' => 'cdr', + 'application/mac-compactpro' => 'cpt', + 'application/pkix-crl' => 'crl', + 'application/pkcs-crl' => 'crl', + 'application/x-x509-ca-cert' => 'crt', + 'application/pkix-cert' => 'crt', + 'text/css' => 'css', + 'text/x-comma-separated-values' => 'csv', + 'text/comma-separated-values' => 'csv', + 'application/vnd.msexcel' => 'csv', + 'application/x-director' => 'dcr', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx', + 'application/x-dvi' => 'dvi', + 'message/rfc822' => 'eml', + 'application/x-msdownload' => 'exe', + 'video/x-f4v' => 'f4v', + 'audio/x-flac' => 'flac', + 'video/x-flv' => 'flv', + 'image/gif' => 'gif', + 'application/gpg-keys' => 'gpg', + 'application/x-gtar' => 'gtar', + 'application/x-gzip' => 'gzip', + 'application/mac-binhex40' => 'hqx', + 'application/mac-binhex' => 'hqx', + 'application/x-binhex40' => 'hqx', + 'application/x-mac-binhex40' => 'hqx', + 'text/html' => 'html', + 'image/x-icon' => 'ico', + 'image/x-ico' => 'ico', + 'image/vnd.microsoft.icon' => 'ico', + 'text/calendar' => 'ics', + 'application/java-archive' => 'jar', + 'application/x-java-application' => 'jar', + 'application/x-jar' => 'jar', + 'image/jp2' => 'jp2', + 'video/mj2' => 'jp2', + 'image/jpx' => 'jp2', + 'image/jpm' => 'jp2', + 'image/jpeg' => 'jpeg', + 'image/pjpeg' => 'jpeg', + 'application/x-javascript' => 'js', + 'application/json' => 'json', + 'text/json' => 'json', + 'application/vnd.google-earth.kml+xml' => 'kml', + 'application/vnd.google-earth.kmz' => 'kmz', + 'text/x-log' => 'log', + 'audio/x-m4a' => 'm4a', + 'audio/mp4' => 'm4a', + 'application/vnd.mpegurl' => 'm4u', + 'audio/midi' => 'mid', + 'application/vnd.mif' => 'mif', + 'video/quicktime' => 'mov', + 'video/x-sgi-movie' => 'movie', + 'audio/mpeg' => 'mp3', + 'audio/mpg' => 'mp3', + 'audio/mpeg3' => 'mp3', + 'audio/mp3' => 'mp3', + 'video/mp4' => 'mp4', + 'video/mpeg' => 'mpeg', + 'application/oda' => 'oda', + 'audio/ogg' => 'ogg', + 'video/ogg' => 'ogg', + 'application/ogg' => 'ogg', + 'font/otf' => 'otf', + 'application/x-pkcs10' => 'p10', + 'application/pkcs10' => 'p10', + 'application/x-pkcs12' => 'p12', + 'application/x-pkcs7-signature' => 'p7a', + 'application/pkcs7-mime' => 'p7c', + 'application/x-pkcs7-mime' => 'p7c', + 'application/x-pkcs7-certreqresp' => 'p7r', + 'application/pkcs7-signature' => 'p7s', + 'application/pdf' => 'pdf', + 'application/octet-stream' => 'pdf', + 'application/x-x509-user-cert' => 'pem', + 'application/x-pem-file' => 'pem', + 'application/pgp' => 'pgp', + 'application/x-httpd-php' => 'php', + 'application/php' => 'php', + 'application/x-php' => 'php', + 'text/php' => 'php', + 'text/x-php' => 'php', + 'application/x-httpd-php-source' => 'php', + 'image/png' => 'png', + 'image/x-png' => 'png', + 'application/powerpoint' => 'ppt', + 'application/vnd.ms-powerpoint' => 'ppt', + 'application/vnd.ms-office' => 'ppt', + 'application/msword' => 'doc', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx', + 'application/x-photoshop' => 'psd', + 'image/vnd.adobe.photoshop' => 'psd', + 'audio/x-realaudio' => 'ra', + 'audio/x-pn-realaudio' => 'ram', + 'application/x-rar' => 'rar', + 'application/rar' => 'rar', + 'application/x-rar-compressed' => 'rar', + 'audio/x-pn-realaudio-plugin' => 'rpm', + 'application/x-pkcs7' => 'rsa', + 'text/rtf' => 'rtf', + 'text/richtext' => 'rtx', + 'video/vnd.rn-realvideo' => 'rv', + 'application/x-stuffit' => 'sit', + 'application/smil' => 'smil', + 'text/srt' => 'srt', + 'image/svg+xml' => 'svg', + 'application/x-shockwave-flash' => 'swf', + 'application/x-tar' => 'tar', + 'application/x-gzip-compressed' => 'tgz', + 'image/tiff' => 'tiff', + 'font/ttf' => 'ttf', + 'text/plain' => 'txt', + 'text/x-vcard' => 'vcf', + 'application/videolan' => 'vlc', + 'text/vtt' => 'vtt', + 'audio/x-wav' => 'wav', + 'audio/wave' => 'wav', + 'audio/wav' => 'wav', + 'application/wbxml' => 'wbxml', + 'video/webm' => 'webm', + 'image/webp' => 'webp', + 'audio/x-ms-wma' => 'wma', + 'application/wmlc' => 'wmlc', + 'video/x-ms-wmv' => 'wmv', + 'video/x-ms-asf' => 'wmv', + 'font/woff' => 'woff', + 'font/woff2' => 'woff2', + 'application/xhtml+xml' => 'xhtml', + 'application/excel' => 'xl', + 'application/msexcel' => 'xls', + 'application/x-msexcel' => 'xls', + 'application/x-ms-excel' => 'xls', + 'application/x-excel' => 'xls', + 'application/x-dos_ms_excel' => 'xls', + 'application/xls' => 'xls', + 'application/x-xls' => 'xls', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx', + 'application/vnd.ms-excel' => 'xlsx', + 'application/xml' => 'xml', + 'text/xml' => 'xml', + 'text/xsl' => 'xsl', + 'application/xspf+xml' => 'xspf', + 'application/x-compress' => 'z', + 'application/x-zip' => 'zip', + 'application/zip' => 'zip', + 'application/x-zip-compressed' => 'zip', + 'application/s-compressed' => 'zip', + 'multipart/x-zip' => 'zip', + 'text/x-scriptzsh' => 'zsh', + ]; + + return $mime_map[$mime] ?? ''; + } +} diff --git a/tests/PhpSpreadsheetTests/Reader/Xlsx/URLImageTest.php b/tests/PhpSpreadsheetTests/Reader/Xlsx/URLImageTest.php new file mode 100644 index 00000000..b8e81501 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Xlsx/URLImageTest.php @@ -0,0 +1,62 @@ +load($filename); + $worksheet = $spreadsheet->getActiveSheet(); + + foreach ($worksheet->getDrawingCollection() as $drawing) { + if ($drawing instanceof MemoryDrawing) { + // Skip memory drawings + } elseif ($drawing instanceof Drawing) { + // Check if the source is a URL or a file path + if ($drawing->getPath() && $drawing->getIsURL()) { + $imageContents = file_get_contents($drawing->getPath()); + $filePath = tempnam(sys_get_temp_dir(), 'Drawing'); + if ($filePath) { + file_put_contents($filePath, $imageContents); + if (file_exists($filePath)) { + $mimeType = mime_content_type($filePath); + // You could use the below to find the extension from mime type. + if ($mimeType) { + $extension = File::mime2ext($mimeType); + self::assertEquals('jpeg', $extension); + unlink($filePath); + } else { + self::fail('Could establish mime type.'); + } + } else { + self::fail('Could not write file to disk.'); + } + } else { + self::fail('Could not create fiel path.'); + } + } else { + self::fail('Could not assert that the file contains an image that is URL sourced.'); + } + } else { + self::fail('No image path found.'); + } + } + + if (empty($worksheet->getDrawingCollection())) { + self::fail('No image found in file.'); + } + } + } +} diff --git a/tests/data/Reader/XLSX/urlImage.xlsx b/tests/data/Reader/XLSX/urlImage.xlsx new file mode 100644 index 00000000..01e8e24d Binary files /dev/null and b/tests/data/Reader/XLSX/urlImage.xlsx differ