From cb23cca3ecbc6177dbcd91c4f0a2a141166c722a Mon Sep 17 00:00:00 2001 From: oleibman Date: Sat, 27 Feb 2021 06:10:04 -0800 Subject: [PATCH] Avoid Duplicate Titles When Reading Multiple HTML Files (#1829) This issue arose while researching issue #1823. The issue was not a bug; it just required clarification to the author of how to use the software. But, while researching, I discovered that loading html into 2 sheets of a spreadsheet has a problem if the html title tag is the same for the 2 sheets. PhpSpreadsheet would be able to save the resulting file, but Excel would not be able to read it properly because of the duplicate title. The worksheet setTitle method allows for disambiguation is such a circumstance. The html reader passed a parameter indicating "don't disambiguate", but I can't see any harm in changing that to "disambiguate". An extremely simple fix, with tests to back it up. --- src/PhpSpreadsheet/Reader/Html.php | 2 +- .../Reader/Html/HtmlLoadStringTest.php | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index e1139015..09148d9f 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -320,7 +320,7 @@ class Html extends BaseReader { if ($child->nodeName === 'title') { $this->processDomElement($child, $sheet, $row, $column, $cellContent); - $sheet->setTitle($cellContent, true, false); + $sheet->setTitle($cellContent, true, true); $cellContent = ''; } else { $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray); diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php index e1041507..bc4c30ff 100644 --- a/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlLoadStringTest.php @@ -89,4 +89,33 @@ class HtmlLoadStringTest extends TestCase $spreadsheet = $reader->loadFromString($html, $spreadsheet); self::assertEquals(2, $spreadsheet->getSheetCount()); } + + public function testCanLoadDuplicateTitle(): void + { + $html = <<<'EOF' + + +Sheet + + +
1
+ + +EOF; + $reader = new \PhpOffice\PhpSpreadsheet\Reader\Html(); + $spreadsheet = $reader->loadFromString($html); + $reader->setSheetIndex(1); + $reader->loadFromString($html, $spreadsheet); + $reader->setSheetIndex(2); + $reader->loadFromString($html, $spreadsheet); + $sheet = $spreadsheet->getSheet(0); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet', $sheet->getTitle()); + $sheet = $spreadsheet->getSheet(1); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet 1', $sheet->getTitle()); + $sheet = $spreadsheet->getSheet(2); + self::assertEquals(1, $sheet->getCell('A1')->getValue()); + self::assertEquals('Sheet 2', $sheet->getTitle()); + } }