From 0869bdc8f78d584b6091a1dcdc5caf507e637cca Mon Sep 17 00:00:00 2001 From: Damjan Cvetko Date: Thu, 1 Mar 2018 01:24:59 +0100 Subject: [PATCH 1/3] Add support for reading element in runs. Internaly encoding it as "\t". --- src/PhpWord/Reader/Word2007/AbstractPart.php | 6 ++++-- tests/PhpWord/Reader/Word2007/ElementTest.php | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/PhpWord/Reader/Word2007/AbstractPart.php b/src/PhpWord/Reader/Word2007/AbstractPart.php index 1d610516..70d3d960 100644 --- a/src/PhpWord/Reader/Word2007/AbstractPart.php +++ b/src/PhpWord/Reader/Word2007/AbstractPart.php @@ -241,9 +241,11 @@ abstract class AbstractPart if ($xmlReader->elementExists('w:br', $domNode)) { $parent->addTextBreak(); } - if ($xmlReader->elementExists('w:t', $domNode)) { + if ($xmlReader->elementExists('w:t', $domNode) || $xmlReader->elementExists('w:tab', $domNode)) { // TextRun - if ($domNode->parentNode->nodeName == 'w:del') { + if ($xmlReader->elementExists('w:tab', $domNode)) { + $textContent = "\t"; + } elseif ($domNode->parentNode->nodeName == 'w:del') { $textContent = $xmlReader->getValue('w:delText', $domNode); } else { $textContent = $xmlReader->getValue('w:t', $domNode); diff --git a/tests/PhpWord/Reader/Word2007/ElementTest.php b/tests/PhpWord/Reader/Word2007/ElementTest.php index c2648b68..6804b172 100644 --- a/tests/PhpWord/Reader/Word2007/ElementTest.php +++ b/tests/PhpWord/Reader/Word2007/ElementTest.php @@ -83,4 +83,22 @@ class ElementTest extends AbstractTestReader $this->assertEquals('bold', $listElements[2]->getText()); $this->assertTrue($listElements[2]->getFontStyle()->getBold()); } + + /** + * Test reading of tab + */ + public function testReadTab() + { + $documentXml = ' + + + + '; + + $phpWord = $this->getDocumentFromString($documentXml); + + $elements = $this->get($phpWord->getSections(), 0)->getElements(); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[0]); + $this->assertEquals("\t", $elements[0]->getText()); + } } From 8a2cba22926242b6ec9433781b8878d301ae1e0e Mon Sep 17 00:00:00 2001 From: Damjan Cvetko Date: Sun, 4 Mar 2018 17:13:06 +0100 Subject: [PATCH 2/3] Support multiple elements (w:t, w:delText, w:tab) in w:r. --- src/PhpWord/Reader/Word2007/AbstractPart.php | 18 +++++++++++------- tests/PhpWord/Reader/Word2007/ElementTest.php | 4 +++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/PhpWord/Reader/Word2007/AbstractPart.php b/src/PhpWord/Reader/Word2007/AbstractPart.php index 70d3d960..c69f636a 100644 --- a/src/PhpWord/Reader/Word2007/AbstractPart.php +++ b/src/PhpWord/Reader/Word2007/AbstractPart.php @@ -241,14 +241,18 @@ abstract class AbstractPart if ($xmlReader->elementExists('w:br', $domNode)) { $parent->addTextBreak(); } - if ($xmlReader->elementExists('w:t', $domNode) || $xmlReader->elementExists('w:tab', $domNode)) { + if ($xmlReader->elementExists('w:t', $domNode) || $xmlReader->elementExists('w:tab', $domNode) || $xmlReader->elementExists('w:delText', $domNode)) { // TextRun - if ($xmlReader->elementExists('w:tab', $domNode)) { - $textContent = "\t"; - } elseif ($domNode->parentNode->nodeName == 'w:del') { - $textContent = $xmlReader->getValue('w:delText', $domNode); - } else { - $textContent = $xmlReader->getValue('w:t', $domNode); + $textContent = ''; + $nodes = $xmlReader->getElements('w:t|w:delText|w:tab', $domNode); + foreach ($nodes as $node) { + if ($node->nodeName == 'w:t') { + $textContent .= $node->nodeValue; + } elseif ($node->nodeName == 'w:delText') { + $textContent .= $node->nodeValue; + } elseif ($node->nodeName == 'w:tab') { + $textContent .= "\t"; + } } /** @var AbstractElement $element */ $element = $parent->addText($textContent, $fontStyle, $paragraphStyle); diff --git a/tests/PhpWord/Reader/Word2007/ElementTest.php b/tests/PhpWord/Reader/Word2007/ElementTest.php index 6804b172..aad4a543 100644 --- a/tests/PhpWord/Reader/Word2007/ElementTest.php +++ b/tests/PhpWord/Reader/Word2007/ElementTest.php @@ -91,7 +91,9 @@ class ElementTest extends AbstractTestReader { $documentXml = ' + One + Two '; @@ -99,6 +101,6 @@ class ElementTest extends AbstractTestReader $elements = $this->get($phpWord->getSections(), 0)->getElements(); $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[0]); - $this->assertEquals("\t", $elements[0]->getText()); + $this->assertEquals("One\tTwo", $elements[0]->getText()); } } From bb70eb0b4c37fa059d53c9714da88150f1ed3cfc Mon Sep 17 00:00:00 2001 From: troosan Date: Sun, 18 Mar 2018 12:37:39 +0100 Subject: [PATCH 3/3] fix docx parsing --- src/PhpWord/Reader/Word2007/AbstractPart.php | 143 +++++++++--------- src/PhpWord/Writer/HTML/Element/Title.php | 14 +- src/PhpWord/Writer/ODText/Element/Title.php | 8 +- src/PhpWord/Writer/RTF/Element/Text.php | 2 +- tests/PhpWord/Reader/Word2007/ElementTest.php | 91 ++++++++++- tests/PhpWord/Reader/Word2007/StyleTest.php | 9 +- tests/PhpWord/Writer/Word2007Test.php | 2 +- 7 files changed, 180 insertions(+), 89 deletions(-) diff --git a/src/PhpWord/Reader/Word2007/AbstractPart.php b/src/PhpWord/Reader/Word2007/AbstractPart.php index 9d002623..7509a382 100644 --- a/src/PhpWord/Reader/Word2007/AbstractPart.php +++ b/src/PhpWord/Reader/Word2007/AbstractPart.php @@ -18,6 +18,7 @@ namespace PhpOffice\PhpWord\Reader\Word2007; use PhpOffice\Common\XMLReader; +use PhpOffice\PhpWord\Element\AbstractContainer; use PhpOffice\PhpWord\Element\TextRun; use PhpOffice\PhpWord\Element\TrackChange; use PhpOffice\PhpWord\PhpWord; @@ -161,20 +162,14 @@ abstract class AbstractPart $parent->addTitle($textContent, $headingDepth); } else { // Text and TextRun - $runCount = $xmlReader->countElements('w:r', $domNode); - $insCount = $xmlReader->countElements('w:ins', $domNode); - $delCount = $xmlReader->countElements('w:del', $domNode); - $linkCount = $xmlReader->countElements('w:hyperlink', $domNode); - $runLinkCount = $runCount + $insCount + $delCount + $linkCount; - if (0 == $runLinkCount) { + $textRunContainers = $xmlReader->countElements('w:r|w:ins|w:del|w:hyperlink|w:smartTag', $domNode); + if (0 === $textRunContainers) { $parent->addTextBreak(null, $paragraphStyle); } else { $nodes = $xmlReader->getElements('*', $domNode); - if ($runLinkCount > 1) { - $parent = $parent->addTextRun($paragraphStyle); - } + $paragraph = $parent->addTextRun($paragraphStyle); foreach ($nodes as $node) { - $this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle); + $this->readRun($xmlReader, $node, $paragraph, $docPart, $paragraphStyle); } } } @@ -216,81 +211,85 @@ abstract class AbstractPart */ protected function readRun(XMLReader $xmlReader, \DOMElement $domNode, $parent, $docPart, $paragraphStyle = null) { - if (in_array($domNode->nodeName, array('w:ins', 'w:del'))) { + if (in_array($domNode->nodeName, array('w:ins', 'w:del', 'w:smartTag', 'w:hyperlink'))) { $nodes = $xmlReader->getElements('*', $domNode); foreach ($nodes as $node) { - return $this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle); + $this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle); + } + } elseif ($domNode->nodeName == 'w:r') { + $fontStyle = $this->readFontStyle($xmlReader, $domNode); + $nodes = $xmlReader->getElements('*', $domNode); + foreach ($nodes as $node) { + $this->readRunChild($xmlReader, $node, $parent, $docPart, $paragraphStyle, $fontStyle); } } + } - if (!in_array($domNode->nodeName, array('w:r', 'w:hyperlink'))) { - return; - } - $fontStyle = $this->readFontStyle($xmlReader, $domNode); - - // Link - if ('w:hyperlink' == $domNode->nodeName) { - $rId = $xmlReader->getAttribute('r:id', $domNode); - $textContent = $xmlReader->getValue('w:r/w:t', $domNode); + /** + * Parses nodes under w:r + * + * @param XMLReader $xmlReader + * @param \DOMElement $node + * @param AbstractContainer $parent + * @param string $docPart + * @param mixed $paragraphStyle + * @param mixed $fontStyle + */ + protected function readRunChild(XMLReader $xmlReader, \DOMElement $node, AbstractContainer $parent, $docPart, $paragraphStyle = null, $fontStyle = null) + { + $runParent = $node->parentNode->parentNode; + if ($node->nodeName == 'w:footnoteReference') { + // Footnote + $wId = $xmlReader->getAttribute('w:id', $node); + $footnote = $parent->addFootnote(); + $footnote->setRelationId($wId); + } elseif ($node->nodeName == 'w:endnoteReference') { + // Endnote + $wId = $xmlReader->getAttribute('w:id', $node); + $endnote = $parent->addEndnote(); + $endnote->setRelationId($wId); + } elseif ($node->nodeName == 'w:pict') { + // Image + $rId = $xmlReader->getAttribute('r:id', $node, 'v:shape/v:imagedata'); $target = $this->getMediaTarget($docPart, $rId); if (!is_null($target)) { - $parent->addLink($target, $textContent, $fontStyle, $paragraphStyle); + if ('External' == $this->getTargetMode($docPart, $rId)) { + $imageSource = $target; + } else { + $imageSource = "zip://{$this->docFile}#{$target}"; + } + $parent->addImage($imageSource); } - } else { - if ($xmlReader->elementExists('w:footnoteReference', $domNode)) { - // Footnote - $wId = $xmlReader->getAttribute('w:id', $domNode, 'w:footnoteReference'); - $footnote = $parent->addFootnote(); - $footnote->setRelationId($wId); - } elseif ($xmlReader->elementExists('w:endnoteReference', $domNode)) { - // Endnote - $wId = $xmlReader->getAttribute('w:id', $domNode, 'w:endnoteReference'); - $endnote = $parent->addEndnote(); - $endnote->setRelationId($wId); - } elseif ($xmlReader->elementExists('w:pict', $domNode)) { - // Image - $rId = $xmlReader->getAttribute('r:id', $domNode, 'w:pict/v:shape/v:imagedata'); + } elseif ($node->nodeName == 'w:object') { + // Object + $rId = $xmlReader->getAttribute('r:id', $node, 'o:OLEObject'); + // $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata'); + $target = $this->getMediaTarget($docPart, $rId); + if (!is_null($target)) { + $textContent = "<Object: {$target}>"; + $parent->addText($textContent, $fontStyle, $paragraphStyle); + } + } elseif ($node->nodeName == 'w:br') { + $parent->addTextBreak(); + } elseif ($node->nodeName == 'w:tab') { + $parent->addText("\t"); + } elseif ($node->nodeName == 'w:t' || $node->nodeName == 'w:delText') { + // TextRun + $textContent = $xmlReader->getValue('.', $node); + + if ($runParent->nodeName == 'w:hyperlink') { + $rId = $xmlReader->getAttribute('r:id', $runParent); $target = $this->getMediaTarget($docPart, $rId); if (!is_null($target)) { - if ('External' == $this->getTargetMode($docPart, $rId)) { - $imageSource = $target; - } else { - $imageSource = "zip://{$this->docFile}#{$target}"; - } - $parent->addImage($imageSource); - } - } elseif ($xmlReader->elementExists('w:object', $domNode)) { - // Object - $rId = $xmlReader->getAttribute('r:id', $domNode, 'w:object/o:OLEObject'); - // $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata'); - $target = $this->getMediaTarget($docPart, $rId); - if (!is_null($target)) { - $textContent = "<Object: {$target}>"; - $parent->addText($textContent, $fontStyle, $paragraphStyle); - } - } - if ($xmlReader->elementExists('w:br', $domNode)) { - $parent->addTextBreak(); - } - if ($xmlReader->elementExists('w:t', $domNode) || $xmlReader->elementExists('w:tab', $domNode) || $xmlReader->elementExists('w:delText', $domNode)) { - // TextRun - $textContent = ''; - $nodes = $xmlReader->getElements('w:t|w:delText|w:tab', $domNode); - foreach ($nodes as $node) { - if ($node->nodeName == 'w:t') { - $textContent .= $node->nodeValue; - } elseif ($node->nodeName == 'w:delText') { - $textContent .= $node->nodeValue; - } elseif ($node->nodeName == 'w:tab') { - $textContent .= "\t"; - } + $parent->addLink($target, $textContent, $fontStyle, $paragraphStyle); } + } else { /** @var AbstractElement $element */ $element = $parent->addText($textContent, $fontStyle, $paragraphStyle); - if (in_array($domNode->parentNode->nodeName, array('w:ins', 'w:del'))) { - $type = ($domNode->parentNode->nodeName == 'w:del') ? TrackChange::DELETED : TrackChange::INSERTED; - $author = $domNode->parentNode->getAttribute('w:author'); - $date = \DateTime::createFromFormat('Y-m-d\TH:i:s\Z', $domNode->parentNode->getAttribute('w:date')); + if (in_array($runParent->nodeName, array('w:ins', 'w:del'))) { + $type = ($runParent->nodeName == 'w:del') ? TrackChange::DELETED : TrackChange::INSERTED; + $author = $runParent->getAttribute('w:author'); + $date = \DateTime::createFromFormat('Y-m-d\TH:i:s\Z', $runParent->getAttribute('w:date')); $element->setChangeInfo($type, $author, $date); } } diff --git a/src/PhpWord/Writer/HTML/Element/Title.php b/src/PhpWord/Writer/HTML/Element/Title.php index 3a802018..7307ce0c 100644 --- a/src/PhpWord/Writer/HTML/Element/Title.php +++ b/src/PhpWord/Writer/HTML/Element/Title.php @@ -38,11 +38,17 @@ class Title extends AbstractElement } $tag = 'h' . $this->element->getDepth(); - if (Settings::isOutputEscapingEnabled()) { - $text = $this->escaper->escapeHtml($this->element->getText()); - } else { - $text = $this->element->getText(); + + $text = $this->element->getText(); + if (is_string($text)) { + if (Settings::isOutputEscapingEnabled()) { + $text = $this->escaper->escapeHtml($text); + } + } elseif ($text instanceof \PhpOffice\PhpWord\Element\AbstractContainer) { + $writer = new Container($this->parentWriter, $this->element); + $text = $writer->write(); } + $content = "<{$tag}>{$text}" . PHP_EOL; return $content; diff --git a/src/PhpWord/Writer/ODText/Element/Title.php b/src/PhpWord/Writer/ODText/Element/Title.php index 343949a2..8b9440ab 100644 --- a/src/PhpWord/Writer/ODText/Element/Title.php +++ b/src/PhpWord/Writer/ODText/Element/Title.php @@ -37,7 +37,13 @@ class Title extends AbstractElement $xmlWriter->startElement('text:h'); $xmlWriter->writeAttribute('text:outline-level', $element->getDepth()); - $this->writeText($element->getText()); + $text = $element->getText(); + if (is_string($text)) { + $this->writeText($text); + } elseif ($text instanceof \PhpOffice\PhpWord\Element\AbstractContainer) { + $containerWriter = new Container($xmlWriter, $text); + $containerWriter->write(); + } $xmlWriter->endElement(); // text:h } } diff --git a/src/PhpWord/Writer/RTF/Element/Text.php b/src/PhpWord/Writer/RTF/Element/Text.php index f80e7935..b9e56e89 100644 --- a/src/PhpWord/Writer/RTF/Element/Text.php +++ b/src/PhpWord/Writer/RTF/Element/Text.php @@ -34,7 +34,7 @@ class Text extends AbstractElement /** @var \PhpOffice\PhpWord\Element\Text $element Type hint */ $element = $this->element; $elementClass = str_replace('\\Writer\\RTF', '', get_class($this)); - if (!$element instanceof $elementClass) { + if (!$element instanceof $elementClass || !is_string($element->getText())) { return ''; } diff --git a/tests/PhpWord/Reader/Word2007/ElementTest.php b/tests/PhpWord/Reader/Word2007/ElementTest.php index 46780278..75060625 100644 --- a/tests/PhpWord/Reader/Word2007/ElementTest.php +++ b/tests/PhpWord/Reader/Word2007/ElementTest.php @@ -18,6 +18,7 @@ namespace PhpOffice\PhpWord\Reader\Word2007; use PhpOffice\PhpWord\AbstractTestReader; +use PhpOffice\PhpWord\Element\TrackChange; /** * Test class for PhpOffice\PhpWord\Reader\Word2007\Element subnamespace @@ -39,9 +40,35 @@ class ElementTest extends AbstractTestReader $phpWord = $this->getDocumentFromString(array('document' => $documentXml)); $elements = $phpWord->getSection(0)->getElements(); - $this->assertInstanceOf('PhpOffice\PhpWord\Element\TextBreak', $elements[0]); - $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[1]); - $this->assertEquals('test string', $elements[1]->getText()); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]); + /** @var \PhpOffice\PhpWord\Element\TextRun $textRun */ + $textRun = $elements[0]; + $this->assertInstanceOf('PhpOffice\PhpWord\Element\TextBreak', $textRun->getElement(0)); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(1)); + $this->assertEquals('test string', $textRun->getElement(1)->getText()); + } + + /** + * Test reading content inside w:smartTag + */ + public function testSmartTag() + { + $documentXml = ' + + + test string + + + '; + + $phpWord = $this->getDocumentFromString(array('document' => $documentXml)); + + $elements = $phpWord->getSection(0)->getElements(); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]); + /** @var \PhpOffice\PhpWord\Element\TextRun $textRun */ + $textRun = $elements[0]; + $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(0)); + $this->assertEquals('test string', $textRun->getElement(0)->getText()); } /** @@ -85,6 +112,49 @@ class ElementTest extends AbstractTestReader $this->assertTrue($listElements[2]->getFontStyle()->getBold()); } + /** + * Test reading track changes + */ + public function testReadTrackChange() + { + $documentXml = ' + + One + + + + two + + + + + three + + + '; + + $phpWord = $this->getDocumentFromString(array('document' => $documentXml)); + + $elements = $phpWord->getSection(0)->getElements(); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]); + /** @var \PhpOffice\PhpWord\Element\TextRun $elements */ + $textRun = $elements[0]; + + $this->assertEquals('One ', $textRun->getElement(0)->getText()); + + $this->assertEquals('two', $textRun->getElement(1)->getText()); + $this->assertNotNull($textRun->getElement(1)->getTrackChange()); + /** @var \PhpOffice\PhpWord\Element\TrackChange $trackChange */ + $trackChange = $textRun->getElement(1)->getTrackChange(); + $this->assertEquals(TrackChange::DELETED, $trackChange->getChangeType()); + + $this->assertEquals('three', $textRun->getElement(2)->getText()); + $this->assertNotNull($textRun->getElement(2)->getTrackChange()); + /** @var \PhpOffice\PhpWord\Element\TrackChange $trackChange */ + $trackChange = $textRun->getElement(2)->getTrackChange(); + $this->assertEquals(TrackChange::INSERTED, $trackChange->getChangeType()); + } + /** * Test reading of tab */ @@ -98,11 +168,18 @@ class ElementTest extends AbstractTestReader '; - $phpWord = $this->getDocumentFromString($documentXml); + $phpWord = $this->getDocumentFromString(array('document' => $documentXml)); - $elements = $this->get($phpWord->getSections(), 0)->getElements(); - $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[0]); - $this->assertEquals("One\tTwo", $elements[0]->getText()); + $elements = $phpWord->getSection(0)->getElements(); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]); + /** @var \PhpOffice\PhpWord\Element\TextRun $textRun */ + $textRun = $elements[0]; + $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(0)); + $this->assertEquals('One', $textRun->getElement(0)->getText()); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(1)); + $this->assertEquals("\t", $textRun->getElement(1)->getText()); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(2)); + $this->assertEquals('Two', $textRun->getElement(2)->getText()); } /** diff --git a/tests/PhpWord/Reader/Word2007/StyleTest.php b/tests/PhpWord/Reader/Word2007/StyleTest.php index 46421d97..9bb6d3bd 100644 --- a/tests/PhpWord/Reader/Word2007/StyleTest.php +++ b/tests/PhpWord/Reader/Word2007/StyleTest.php @@ -117,10 +117,13 @@ class StyleTest extends AbstractTestReader $phpWord = $this->getDocumentFromString(array('document' => $documentXml)); $elements = $phpWord->getSection(0)->getElements(); - $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[0]); - $this->assertInstanceOf('PhpOffice\PhpWord\Style\Font', $elements[0]->getFontStyle()); + /** @var \PhpOffice\PhpWord\Element\TextRun $elements */ + $textRun = $elements[0]; + $this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $textRun); + $this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(0)); + $this->assertInstanceOf('PhpOffice\PhpWord\Style\Font', $textRun->getElement(0)->getFontStyle()); /** @var \PhpOffice\PhpWord\Style\Font $fontStyle */ - $fontStyle = $elements[0]->getFontStyle(); + $fontStyle = $textRun->getElement(0)->getFontStyle(); $this->assertEquals(15, $fontStyle->getPosition()); } } diff --git a/tests/PhpWord/Writer/Word2007Test.php b/tests/PhpWord/Writer/Word2007Test.php index 22a0e6df..0db36fc1 100644 --- a/tests/PhpWord/Writer/Word2007Test.php +++ b/tests/PhpWord/Writer/Word2007Test.php @@ -75,7 +75,7 @@ class Word2007Test extends \PHPUnit\Framework\TestCase public function testSave() { $localImage = __DIR__ . '/../_files/images/earth.jpg'; - $remoteImage = 'http://php.net//images/logos/php-med-trans-light.gif'; + $remoteImage = 'http://php.net/images/logos/new-php-logo.png'; $phpWord = new PhpWord(); $phpWord->addFontStyle('Font', array('size' => 11)); $phpWord->addParagraphStyle('Paragraph', array('alignment' => Jc::CENTER));