fix docx parsing
This commit is contained in:
parent
17680f7aec
commit
bb70eb0b4c
|
|
@ -18,6 +18,7 @@
|
|||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\Common\XMLReader;
|
||||
use PhpOffice\PhpWord\Element\AbstractContainer;
|
||||
use PhpOffice\PhpWord\Element\TextRun;
|
||||
use PhpOffice\PhpWord\Element\TrackChange;
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
|
|
@ -161,20 +162,14 @@ abstract class AbstractPart
|
|||
$parent->addTitle($textContent, $headingDepth);
|
||||
} else {
|
||||
// Text and TextRun
|
||||
$runCount = $xmlReader->countElements('w:r', $domNode);
|
||||
$insCount = $xmlReader->countElements('w:ins', $domNode);
|
||||
$delCount = $xmlReader->countElements('w:del', $domNode);
|
||||
$linkCount = $xmlReader->countElements('w:hyperlink', $domNode);
|
||||
$runLinkCount = $runCount + $insCount + $delCount + $linkCount;
|
||||
if (0 == $runLinkCount) {
|
||||
$textRunContainers = $xmlReader->countElements('w:r|w:ins|w:del|w:hyperlink|w:smartTag', $domNode);
|
||||
if (0 === $textRunContainers) {
|
||||
$parent->addTextBreak(null, $paragraphStyle);
|
||||
} else {
|
||||
$nodes = $xmlReader->getElements('*', $domNode);
|
||||
if ($runLinkCount > 1) {
|
||||
$parent = $parent->addTextRun($paragraphStyle);
|
||||
}
|
||||
$paragraph = $parent->addTextRun($paragraphStyle);
|
||||
foreach ($nodes as $node) {
|
||||
$this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle);
|
||||
$this->readRun($xmlReader, $node, $paragraph, $docPart, $paragraphStyle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -216,81 +211,85 @@ abstract class AbstractPart
|
|||
*/
|
||||
protected function readRun(XMLReader $xmlReader, \DOMElement $domNode, $parent, $docPart, $paragraphStyle = null)
|
||||
{
|
||||
if (in_array($domNode->nodeName, array('w:ins', 'w:del'))) {
|
||||
if (in_array($domNode->nodeName, array('w:ins', 'w:del', 'w:smartTag', 'w:hyperlink'))) {
|
||||
$nodes = $xmlReader->getElements('*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
return $this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle);
|
||||
$this->readRun($xmlReader, $node, $parent, $docPart, $paragraphStyle);
|
||||
}
|
||||
} elseif ($domNode->nodeName == 'w:r') {
|
||||
$fontStyle = $this->readFontStyle($xmlReader, $domNode);
|
||||
$nodes = $xmlReader->getElements('*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
$this->readRunChild($xmlReader, $node, $parent, $docPart, $paragraphStyle, $fontStyle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!in_array($domNode->nodeName, array('w:r', 'w:hyperlink'))) {
|
||||
return;
|
||||
}
|
||||
$fontStyle = $this->readFontStyle($xmlReader, $domNode);
|
||||
|
||||
// Link
|
||||
if ('w:hyperlink' == $domNode->nodeName) {
|
||||
$rId = $xmlReader->getAttribute('r:id', $domNode);
|
||||
$textContent = $xmlReader->getValue('w:r/w:t', $domNode);
|
||||
/**
|
||||
* Parses nodes under w:r
|
||||
*
|
||||
* @param XMLReader $xmlReader
|
||||
* @param \DOMElement $node
|
||||
* @param AbstractContainer $parent
|
||||
* @param string $docPart
|
||||
* @param mixed $paragraphStyle
|
||||
* @param mixed $fontStyle
|
||||
*/
|
||||
protected function readRunChild(XMLReader $xmlReader, \DOMElement $node, AbstractContainer $parent, $docPart, $paragraphStyle = null, $fontStyle = null)
|
||||
{
|
||||
$runParent = $node->parentNode->parentNode;
|
||||
if ($node->nodeName == 'w:footnoteReference') {
|
||||
// Footnote
|
||||
$wId = $xmlReader->getAttribute('w:id', $node);
|
||||
$footnote = $parent->addFootnote();
|
||||
$footnote->setRelationId($wId);
|
||||
} elseif ($node->nodeName == 'w:endnoteReference') {
|
||||
// Endnote
|
||||
$wId = $xmlReader->getAttribute('w:id', $node);
|
||||
$endnote = $parent->addEndnote();
|
||||
$endnote->setRelationId($wId);
|
||||
} elseif ($node->nodeName == 'w:pict') {
|
||||
// Image
|
||||
$rId = $xmlReader->getAttribute('r:id', $node, 'v:shape/v:imagedata');
|
||||
$target = $this->getMediaTarget($docPart, $rId);
|
||||
if (!is_null($target)) {
|
||||
$parent->addLink($target, $textContent, $fontStyle, $paragraphStyle);
|
||||
if ('External' == $this->getTargetMode($docPart, $rId)) {
|
||||
$imageSource = $target;
|
||||
} else {
|
||||
$imageSource = "zip://{$this->docFile}#{$target}";
|
||||
}
|
||||
$parent->addImage($imageSource);
|
||||
}
|
||||
} else {
|
||||
if ($xmlReader->elementExists('w:footnoteReference', $domNode)) {
|
||||
// Footnote
|
||||
$wId = $xmlReader->getAttribute('w:id', $domNode, 'w:footnoteReference');
|
||||
$footnote = $parent->addFootnote();
|
||||
$footnote->setRelationId($wId);
|
||||
} elseif ($xmlReader->elementExists('w:endnoteReference', $domNode)) {
|
||||
// Endnote
|
||||
$wId = $xmlReader->getAttribute('w:id', $domNode, 'w:endnoteReference');
|
||||
$endnote = $parent->addEndnote();
|
||||
$endnote->setRelationId($wId);
|
||||
} elseif ($xmlReader->elementExists('w:pict', $domNode)) {
|
||||
// Image
|
||||
$rId = $xmlReader->getAttribute('r:id', $domNode, 'w:pict/v:shape/v:imagedata');
|
||||
} elseif ($node->nodeName == 'w:object') {
|
||||
// Object
|
||||
$rId = $xmlReader->getAttribute('r:id', $node, 'o:OLEObject');
|
||||
// $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata');
|
||||
$target = $this->getMediaTarget($docPart, $rId);
|
||||
if (!is_null($target)) {
|
||||
$textContent = "<Object: {$target}>";
|
||||
$parent->addText($textContent, $fontStyle, $paragraphStyle);
|
||||
}
|
||||
} elseif ($node->nodeName == 'w:br') {
|
||||
$parent->addTextBreak();
|
||||
} elseif ($node->nodeName == 'w:tab') {
|
||||
$parent->addText("\t");
|
||||
} elseif ($node->nodeName == 'w:t' || $node->nodeName == 'w:delText') {
|
||||
// TextRun
|
||||
$textContent = $xmlReader->getValue('.', $node);
|
||||
|
||||
if ($runParent->nodeName == 'w:hyperlink') {
|
||||
$rId = $xmlReader->getAttribute('r:id', $runParent);
|
||||
$target = $this->getMediaTarget($docPart, $rId);
|
||||
if (!is_null($target)) {
|
||||
if ('External' == $this->getTargetMode($docPart, $rId)) {
|
||||
$imageSource = $target;
|
||||
} else {
|
||||
$imageSource = "zip://{$this->docFile}#{$target}";
|
||||
}
|
||||
$parent->addImage($imageSource);
|
||||
}
|
||||
} elseif ($xmlReader->elementExists('w:object', $domNode)) {
|
||||
// Object
|
||||
$rId = $xmlReader->getAttribute('r:id', $domNode, 'w:object/o:OLEObject');
|
||||
// $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata');
|
||||
$target = $this->getMediaTarget($docPart, $rId);
|
||||
if (!is_null($target)) {
|
||||
$textContent = "<Object: {$target}>";
|
||||
$parent->addText($textContent, $fontStyle, $paragraphStyle);
|
||||
}
|
||||
}
|
||||
if ($xmlReader->elementExists('w:br', $domNode)) {
|
||||
$parent->addTextBreak();
|
||||
}
|
||||
if ($xmlReader->elementExists('w:t', $domNode) || $xmlReader->elementExists('w:tab', $domNode) || $xmlReader->elementExists('w:delText', $domNode)) {
|
||||
// TextRun
|
||||
$textContent = '';
|
||||
$nodes = $xmlReader->getElements('w:t|w:delText|w:tab', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
if ($node->nodeName == 'w:t') {
|
||||
$textContent .= $node->nodeValue;
|
||||
} elseif ($node->nodeName == 'w:delText') {
|
||||
$textContent .= $node->nodeValue;
|
||||
} elseif ($node->nodeName == 'w:tab') {
|
||||
$textContent .= "\t";
|
||||
}
|
||||
$parent->addLink($target, $textContent, $fontStyle, $paragraphStyle);
|
||||
}
|
||||
} else {
|
||||
/** @var AbstractElement $element */
|
||||
$element = $parent->addText($textContent, $fontStyle, $paragraphStyle);
|
||||
if (in_array($domNode->parentNode->nodeName, array('w:ins', 'w:del'))) {
|
||||
$type = ($domNode->parentNode->nodeName == 'w:del') ? TrackChange::DELETED : TrackChange::INSERTED;
|
||||
$author = $domNode->parentNode->getAttribute('w:author');
|
||||
$date = \DateTime::createFromFormat('Y-m-d\TH:i:s\Z', $domNode->parentNode->getAttribute('w:date'));
|
||||
if (in_array($runParent->nodeName, array('w:ins', 'w:del'))) {
|
||||
$type = ($runParent->nodeName == 'w:del') ? TrackChange::DELETED : TrackChange::INSERTED;
|
||||
$author = $runParent->getAttribute('w:author');
|
||||
$date = \DateTime::createFromFormat('Y-m-d\TH:i:s\Z', $runParent->getAttribute('w:date'));
|
||||
$element->setChangeInfo($type, $author, $date);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,11 +38,17 @@ class Title extends AbstractElement
|
|||
}
|
||||
|
||||
$tag = 'h' . $this->element->getDepth();
|
||||
if (Settings::isOutputEscapingEnabled()) {
|
||||
$text = $this->escaper->escapeHtml($this->element->getText());
|
||||
} else {
|
||||
$text = $this->element->getText();
|
||||
|
||||
$text = $this->element->getText();
|
||||
if (is_string($text)) {
|
||||
if (Settings::isOutputEscapingEnabled()) {
|
||||
$text = $this->escaper->escapeHtml($text);
|
||||
}
|
||||
} elseif ($text instanceof \PhpOffice\PhpWord\Element\AbstractContainer) {
|
||||
$writer = new Container($this->parentWriter, $this->element);
|
||||
$text = $writer->write();
|
||||
}
|
||||
|
||||
$content = "<{$tag}>{$text}</{$tag}>" . PHP_EOL;
|
||||
|
||||
return $content;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,13 @@ class Title extends AbstractElement
|
|||
|
||||
$xmlWriter->startElement('text:h');
|
||||
$xmlWriter->writeAttribute('text:outline-level', $element->getDepth());
|
||||
$this->writeText($element->getText());
|
||||
$text = $element->getText();
|
||||
if (is_string($text)) {
|
||||
$this->writeText($text);
|
||||
} elseif ($text instanceof \PhpOffice\PhpWord\Element\AbstractContainer) {
|
||||
$containerWriter = new Container($xmlWriter, $text);
|
||||
$containerWriter->write();
|
||||
}
|
||||
$xmlWriter->endElement(); // text:h
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class Text extends AbstractElement
|
|||
/** @var \PhpOffice\PhpWord\Element\Text $element Type hint */
|
||||
$element = $this->element;
|
||||
$elementClass = str_replace('\\Writer\\RTF', '', get_class($this));
|
||||
if (!$element instanceof $elementClass) {
|
||||
if (!$element instanceof $elementClass || !is_string($element->getText())) {
|
||||
return '';
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\AbstractTestReader;
|
||||
use PhpOffice\PhpWord\Element\TrackChange;
|
||||
|
||||
/**
|
||||
* Test class for PhpOffice\PhpWord\Reader\Word2007\Element subnamespace
|
||||
|
|
@ -39,9 +40,35 @@ class ElementTest extends AbstractTestReader
|
|||
$phpWord = $this->getDocumentFromString(array('document' => $documentXml));
|
||||
|
||||
$elements = $phpWord->getSection(0)->getElements();
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\TextBreak', $elements[0]);
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[1]);
|
||||
$this->assertEquals('test string', $elements[1]->getText());
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]);
|
||||
/** @var \PhpOffice\PhpWord\Element\TextRun $textRun */
|
||||
$textRun = $elements[0];
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\TextBreak', $textRun->getElement(0));
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(1));
|
||||
$this->assertEquals('test string', $textRun->getElement(1)->getText());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test reading content inside w:smartTag
|
||||
*/
|
||||
public function testSmartTag()
|
||||
{
|
||||
$documentXml = '<w:p>
|
||||
<w:smartTag>
|
||||
<w:r>
|
||||
<w:t xml:space="preserve">test string</w:t>
|
||||
</w:r>
|
||||
</w:smartTag>
|
||||
</w:p>';
|
||||
|
||||
$phpWord = $this->getDocumentFromString(array('document' => $documentXml));
|
||||
|
||||
$elements = $phpWord->getSection(0)->getElements();
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]);
|
||||
/** @var \PhpOffice\PhpWord\Element\TextRun $textRun */
|
||||
$textRun = $elements[0];
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(0));
|
||||
$this->assertEquals('test string', $textRun->getElement(0)->getText());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -85,6 +112,49 @@ class ElementTest extends AbstractTestReader
|
|||
$this->assertTrue($listElements[2]->getFontStyle()->getBold());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test reading track changes
|
||||
*/
|
||||
public function testReadTrackChange()
|
||||
{
|
||||
$documentXml = '<w:p>
|
||||
<w:r>
|
||||
<w:t>One </w:t>
|
||||
</w:r>
|
||||
<w:del w:author="Barney" w:date="2018-03-14T10:57:05Z">
|
||||
<w:r>
|
||||
<w:delText>two</w:delText>
|
||||
</w:r>
|
||||
</w:del>
|
||||
<w:ins w:author="Fred" w:date="2018-03-14T10:57:05Z">
|
||||
<w:r>
|
||||
<w:t>three</w:t>
|
||||
</w:r>
|
||||
</w:ins>
|
||||
</w:p>';
|
||||
|
||||
$phpWord = $this->getDocumentFromString(array('document' => $documentXml));
|
||||
|
||||
$elements = $phpWord->getSection(0)->getElements();
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]);
|
||||
/** @var \PhpOffice\PhpWord\Element\TextRun $elements */
|
||||
$textRun = $elements[0];
|
||||
|
||||
$this->assertEquals('One ', $textRun->getElement(0)->getText());
|
||||
|
||||
$this->assertEquals('two', $textRun->getElement(1)->getText());
|
||||
$this->assertNotNull($textRun->getElement(1)->getTrackChange());
|
||||
/** @var \PhpOffice\PhpWord\Element\TrackChange $trackChange */
|
||||
$trackChange = $textRun->getElement(1)->getTrackChange();
|
||||
$this->assertEquals(TrackChange::DELETED, $trackChange->getChangeType());
|
||||
|
||||
$this->assertEquals('three', $textRun->getElement(2)->getText());
|
||||
$this->assertNotNull($textRun->getElement(2)->getTrackChange());
|
||||
/** @var \PhpOffice\PhpWord\Element\TrackChange $trackChange */
|
||||
$trackChange = $textRun->getElement(2)->getTrackChange();
|
||||
$this->assertEquals(TrackChange::INSERTED, $trackChange->getChangeType());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test reading of tab
|
||||
*/
|
||||
|
|
@ -98,11 +168,18 @@ class ElementTest extends AbstractTestReader
|
|||
</w:r>
|
||||
</w:p>';
|
||||
|
||||
$phpWord = $this->getDocumentFromString($documentXml);
|
||||
$phpWord = $this->getDocumentFromString(array('document' => $documentXml));
|
||||
|
||||
$elements = $this->get($phpWord->getSections(), 0)->getElements();
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[0]);
|
||||
$this->assertEquals("One\tTwo", $elements[0]->getText());
|
||||
$elements = $phpWord->getSection(0)->getElements();
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $elements[0]);
|
||||
/** @var \PhpOffice\PhpWord\Element\TextRun $textRun */
|
||||
$textRun = $elements[0];
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(0));
|
||||
$this->assertEquals('One', $textRun->getElement(0)->getText());
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(1));
|
||||
$this->assertEquals("\t", $textRun->getElement(1)->getText());
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(2));
|
||||
$this->assertEquals('Two', $textRun->getElement(2)->getText());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -117,10 +117,13 @@ class StyleTest extends AbstractTestReader
|
|||
$phpWord = $this->getDocumentFromString(array('document' => $documentXml));
|
||||
|
||||
$elements = $phpWord->getSection(0)->getElements();
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $elements[0]);
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Style\Font', $elements[0]->getFontStyle());
|
||||
/** @var \PhpOffice\PhpWord\Element\TextRun $elements */
|
||||
$textRun = $elements[0];
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\TextRun', $textRun);
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Element\Text', $textRun->getElement(0));
|
||||
$this->assertInstanceOf('PhpOffice\PhpWord\Style\Font', $textRun->getElement(0)->getFontStyle());
|
||||
/** @var \PhpOffice\PhpWord\Style\Font $fontStyle */
|
||||
$fontStyle = $elements[0]->getFontStyle();
|
||||
$fontStyle = $textRun->getElement(0)->getFontStyle();
|
||||
$this->assertEquals(15, $fontStyle->getPosition());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class Word2007Test extends \PHPUnit\Framework\TestCase
|
|||
public function testSave()
|
||||
{
|
||||
$localImage = __DIR__ . '/../_files/images/earth.jpg';
|
||||
$remoteImage = 'http://php.net//images/logos/php-med-trans-light.gif';
|
||||
$remoteImage = 'http://php.net/images/logos/new-php-logo.png';
|
||||
$phpWord = new PhpWord();
|
||||
$phpWord->addFontStyle('Font', array('size' => 11));
|
||||
$phpWord->addParagraphStyle('Paragraph', array('alignment' => Jc::CENTER));
|
||||
|
|
|
|||
Loading…
Reference in New Issue