diff --git a/src/PhpWord/Reader/ODText.php b/src/PhpWord/Reader/ODText.php index 0b58dc50..d0aa9138 100644 --- a/src/PhpWord/Reader/ODText.php +++ b/src/PhpWord/Reader/ODText.php @@ -17,8 +17,8 @@ namespace PhpOffice\PhpWord\Reader; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Reader for ODText diff --git a/src/PhpWord/Reader/ODText/Content.php b/src/PhpWord/Reader/ODText/Content.php index 9dfd6453..cec06418 100644 --- a/src/PhpWord/Reader/ODText/Content.php +++ b/src/PhpWord/Reader/ODText/Content.php @@ -17,9 +17,9 @@ namespace PhpOffice\PhpWord\Reader\ODText; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\Element\TrackChange; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Content reader diff --git a/src/PhpWord/Reader/ODText/Meta.php b/src/PhpWord/Reader/ODText/Meta.php index 8801a543..9a3d8341 100644 --- a/src/PhpWord/Reader/ODText/Meta.php +++ b/src/PhpWord/Reader/ODText/Meta.php @@ -17,8 +17,8 @@ namespace PhpOffice\PhpWord\Reader\ODText; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Meta reader diff --git a/src/PhpWord/Reader/Word2007.php b/src/PhpWord/Reader/Word2007.php index 52030ef8..699a4ead 100644 --- a/src/PhpWord/Reader/Word2007.php +++ b/src/PhpWord/Reader/Word2007.php @@ -17,8 +17,8 @@ namespace PhpOffice\PhpWord\Reader; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; use PhpOffice\PhpWord\Shared\ZipArchive; /** diff --git a/src/PhpWord/Reader/Word2007/AbstractPart.php b/src/PhpWord/Reader/Word2007/AbstractPart.php index eab659fa..083161d0 100644 --- a/src/PhpWord/Reader/Word2007/AbstractPart.php +++ b/src/PhpWord/Reader/Word2007/AbstractPart.php @@ -17,12 +17,12 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\ComplexType\TblWidth as TblWidthComplexType; use PhpOffice\PhpWord\Element\AbstractContainer; use PhpOffice\PhpWord\Element\TextRun; use PhpOffice\PhpWord\Element\TrackChange; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Abstract part reader @@ -95,7 +95,7 @@ abstract class AbstractPart /** * Read w:p. * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @param \PhpOffice\PhpWord\Element\AbstractContainer $parent * @param string $docPart @@ -202,7 +202,7 @@ abstract class AbstractPart /** * Read w:r. * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @param \PhpOffice\PhpWord\Element\AbstractContainer $parent * @param string $docPart @@ -320,7 +320,7 @@ abstract class AbstractPart /** * Read w:tbl. * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @param mixed $parent * @param string $docPart @@ -378,7 +378,7 @@ abstract class AbstractPart /** * Read w:pPr. * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @return array|null */ @@ -413,7 +413,7 @@ abstract class AbstractPart /** * Read w:rPr * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @return array|null */ @@ -459,7 +459,7 @@ abstract class AbstractPart /** * Read w:tblPr * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @return string|array|null * @todo Capture w:tblStylePr w:type="firstRow" @@ -509,7 +509,7 @@ abstract class AbstractPart /** * Read w:tblpPr * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @return array */ @@ -534,7 +534,7 @@ abstract class AbstractPart /** * Read w:tblInd * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @return TblWidthComplexType */ @@ -552,7 +552,7 @@ abstract class AbstractPart /** * Read w:tcPr * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @return array */ @@ -620,7 +620,7 @@ abstract class AbstractPart /** * Read style definition * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $parentNode * @param array $styleDefs * @ignoreScrutinizerPatch diff --git a/src/PhpWord/Reader/Word2007/DocPropsCore.php b/src/PhpWord/Reader/Word2007/DocPropsCore.php index 36eecebe..d241df18 100644 --- a/src/PhpWord/Reader/Word2007/DocPropsCore.php +++ b/src/PhpWord/Reader/Word2007/DocPropsCore.php @@ -17,8 +17,8 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Core properties reader diff --git a/src/PhpWord/Reader/Word2007/DocPropsCustom.php b/src/PhpWord/Reader/Word2007/DocPropsCustom.php index a6835aac..feb41006 100644 --- a/src/PhpWord/Reader/Word2007/DocPropsCustom.php +++ b/src/PhpWord/Reader/Word2007/DocPropsCustom.php @@ -17,9 +17,9 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\Metadata\DocInfo; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Custom properties reader diff --git a/src/PhpWord/Reader/Word2007/Document.php b/src/PhpWord/Reader/Word2007/Document.php index f0d1194a..13a92e47 100644 --- a/src/PhpWord/Reader/Word2007/Document.php +++ b/src/PhpWord/Reader/Word2007/Document.php @@ -17,9 +17,9 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\Element\Section; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Document reader @@ -97,7 +97,7 @@ class Document extends AbstractPart /** * Read w:sectPr * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $domNode * @ignoreScrutinizerPatch * @return array @@ -141,7 +141,7 @@ class Document extends AbstractPart /** * Read w:p node. * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $node * @param \PhpOffice\PhpWord\Element\Section &$section * @@ -170,7 +170,7 @@ class Document extends AbstractPart /** * Read w:sectPr node. * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $node * @param \PhpOffice\PhpWord\Element\Section &$section */ diff --git a/src/PhpWord/Reader/Word2007/Footnotes.php b/src/PhpWord/Reader/Word2007/Footnotes.php index 634f4739..a8829d0b 100644 --- a/src/PhpWord/Reader/Word2007/Footnotes.php +++ b/src/PhpWord/Reader/Word2007/Footnotes.php @@ -17,8 +17,8 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Footnotes reader diff --git a/src/PhpWord/Reader/Word2007/Numbering.php b/src/PhpWord/Reader/Word2007/Numbering.php index 3f57cbf8..dea8f3ee 100644 --- a/src/PhpWord/Reader/Word2007/Numbering.php +++ b/src/PhpWord/Reader/Word2007/Numbering.php @@ -17,8 +17,8 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; /** * Numbering reader @@ -89,7 +89,7 @@ class Numbering extends AbstractPart /** * Read numbering level definition from w:abstractNum and w:num * - * @param \PhpOffice\Common\XMLReader $xmlReader + * @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader * @param \DOMElement $subnode * @param int $levelId * @return array diff --git a/src/PhpWord/Reader/Word2007/Settings.php b/src/PhpWord/Reader/Word2007/Settings.php index 3084943b..0a59e045 100644 --- a/src/PhpWord/Reader/Word2007/Settings.php +++ b/src/PhpWord/Reader/Word2007/Settings.php @@ -17,9 +17,9 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\ComplexType\TrackChangesView; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; use PhpOffice\PhpWord\Style\Language; /** diff --git a/src/PhpWord/Reader/Word2007/Styles.php b/src/PhpWord/Reader/Word2007/Styles.php index 97f29b43..37ce4909 100644 --- a/src/PhpWord/Reader/Word2007/Styles.php +++ b/src/PhpWord/Reader/Word2007/Styles.php @@ -17,8 +17,8 @@ namespace PhpOffice\PhpWord\Reader\Word2007; -use PhpOffice\Common\XMLReader; use PhpOffice\PhpWord\PhpWord; +use PhpOffice\PhpWord\Shared\XMLReader; use PhpOffice\PhpWord\Style\Language; /** diff --git a/src/PhpWord/Shared/XMLReader.php b/src/PhpWord/Shared/XMLReader.php new file mode 100644 index 00000000..77013f6c --- /dev/null +++ b/src/PhpWord/Shared/XMLReader.php @@ -0,0 +1,211 @@ +open($zipFile); + $content = $zip->getFromName($xmlFile); + $zip->close(); + + if ($content === false) { + return false; + } + + return $this->getDomFromString($content); + } + + /** + * Get DOMDocument from content string + * + * @param string $content + * @return \DOMDocument + */ + public function getDomFromString($content) + { + $originalLibXMLEntityValue = libxml_disable_entity_loader(true); + $this->dom = new \DOMDocument(); + $this->dom->loadXML($content); + libxml_disable_entity_loader($originalLibXMLEntityValue); + + return $this->dom; + } + + /** + * Get elements + * + * @param string $path + * @param \DOMElement $contextNode + * @return \DOMNodeList + */ + public function getElements($path, \DOMElement $contextNode = null) + { + if ($this->dom === null) { + return array(); + } + if ($this->xpath === null) { + $this->xpath = new \DOMXpath($this->dom); + } + + if (is_null($contextNode)) { + return $this->xpath->query($path); + } + + return $this->xpath->query($path, $contextNode); + } + + /** + * Registers the namespace with the DOMXPath object + * + * @param string $prefix The prefix + * @param string $namespaceURI The URI of the namespace + * @return bool true on success or false on failure + * @throws \InvalidArgumentException If called before having loaded the DOM document + */ + public function registerNamespace($prefix, $namespaceURI) + { + if ($this->dom === null) { + throw new \InvalidArgumentException('Dom needs to be loaded before registering a namespace'); + } + if ($this->xpath === null) { + $this->xpath = new \DOMXpath($this->dom); + } + return $this->xpath->registerNamespace($prefix, $namespaceURI); + } + + /** + * Get element + * + * @param string $path + * @param \DOMElement $contextNode + * @return \DOMElement|null + */ + public function getElement($path, \DOMElement $contextNode = null) + { + $elements = $this->getElements($path, $contextNode); + if ($elements->length > 0) { + return $elements->item(0); + } + + return null; + } + + /** + * Get element attribute + * + * @param string $attribute + * @param \DOMElement $contextNode + * @param string $path + * @return string|null + */ + public function getAttribute($attribute, \DOMElement $contextNode = null, $path = null) + { + $return = null; + if ($path !== null) { + $elements = $this->getElements($path, $contextNode); + if ($elements->length > 0) { + /** @var \DOMElement $node Type hint */ + $node = $elements->item(0); + $return = $node->getAttribute($attribute); + } + } else { + if ($contextNode !== null) { + $return = $contextNode->getAttribute($attribute); + } + } + + return ($return == '') ? null : $return; + } + + /** + * Get element value + * + * @param string $path + * @param \DOMElement $contextNode + * @return string|null + */ + public function getValue($path, \DOMElement $contextNode = null) + { + $elements = $this->getElements($path, $contextNode); + if ($elements->length > 0) { + return $elements->item(0)->nodeValue; + } + + return null; + } + + /** + * Count elements + * + * @param string $path + * @param \DOMElement $contextNode + * @return integer + */ + public function countElements($path, \DOMElement $contextNode = null) + { + $elements = $this->getElements($path, $contextNode); + + return $elements->length; + } + + /** + * Element exists + * + * @param string $path + * @param \DOMElement $contextNode + * @return boolean + */ + public function elementExists($path, \DOMElement $contextNode = null) + { + return $this->getElements($path, $contextNode)->length > 0; + } +} diff --git a/tests/PhpWord/Shared/XMLReaderTest.php b/tests/PhpWord/Shared/XMLReaderTest.php new file mode 100644 index 00000000..a34d650a --- /dev/null +++ b/tests/PhpWord/Shared/XMLReaderTest.php @@ -0,0 +1,134 @@ +getDomFromString('AAA'); + + $this->assertTrue($reader->elementExists('/element/child')); + $this->assertEquals('AAA', $reader->getElement('/element/child')->textContent); + $this->assertEquals('AAA', $reader->getValue('/element/child')); + $this->assertEquals('test', $reader->getAttribute('attr', $reader->getElement('/element'))); + $this->assertEquals('subtest', $reader->getAttribute('attr', $reader->getElement('/element'), 'child')); + } + + /** + * Test reading XML from zip + */ + public function testDomFromZip() + { + $archiveFile = __DIR__ . '/../_files/xml/reader.zip'; + + $reader = new XMLReader(); + $reader->getDomFromZip($archiveFile, 'test.xml'); + + $this->assertTrue($reader->elementExists('/element/child')); + + $this->assertFalse($reader->getDomFromZip($archiveFile, 'non_existing_xml_file.xml')); + } + + /** + * Test that read from non existing archive throws exception + * + * @expectedException Exception + */ + public function testThrowsExceptionOnNonExistingArchive() + { + $archiveFile = __DIR__ . '/../_files/xml/readers.zip'; + + $reader = new XMLReader(); + $reader->getDomFromZip($archiveFile, 'test.xml'); + } + + /** + * Test elements count + */ + public function testCountElements() + { + $reader = new XMLReader(); + $reader->getDomFromString('AAABBB'); + + $this->assertEquals(2, $reader->countElements('/element/child')); + } + + /** + * Test read non existing elements + */ + public function testReturnNullOnNonExistingNode() + { + $reader = new XMLReader(); + $this->assertEmpty($reader->getElements('/element/children')); + $reader->getDomFromString('AAA'); + + $this->assertNull($reader->getElement('/element/children')); + $this->assertNull($reader->getValue('/element/children')); + } + + /** + * Test that xpath fails if custom namespace is not registered + */ + public function testShouldThrowExceptionIfNamespaceIsNotKnown() + { + try { + $reader = new XMLReader(); + $reader->getDomFromString('AAA'); + + $this->assertTrue($reader->elementExists('/element/test:child')); + $this->assertEquals('AAA', $reader->getElement('/element/test:child')->textContent); + $this->fail(); + } catch (\Exception $e) { + $this->assertTrue(true); + } + } + + /** + * Test reading XML with manually registered namespace + */ + public function testShouldParseXmlWithCustomNamespace() + { + $reader = new XMLReader(); + $reader->getDomFromString('AAA'); + $reader->registerNamespace('test', 'http://phpword.com/my/custom/namespace'); + + $this->assertTrue($reader->elementExists('/element/test:child')); + $this->assertEquals('AAA', $reader->getElement('/element/test:child')->textContent); + } + + /** + * Test that xpath fails if custom namespace is not registered + * + * @expectedException InvalidArgumentException + */ + public function testShouldThowExceptionIfTryingToRegisterNamespaceBeforeReadingDoc() + { + $reader = new XMLReader(); + $reader->registerNamespace('test', 'http://phpword.com/my/custom/namespace'); + } +} diff --git a/tests/PhpWord/_files/xml/reader.zip b/tests/PhpWord/_files/xml/reader.zip new file mode 100644 index 00000000..dbe69cbb Binary files /dev/null and b/tests/PhpWord/_files/xml/reader.zip differ