From d1d1e6b4df8c7a60b31781baafd30efa22c8f8fc Mon Sep 17 00:00:00 2001 From: Ivan Lanin Date: Mon, 7 Apr 2014 18:06:42 +0700 Subject: [PATCH] Reader: Ability to read tables, links, preserve text, and text break --- src/PhpWord/Container/Section.php | 3 + src/PhpWord/Container/Settings.php | 5 +- src/PhpWord/Reader/Word2007.php | 756 ++++++++++++++++++++--------- src/PhpWord/Shared/XMLReader.php | 163 +++++++ src/PhpWord/Style.php | 10 +- 5 files changed, 694 insertions(+), 243 deletions(-) create mode 100644 src/PhpWord/Shared/XMLReader.php diff --git a/src/PhpWord/Container/Section.php b/src/PhpWord/Container/Section.php index dae4f26c..0b55f991 100644 --- a/src/PhpWord/Container/Section.php +++ b/src/PhpWord/Container/Section.php @@ -66,6 +66,9 @@ class Section extends Container { if (!is_null($settings) && is_array($settings)) { foreach ($settings as $key => $value) { + if (is_null($value)) { + continue; + } if (substr($key, 0, 1) == '_') { $key = substr($key, 1); } diff --git a/src/PhpWord/Container/Settings.php b/src/PhpWord/Container/Settings.php index 762d45c3..40c20d8c 100644 --- a/src/PhpWord/Container/Settings.php +++ b/src/PhpWord/Container/Settings.php @@ -229,7 +229,10 @@ class Settings } elseif ($key == 'borderColor') { $this->setBorderColor($value); } else { - $this->$key = $value; + $method = 'set' . $key; + if (method_exists($this, $method)) { + $this->$method($value); + } } } diff --git a/src/PhpWord/Reader/Word2007.php b/src/PhpWord/Reader/Word2007.php index 2cdb7f9a..9cc4f330 100644 --- a/src/PhpWord/Reader/Word2007.php +++ b/src/PhpWord/Reader/Word2007.php @@ -13,31 +13,54 @@ use PhpOffice\PhpWord\PhpWord; use PhpOffice\PhpWord\Settings; use PhpOffice\PhpWord\DocumentProperties; use PhpOffice\PhpWord\Exception\Exception; +use PhpOffice\PhpWord\Shared\XMLReader; +use PhpOffice\PhpWord\Container\Section; /** * Reader for Word2007 */ class Word2007 extends Reader implements IReader { + /** + * PhpWord object + * + * @var PhpWord + */ + private $phpWord; + + /** + * Part relationships + * + * @var array + */ + private $partRels = array('document' => array(), 'footnotes' => array()); + + /** + * Current active part document|footnotes|headerx|footerx + * + * @var string + */ + private $activePart = 'document'; + /** * Can the current IReader read the file? * - * @param string $pFilename + * @param string $fileName * @return bool - * @throws \PhpOffice\PhpWord\Exception\Exception + * @throws Exception */ - public function canRead($pFilename) + public function canRead($fileName) { // Check if file exists - if (!file_exists($pFilename)) { - throw new Exception("Could not open {$pFilename} for reading! File does not exist."); + if (!file_exists($fileName)) { + throw new Exception("Could not open {$fileName} for reading! File does not exist."); } $return = false; // Load file $zipClass = Settings::getZipClass(); $zip = new $zipClass(); - if ($zip->open($pFilename) === true) { + if ($zip->open($fileName) === true) { // check if it is an OOXML archive $rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels")); if ($rels !== false) { @@ -91,21 +114,24 @@ class Word2007 extends Reader implements IReader /** * Loads PhpWord from file * - * @param string $pFilename - * @return \PhpOffice\PhpWord\PhpWord|null + * @param string $fileName + * @return PhpWord|null */ - public function load($pFilename) + public function load($fileName) { // Check if file exists and can be read - if (!$this->canRead($pFilename)) { + if (!$this->canRead($fileName)) { return null; } // Initialisations - $word = new PhpWord(); + $this->phpWord = new PhpWord(); $zipClass = Settings::getZipClass(); $zip = new $zipClass(); - $zip->open($pFilename); + $zip->open($fileName); + + // Read document relationships + $this->readPartRels($fileName, 'document'); // Read properties and documents $rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels")); @@ -118,7 +144,7 @@ class Word2007 extends Reader implements IReader $xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); $xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/"); $xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"); - $docProps = $word->getDocumentProperties(); + $docProps = $this->phpWord->getDocumentProperties(); $docProps->setCreator((string)self::arrayItem($xmlCore->xpath("dc:creator"))); $docProps->setLastModifiedBy((string)self::arrayItem($xmlCore->xpath("cp:lastModifiedBy"))); $docProps->setCreated(strtotime(self::arrayItem($xmlCore->xpath("dcterms:created")))); @@ -134,7 +160,7 @@ class Word2007 extends Reader implements IReader case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties": $xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); if (is_object($xmlCore)) { - $docProps = $word->getDocumentProperties(); + $docProps = $this->phpWord->getDocumentProperties(); if (isset($xmlCore->Company)) { $docProps->setCompany((string)$xmlCore->Company); } @@ -147,7 +173,7 @@ class Word2007 extends Reader implements IReader case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties": $xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); if (is_object($xmlCore)) { - $docProps = $word->getDocumentProperties(); + $docProps = $this->phpWord->getDocumentProperties(); foreach ($xmlCore as $xmlProperty) { $cellDataOfficeAttributes = $xmlProperty->attributes(); if (isset($cellDataOfficeAttributes['name'])) { @@ -162,266 +188,520 @@ class Word2007 extends Reader implements IReader } } break; - // Document - case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument": - $dir = dirname($rel["Target"]); - $archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels"; - $relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive)); - $relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships"); - $xpath = self::arrayItem( - $relsDoc->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']") - ); - $xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}", true)); - if (is_object($xmlDoc)) { - $section = $word->addSection(); - - foreach ($xmlDoc->body->children() as $elm) { - $elmName = $elm->getName(); - if ($elmName == 'p') { // Paragraph/section - // Create new section if section setting found - if ($elm->pPr->sectPr) { - $section->setSettings($this->loadSectionSettings($elm->pPr)); - $section = $word->addSection(); - continue; - } - // Has w:r? It's either text or textrun - if ($elm->r) { - // w:r = 1? It's a plain paragraph - if (count($elm->r) == 1) { - $section->addText( - $elm->r->t, - $this->loadFontStyle($elm->r) - ); - // w:r more than 1? It's a textrun - } else { - $textRun = $section->addTextRun(); - foreach ($elm->r as $r) { - $textRun->addText( - $r->t, - $this->loadFontStyle($r) - ); - } - } - // No, it's a textbreak - } else { - $section->addTextBreak(); - } - } elseif ($elmName == 'sectPr') { - // Last section setting - $section->setSettings($this->loadSectionSettings($xmlDoc->body)); - } - } - } - break; } } - // Read styles - $docRels = simplexml_load_string($this->getFromZipArchive($zip, "word/_rels/document.xml.rels")); - foreach ($docRels->Relationship as $rel) { - switch ($rel["Type"]) { - case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles": - $xmlStyle = simplexml_load_string($this->getFromZipArchive($zip, "word/{$rel['Target']}", true)); - if (is_object($xmlStyle)) { - foreach ($xmlStyle->children() as $elm) { - if ($elm->getName() != 'style') { - continue; - } - $pStyle = null; - $fStyle = null; - $hasParagraphStyle = isset($elm->pPr); - $hasFontStyle = isset($elm->rPr); - $styleName = (string)$elm->name['val']; - if ($hasParagraphStyle) { - $pStyle = $this->loadParagraphStyle($elm); - if (is_array($pStyle) && !$hasFontStyle) { - $word->addParagraphStyle($styleName, $pStyle); - } - } - if ($hasFontStyle) { - $fStyle = $this->loadFontStyle($elm); - $word->addFontStyle($styleName, $fStyle, $pStyle); - } - } - } - break; + // Read document + $this->readDocument($fileName, 'word/document.xml'); + + // Read document relationships + foreach ($this->partRels['document'] as $rId => $rel) { + if ($rel['type'] == 'styles') { + $this->readStyles($fileName, 'word/' . $rel['target']); } } $zip->close(); - return $word; + return $this->phpWord; } /** - * Load section settings from SimpleXMLElement + * Read _rels/$partName.xml.rels * - * @param \SimpleXMLElement $elm - * @return array|string|null - * - * @todo Implement gutter + * @param string $fileName + * @param string $partName document|footnotes|headerx|footerx */ - private function loadSectionSettings($elm) + private function readPartRels($fileName, $partName) { - if ($xml = $elm->sectPr) { - $setting = array(); - if ($xml->type) { - $setting['breakType'] = (string)$xml->type['val']; + $relPrefix = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/'; + $xmlReader = new XMLReader(); + $response = $xmlReader->getDomFromZip($fileName, "word/_rels/{$partName}.xml.rels"); + if ($response) { + $rels = $xmlReader->getElements('*'); + foreach ($rels as $rel) { + $this->partRels[$partName][$rel->getAttribute('Id')] = array( + 'type' => str_replace($relPrefix, '', $rel->getAttribute('Type')), + 'target' => $rel->getAttribute('Target'), + ); } - if ($xml->pgSz) { - if (isset($xml->pgSz['w'])) { - $setting['pageSizeW'] = (int)$xml->pgSz['w']; - } - if (isset($xml->pgSz['h'])) { - $setting['pageSizeH'] = (int)$xml->pgSz['h']; - } - if (isset($xml->pgSz['orient'])) { - $setting['orientation'] = (string)$xml->pgSz['orient']; - } - } - if ($xml->pgMar) { - if (isset($xml->pgMar['top'])) { - $setting['topMargin'] = (int)$xml->pgMar['top']; - } - if (isset($xml->pgMar['left'])) { - $setting['leftMargin'] = (int)$xml->pgMar['left']; - } - if (isset($xml->pgMar['bottom'])) { - $setting['bottomMargin'] = (int)$xml->pgMar['bottom']; - } - if (isset($xml->pgMar['right'])) { - $setting['rightMargin'] = (int)$xml->pgMar['right']; - } - if (isset($xml->pgMar['header'])) { - $setting['headerHeight'] = (int)$xml->pgMar['header']; - } - if (isset($xml->pgMar['footer'])) { - $setting['footerHeight'] = (int)$xml->pgMar['footer']; - } - if (isset($xml->pgMar['gutter'])) { - // $setting['gutter'] = (int)$xml->pgMar['gutter']; - } - } - if ($xml->cols) { - if (isset($xml->cols['num'])) { - $setting['colsNum'] = (int)$xml->cols['num']; - } - if (isset($xml->cols['space'])) { - $setting['colsSpace'] = (int)$xml->cols['space']; - } - } - return $setting; } - return null; } /** - * Load paragraph style from SimpleXMLElement + * Read styles.xml * - * @param \SimpleXMLElement $elm - * @return array|string|null + * @param string $fileName + * @param string $xmlFile */ - private function loadParagraphStyle($elm) + private function readStyles($fileName, $xmlFile) { - if ($xml = $elm->pPr) { - if ($xml->pStyle) { - return (string)$xml->pStyle['val']; - } - $style = array(); - if ($xml->jc) { - $style['align'] = (string)$xml->jc['val']; - } - if ($xml->ind) { - if (isset($xml->ind->left)) { - $style['indent'] = (int)$xml->ind->left; + $xmlReader = new XMLReader(); + $xmlReader->getDomFromZip($fileName, $xmlFile); + + $nodes = $xmlReader->getElements('w:style'); + if ($nodes->length > 0) { + foreach ($nodes as $node) { + $type = $xmlReader->getAttribute($node, 'w:type'); + $name = $xmlReader->getAttribute($node, 'w:styleId'); + if (is_null($name)) { + $name = $xmlReader->getAttribute('w:name', 'w:val', $node); } - if (isset($xml->ind->hanging)) { - $style['hanging'] = (int)$xml->ind->hanging; - } - if (isset($xml->ind->line)) { - $style['spacing'] = (int)$xml->ind->line; + $default = ($xmlReader->getAttribute($node, 'w:default') == 1); + if ($type == 'paragraph') { + $pStyle = $this->readWpPr($xmlReader, $node); + $fStyle = $this->readWrPr($xmlReader, $node); + if (empty($fStyle)) { + $this->phpWord->addParagraphStyle($name, $pStyle); + } else { + $this->phpWord->addFontStyle($name, $fStyle, $pStyle); + } + } elseif ($type == 'character') { + $fStyle = $this->readWrPr($xmlReader, $node); + if (!empty($fStyle)) { + $this->phpWord->addFontStyle($name, $fStyle); + } + } elseif ($type == 'table') { + $tStyle = $this->readWtblPr($xmlReader, $node); + if (!empty($tStyle)) { + $this->phpWord->addTableStyle($name, $tStyle); + } } } - if ($xml->spacing) { - if (isset($xml->spacing['after'])) { - $style['spaceAfter'] = (int)$xml->spacing['after']; - } - if (isset($xml->spacing['before'])) { - $style['spaceBefore'] = (int)$xml->spacing['before']; - } - if (isset($xml->spacing['line'])) { - $style['spacing'] = (int)$xml->spacing['line']; - } - } - if ($xml->basedOn) { - $style['basedOn'] = (string)$xml->basedOn['val']; - } - if ($xml->next) { - $style['next'] = (string)$xml->next['val']; - } - if ($xml->widowControl) { - $style['widowControl'] = false; - } - if ($xml->keepNext) { - $style['keepNext'] = true; - } - if ($xml->keepLines) { - $style['keepLines'] = true; - } - if ($xml->pageBreakBefore) { - $style['pageBreakBefore'] = true; - } - return $style; } - return null; } /** - * Load font style from SimpleXMLElement + * Read document.xml * - * @param \SimpleXMLElement $elm - * @return array|string|null + * @param string $fileName + * @param string $xmlFile */ - private function loadFontStyle($elm) + private function readDocument($fileName, $xmlFile) { - if ($xml = $elm->rPr) { - if ($xml->rStyle) { - return (string)$xml->rStyle['val']; + $xmlReader = new XMLReader(); + $xmlReader->getDomFromZip($fileName, $xmlFile); + + $nodes = $xmlReader->getElements('w:body/*'); + if ($nodes->length > 0) { + $section = $this->phpWord->addSection(); + foreach ($nodes as $node) { + if ($node->nodeName == 'w:p') { // Paragraph + if ($xmlReader->getAttribute('w:r/w:br', 'w:type', $node) == 'page') { + $section->addPageBreak(); // PageBreak + } else { + $this->readWp($xmlReader, $node, $section); + } + // Section properties + if ($xmlReader->elementExists('w:pPr/w:sectPr', $node)) { + $settingsNode = $xmlReader->getElement('w:pPr/w:sectPr', $node); + $settings = $this->readWsectPr($xmlReader, $settingsNode); + $section->setSettings($settings); + $this->readHeaderFooter($fileName, $settings, $section); + $section = $this->phpWord->addSection(); + } + } elseif ($node->nodeName == 'w:tbl') { // Table + $this->readWtbl($xmlReader, $node, $section); + } elseif ($node->nodeName == 'w:sectPr') { // Last section + $settings = $this->readWsectPr($xmlReader, $node); + $section->setSettings($settings); + $this->readHeaderFooter($fileName, $settings, $section); + } } - $style = array(); - if ($xml->rFonts) { - $style['name'] = (string)$xml->rFonts['ascii']; + } + } + + /** + * Read header footer + * + * @param string $fileName + * @param array $settings + * @param Section $section + */ + private function readHeaderFooter($fileName, $settings, &$section) + { + if (is_array($settings) && array_key_exists('headerFooter', $settings)) { + foreach ($settings['headerFooter'] as $rId => $headerFooter) { + if (array_key_exists($rId, $this->partRels['document'])) { + $target = $this->partRels['document'][$rId]['target']; + $xmlFile = 'word/' . $target; + $method = 'add' . $headerFooter['method']; + $type = $headerFooter['type']; + $object = $section->$method($type); + + $this->activePart = str_replace('.xml', '', $target); + $this->readPartRels($fileName, $this->activePart); + + $xmlReader = new XMLReader(); + $xmlReader->getDomFromZip($fileName, $xmlFile); + $nodes = $xmlReader->getElements('*'); + if ($nodes->length > 0) { + foreach ($nodes as $node) { + if ($node->nodeName == 'w:p') { // Paragraph + $this->readWp($xmlReader, $node, $object); + } elseif ($node->nodeName == 'w:tbl') { // Table + $this->readWtbl($xmlReader, $node, $object); + } + } + } + } } - if ($xml->sz) { - $style['size'] = (int)$xml->sz['val'] / 2; - } - if ($xml->color) { - $style['color'] = (string)$xml->color['val']; - } - if ($xml->b) { - $style['bold'] = true; - } - if ($xml->i) { - $style['italic'] = true; - } - if ($xml->u) { - $style['underline'] = (string)$xml->u['val']; - } - if ($xml->strike) { - $style['strikethrough'] = true; - } - if ($xml->highlight) { - $style['fgColor'] = (string)$xml->highlight['val']; - } - if ($xml->vertAlign) { - if ($xml->vertAlign['val'] == 'superscript') { - $style['superScript'] = true; + } + $this->activePart = 'document'; + } + + /** + * Read w:p + * + * @param mixed $container + * @todo Get font style for preserve text + */ + private function readWp(XMLReader $xmlReader, \DOMNode $domNode, &$container) + { + // Paragraph style + $pStyle = null; + if ($xmlReader->elementExists('w:pPr', $domNode)) { + $pStyle = $this->readWpPr($xmlReader, $domNode); + } + + // Content + if ($xmlReader->elementExists('w:r/w:instrText', $domNode)) { // Preserve text + $textContent = ''; + $fStyle = $this->readWrPr($xmlReader, $domNode); + $nodes = $xmlReader->getElements('w:r', $domNode); + foreach ($nodes as $node) { + $instrText = $xmlReader->getValue('w:instrText', $node); + if (!is_null($instrText)) { + $textContent .= '{' . $instrText . '}'; } else { - $style['subScript'] = true; + $textContent .= $xmlReader->getValue('w:t', $node); + } + } + $container->addPreserveText($textContent, $fStyle, $pStyle); + } else { // Text and TextRun + $runCount = $xmlReader->countElements('w:r', $domNode); + $linkCount = $xmlReader->countElements('w:hyperlink', $domNode); + $runLinkCount = $runCount + $linkCount; + if ($runLinkCount == 0) { + $container->addTextBreak(null, $pStyle); + } else { + if ($runLinkCount > 1) { + $textContainer = &$container->addTextRun($pStyle); + $pStyle = null; + } else { + $textContainer = &$container; + } + $nodes = $xmlReader->getElements('*', $domNode); + foreach ($nodes as $node) { + $this->readWr($xmlReader, $node, $textContainer, $pStyle); } } - return $style; } - return null; + } + + /** + * Read w:r + * + * @param mixed $container + * @param mixed $pStyle + */ + private function readWr(XMLReader $xmlReader, \DOMNode $domNode, &$container, $pStyle = null) + { + if (!in_array($domNode->nodeName, array('w:r', 'w:hyperlink'))) { + return; + } + $fStyle = $this->readWrPr($xmlReader, $domNode); + if ($domNode->nodeName == 'w:hyperlink') { + $rId = $xmlReader->getAttribute($domNode, 'r:id'); + $textContent = $xmlReader->getValue('w:r/w:t', $domNode); + if (array_key_exists($this->activePart, $this->partRels)) { + if (array_key_exists($rId, $this->partRels[$this->activePart])) { + $linkSource = $this->partRels[$this->activePart][$rId]['target']; + } + } + $container->addLink($linkSource, $textContent, $fStyle, $pStyle); + } else { + $textContent = $xmlReader->getValue('w:t', $domNode); + $container->addText($textContent, $fStyle, $pStyle); + } + } + + /** + * Read w:tbl + * + * @param mixed $container + */ + private function readWtbl(XMLReader $xmlReader, \DOMNode $domNode, &$container) + { + // Table style + $tblStyle = null; + if ($xmlReader->elementExists('w:tblPr', $domNode)) { + $tblStyle = $this->readWtblPr($xmlReader, $domNode); + } + + $table = $container->addTable($tblStyle); + $tblNodes = $xmlReader->getElements('*', $domNode); + foreach ($tblNodes as $tblNode) { + $tblNodeName = $tblNode->nodeName; + if ($tblNode->nodeName == 'w:tblGrid') { // Column + // @todo Do something with table columns + } elseif ($tblNode->nodeName == 'w:tr') { // Row + $rowHeight = $xmlReader->getAttribute('w:trPr/w:trHeight', 'w:val', $tblNode); + $rowHRule = $xmlReader->getAttribute('w:trPr/w:trHeight', 'w:hRule', $tblNode); + $rowHRule = $rowHRule == 'exact' ? true : false; + $rowStyle = array( + 'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode), + 'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode), + 'exactHeight' => $rowHRule, + ); + + $row = $table->addRow($rowHeight, $rowStyle); + $rowNodes = $xmlReader->getElements('*', $tblNode); + foreach ($rowNodes as $rowNode) { + if ($rowNode->nodeName == 'w:trPr') { // Row style + // @todo Do something with row style + } elseif ($rowNode->nodeName == 'w:tc') { // Cell + $cellWidth = $xmlReader->getAttribute('w:tcPr/w:tcW', 'w:w', $rowNode); + $cellStyle = null; + if ($xmlReader->elementExists('w:tcPr', $rowNode)) { + $cellStyle = $this->readWtcPr( + $xmlReader, + $xmlReader->getElement('w:tcPr', $rowNode) + ); + } + + $cell = $row->addCell($cellWidth, $cellStyle); + $cellNodes = $xmlReader->getElements('*', $rowNode); + foreach ($cellNodes as $cellNode) { + if ($cellNode->nodeName == 'w:p') { // Paragraph + $this->readWp($xmlReader, $cellNode, $cell); + } + } + } + } + } + } + } + + /** + * Read w:sectPr + * + * @return array|null + */ + private function readWsectPr(XMLReader $xmlReader, \DOMNode $domNode) + { + $ret = null; + $mapping = array( + 'w:type' => 'breakType', 'w:pgSz' => 'pageSize', + 'w:pgMar' => 'pageMargin', 'w:cols' => 'columns', + 'w:headerReference' => 'header', 'w:footerReference' => 'footer', + ); + $nodes = $xmlReader->getElements('*', $domNode); + foreach ($nodes as $node) { + $nodeName = $node->nodeName; + if (!array_key_exists($nodeName, $mapping)) { + continue; + } + $retKey = $mapping[$nodeName]; + if ($nodeName == 'w:type') { + $ret['breakType'] = $xmlReader->getAttribute($node, 'w:val'); + } elseif ($nodeName == 'w:pgSz') { + $ret['pageSizeW'] = $xmlReader->getAttribute($node, 'w:w'); + $ret['pageSizeH'] = $xmlReader->getAttribute($node, 'w:h'); + $ret['orientation'] = $xmlReader->getAttribute($node, 'w:orient'); + } elseif ($nodeName == 'w:pgMar') { + $ret['topMargin'] = $xmlReader->getAttribute($node, 'w:top'); + $ret['leftMargin'] = $xmlReader->getAttribute($node, 'w:left'); + $ret['bottomMargin'] = $xmlReader->getAttribute($node, 'w:bottom'); + $ret['rightMargin'] = $xmlReader->getAttribute($node, 'w:right'); + $ret['headerHeight'] = $xmlReader->getAttribute($node, 'w:header'); + $ret['footerHeight'] = $xmlReader->getAttribute($node, 'w:footer'); + $ret['gutter'] = $xmlReader->getAttribute($node, 'w:gutter'); + } elseif ($nodeName == 'w:cols') { + $ret['colsNum'] = $xmlReader->getAttribute($node, 'w:num'); + $ret['colsSpace'] = $xmlReader->getAttribute($node, 'w:space'); + } elseif (in_array($nodeName, array('w:headerReference', 'w:footerReference'))) { + $id = $xmlReader->getAttribute($node, 'r:id'); + $ret['headerFooter'][$id] = array( + 'method' => $retKey, + 'type' => $xmlReader->getAttribute($node, 'w:type'), + ); + } + } + + return $ret; + } + + /** + * Read w:pPr + * + * @return string|array|null + */ + private function readWpPr(XMLReader $xmlReader, \DOMNode $domNode) + { + $ret = null; + if ($xmlReader->elementExists('w:pPr', $domNode)) { + if ($xmlReader->elementExists('w:pPr/w:pStyle', $domNode)) { + $ret = $xmlReader->getAttribute('w:pPr/w:pStyle', 'w:val', $domNode); + } else { + $ret = array(); + $mapping = array( + 'w:jc' => 'align', 'w:ind' => 'indent', 'w:spacing' => 'spacing', + 'w:basedOn' => 'basedOn', 'w:next' => 'next', + 'w:widowControl' => 'widowControl', 'w:keepNext' => 'keepNext', + 'w:keepLines' => 'keepLines', 'w:pageBreakBefore' => 'pageBreakBefore', + ); + $nodes = $xmlReader->getElements('w:pPr/*', $domNode); + foreach ($nodes as $node) { + $nodeName = $node->nodeName; + if (!array_key_exists($nodeName, $mapping)) { + continue; + } + $retKey = $mapping[$nodeName]; + if ($nodeName == 'w:ind') { + $ret['indent'] = $xmlReader->getAttribute($node, 'w:left'); + $ret['hanging'] = $xmlReader->getAttribute($node, 'w:hanging'); + } elseif ($nodeName == 'w:spacing') { + $ret['spaceAfter'] = $xmlReader->getAttribute($node, 'w:after'); + $ret['spaceBefore'] = $xmlReader->getAttribute($node, 'w:before'); + $ret['line'] = $xmlReader->getAttribute($node, 'w:line'); + } elseif (in_array($nodeName, array('w:keepNext', 'w:keepLines', 'w:pageBreakBefore'))) { + $ret[$retKey] = true; + } elseif (in_array($nodeName, array('w:widowControl'))) { + $ret[$retKey] = false; + } elseif (in_array($nodeName, array('w:jc', 'w:basedOn', 'w:next'))) { + $ret[$retKey] = $xmlReader->getAttribute($node, 'w:val'); + } + } + } + } + + return $ret; + } + + /** + * Read w:rPr + * + * @return string|array|null + */ + private function readWrPr(XMLReader $xmlReader, \DOMNode $domNode) + { + $ret = null; + if ($xmlReader->elementExists('w:rPr', $domNode)) { + if ($xmlReader->elementExists('w:rPr/w:rStyle', $domNode)) { + $ret = $xmlReader->getAttribute('w:rPr/w:rStyle', 'w:val', $domNode); + } else { + $ret = array(); + $mapping = array( + 'w:b' => 'bold', 'w:i' => 'italic', 'w:color' => 'color', + 'w:strike' => 'strikethrough', 'w:u' => 'underline', + 'w:highlight' => 'fgColor', 'w:sz' => 'size', + 'w:rFonts' => 'name', 'w:vertAlign' => 'superScript', + ); + $nodes = $xmlReader->getElements('w:rPr/*', $domNode); + foreach ($nodes as $node) { + $nodeName = $node->nodeName; + if (!array_key_exists($nodeName, $mapping)) { + continue; + } + $retKey = $mapping[$nodeName]; + if ($nodeName == 'w:rFonts') { + $ret['name'] = $xmlReader->getAttribute($node, 'w:ascii'); + $ret['hint'] = $xmlReader->getAttribute($node, 'w:hint'); + } elseif (in_array($nodeName, array('w:b', 'w:i', 'w:strike'))) { + $ret[$retKey] = true; + } elseif (in_array($nodeName, array('w:u', 'w:highlight', 'w:color'))) { + $ret[$retKey] = $xmlReader->getAttribute($node, 'w:val'); + } elseif ($nodeName == 'w:sz') { + $ret[$retKey] = $xmlReader->getAttribute($node, 'w:val') / 2; + } elseif ($nodeName == 'w:vertAlign') { + $ret[$retKey] = $xmlReader->getAttribute($node, 'w:val'); + if ($ret[$retKey] == 'superscript') { + $ret['superScript'] = true; + } else { + $ret['superScript'] = false; + $ret['subScript'] = true; + } + } + } + } + } + + return $ret; + } + /** + * Read w:tblPr + * + * @return string|array|null + * @todo Capture w:tblStylePr w:type="firstRow" + */ + private function readWtblPr(XMLReader $xmlReader, \DOMNode $domNode) + { + $ret = null; + $margins = array('top', 'left', 'bottom', 'right'); + $borders = $margins + array('insideH', 'insideV'); + + if ($xmlReader->elementExists('w:tblPr', $domNode)) { + if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) { + $ret = $xmlReader->getAttribute('w:tblPr/w:tblStyle', 'w:val', $domNode); + } else { + $ret = array(); + $mapping = array( + 'w:tblCellMar' => 'cellMargin', 'w:tblBorders' => 'border', + ); + $nodes = $xmlReader->getElements('w:tblPr/*', $domNode); + foreach ($nodes as $node) { + $nodeName = $node->nodeName; + if (!array_key_exists($nodeName, $mapping)) { + continue; + } + $retKey = $mapping[$nodeName]; + if ($nodeName == 'w:tblCellMar') { + foreach ($margins as $side) { + $ucfirstSide = ucfirst($side); + $ret["cellMargin$ucfirstSide"] = $xmlReader->getAttribute("w:$side", 'w:w', $node); + } + } elseif ($nodeName == 'w:tblBorders') { + foreach ($borders as $side) { + $ucfirstSide = ucfirst($side); + $ret["border{$ucfirstSide}Size"] = $xmlReader->getAttribute("w:$side", 'w:sz', $node); + $ret["border{$ucfirstSide}Color"] = $xmlReader->getAttribute("w:$side", 'w:color', $node); + } + } + } + } + } + + return $ret; + } + + /** + * Read w:tcPr + * + * @return array|null + */ + private function readWtcPr(XMLReader $xmlReader, \DOMNode $domNode) + { + $ret = null; + $mapping = array( + 'w:shd' => 'bgColor', + 'w:vAlign' => 'valign', 'w:textDirection' => 'textDirection', + 'w:gridSpan' => 'gridSpan', 'w:vMerge' => 'vMerge', + ); + $nodes = $xmlReader->getElements('*', $domNode); + foreach ($nodes as $node) { + $nodeName = $node->nodeName; + if (!array_key_exists($nodeName, $mapping)) { + continue; + } + $retKey = $mapping[$nodeName]; + if ($nodeName == 'w:shd') { + $ret['bgColor'] = $xmlReader->getAttribute($node, 'w:fill'); + } else { + $ret[$retKey] = $xmlReader->getAttribute($node, 'w:val'); + } + } + + return $ret; } /** diff --git a/src/PhpWord/Shared/XMLReader.php b/src/PhpWord/Shared/XMLReader.php new file mode 100644 index 00000000..15ed9da3 --- /dev/null +++ b/src/PhpWord/Shared/XMLReader.php @@ -0,0 +1,163 @@ +open($zipFile); + if ($canOpen === false) { + throw new Exception('Cannot open archive file.'); + } + $contents = $zip->getFromName($xmlFile); + $zip->close(); + if ($contents === false) { + return false; + } else { + $this->dom = new \DOMDocument(); + $this->dom->loadXML($contents); + return $this->dom; + } + } + + /** + * Get elements + * + * @param string $path + * @return \DOMNodeList + */ + public function getElements($path, \DOMNode $context = null) + { + if ($this->dom === null) { + return array(); + } + if ($this->xpath === null) { + $this->xpath = new \DOMXpath($this->dom); + } + + return $this->xpath->query($path, $context); + } + + /** + * Get elements + * + * @param string $path + * @return \DOMNodeList + */ + public function getElement($path, \DOMNode $context = null) + { + $elements = $this->getElements($path, $context); + if ($elements->length > 0) { + return $elements->item(0); + } else { + return false; + } + } + + /** + * Get element attribute + * + * @param string|\DOMNode $path + * @param string $attribute + * @return null|string + */ + public function getAttribute($path, $attribute, \DOMNode $context = null) + { + if ($path instanceof \DOMNode) { + $return = $path->getAttribute($attribute); + } else { + $elements = $this->getElements($path, $context); + if ($elements->length > 0) { + $return = $elements->item(0)->getAttribute($attribute); + } else { + $return = ''; + } + } + + return ($return == '') ? null : $return; + } + + /** + * Get element value + * + * @param string $path + * @return null|string + */ + public function getValue($path, \DOMNode $context = null) + { + $elements = $this->getElements($path, $context); + if ($elements->length > 0) { + $return = $elements->item(0)->nodeValue; + } else { + $return = ''; + } + + return ($return == '') ? null : $return; + } + + /** + * Count elements + * + * @param string $path + * @return \DOMNodeList + */ + public function countElements($path, \DOMNode $context = null) + { + $elements = $this->getElements($path, $context); + return $elements->length; + } + + /** + * Element exists + * + * @param string $path + * @return \DOMNodeList + */ + public function elementExists($path, \DOMNode $context = null) + { + return $this->getElements($path, $context)->length > 0; + } +} diff --git a/src/PhpWord/Style.php b/src/PhpWord/Style.php index e883b9ed..ab1159ce 100755 --- a/src/PhpWord/Style.php +++ b/src/PhpWord/Style.php @@ -140,11 +140,13 @@ class Style private static function setStyleValues($styleName, $styleValues, $styleObject) { if (!array_key_exists($styleName, self::$styles)) { - foreach ($styleValues as $key => $value) { - if (substr($key, 0, 1) == '_') { - $key = substr($key, 1); + if (is_array($styleValues)) { + foreach ($styleValues as $key => $value) { + if (substr($key, 0, 1) == '_') { + $key = substr($key, 1); + } + $styleObject->setStyleValue($key, $value); } - $styleObject->setStyleValue($key, $value); } self::$styles[$styleName] = $styleObject;