PHPWord/src/PhpWord/Reader/Word2007/Document.php

351 lines
14 KiB
PHP

<?php
/**
* PHPWord
*
* @link https://github.com/PHPOffice/PHPWord
* @copyright 2014 PHPWord
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/
namespace PhpOffice\PhpWord\Reader\Word2007;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Shared\XMLReader;
/**
* Document reader
*/
class Document extends AbstractPart
{
/**
* Read document.xml
*
* @param \PhpOffice\PhpWord\PhpWord $phpWord
*/
public function read(PhpWord &$phpWord)
{
$xmlReader = new XMLReader();
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
$nodes = $xmlReader->getElements('w:body/*');
if ($nodes->length > 0) {
$section = $phpWord->addSection();
foreach ($nodes as $node) {
switch ($node->nodeName) {
case 'w:p': // Paragraph
// Page break
// @todo <w:lastRenderedPageBreak>
if ($xmlReader->getAttribute('w:type', $node, 'w:r/w:br') == 'page') {
$section->addPageBreak(); // PageBreak
}
// Paragraph
$this->readParagraph($xmlReader, $node, $section, 'document');
// Section properties
if ($xmlReader->elementExists('w:pPr/w:sectPr', $node)) {
$settingsNode = $xmlReader->getElement('w:pPr/w:sectPr', $node);
if (!is_null($settingsNode)) {
$settings = $this->readSectionStyle($xmlReader, $settingsNode);
$section->setSettings($settings);
if (!is_null($settings)) {
$this->readHeaderFooter($settings, $section);
}
}
$section = $phpWord->addSection();
}
break;
case 'w:tbl': // Table
$this->readTable($xmlReader, $node, $section, 'document');
break;
case 'w:sectPr': // Last section
$settings = $this->readSectionStyle($xmlReader, $node);
$section->setSettings($settings);
if (!is_null($settings)) {
$this->readHeaderFooter($settings, $section);
}
break;
}
}
}
}
/**
* Read header footer
*
* @param array $settings
* @param \PhpOffice\PhpWord\Element\Section $section
*/
private function readHeaderFooter($settings, &$section)
{
if (is_array($settings) && array_key_exists('hf', $settings)) {
foreach ($settings['hf'] as $rId => $hfSetting) {
if (array_key_exists($rId, $this->rels['document'])) {
list($hfType, $xmlFile, $docPart) = array_values($this->rels['document'][$rId]);
$method = "add{$hfType}";
$hfObject = $section->$method($hfSetting['type']);
// Read header/footer content
$xmlReader = new XMLReader();
$xmlReader->getDomFromZip($this->docFile, $xmlFile);
$nodes = $xmlReader->getElements('*');
if ($nodes->length > 0) {
foreach ($nodes as $node) {
switch ($node->nodeName) {
case 'w:p': // Paragraph
$this->readParagraph($xmlReader, $node, $hfObject, $docPart);
break;
case 'w:tbl': // Table
$this->readTable($xmlReader, $node, $hfObject, $docPart);
break;
}
}
}
}
}
}
}
/**
* Read w:p
*
* @param mixed $parent
* @param string $docPart
*
* @todo Get font style for preserve text
*/
private function readParagraph(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
// Paragraph style
$paragraphStyle = null;
$headingMatches = array();
if ($xmlReader->elementExists('w:pPr', $domNode)) {
$paragraphStyle = $this->readParagraphStyle($xmlReader, $domNode);
if (is_string($paragraphStyle)) {
preg_match('/Heading(\d)/', $paragraphStyle, $headingMatches);
}
}
// PreserveText
if ($xmlReader->elementExists('w:r/w:instrText', $domNode)) {
$ignoreText = false;
$textContent = '';
$fontStyle = $this->readFontStyle($xmlReader, $domNode);
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$instrText = $xmlReader->getValue('w:instrText', $node);
if ($xmlReader->elementExists('w:fldChar', $node)) {
$fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
if ($fldCharType == 'begin') {
$ignoreText = true;
} elseif ($fldCharType == 'end') {
$ignoreText = false;
}
}
if (!is_null($instrText)) {
$textContent .= '{' . $instrText . '}';
} else {
if ($ignoreText === false) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
}
}
$parent->addPreserveText($textContent, $fontStyle, $paragraphStyle);
// List item
} elseif ($xmlReader->elementExists('w:pPr/w:numPr', $domNode)) {
$textContent = '';
$numId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:numId');
$levelId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:ilvl');
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
$parent->addListItem($textContent, $levelId, null, "PHPWordList{$numId}", $paragraphStyle);
// Heading
} elseif (!empty($headingMatches)) {
$textContent = '';
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
$parent->addTitle($textContent, $headingMatches[1]);
// Text and TextRun
} else {
$runCount = $xmlReader->countElements('w:r', $domNode);
$linkCount = $xmlReader->countElements('w:hyperlink', $domNode);
$runLinkCount = $runCount + $linkCount;
if ($runLinkCount == 0) {
$parent->addTextBreak(null, $paragraphStyle);
} else {
if ($runLinkCount > 1) {
$textrun = $parent->addTextRun($paragraphStyle);
$textParent = &$textrun;
} else {
$textParent = &$parent;
}
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
$this->readRun($xmlReader, $node, $textParent, $docPart, $paragraphStyle);
}
}
}
}
/**
* Read w:tbl
*
* @param mixed $parent
* @param string $docPart
*/
private function readTable(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
// Table style
$tblStyle = null;
if ($xmlReader->elementExists('w:tblPr', $domNode)) {
$tblStyle = $this->readTableStyle($xmlReader, $domNode);
}
$table = $parent->addTable($tblStyle);
$tblNodes = $xmlReader->getElements('*', $domNode);
foreach ($tblNodes as $tblNode) {
if ($tblNode->nodeName == 'w:tblGrid') { // Column
// @todo Do something with table columns
} elseif ($tblNode->nodeName == 'w:tr') { // Row
$rowHeight = $xmlReader->getAttribute('w:val', $tblNode, 'w:trPr/w:trHeight');
$rowHRule = $xmlReader->getAttribute('w:hRule', $tblNode, 'w:trPr/w:trHeight');
$rowHRule = $rowHRule == 'exact' ? true : false;
$rowStyle = array(
'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
'exactHeight' => $rowHRule,
);
$row = $table->addRow($rowHeight, $rowStyle);
$rowNodes = $xmlReader->getElements('*', $tblNode);
foreach ($rowNodes as $rowNode) {
if ($rowNode->nodeName == 'w:trPr') { // Row style
// @todo Do something with row style
} elseif ($rowNode->nodeName == 'w:tc') { // Cell
$cellWidth = $xmlReader->getAttribute('w:w', $rowNode, 'w:tcPr/w:tcW');
$cellStyle = null;
$cellStyleNode = $xmlReader->getElement('w:tcPr', $rowNode);
if (!is_null($cellStyleNode)) {
$cellStyle = $this->readCellStyle($xmlReader, $cellStyleNode);
}
$cell = $row->addCell($cellWidth, $cellStyle);
$cellNodes = $xmlReader->getElements('*', $rowNode);
foreach ($cellNodes as $cellNode) {
if ($cellNode->nodeName == 'w:p') { // Paragraph
$this->readParagraph($xmlReader, $cellNode, $cell, $docPart);
}
}
}
}
}
}
}
/**
* Read w:sectPr
*
* @return array|null
*/
private function readSectionStyle(XMLReader $xmlReader, \DOMElement $domNode)
{
$ret = null;
$mapping = array(
'w:type' => 'breakType', 'w:pgSz' => 'pageSize',
'w:pgMar' => 'pageMargin', 'w:cols' => 'columns',
'w:headerReference' => 'header', 'w:footerReference' => 'footer',
);
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
if (!array_key_exists($node->nodeName, $mapping)) {
continue;
}
$property = $mapping[$node->nodeName];
switch ($node->nodeName) {
case 'w:type':
$ret['breakType'] = $xmlReader->getAttribute('w:val', $node);
break;
case 'w:pgSz':
$ret['pageSizeW'] = $xmlReader->getAttribute('w:w', $node);
$ret['pageSizeH'] = $xmlReader->getAttribute('w:h', $node);
$ret['orientation'] = $xmlReader->getAttribute('w:orient', $node);
break;
case 'w:pgMar':
$ret['topMargin'] = $xmlReader->getAttribute('w:top', $node);
$ret['leftMargin'] = $xmlReader->getAttribute('w:left', $node);
$ret['bottomMargin'] = $xmlReader->getAttribute('w:bottom', $node);
$ret['rightMargin'] = $xmlReader->getAttribute('w:right', $node);
$ret['headerHeight'] = $xmlReader->getAttribute('w:header', $node);
$ret['footerHeight'] = $xmlReader->getAttribute('w:footer', $node);
$ret['gutter'] = $xmlReader->getAttribute('w:gutter', $node);
break;
case 'w:cols':
$ret['colsNum'] = $xmlReader->getAttribute('w:num', $node);
$ret['colsSpace'] = $xmlReader->getAttribute('w:space', $node);
break;
case 'w:headerReference':
case 'w:footerReference':
$id = $xmlReader->getAttribute('r:id', $node);
$ret['hf'][$id] = array(
'method' => $property,
'type' => $xmlReader->getAttribute('w:type', $node),
);
break;
}
}
return $ret;
}
/**
* Read w:tcPr
*
* @return array|null
*/
private function readCellStyle(XMLReader $xmlReader, \DOMElement $domNode)
{
$style = null;
$mapping = array(
'w:shd' => 'bgColor',
'w:vAlign' => 'valign', 'w:textDirection' => 'textDirection',
'w:gridSpan' => 'gridSpan', 'w:vMerge' => 'vMerge',
);
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
if (!array_key_exists($node->nodeName, $mapping)) {
continue;
}
$property = $mapping[$node->nodeName];
switch ($node->nodeName) {
case 'w:shd':
$style['bgColor'] = $xmlReader->getAttribute('w:fill', $node);
break;
default:
$style[$property] = $xmlReader->getAttribute('w:val', $node);
break;
}
}
return $style;
}
}