Reader: Ability to read tables, links, preserve text, and text break

This commit is contained in:
Ivan Lanin 2014-04-07 18:06:42 +07:00
parent 177c523799
commit d1d1e6b4df
5 changed files with 694 additions and 243 deletions

View File

@ -66,6 +66,9 @@ class Section extends Container
{ {
if (!is_null($settings) && is_array($settings)) { if (!is_null($settings) && is_array($settings)) {
foreach ($settings as $key => $value) { foreach ($settings as $key => $value) {
if (is_null($value)) {
continue;
}
if (substr($key, 0, 1) == '_') { if (substr($key, 0, 1) == '_') {
$key = substr($key, 1); $key = substr($key, 1);
} }

View File

@ -229,7 +229,10 @@ class Settings
} elseif ($key == 'borderColor') { } elseif ($key == 'borderColor') {
$this->setBorderColor($value); $this->setBorderColor($value);
} else { } else {
$this->$key = $value; $method = 'set' . $key;
if (method_exists($this, $method)) {
$this->$method($value);
}
} }
} }

View File

@ -13,31 +13,54 @@ use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Settings; use PhpOffice\PhpWord\Settings;
use PhpOffice\PhpWord\DocumentProperties; use PhpOffice\PhpWord\DocumentProperties;
use PhpOffice\PhpWord\Exception\Exception; use PhpOffice\PhpWord\Exception\Exception;
use PhpOffice\PhpWord\Shared\XMLReader;
use PhpOffice\PhpWord\Container\Section;
/** /**
* Reader for Word2007 * Reader for Word2007
*/ */
class Word2007 extends Reader implements IReader class Word2007 extends Reader implements IReader
{ {
/**
* PhpWord object
*
* @var PhpWord
*/
private $phpWord;
/**
* Part relationships
*
* @var array
*/
private $partRels = array('document' => array(), 'footnotes' => array());
/**
* Current active part document|footnotes|headerx|footerx
*
* @var string
*/
private $activePart = 'document';
/** /**
* Can the current IReader read the file? * Can the current IReader read the file?
* *
* @param string $pFilename * @param string $fileName
* @return bool * @return bool
* @throws \PhpOffice\PhpWord\Exception\Exception * @throws Exception
*/ */
public function canRead($pFilename) public function canRead($fileName)
{ {
// Check if file exists // Check if file exists
if (!file_exists($pFilename)) { if (!file_exists($fileName)) {
throw new Exception("Could not open {$pFilename} for reading! File does not exist."); throw new Exception("Could not open {$fileName} for reading! File does not exist.");
} }
$return = false; $return = false;
// Load file // Load file
$zipClass = Settings::getZipClass(); $zipClass = Settings::getZipClass();
$zip = new $zipClass(); $zip = new $zipClass();
if ($zip->open($pFilename) === true) { if ($zip->open($fileName) === true) {
// check if it is an OOXML archive // check if it is an OOXML archive
$rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels")); $rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels"));
if ($rels !== false) { if ($rels !== false) {
@ -91,21 +114,24 @@ class Word2007 extends Reader implements IReader
/** /**
* Loads PhpWord from file * Loads PhpWord from file
* *
* @param string $pFilename * @param string $fileName
* @return \PhpOffice\PhpWord\PhpWord|null * @return PhpWord|null
*/ */
public function load($pFilename) public function load($fileName)
{ {
// Check if file exists and can be read // Check if file exists and can be read
if (!$this->canRead($pFilename)) { if (!$this->canRead($fileName)) {
return null; return null;
} }
// Initialisations // Initialisations
$word = new PhpWord(); $this->phpWord = new PhpWord();
$zipClass = Settings::getZipClass(); $zipClass = Settings::getZipClass();
$zip = new $zipClass(); $zip = new $zipClass();
$zip->open($pFilename); $zip->open($fileName);
// Read document relationships
$this->readPartRels($fileName, 'document');
// Read properties and documents // Read properties and documents
$rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels")); $rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels"));
@ -118,7 +144,7 @@ class Word2007 extends Reader implements IReader
$xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); $xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
$xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/"); $xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/");
$xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"); $xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties");
$docProps = $word->getDocumentProperties(); $docProps = $this->phpWord->getDocumentProperties();
$docProps->setCreator((string)self::arrayItem($xmlCore->xpath("dc:creator"))); $docProps->setCreator((string)self::arrayItem($xmlCore->xpath("dc:creator")));
$docProps->setLastModifiedBy((string)self::arrayItem($xmlCore->xpath("cp:lastModifiedBy"))); $docProps->setLastModifiedBy((string)self::arrayItem($xmlCore->xpath("cp:lastModifiedBy")));
$docProps->setCreated(strtotime(self::arrayItem($xmlCore->xpath("dcterms:created")))); $docProps->setCreated(strtotime(self::arrayItem($xmlCore->xpath("dcterms:created"))));
@ -134,7 +160,7 @@ class Word2007 extends Reader implements IReader
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties": case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties":
$xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); $xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
if (is_object($xmlCore)) { if (is_object($xmlCore)) {
$docProps = $word->getDocumentProperties(); $docProps = $this->phpWord->getDocumentProperties();
if (isset($xmlCore->Company)) { if (isset($xmlCore->Company)) {
$docProps->setCompany((string)$xmlCore->Company); $docProps->setCompany((string)$xmlCore->Company);
} }
@ -147,7 +173,7 @@ class Word2007 extends Reader implements IReader
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties": case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties":
$xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); $xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
if (is_object($xmlCore)) { if (is_object($xmlCore)) {
$docProps = $word->getDocumentProperties(); $docProps = $this->phpWord->getDocumentProperties();
foreach ($xmlCore as $xmlProperty) { foreach ($xmlCore as $xmlProperty) {
$cellDataOfficeAttributes = $xmlProperty->attributes(); $cellDataOfficeAttributes = $xmlProperty->attributes();
if (isset($cellDataOfficeAttributes['name'])) { if (isset($cellDataOfficeAttributes['name'])) {
@ -162,266 +188,520 @@ class Word2007 extends Reader implements IReader
} }
} }
break; break;
// Document
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument":
$dir = dirname($rel["Target"]);
$archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels";
$relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive));
$relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
$xpath = self::arrayItem(
$relsDoc->xpath("rel:Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']")
);
$xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}", true));
if (is_object($xmlDoc)) {
$section = $word->addSection();
foreach ($xmlDoc->body->children() as $elm) {
$elmName = $elm->getName();
if ($elmName == 'p') { // Paragraph/section
// Create new section if section setting found
if ($elm->pPr->sectPr) {
$section->setSettings($this->loadSectionSettings($elm->pPr));
$section = $word->addSection();
continue;
}
// Has w:r? It's either text or textrun
if ($elm->r) {
// w:r = 1? It's a plain paragraph
if (count($elm->r) == 1) {
$section->addText(
$elm->r->t,
$this->loadFontStyle($elm->r)
);
// w:r more than 1? It's a textrun
} else {
$textRun = $section->addTextRun();
foreach ($elm->r as $r) {
$textRun->addText(
$r->t,
$this->loadFontStyle($r)
);
}
}
// No, it's a textbreak
} else {
$section->addTextBreak();
}
} elseif ($elmName == 'sectPr') {
// Last section setting
$section->setSettings($this->loadSectionSettings($xmlDoc->body));
}
}
}
break;
} }
} }
// Read styles // Read document
$docRels = simplexml_load_string($this->getFromZipArchive($zip, "word/_rels/document.xml.rels")); $this->readDocument($fileName, 'word/document.xml');
foreach ($docRels->Relationship as $rel) {
switch ($rel["Type"]) { // Read document relationships
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles": foreach ($this->partRels['document'] as $rId => $rel) {
$xmlStyle = simplexml_load_string($this->getFromZipArchive($zip, "word/{$rel['Target']}", true)); if ($rel['type'] == 'styles') {
if (is_object($xmlStyle)) { $this->readStyles($fileName, 'word/' . $rel['target']);
foreach ($xmlStyle->children() as $elm) {
if ($elm->getName() != 'style') {
continue;
}
$pStyle = null;
$fStyle = null;
$hasParagraphStyle = isset($elm->pPr);
$hasFontStyle = isset($elm->rPr);
$styleName = (string)$elm->name['val'];
if ($hasParagraphStyle) {
$pStyle = $this->loadParagraphStyle($elm);
if (is_array($pStyle) && !$hasFontStyle) {
$word->addParagraphStyle($styleName, $pStyle);
}
}
if ($hasFontStyle) {
$fStyle = $this->loadFontStyle($elm);
$word->addFontStyle($styleName, $fStyle, $pStyle);
}
}
}
break;
} }
} }
$zip->close(); $zip->close();
return $word; return $this->phpWord;
} }
/** /**
* Load section settings from SimpleXMLElement * Read _rels/$partName.xml.rels
* *
* @param \SimpleXMLElement $elm * @param string $fileName
* @return array|string|null * @param string $partName document|footnotes|headerx|footerx
*
* @todo Implement gutter
*/ */
private function loadSectionSettings($elm) private function readPartRels($fileName, $partName)
{ {
if ($xml = $elm->sectPr) { $relPrefix = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/';
$setting = array(); $xmlReader = new XMLReader();
if ($xml->type) { $response = $xmlReader->getDomFromZip($fileName, "word/_rels/{$partName}.xml.rels");
$setting['breakType'] = (string)$xml->type['val']; if ($response) {
$rels = $xmlReader->getElements('*');
foreach ($rels as $rel) {
$this->partRels[$partName][$rel->getAttribute('Id')] = array(
'type' => str_replace($relPrefix, '', $rel->getAttribute('Type')),
'target' => $rel->getAttribute('Target'),
);
} }
if ($xml->pgSz) {
if (isset($xml->pgSz['w'])) {
$setting['pageSizeW'] = (int)$xml->pgSz['w'];
}
if (isset($xml->pgSz['h'])) {
$setting['pageSizeH'] = (int)$xml->pgSz['h'];
}
if (isset($xml->pgSz['orient'])) {
$setting['orientation'] = (string)$xml->pgSz['orient'];
}
}
if ($xml->pgMar) {
if (isset($xml->pgMar['top'])) {
$setting['topMargin'] = (int)$xml->pgMar['top'];
}
if (isset($xml->pgMar['left'])) {
$setting['leftMargin'] = (int)$xml->pgMar['left'];
}
if (isset($xml->pgMar['bottom'])) {
$setting['bottomMargin'] = (int)$xml->pgMar['bottom'];
}
if (isset($xml->pgMar['right'])) {
$setting['rightMargin'] = (int)$xml->pgMar['right'];
}
if (isset($xml->pgMar['header'])) {
$setting['headerHeight'] = (int)$xml->pgMar['header'];
}
if (isset($xml->pgMar['footer'])) {
$setting['footerHeight'] = (int)$xml->pgMar['footer'];
}
if (isset($xml->pgMar['gutter'])) {
// $setting['gutter'] = (int)$xml->pgMar['gutter'];
}
}
if ($xml->cols) {
if (isset($xml->cols['num'])) {
$setting['colsNum'] = (int)$xml->cols['num'];
}
if (isset($xml->cols['space'])) {
$setting['colsSpace'] = (int)$xml->cols['space'];
}
}
return $setting;
} }
return null;
} }
/** /**
* Load paragraph style from SimpleXMLElement * Read styles.xml
* *
* @param \SimpleXMLElement $elm * @param string $fileName
* @return array|string|null * @param string $xmlFile
*/ */
private function loadParagraphStyle($elm) private function readStyles($fileName, $xmlFile)
{ {
if ($xml = $elm->pPr) { $xmlReader = new XMLReader();
if ($xml->pStyle) { $xmlReader->getDomFromZip($fileName, $xmlFile);
return (string)$xml->pStyle['val'];
} $nodes = $xmlReader->getElements('w:style');
$style = array(); if ($nodes->length > 0) {
if ($xml->jc) { foreach ($nodes as $node) {
$style['align'] = (string)$xml->jc['val']; $type = $xmlReader->getAttribute($node, 'w:type');
} $name = $xmlReader->getAttribute($node, 'w:styleId');
if ($xml->ind) { if (is_null($name)) {
if (isset($xml->ind->left)) { $name = $xmlReader->getAttribute('w:name', 'w:val', $node);
$style['indent'] = (int)$xml->ind->left;
} }
if (isset($xml->ind->hanging)) { $default = ($xmlReader->getAttribute($node, 'w:default') == 1);
$style['hanging'] = (int)$xml->ind->hanging; if ($type == 'paragraph') {
} $pStyle = $this->readWpPr($xmlReader, $node);
if (isset($xml->ind->line)) { $fStyle = $this->readWrPr($xmlReader, $node);
$style['spacing'] = (int)$xml->ind->line; if (empty($fStyle)) {
$this->phpWord->addParagraphStyle($name, $pStyle);
} else {
$this->phpWord->addFontStyle($name, $fStyle, $pStyle);
}
} elseif ($type == 'character') {
$fStyle = $this->readWrPr($xmlReader, $node);
if (!empty($fStyle)) {
$this->phpWord->addFontStyle($name, $fStyle);
}
} elseif ($type == 'table') {
$tStyle = $this->readWtblPr($xmlReader, $node);
if (!empty($tStyle)) {
$this->phpWord->addTableStyle($name, $tStyle);
}
} }
} }
if ($xml->spacing) {
if (isset($xml->spacing['after'])) {
$style['spaceAfter'] = (int)$xml->spacing['after'];
}
if (isset($xml->spacing['before'])) {
$style['spaceBefore'] = (int)$xml->spacing['before'];
}
if (isset($xml->spacing['line'])) {
$style['spacing'] = (int)$xml->spacing['line'];
}
}
if ($xml->basedOn) {
$style['basedOn'] = (string)$xml->basedOn['val'];
}
if ($xml->next) {
$style['next'] = (string)$xml->next['val'];
}
if ($xml->widowControl) {
$style['widowControl'] = false;
}
if ($xml->keepNext) {
$style['keepNext'] = true;
}
if ($xml->keepLines) {
$style['keepLines'] = true;
}
if ($xml->pageBreakBefore) {
$style['pageBreakBefore'] = true;
}
return $style;
} }
return null;
} }
/** /**
* Load font style from SimpleXMLElement * Read document.xml
* *
* @param \SimpleXMLElement $elm * @param string $fileName
* @return array|string|null * @param string $xmlFile
*/ */
private function loadFontStyle($elm) private function readDocument($fileName, $xmlFile)
{ {
if ($xml = $elm->rPr) { $xmlReader = new XMLReader();
if ($xml->rStyle) { $xmlReader->getDomFromZip($fileName, $xmlFile);
return (string)$xml->rStyle['val'];
$nodes = $xmlReader->getElements('w:body/*');
if ($nodes->length > 0) {
$section = $this->phpWord->addSection();
foreach ($nodes as $node) {
if ($node->nodeName == 'w:p') { // Paragraph
if ($xmlReader->getAttribute('w:r/w:br', 'w:type', $node) == 'page') {
$section->addPageBreak(); // PageBreak
} else {
$this->readWp($xmlReader, $node, $section);
}
// Section properties
if ($xmlReader->elementExists('w:pPr/w:sectPr', $node)) {
$settingsNode = $xmlReader->getElement('w:pPr/w:sectPr', $node);
$settings = $this->readWsectPr($xmlReader, $settingsNode);
$section->setSettings($settings);
$this->readHeaderFooter($fileName, $settings, $section);
$section = $this->phpWord->addSection();
}
} elseif ($node->nodeName == 'w:tbl') { // Table
$this->readWtbl($xmlReader, $node, $section);
} elseif ($node->nodeName == 'w:sectPr') { // Last section
$settings = $this->readWsectPr($xmlReader, $node);
$section->setSettings($settings);
$this->readHeaderFooter($fileName, $settings, $section);
}
} }
$style = array(); }
if ($xml->rFonts) { }
$style['name'] = (string)$xml->rFonts['ascii'];
/**
* Read header footer
*
* @param string $fileName
* @param array $settings
* @param Section $section
*/
private function readHeaderFooter($fileName, $settings, &$section)
{
if (is_array($settings) && array_key_exists('headerFooter', $settings)) {
foreach ($settings['headerFooter'] as $rId => $headerFooter) {
if (array_key_exists($rId, $this->partRels['document'])) {
$target = $this->partRels['document'][$rId]['target'];
$xmlFile = 'word/' . $target;
$method = 'add' . $headerFooter['method'];
$type = $headerFooter['type'];
$object = $section->$method($type);
$this->activePart = str_replace('.xml', '', $target);
$this->readPartRels($fileName, $this->activePart);
$xmlReader = new XMLReader();
$xmlReader->getDomFromZip($fileName, $xmlFile);
$nodes = $xmlReader->getElements('*');
if ($nodes->length > 0) {
foreach ($nodes as $node) {
if ($node->nodeName == 'w:p') { // Paragraph
$this->readWp($xmlReader, $node, $object);
} elseif ($node->nodeName == 'w:tbl') { // Table
$this->readWtbl($xmlReader, $node, $object);
}
}
}
}
} }
if ($xml->sz) { }
$style['size'] = (int)$xml->sz['val'] / 2; $this->activePart = 'document';
} }
if ($xml->color) {
$style['color'] = (string)$xml->color['val']; /**
} * Read w:p
if ($xml->b) { *
$style['bold'] = true; * @param mixed $container
} * @todo Get font style for preserve text
if ($xml->i) { */
$style['italic'] = true; private function readWp(XMLReader $xmlReader, \DOMNode $domNode, &$container)
} {
if ($xml->u) { // Paragraph style
$style['underline'] = (string)$xml->u['val']; $pStyle = null;
} if ($xmlReader->elementExists('w:pPr', $domNode)) {
if ($xml->strike) { $pStyle = $this->readWpPr($xmlReader, $domNode);
$style['strikethrough'] = true; }
}
if ($xml->highlight) { // Content
$style['fgColor'] = (string)$xml->highlight['val']; if ($xmlReader->elementExists('w:r/w:instrText', $domNode)) { // Preserve text
} $textContent = '';
if ($xml->vertAlign) { $fStyle = $this->readWrPr($xmlReader, $domNode);
if ($xml->vertAlign['val'] == 'superscript') { $nodes = $xmlReader->getElements('w:r', $domNode);
$style['superScript'] = true; foreach ($nodes as $node) {
$instrText = $xmlReader->getValue('w:instrText', $node);
if (!is_null($instrText)) {
$textContent .= '{' . $instrText . '}';
} else { } else {
$style['subScript'] = true; $textContent .= $xmlReader->getValue('w:t', $node);
}
}
$container->addPreserveText($textContent, $fStyle, $pStyle);
} else { // Text and TextRun
$runCount = $xmlReader->countElements('w:r', $domNode);
$linkCount = $xmlReader->countElements('w:hyperlink', $domNode);
$runLinkCount = $runCount + $linkCount;
if ($runLinkCount == 0) {
$container->addTextBreak(null, $pStyle);
} else {
if ($runLinkCount > 1) {
$textContainer = &$container->addTextRun($pStyle);
$pStyle = null;
} else {
$textContainer = &$container;
}
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
$this->readWr($xmlReader, $node, $textContainer, $pStyle);
} }
} }
return $style;
} }
return null; }
/**
* Read w:r
*
* @param mixed $container
* @param mixed $pStyle
*/
private function readWr(XMLReader $xmlReader, \DOMNode $domNode, &$container, $pStyle = null)
{
if (!in_array($domNode->nodeName, array('w:r', 'w:hyperlink'))) {
return;
}
$fStyle = $this->readWrPr($xmlReader, $domNode);
if ($domNode->nodeName == 'w:hyperlink') {
$rId = $xmlReader->getAttribute($domNode, 'r:id');
$textContent = $xmlReader->getValue('w:r/w:t', $domNode);
if (array_key_exists($this->activePart, $this->partRels)) {
if (array_key_exists($rId, $this->partRels[$this->activePart])) {
$linkSource = $this->partRels[$this->activePart][$rId]['target'];
}
}
$container->addLink($linkSource, $textContent, $fStyle, $pStyle);
} else {
$textContent = $xmlReader->getValue('w:t', $domNode);
$container->addText($textContent, $fStyle, $pStyle);
}
}
/**
* Read w:tbl
*
* @param mixed $container
*/
private function readWtbl(XMLReader $xmlReader, \DOMNode $domNode, &$container)
{
// Table style
$tblStyle = null;
if ($xmlReader->elementExists('w:tblPr', $domNode)) {
$tblStyle = $this->readWtblPr($xmlReader, $domNode);
}
$table = $container->addTable($tblStyle);
$tblNodes = $xmlReader->getElements('*', $domNode);
foreach ($tblNodes as $tblNode) {
$tblNodeName = $tblNode->nodeName;
if ($tblNode->nodeName == 'w:tblGrid') { // Column
// @todo Do something with table columns
} elseif ($tblNode->nodeName == 'w:tr') { // Row
$rowHeight = $xmlReader->getAttribute('w:trPr/w:trHeight', 'w:val', $tblNode);
$rowHRule = $xmlReader->getAttribute('w:trPr/w:trHeight', 'w:hRule', $tblNode);
$rowHRule = $rowHRule == 'exact' ? true : false;
$rowStyle = array(
'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
'exactHeight' => $rowHRule,
);
$row = $table->addRow($rowHeight, $rowStyle);
$rowNodes = $xmlReader->getElements('*', $tblNode);
foreach ($rowNodes as $rowNode) {
if ($rowNode->nodeName == 'w:trPr') { // Row style
// @todo Do something with row style
} elseif ($rowNode->nodeName == 'w:tc') { // Cell
$cellWidth = $xmlReader->getAttribute('w:tcPr/w:tcW', 'w:w', $rowNode);
$cellStyle = null;
if ($xmlReader->elementExists('w:tcPr', $rowNode)) {
$cellStyle = $this->readWtcPr(
$xmlReader,
$xmlReader->getElement('w:tcPr', $rowNode)
);
}
$cell = $row->addCell($cellWidth, $cellStyle);
$cellNodes = $xmlReader->getElements('*', $rowNode);
foreach ($cellNodes as $cellNode) {
if ($cellNode->nodeName == 'w:p') { // Paragraph
$this->readWp($xmlReader, $cellNode, $cell);
}
}
}
}
}
}
}
/**
* Read w:sectPr
*
* @return array|null
*/
private function readWsectPr(XMLReader $xmlReader, \DOMNode $domNode)
{
$ret = null;
$mapping = array(
'w:type' => 'breakType', 'w:pgSz' => 'pageSize',
'w:pgMar' => 'pageMargin', 'w:cols' => 'columns',
'w:headerReference' => 'header', 'w:footerReference' => 'footer',
);
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
$nodeName = $node->nodeName;
if (!array_key_exists($nodeName, $mapping)) {
continue;
}
$retKey = $mapping[$nodeName];
if ($nodeName == 'w:type') {
$ret['breakType'] = $xmlReader->getAttribute($node, 'w:val');
} elseif ($nodeName == 'w:pgSz') {
$ret['pageSizeW'] = $xmlReader->getAttribute($node, 'w:w');
$ret['pageSizeH'] = $xmlReader->getAttribute($node, 'w:h');
$ret['orientation'] = $xmlReader->getAttribute($node, 'w:orient');
} elseif ($nodeName == 'w:pgMar') {
$ret['topMargin'] = $xmlReader->getAttribute($node, 'w:top');
$ret['leftMargin'] = $xmlReader->getAttribute($node, 'w:left');
$ret['bottomMargin'] = $xmlReader->getAttribute($node, 'w:bottom');
$ret['rightMargin'] = $xmlReader->getAttribute($node, 'w:right');
$ret['headerHeight'] = $xmlReader->getAttribute($node, 'w:header');
$ret['footerHeight'] = $xmlReader->getAttribute($node, 'w:footer');
$ret['gutter'] = $xmlReader->getAttribute($node, 'w:gutter');
} elseif ($nodeName == 'w:cols') {
$ret['colsNum'] = $xmlReader->getAttribute($node, 'w:num');
$ret['colsSpace'] = $xmlReader->getAttribute($node, 'w:space');
} elseif (in_array($nodeName, array('w:headerReference', 'w:footerReference'))) {
$id = $xmlReader->getAttribute($node, 'r:id');
$ret['headerFooter'][$id] = array(
'method' => $retKey,
'type' => $xmlReader->getAttribute($node, 'w:type'),
);
}
}
return $ret;
}
/**
* Read w:pPr
*
* @return string|array|null
*/
private function readWpPr(XMLReader $xmlReader, \DOMNode $domNode)
{
$ret = null;
if ($xmlReader->elementExists('w:pPr', $domNode)) {
if ($xmlReader->elementExists('w:pPr/w:pStyle', $domNode)) {
$ret = $xmlReader->getAttribute('w:pPr/w:pStyle', 'w:val', $domNode);
} else {
$ret = array();
$mapping = array(
'w:jc' => 'align', 'w:ind' => 'indent', 'w:spacing' => 'spacing',
'w:basedOn' => 'basedOn', 'w:next' => 'next',
'w:widowControl' => 'widowControl', 'w:keepNext' => 'keepNext',
'w:keepLines' => 'keepLines', 'w:pageBreakBefore' => 'pageBreakBefore',
);
$nodes = $xmlReader->getElements('w:pPr/*', $domNode);
foreach ($nodes as $node) {
$nodeName = $node->nodeName;
if (!array_key_exists($nodeName, $mapping)) {
continue;
}
$retKey = $mapping[$nodeName];
if ($nodeName == 'w:ind') {
$ret['indent'] = $xmlReader->getAttribute($node, 'w:left');
$ret['hanging'] = $xmlReader->getAttribute($node, 'w:hanging');
} elseif ($nodeName == 'w:spacing') {
$ret['spaceAfter'] = $xmlReader->getAttribute($node, 'w:after');
$ret['spaceBefore'] = $xmlReader->getAttribute($node, 'w:before');
$ret['line'] = $xmlReader->getAttribute($node, 'w:line');
} elseif (in_array($nodeName, array('w:keepNext', 'w:keepLines', 'w:pageBreakBefore'))) {
$ret[$retKey] = true;
} elseif (in_array($nodeName, array('w:widowControl'))) {
$ret[$retKey] = false;
} elseif (in_array($nodeName, array('w:jc', 'w:basedOn', 'w:next'))) {
$ret[$retKey] = $xmlReader->getAttribute($node, 'w:val');
}
}
}
}
return $ret;
}
/**
* Read w:rPr
*
* @return string|array|null
*/
private function readWrPr(XMLReader $xmlReader, \DOMNode $domNode)
{
$ret = null;
if ($xmlReader->elementExists('w:rPr', $domNode)) {
if ($xmlReader->elementExists('w:rPr/w:rStyle', $domNode)) {
$ret = $xmlReader->getAttribute('w:rPr/w:rStyle', 'w:val', $domNode);
} else {
$ret = array();
$mapping = array(
'w:b' => 'bold', 'w:i' => 'italic', 'w:color' => 'color',
'w:strike' => 'strikethrough', 'w:u' => 'underline',
'w:highlight' => 'fgColor', 'w:sz' => 'size',
'w:rFonts' => 'name', 'w:vertAlign' => 'superScript',
);
$nodes = $xmlReader->getElements('w:rPr/*', $domNode);
foreach ($nodes as $node) {
$nodeName = $node->nodeName;
if (!array_key_exists($nodeName, $mapping)) {
continue;
}
$retKey = $mapping[$nodeName];
if ($nodeName == 'w:rFonts') {
$ret['name'] = $xmlReader->getAttribute($node, 'w:ascii');
$ret['hint'] = $xmlReader->getAttribute($node, 'w:hint');
} elseif (in_array($nodeName, array('w:b', 'w:i', 'w:strike'))) {
$ret[$retKey] = true;
} elseif (in_array($nodeName, array('w:u', 'w:highlight', 'w:color'))) {
$ret[$retKey] = $xmlReader->getAttribute($node, 'w:val');
} elseif ($nodeName == 'w:sz') {
$ret[$retKey] = $xmlReader->getAttribute($node, 'w:val') / 2;
} elseif ($nodeName == 'w:vertAlign') {
$ret[$retKey] = $xmlReader->getAttribute($node, 'w:val');
if ($ret[$retKey] == 'superscript') {
$ret['superScript'] = true;
} else {
$ret['superScript'] = false;
$ret['subScript'] = true;
}
}
}
}
}
return $ret;
}
/**
* Read w:tblPr
*
* @return string|array|null
* @todo Capture w:tblStylePr w:type="firstRow"
*/
private function readWtblPr(XMLReader $xmlReader, \DOMNode $domNode)
{
$ret = null;
$margins = array('top', 'left', 'bottom', 'right');
$borders = $margins + array('insideH', 'insideV');
if ($xmlReader->elementExists('w:tblPr', $domNode)) {
if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) {
$ret = $xmlReader->getAttribute('w:tblPr/w:tblStyle', 'w:val', $domNode);
} else {
$ret = array();
$mapping = array(
'w:tblCellMar' => 'cellMargin', 'w:tblBorders' => 'border',
);
$nodes = $xmlReader->getElements('w:tblPr/*', $domNode);
foreach ($nodes as $node) {
$nodeName = $node->nodeName;
if (!array_key_exists($nodeName, $mapping)) {
continue;
}
$retKey = $mapping[$nodeName];
if ($nodeName == 'w:tblCellMar') {
foreach ($margins as $side) {
$ucfirstSide = ucfirst($side);
$ret["cellMargin$ucfirstSide"] = $xmlReader->getAttribute("w:$side", 'w:w', $node);
}
} elseif ($nodeName == 'w:tblBorders') {
foreach ($borders as $side) {
$ucfirstSide = ucfirst($side);
$ret["border{$ucfirstSide}Size"] = $xmlReader->getAttribute("w:$side", 'w:sz', $node);
$ret["border{$ucfirstSide}Color"] = $xmlReader->getAttribute("w:$side", 'w:color', $node);
}
}
}
}
}
return $ret;
}
/**
* Read w:tcPr
*
* @return array|null
*/
private function readWtcPr(XMLReader $xmlReader, \DOMNode $domNode)
{
$ret = null;
$mapping = array(
'w:shd' => 'bgColor',
'w:vAlign' => 'valign', 'w:textDirection' => 'textDirection',
'w:gridSpan' => 'gridSpan', 'w:vMerge' => 'vMerge',
);
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
$nodeName = $node->nodeName;
if (!array_key_exists($nodeName, $mapping)) {
continue;
}
$retKey = $mapping[$nodeName];
if ($nodeName == 'w:shd') {
$ret['bgColor'] = $xmlReader->getAttribute($node, 'w:fill');
} else {
$ret[$retKey] = $xmlReader->getAttribute($node, 'w:val');
}
}
return $ret;
} }
/** /**

View File

@ -0,0 +1,163 @@
<?php
/**
* PHPWord
*
* @link https://github.com/PHPOffice/PHPWord
* @copyright 2014 PHPWord
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
*/
namespace PhpOffice\PhpWord\Shared;
use PhpOffice\PhpWord\Exception\Exception;
use PhpOffice\PhpWord\Settings;
/**
* XML Reader wrapper
*
* @since 0.9.2
*/
class XMLReader
{
/**
* DOMDocument object
*
* @var \DOMDocument
*/
private $dom = null;
/**
* DOMXpath object
*
* @var \DOMXpath
*/
private $xpath = null;
/**
* Get DOMDocument from ZipArchive
*
* @param string $zipFile
* @param string $xmlFile
* @return \DOMDocument
*/
public function getDomFromZip($zipFile, $xmlFile)
{
if (file_exists($zipFile) === false) {
throw new Exception('Cannot find archive file.');
}
$zipClass = Settings::getZipClass();
$zip = new $zipClass();
$canOpen = $zip->open($zipFile);
if ($canOpen === false) {
throw new Exception('Cannot open archive file.');
}
$contents = $zip->getFromName($xmlFile);
$zip->close();
if ($contents === false) {
return false;
} else {
$this->dom = new \DOMDocument();
$this->dom->loadXML($contents);
return $this->dom;
}
}
/**
* Get elements
*
* @param string $path
* @return \DOMNodeList
*/
public function getElements($path, \DOMNode $context = null)
{
if ($this->dom === null) {
return array();
}
if ($this->xpath === null) {
$this->xpath = new \DOMXpath($this->dom);
}
return $this->xpath->query($path, $context);
}
/**
* Get elements
*
* @param string $path
* @return \DOMNodeList
*/
public function getElement($path, \DOMNode $context = null)
{
$elements = $this->getElements($path, $context);
if ($elements->length > 0) {
return $elements->item(0);
} else {
return false;
}
}
/**
* Get element attribute
*
* @param string|\DOMNode $path
* @param string $attribute
* @return null|string
*/
public function getAttribute($path, $attribute, \DOMNode $context = null)
{
if ($path instanceof \DOMNode) {
$return = $path->getAttribute($attribute);
} else {
$elements = $this->getElements($path, $context);
if ($elements->length > 0) {
$return = $elements->item(0)->getAttribute($attribute);
} else {
$return = '';
}
}
return ($return == '') ? null : $return;
}
/**
* Get element value
*
* @param string $path
* @return null|string
*/
public function getValue($path, \DOMNode $context = null)
{
$elements = $this->getElements($path, $context);
if ($elements->length > 0) {
$return = $elements->item(0)->nodeValue;
} else {
$return = '';
}
return ($return == '') ? null : $return;
}
/**
* Count elements
*
* @param string $path
* @return \DOMNodeList
*/
public function countElements($path, \DOMNode $context = null)
{
$elements = $this->getElements($path, $context);
return $elements->length;
}
/**
* Element exists
*
* @param string $path
* @return \DOMNodeList
*/
public function elementExists($path, \DOMNode $context = null)
{
return $this->getElements($path, $context)->length > 0;
}
}

View File

@ -140,11 +140,13 @@ class Style
private static function setStyleValues($styleName, $styleValues, $styleObject) private static function setStyleValues($styleName, $styleValues, $styleObject)
{ {
if (!array_key_exists($styleName, self::$styles)) { if (!array_key_exists($styleName, self::$styles)) {
foreach ($styleValues as $key => $value) { if (is_array($styleValues)) {
if (substr($key, 0, 1) == '_') { foreach ($styleValues as $key => $value) {
$key = substr($key, 1); if (substr($key, 0, 1) == '_') {
$key = substr($key, 1);
}
$styleObject->setStyleValue($key, $value);
} }
$styleObject->setStyleValue($key, $value);
} }
self::$styles[$styleName] = $styleObject; self::$styles[$styleName] = $styleObject;