Refactor: Apply composite pattern for Word2007 reader
This commit is contained in:
parent
a861218141
commit
a4fa3dfb55
|
|
@ -78,6 +78,7 @@ This release marked heavy refactorings on internal code structure with the creat
|
|||
- Refactor: Remove Style\Tabs
|
||||
- Refactor: Apply composite pattern for writers
|
||||
- Refactor: Split `AbstractContainer` from `AbstractElement`
|
||||
- Refactor: Apply composite pattern for Word2007 reader
|
||||
|
||||
## 0.9.1 - 27 Mar 2014
|
||||
|
||||
|
|
|
|||
|
|
@ -49,8 +49,8 @@ class Link extends AbstractElement
|
|||
/**
|
||||
* Create a new Link Element
|
||||
*
|
||||
* @param string $linkSrc
|
||||
* @param string $linkName
|
||||
* @param string $target
|
||||
* @param string $text
|
||||
* @param mixed $fontStyle
|
||||
* @param mixed $paragraphStyle
|
||||
*/
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,336 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\XMLReader;
|
||||
|
||||
/**
|
||||
* Abstract part reader
|
||||
*/
|
||||
abstract class AbstractPart
|
||||
{
|
||||
/**
|
||||
* Document file
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $docFile;
|
||||
|
||||
/**
|
||||
* XML file
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $xmlFile;
|
||||
|
||||
/**
|
||||
* Part relationships
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $rels = array();
|
||||
|
||||
/**
|
||||
* Read part
|
||||
*/
|
||||
abstract public function read(PhpWord &$phpWord);
|
||||
|
||||
/**
|
||||
* Create new instance
|
||||
*
|
||||
* @param string $docFile
|
||||
* @param string $xmlFile
|
||||
*/
|
||||
public function __construct($docFile, $xmlFile)
|
||||
{
|
||||
$this->docFile = $docFile;
|
||||
$this->xmlFile = $xmlFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set relationships
|
||||
*
|
||||
* @param array $value
|
||||
*/
|
||||
public function setRels($value)
|
||||
{
|
||||
$this->rels = $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:r
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
|
||||
* @param \DOMElement $domNode
|
||||
* @param mixed $parent
|
||||
* @param string $docPart
|
||||
* @param mixed $pStyle
|
||||
*
|
||||
* @todo Footnote paragraph style
|
||||
*/
|
||||
protected function readRun(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart, $pStyle = null)
|
||||
{
|
||||
if (!in_array($domNode->nodeName, array('w:r', 'w:hyperlink'))) {
|
||||
return;
|
||||
}
|
||||
$fStyle = $this->readFontStyle($xmlReader, $domNode);
|
||||
|
||||
// Link
|
||||
if ($domNode->nodeName == 'w:hyperlink') {
|
||||
$rId = $xmlReader->getAttribute('r:id', $domNode);
|
||||
$textContent = $xmlReader->getValue('w:r/w:t', $domNode);
|
||||
$target = $this->getMediaTarget($docPart, $rId);
|
||||
if (!is_null($target)) {
|
||||
$parent->addLink($target, $textContent, $fStyle, $pStyle);
|
||||
}
|
||||
} else {
|
||||
// Footnote
|
||||
if ($xmlReader->elementExists('w:footnoteReference', $domNode)) {
|
||||
$parent->addFootnote();
|
||||
|
||||
// Endnote
|
||||
} elseif ($xmlReader->elementExists('w:endnoteReference', $domNode)) {
|
||||
$parent->addEndnote();
|
||||
|
||||
// Image
|
||||
} elseif ($xmlReader->elementExists('w:pict', $domNode)) {
|
||||
$rId = $xmlReader->getAttribute('r:id', $domNode, 'w:pict/v:shape/v:imagedata');
|
||||
$target = $this->getMediaTarget($docPart, $rId);
|
||||
if (!is_null($target)) {
|
||||
$imageSource = "zip://{$this->docFile}#{$target}";
|
||||
$parent->addImage($imageSource);
|
||||
}
|
||||
|
||||
// Object
|
||||
} elseif ($xmlReader->elementExists('w:object', $domNode)) {
|
||||
$rId = $xmlReader->getAttribute('r:id', $domNode, 'w:object/o:OLEObject');
|
||||
// $rIdIcon = $xmlReader->getAttribute('r:id', $domNode, 'w:object/v:shape/v:imagedata');
|
||||
$target = $this->getMediaTarget($docPart, $rId);
|
||||
if (!is_null($target)) {
|
||||
$textContent = "<Object: {$target}>";
|
||||
$parent->addText($textContent, $fStyle, $pStyle);
|
||||
}
|
||||
|
||||
// TextRun
|
||||
} else {
|
||||
$textContent = $xmlReader->getValue('w:t', $domNode);
|
||||
$parent->addText($textContent, $fStyle, $pStyle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:pPr
|
||||
*
|
||||
* @return string|array|null
|
||||
*/
|
||||
protected function readParagraphStyle(XMLReader $xmlReader, \DOMElement $domNode)
|
||||
{
|
||||
$style = null;
|
||||
if ($xmlReader->elementExists('w:pPr', $domNode)) {
|
||||
if ($xmlReader->elementExists('w:pPr/w:pStyle', $domNode)) {
|
||||
$style = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:pStyle');
|
||||
} else {
|
||||
$style = array();
|
||||
$mapping = array(
|
||||
'w:ind' => 'indent', 'w:spacing' => 'spacing',
|
||||
'w:jc' => 'align', 'w:basedOn' => 'basedOn', 'w:next' => 'next',
|
||||
'w:widowControl' => 'widowControl', 'w:keepNext' => 'keepNext',
|
||||
'w:keepLines' => 'keepLines', 'w:pageBreakBefore' => 'pageBreakBefore',
|
||||
);
|
||||
|
||||
$nodes = $xmlReader->getElements('w:pPr/*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
if (!array_key_exists($node->nodeName, $mapping)) {
|
||||
continue;
|
||||
}
|
||||
$property = $mapping[$node->nodeName];
|
||||
switch ($node->nodeName) {
|
||||
|
||||
case 'w:ind':
|
||||
$style['indent'] = $xmlReader->getAttribute('w:left', $node);
|
||||
$style['hanging'] = $xmlReader->getAttribute('w:hanging', $node);
|
||||
break;
|
||||
|
||||
case 'w:spacing':
|
||||
$style['spaceAfter'] = $xmlReader->getAttribute('w:after', $node);
|
||||
$style['spaceBefore'] = $xmlReader->getAttribute('w:before', $node);
|
||||
// Commented. Need to adjust the number when return value is null
|
||||
// $style['spacing'] = $xmlReader->getAttribute('w:line', $node);
|
||||
break;
|
||||
|
||||
case 'w:keepNext':
|
||||
case 'w:keepLines':
|
||||
case 'w:pageBreakBefore':
|
||||
$style[$property] = true;
|
||||
break;
|
||||
|
||||
case 'w:widowControl':
|
||||
$style[$property] = false;
|
||||
break;
|
||||
|
||||
case 'w:jc':
|
||||
case 'w:basedOn':
|
||||
case 'w:next':
|
||||
$style[$property] = $xmlReader->getAttribute('w:val', $node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $style;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:rPr
|
||||
*
|
||||
* @return string|array|null
|
||||
*/
|
||||
protected function readFontStyle(XMLReader $xmlReader, \DOMElement $domNode)
|
||||
{
|
||||
$style = null;
|
||||
// Hyperlink has an extra w:r child
|
||||
if ($domNode->nodeName == 'w:hyperlink') {
|
||||
$domNode = $xmlReader->getElement('w:r', $domNode);
|
||||
}
|
||||
if (is_null($domNode)) {
|
||||
return $style;
|
||||
}
|
||||
if ($xmlReader->elementExists('w:rPr', $domNode)) {
|
||||
if ($xmlReader->elementExists('w:rPr/w:rStyle', $domNode)) {
|
||||
$style = $xmlReader->getAttribute('w:val', $domNode, 'w:rPr/w:rStyle');
|
||||
} else {
|
||||
$style = array();
|
||||
$mapping = array(
|
||||
'w:b' => 'bold', 'w:i' => 'italic', 'w:color' => 'color',
|
||||
'w:strike' => 'strikethrough', 'w:u' => 'underline',
|
||||
'w:highlight' => 'fgColor', 'w:sz' => 'size',
|
||||
'w:rFonts' => 'name', 'w:vertAlign' => 'superScript',
|
||||
);
|
||||
|
||||
$nodes = $xmlReader->getElements('w:rPr/*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
if (!array_key_exists($node->nodeName, $mapping)) {
|
||||
continue;
|
||||
}
|
||||
$property = $mapping[$node->nodeName];
|
||||
switch ($node->nodeName) {
|
||||
|
||||
case 'w:rFonts':
|
||||
$style['name'] = $xmlReader->getAttribute('w:ascii', $node);
|
||||
$style['hint'] = $xmlReader->getAttribute('w:hint', $node);
|
||||
break;
|
||||
|
||||
case 'w:b':
|
||||
case 'w:i':
|
||||
case 'w:strike':
|
||||
$style[$property] = true;
|
||||
break;
|
||||
|
||||
case 'w:u':
|
||||
case 'w:highlight':
|
||||
case 'w:color':
|
||||
$style[$property] = $xmlReader->getAttribute('w:val', $node);
|
||||
break;
|
||||
|
||||
case 'w:sz':
|
||||
$style[$property] = $xmlReader->getAttribute('w:val', $node) / 2;
|
||||
break;
|
||||
|
||||
case 'w:vertAlign':
|
||||
$style[$property] = $xmlReader->getAttribute('w:val', $node);
|
||||
if ($style[$property] == 'superscript') {
|
||||
$style['superScript'] = true;
|
||||
} else {
|
||||
$style['superScript'] = false;
|
||||
$style['subScript'] = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $style;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:tblPr
|
||||
*
|
||||
* @return string|array|null
|
||||
* @todo Capture w:tblStylePr w:type="firstRow"
|
||||
*/
|
||||
protected function readTableStyle(XMLReader $xmlReader, \DOMElement $domNode)
|
||||
{
|
||||
$style = null;
|
||||
$margins = array('top', 'left', 'bottom', 'right');
|
||||
$borders = $margins + array('insideH', 'insideV');
|
||||
|
||||
if ($xmlReader->elementExists('w:tblPr', $domNode)) {
|
||||
if ($xmlReader->elementExists('w:tblPr/w:tblStyle', $domNode)) {
|
||||
$style = $xmlReader->getAttribute('w:val', $domNode, 'w:tblPr/w:tblStyle');
|
||||
} else {
|
||||
$style = array();
|
||||
$mapping = array(
|
||||
'w:tblCellMar' => 'cellMargin',
|
||||
'w:tblBorders' => 'border',
|
||||
);
|
||||
|
||||
$nodes = $xmlReader->getElements('w:tblPr/*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
if (!array_key_exists($node->nodeName, $mapping)) {
|
||||
continue;
|
||||
}
|
||||
// $property = $mapping[$node->nodeName];
|
||||
switch ($node->nodeName) {
|
||||
|
||||
case 'w:tblCellMar':
|
||||
foreach ($margins as $side) {
|
||||
$ucfSide = ucfirst($side);
|
||||
$style["cellMargin$ucfSide"] = $xmlReader->getAttribute('w:w', $node, "w:$side");
|
||||
}
|
||||
break;
|
||||
|
||||
case 'w:tblBorders':
|
||||
foreach ($borders as $side) {
|
||||
$ucfSide = ucfirst($side);
|
||||
$style["border{$ucfSide}Size"] = $xmlReader->getAttribute('w:sz', $node, "w:$side");
|
||||
$style["border{$ucfSide}Color"] = $xmlReader->getAttribute('w:color', $node, "w:$side");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $style;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the target of image, object, or link as stored in ::readMainRels
|
||||
*
|
||||
* @param string $docPart
|
||||
* @param string $rId
|
||||
* @return string|null
|
||||
*/
|
||||
private function getMediaTarget($docPart, $rId)
|
||||
{
|
||||
$target = null;
|
||||
if (array_key_exists($docPart, $this->rels)) {
|
||||
if (array_key_exists($rId, $this->rels[$docPart])) {
|
||||
$target = $this->rels[$docPart][$rId]['target'];
|
||||
}
|
||||
}
|
||||
|
||||
return $target;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\XMLReader;
|
||||
|
||||
/**
|
||||
* Core/extended document properties reader
|
||||
*/
|
||||
class DocProps extends AbstractPart
|
||||
{
|
||||
/**
|
||||
* Property mapping
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $mapping = array();
|
||||
|
||||
/**
|
||||
* Callback functions
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $callbacks = array();
|
||||
|
||||
/**
|
||||
* Read core/extended document properties
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
*/
|
||||
public function read(PhpWord &$phpWord)
|
||||
{
|
||||
$xmlReader = new XMLReader();
|
||||
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
|
||||
|
||||
$docProps = $phpWord->getDocumentProperties();
|
||||
|
||||
$nodes = $xmlReader->getElements('*');
|
||||
if ($nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
if (!array_key_exists($node->nodeName, $this->mapping)) {
|
||||
continue;
|
||||
}
|
||||
$method = $this->mapping[$node->nodeName];
|
||||
$value = $node->nodeValue == '' ? null : $node->nodeValue;
|
||||
if (array_key_exists($node->nodeName, $this->callbacks)) {
|
||||
$value = $this->callbacks[$node->nodeName]($value);
|
||||
}
|
||||
if (method_exists($docProps, $method)) {
|
||||
$docProps->$method($value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
/**
|
||||
* Extended properties reader
|
||||
*/
|
||||
class DocPropsApp extends DocProps
|
||||
{
|
||||
/**
|
||||
* Property mapping
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $mapping = array('Company' => 'setCompany', 'Manager' => 'setManager');
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
/**
|
||||
* Core properties reader
|
||||
*/
|
||||
class DocPropsCore extends DocProps
|
||||
{
|
||||
/**
|
||||
* Property mapping
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $mapping = array(
|
||||
'dc:creator' => 'setCreator',
|
||||
'dc:title' => 'setTitle',
|
||||
'dc:description' => 'setDescription',
|
||||
'dc:subject' => 'setSubject',
|
||||
'cp:keywords' => 'setKeywords',
|
||||
'cp:category' => 'setCategory',
|
||||
'cp:lastModifiedBy' => 'setLastModifiedBy',
|
||||
'dcterms:created' => 'setCreated',
|
||||
'dcterms:modified' => 'setModified',
|
||||
);
|
||||
|
||||
/**
|
||||
* Callback functions
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $callbacks = array('dcterms:created' => 'strtotime', 'dcterms:modified' => 'strtotime');
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\DocumentProperties;
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\XMLReader;
|
||||
|
||||
/**
|
||||
* Custom properties reader
|
||||
*/
|
||||
class DocPropsCustom extends AbstractPart
|
||||
{
|
||||
/**
|
||||
* Read custom document properties
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
*/
|
||||
public function read(PhpWord &$phpWord)
|
||||
{
|
||||
$xmlReader = new XMLReader();
|
||||
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
|
||||
$docProps = $phpWord->getDocumentProperties();
|
||||
|
||||
$nodes = $xmlReader->getElements('*');
|
||||
if ($nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
$propertyName = $xmlReader->getAttribute('name', $node);
|
||||
$attributeNode = $xmlReader->getElement('*', $node);
|
||||
$attributeType = $attributeNode->nodeName;
|
||||
$attributeValue = $attributeNode->nodeValue;
|
||||
$attributeValue = DocumentProperties::convertProperty($attributeValue, $attributeType);
|
||||
$attributeType = DocumentProperties::convertPropertyType($attributeType);
|
||||
$docProps->setCustomProperty($propertyName, $attributeValue, $attributeType);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,350 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\XMLReader;
|
||||
|
||||
/**
|
||||
* Document reader
|
||||
*/
|
||||
class Document extends AbstractPart
|
||||
{
|
||||
/**
|
||||
* Read document.xml
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
*/
|
||||
public function read(PhpWord &$phpWord)
|
||||
{
|
||||
$xmlReader = new XMLReader();
|
||||
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
|
||||
|
||||
$nodes = $xmlReader->getElements('w:body/*');
|
||||
if ($nodes->length > 0) {
|
||||
$section = $phpWord->addSection();
|
||||
foreach ($nodes as $node) {
|
||||
switch ($node->nodeName) {
|
||||
|
||||
case 'w:p': // Paragraph
|
||||
// Page break
|
||||
// @todo <w:lastRenderedPageBreak>
|
||||
if ($xmlReader->getAttribute('w:type', $node, 'w:r/w:br') == 'page') {
|
||||
$section->addPageBreak(); // PageBreak
|
||||
}
|
||||
|
||||
// Paragraph
|
||||
$this->readParagraph($xmlReader, $node, $section, 'document');
|
||||
// Section properties
|
||||
if ($xmlReader->elementExists('w:pPr/w:sectPr', $node)) {
|
||||
$settingsNode = $xmlReader->getElement('w:pPr/w:sectPr', $node);
|
||||
if (!is_null($settingsNode)) {
|
||||
$settings = $this->readSectionStyle($xmlReader, $settingsNode);
|
||||
$section->setSettings($settings);
|
||||
if (!is_null($settings)) {
|
||||
$this->readHeaderFooter($settings, $section);
|
||||
}
|
||||
}
|
||||
$section = $phpWord->addSection();
|
||||
}
|
||||
break;
|
||||
|
||||
case 'w:tbl': // Table
|
||||
$this->readTable($xmlReader, $node, $section, 'document');
|
||||
break;
|
||||
|
||||
case 'w:sectPr': // Last section
|
||||
$settings = $this->readSectionStyle($xmlReader, $node);
|
||||
$section->setSettings($settings);
|
||||
if (!is_null($settings)) {
|
||||
$this->readHeaderFooter($settings, $section);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read header footer
|
||||
*
|
||||
* @param array $settings
|
||||
* @param \PhpOffice\PhpWord\Element\Section $section
|
||||
*/
|
||||
private function readHeaderFooter($settings, &$section)
|
||||
{
|
||||
if (is_array($settings) && array_key_exists('hf', $settings)) {
|
||||
foreach ($settings['hf'] as $rId => $hfSetting) {
|
||||
if (array_key_exists($rId, $this->rels['document'])) {
|
||||
list($hfType, $xmlFile, $docPart) = array_values($this->rels['document'][$rId]);
|
||||
$method = "add{$hfType}";
|
||||
$hfObject = $section->$method($hfSetting['type']);
|
||||
|
||||
// Read header/footer content
|
||||
$xmlReader = new XMLReader();
|
||||
$xmlReader->getDomFromZip($this->docFile, $xmlFile);
|
||||
$nodes = $xmlReader->getElements('*');
|
||||
if ($nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
switch ($node->nodeName) {
|
||||
|
||||
case 'w:p': // Paragraph
|
||||
$this->readParagraph($xmlReader, $node, $hfObject, $docPart);
|
||||
break;
|
||||
|
||||
case 'w:tbl': // Table
|
||||
$this->readTable($xmlReader, $node, $hfObject, $docPart);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:p
|
||||
*
|
||||
* @param mixed $parent
|
||||
* @param string $docPart
|
||||
*
|
||||
* @todo Get font style for preserve text
|
||||
*/
|
||||
private function readParagraph(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
|
||||
{
|
||||
// Paragraph style
|
||||
$pStyle = null;
|
||||
$headingMatches = array();
|
||||
if ($xmlReader->elementExists('w:pPr', $domNode)) {
|
||||
$pStyle = $this->readParagraphStyle($xmlReader, $domNode);
|
||||
if (is_string($pStyle)) {
|
||||
preg_match('/Heading(\d)/', $pStyle, $headingMatches);
|
||||
}
|
||||
}
|
||||
|
||||
// PreserveText
|
||||
if ($xmlReader->elementExists('w:r/w:instrText', $domNode)) {
|
||||
$ignoreText = false;
|
||||
$textContent = '';
|
||||
$fStyle = $this->readFontStyle($xmlReader, $domNode);
|
||||
$nodes = $xmlReader->getElements('w:r', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
$instrText = $xmlReader->getValue('w:instrText', $node);
|
||||
if ($xmlReader->elementExists('w:fldChar', $node)) {
|
||||
$fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
|
||||
if ($fldCharType == 'begin') {
|
||||
$ignoreText = true;
|
||||
} elseif ($fldCharType == 'end') {
|
||||
$ignoreText = false;
|
||||
}
|
||||
}
|
||||
if (!is_null($instrText)) {
|
||||
$textContent .= '{' . $instrText . '}';
|
||||
} else {
|
||||
if ($ignoreText === false) {
|
||||
$textContent .= $xmlReader->getValue('w:t', $node);
|
||||
}
|
||||
}
|
||||
}
|
||||
$parent->addPreserveText($textContent, $fStyle, $pStyle);
|
||||
|
||||
// List item
|
||||
} elseif ($xmlReader->elementExists('w:pPr/w:numPr', $domNode)) {
|
||||
$textContent = '';
|
||||
$numId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:numId');
|
||||
$levelId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:ilvl');
|
||||
$nodes = $xmlReader->getElements('w:r', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
$textContent .= $xmlReader->getValue('w:t', $node);
|
||||
}
|
||||
$parent->addListItem($textContent, $levelId, null, "PHPWordList{$numId}", $pStyle);
|
||||
|
||||
// Heading
|
||||
} elseif (!empty($headingMatches)) {
|
||||
$textContent = '';
|
||||
$nodes = $xmlReader->getElements('w:r', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
$textContent .= $xmlReader->getValue('w:t', $node);
|
||||
}
|
||||
$parent->addTitle($textContent, $headingMatches[1]);
|
||||
|
||||
// Text and TextRun
|
||||
} else {
|
||||
$runCount = $xmlReader->countElements('w:r', $domNode);
|
||||
$linkCount = $xmlReader->countElements('w:hyperlink', $domNode);
|
||||
$runLinkCount = $runCount + $linkCount;
|
||||
if ($runLinkCount == 0) {
|
||||
$parent->addTextBreak(null, $pStyle);
|
||||
} else {
|
||||
if ($runLinkCount > 1) {
|
||||
$textrun = $parent->addTextRun($pStyle);
|
||||
$textParent = &$textrun;
|
||||
} else {
|
||||
$textParent = &$parent;
|
||||
}
|
||||
$nodes = $xmlReader->getElements('*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
$this->readRun($xmlReader, $node, $textParent, $docPart, $pStyle);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:tbl
|
||||
*
|
||||
* @param mixed $parent
|
||||
* @param string $docPart
|
||||
*/
|
||||
private function readTable(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
|
||||
{
|
||||
// Table style
|
||||
$tblStyle = null;
|
||||
if ($xmlReader->elementExists('w:tblPr', $domNode)) {
|
||||
$tblStyle = $this->readTableStyle($xmlReader, $domNode);
|
||||
}
|
||||
|
||||
$table = $parent->addTable($tblStyle);
|
||||
$tblNodes = $xmlReader->getElements('*', $domNode);
|
||||
foreach ($tblNodes as $tblNode) {
|
||||
if ($tblNode->nodeName == 'w:tblGrid') { // Column
|
||||
// @todo Do something with table columns
|
||||
|
||||
} elseif ($tblNode->nodeName == 'w:tr') { // Row
|
||||
$rowHeight = $xmlReader->getAttribute('w:val', $tblNode, 'w:trPr/w:trHeight');
|
||||
$rowHRule = $xmlReader->getAttribute('w:hRule', $tblNode, 'w:trPr/w:trHeight');
|
||||
$rowHRule = $rowHRule == 'exact' ? true : false;
|
||||
$rowStyle = array(
|
||||
'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
|
||||
'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
|
||||
'exactHeight' => $rowHRule,
|
||||
);
|
||||
|
||||
$row = $table->addRow($rowHeight, $rowStyle);
|
||||
$rowNodes = $xmlReader->getElements('*', $tblNode);
|
||||
foreach ($rowNodes as $rowNode) {
|
||||
if ($rowNode->nodeName == 'w:trPr') { // Row style
|
||||
// @todo Do something with row style
|
||||
|
||||
} elseif ($rowNode->nodeName == 'w:tc') { // Cell
|
||||
$cellWidth = $xmlReader->getAttribute('w:w', $rowNode, 'w:tcPr/w:tcW');
|
||||
$cellStyle = null;
|
||||
$cellStyleNode = $xmlReader->getElement('w:tcPr', $rowNode);
|
||||
if (!is_null($cellStyleNode)) {
|
||||
$cellStyle = $this->readCellStyle($xmlReader, $cellStyleNode);
|
||||
}
|
||||
|
||||
$cell = $row->addCell($cellWidth, $cellStyle);
|
||||
$cellNodes = $xmlReader->getElements('*', $rowNode);
|
||||
foreach ($cellNodes as $cellNode) {
|
||||
if ($cellNode->nodeName == 'w:p') { // Paragraph
|
||||
$this->readParagraph($xmlReader, $cellNode, $cell, $docPart);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:sectPr
|
||||
*
|
||||
* @return array|null
|
||||
*/
|
||||
private function readSectionStyle(XMLReader $xmlReader, \DOMElement $domNode)
|
||||
{
|
||||
$ret = null;
|
||||
$mapping = array(
|
||||
'w:type' => 'breakType', 'w:pgSz' => 'pageSize',
|
||||
'w:pgMar' => 'pageMargin', 'w:cols' => 'columns',
|
||||
'w:headerReference' => 'header', 'w:footerReference' => 'footer',
|
||||
);
|
||||
$nodes = $xmlReader->getElements('*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
if (!array_key_exists($node->nodeName, $mapping)) {
|
||||
continue;
|
||||
}
|
||||
$property = $mapping[$node->nodeName];
|
||||
switch ($node->nodeName) {
|
||||
|
||||
case 'w:type':
|
||||
$ret['breakType'] = $xmlReader->getAttribute('w:val', $node);
|
||||
break;
|
||||
|
||||
case 'w:pgSz':
|
||||
$ret['pageSizeW'] = $xmlReader->getAttribute('w:w', $node);
|
||||
$ret['pageSizeH'] = $xmlReader->getAttribute('w:h', $node);
|
||||
$ret['orientation'] = $xmlReader->getAttribute('w:orient', $node);
|
||||
break;
|
||||
|
||||
case 'w:pgMar':
|
||||
$ret['topMargin'] = $xmlReader->getAttribute('w:top', $node);
|
||||
$ret['leftMargin'] = $xmlReader->getAttribute('w:left', $node);
|
||||
$ret['bottomMargin'] = $xmlReader->getAttribute('w:bottom', $node);
|
||||
$ret['rightMargin'] = $xmlReader->getAttribute('w:right', $node);
|
||||
$ret['headerHeight'] = $xmlReader->getAttribute('w:header', $node);
|
||||
$ret['footerHeight'] = $xmlReader->getAttribute('w:footer', $node);
|
||||
$ret['gutter'] = $xmlReader->getAttribute('w:gutter', $node);
|
||||
break;
|
||||
|
||||
case 'w:cols':
|
||||
$ret['colsNum'] = $xmlReader->getAttribute('w:num', $node);
|
||||
$ret['colsSpace'] = $xmlReader->getAttribute('w:space', $node);
|
||||
break;
|
||||
|
||||
case 'w:headerReference':
|
||||
case 'w:footerReference':
|
||||
$id = $xmlReader->getAttribute('r:id', $node);
|
||||
$ret['hf'][$id] = array(
|
||||
'method' => $property,
|
||||
'type' => $xmlReader->getAttribute('w:type', $node),
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read w:tcPr
|
||||
*
|
||||
* @return array|null
|
||||
*/
|
||||
private function readCellStyle(XMLReader $xmlReader, \DOMElement $domNode)
|
||||
{
|
||||
$style = null;
|
||||
$mapping = array(
|
||||
'w:shd' => 'bgColor',
|
||||
'w:vAlign' => 'valign', 'w:textDirection' => 'textDirection',
|
||||
'w:gridSpan' => 'gridSpan', 'w:vMerge' => 'vMerge',
|
||||
);
|
||||
$nodes = $xmlReader->getElements('*', $domNode);
|
||||
foreach ($nodes as $node) {
|
||||
if (!array_key_exists($node->nodeName, $mapping)) {
|
||||
continue;
|
||||
}
|
||||
$property = $mapping[$node->nodeName];
|
||||
switch ($node->nodeName) {
|
||||
case 'w:shd':
|
||||
$style['bgColor'] = $xmlReader->getAttribute('w:fill', $node);
|
||||
break;
|
||||
|
||||
default:
|
||||
$style[$property] = $xmlReader->getAttribute('w:val', $node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $style;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
/**
|
||||
* Endnotes reader
|
||||
*/
|
||||
class Endnotes extends Notes
|
||||
{
|
||||
/**
|
||||
* Note type = endnotes
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $type = 'endnotes';
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
/**
|
||||
* Footnotes reader
|
||||
*/
|
||||
class Footnotes extends Notes
|
||||
{
|
||||
/**
|
||||
* Note type = footnotes
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $type = 'footnotes';
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\XMLReader;
|
||||
|
||||
/**
|
||||
* Notes reader
|
||||
*/
|
||||
class Notes extends AbstractPart
|
||||
{
|
||||
/**
|
||||
* Note type footnotes|endnotes
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $type = 'footnotes';
|
||||
|
||||
/**
|
||||
* Read (footnotes|endnotes).xml
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
*/
|
||||
public function read(PhpWord &$phpWord)
|
||||
{
|
||||
$this->type = ($this->type == 'endnotes') ? 'endnotes' : 'footnotes';
|
||||
$collectionClass = 'PhpOffice\\PhpWord\\' . ucfirst($this->type);
|
||||
$collection = $collectionClass::getElements();
|
||||
|
||||
$xmlReader = new XMLReader();
|
||||
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
|
||||
$nodes = $xmlReader->getElements('*');
|
||||
if ($nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
$id = $xmlReader->getAttribute('w:id', $node);
|
||||
$type = $xmlReader->getAttribute('w:type', $node);
|
||||
|
||||
// Avoid w:type "separator" and "continuationSeparator"
|
||||
// Only look for <footnote> or <endnote> without w:type attribute
|
||||
if (is_null($type) && array_key_exists($id, $collection)) {
|
||||
$element = $collection[$id];
|
||||
$pNodes = $xmlReader->getElements('w:p/*', $node);
|
||||
foreach ($pNodes as $pNode) {
|
||||
$this->readRun($xmlReader, $pNode, $element, $type);
|
||||
}
|
||||
$collectionClass::setElement($id, $element);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\XMLReader;
|
||||
|
||||
/**
|
||||
* Numbering reader
|
||||
*/
|
||||
class Numbering extends AbstractPart
|
||||
{
|
||||
/**
|
||||
* Read numbering.xml
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
*/
|
||||
public function read(PhpWord &$phpWord)
|
||||
{
|
||||
$abstracts = array();
|
||||
$numberings = array();
|
||||
$xmlReader = new XMLReader();
|
||||
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
|
||||
|
||||
// Abstract numbering definition
|
||||
$nodes = $xmlReader->getElements('w:abstractNum');
|
||||
if ($nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
$abstractId = $xmlReader->getAttribute('w:abstractNumId', $node);
|
||||
$abstracts[$abstractId] = array('levels' => array());
|
||||
$abstract = &$abstracts[$abstractId];
|
||||
$subnodes = $xmlReader->getElements('*', $node);
|
||||
foreach ($subnodes as $subnode) {
|
||||
switch ($subnode->nodeName) {
|
||||
case 'w:multiLevelType':
|
||||
$abstract['type'] = $xmlReader->getAttribute('w:val', $subnode);
|
||||
break;
|
||||
case 'w:lvl':
|
||||
$levelId = $xmlReader->getAttribute('w:ilvl', $subnode);
|
||||
$abstract['levels'][$levelId] = $this->readLevel($xmlReader, $subnode, $levelId);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Numbering instance definition
|
||||
$nodes = $xmlReader->getElements('w:num');
|
||||
if ($nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
$numId = $xmlReader->getAttribute('w:numId', $node);
|
||||
$abstractId = $xmlReader->getAttribute('w:val', $node, 'w:abstractNumId');
|
||||
$numberings[$numId] = $abstracts[$abstractId];
|
||||
$numberings[$numId]['numId'] = $numId;
|
||||
$subnodes = $xmlReader->getElements('w:lvlOverride/w:lvl', $node);
|
||||
foreach ($subnodes as $subnode) {
|
||||
$levelId = $xmlReader->getAttribute('w:ilvl', $subnode);
|
||||
$overrides = $this->readLevel($xmlReader, $subnode, $levelId);
|
||||
foreach ($overrides as $key => $value) {
|
||||
$numberings[$numId]['levels'][$levelId][$key] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Push to Style collection
|
||||
foreach ($numberings as $numId => $numbering) {
|
||||
$phpWord->addNumberingStyle("PHPWordList{$numId}", $numbering);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read numbering level definition from w:abstractNum and w:num
|
||||
*
|
||||
* @param integer $levelId
|
||||
* @return array
|
||||
*/
|
||||
private function readLevel(XMLReader $xmlReader, \DOMElement $subnode, $levelId)
|
||||
{
|
||||
$level = array();
|
||||
|
||||
$level['level'] = $levelId;
|
||||
$level['start'] = $xmlReader->getAttribute('w:val', $subnode, 'w:start');
|
||||
$level['format'] = $xmlReader->getAttribute('w:val', $subnode, 'w:numFmt');
|
||||
$level['restart'] = $xmlReader->getAttribute('w:val', $subnode, 'w:lvlRestart');
|
||||
$level['suffix'] = $xmlReader->getAttribute('w:val', $subnode, 'w:suff');
|
||||
$level['text'] = $xmlReader->getAttribute('w:val', $subnode, 'w:lvlText');
|
||||
$level['align'] = $xmlReader->getAttribute('w:val', $subnode, 'w:lvlJc');
|
||||
$level['tab'] = $xmlReader->getAttribute('w:pos', $subnode, 'w:pPr/w:tabs/w:tab');
|
||||
$level['left'] = $xmlReader->getAttribute('w:left', $subnode, 'w:pPr/w:ind');
|
||||
$level['hanging'] = $xmlReader->getAttribute('w:hanging', $subnode, 'w:pPr/w:ind');
|
||||
$level['font'] = $xmlReader->getAttribute('w:ascii', $subnode, 'w:rPr/w:rFonts');
|
||||
$level['hint'] = $xmlReader->getAttribute('w:hint', $subnode, 'w:rPr/w:rFonts');
|
||||
|
||||
foreach ($level as $key => $value) {
|
||||
if (is_null($value)) {
|
||||
unset($level[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
return $level;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
<?php
|
||||
/**
|
||||
* PHPWord
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2014 PHPWord
|
||||
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader\Word2007;
|
||||
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\XMLReader;
|
||||
|
||||
/**
|
||||
* Styles reader
|
||||
*/
|
||||
class Styles extends AbstractPart
|
||||
{
|
||||
/**
|
||||
* Read styles.xml
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
*/
|
||||
public function read(PhpWord &$phpWord)
|
||||
{
|
||||
$xmlReader = new XMLReader();
|
||||
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
|
||||
|
||||
$nodes = $xmlReader->getElements('w:style');
|
||||
if ($nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
$type = $xmlReader->getAttribute('w:type', $node);
|
||||
$name = $xmlReader->getAttribute('w:styleId', $node);
|
||||
if (is_null($name)) {
|
||||
$name = $xmlReader->getAttribute('w:val', $node, 'w:name');
|
||||
}
|
||||
preg_match('/Heading(\d)/', $name, $headingMatches);
|
||||
// $default = ($xmlReader->getAttribute('w:default', $node) == 1);
|
||||
switch ($type) {
|
||||
|
||||
case 'paragraph':
|
||||
$pStyle = $this->readParagraphStyle($xmlReader, $node);
|
||||
$fStyle = $this->readFontStyle($xmlReader, $node);
|
||||
if (!empty($headingMatches)) {
|
||||
$phpWord->addTitleStyle($headingMatches[1], $fStyle, $pStyle);
|
||||
} else {
|
||||
if (empty($fStyle)) {
|
||||
if (is_array($pStyle)) {
|
||||
$phpWord->addParagraphStyle($name, $pStyle);
|
||||
}
|
||||
} else {
|
||||
$phpWord->addFontStyle($name, $fStyle, $pStyle);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'character':
|
||||
$fStyle = $this->readFontStyle($xmlReader, $node);
|
||||
if (!empty($fStyle)) {
|
||||
$phpWord->addFontStyle($name, $fStyle);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'table':
|
||||
$tStyle = $this->readTableStyle($xmlReader, $node);
|
||||
if (!empty($tStyle)) {
|
||||
$phpWord->addTableStyle($name, $tStyle);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue