Refactor Word2007 reader

This commit is contained in:
Ivan Lanin 2014-05-21 00:00:19 +07:00
parent f536bd6fbd
commit 61c6d51e5e
4 changed files with 196 additions and 171 deletions

View File

@ -25,6 +25,20 @@ use PhpOffice\PhpWord\Shared\XMLReader;
*/
abstract class AbstractPart extends Word2007AbstractPart
{
/**
* Read w:p (override)
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @param mixed $parent
* @param string $docPart
*
* @todo Get font style for preserve text
*/
protected function readParagraph(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
}
/**
* Read w:r (override)
*
@ -38,6 +52,18 @@ abstract class AbstractPart extends Word2007AbstractPart
{
}
/**
* Read w:tbl (override)
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @param mixed $parent
* @param string $docPart
*/
protected function readTable(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
}
/**
* Read w:pPr (override)
*/

View File

@ -86,6 +86,96 @@ abstract class AbstractPart
$this->rels = $value;
}
/**
* Read w:p
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @param mixed $parent
* @param string $docPart
*
* @todo Get font style for preserve text
*/
protected function readParagraph(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
// Paragraph style
$paragraphStyle = null;
$headingMatches = array();
if ($xmlReader->elementExists('w:pPr', $domNode)) {
$paragraphStyle = $this->readParagraphStyle($xmlReader, $domNode);
if (is_array($paragraphStyle) && array_key_exists('styleName', $paragraphStyle)) {
preg_match('/Heading(\d)/', $paragraphStyle['styleName'], $headingMatches);
}
}
// PreserveText
if ($xmlReader->elementExists('w:r/w:instrText', $domNode)) {
$ignoreText = false;
$textContent = '';
$fontStyle = $this->readFontStyle($xmlReader, $domNode);
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$instrText = $xmlReader->getValue('w:instrText', $node);
if ($xmlReader->elementExists('w:fldChar', $node)) {
$fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
if ($fldCharType == 'begin') {
$ignoreText = true;
} elseif ($fldCharType == 'end') {
$ignoreText = false;
}
}
if (!is_null($instrText)) {
$textContent .= '{' . $instrText . '}';
} else {
if ($ignoreText === false) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
}
}
$parent->addPreserveText($textContent, $fontStyle, $paragraphStyle);
// List item
} elseif ($xmlReader->elementExists('w:pPr/w:numPr', $domNode)) {
$textContent = '';
$numId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:numId');
$levelId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:ilvl');
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
$parent->addListItem($textContent, $levelId, null, "PHPWordList{$numId}", $paragraphStyle);
// Heading
} elseif (!empty($headingMatches)) {
$textContent = '';
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
$parent->addTitle($textContent, $headingMatches[1]);
// Text and TextRun
} else {
$runCount = $xmlReader->countElements('w:r', $domNode);
$linkCount = $xmlReader->countElements('w:hyperlink', $domNode);
$runLinkCount = $runCount + $linkCount;
if ($runLinkCount == 0) {
$parent->addTextBreak(null, $paragraphStyle);
} else {
if ($runLinkCount > 1) {
$textrun = $parent->addTextRun($paragraphStyle);
$textParent = &$textrun;
} else {
$textParent = &$parent;
}
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
$this->readRun($xmlReader, $node, $textParent, $docPart, $paragraphStyle);
}
}
}
}
/**
* Read w:r
*
@ -148,6 +238,66 @@ abstract class AbstractPart
}
}
/**
* Read w:tbl
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @param mixed $parent
* @param string $docPart
*/
protected function readTable(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
// Table style
$tblStyle = null;
if ($xmlReader->elementExists('w:tblPr', $domNode)) {
$tblStyle = $this->readTableStyle($xmlReader, $domNode);
}
/** @var \PhpOffice\PhpWord\Element\Table $table Type hint */
$table = $parent->addTable($tblStyle);
$tblNodes = $xmlReader->getElements('*', $domNode);
foreach ($tblNodes as $tblNode) {
if ($tblNode->nodeName == 'w:tblGrid') { // Column
// @todo Do something with table columns
} elseif ($tblNode->nodeName == 'w:tr') { // Row
$rowHeight = $xmlReader->getAttribute('w:val', $tblNode, 'w:trPr/w:trHeight');
$rowHRule = $xmlReader->getAttribute('w:hRule', $tblNode, 'w:trPr/w:trHeight');
$rowHRule = $rowHRule == 'exact' ? true : false;
$rowStyle = array(
'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
'exactHeight' => $rowHRule,
);
$row = $table->addRow($rowHeight, $rowStyle);
$rowNodes = $xmlReader->getElements('*', $tblNode);
foreach ($rowNodes as $rowNode) {
if ($rowNode->nodeName == 'w:trPr') { // Row style
// @todo Do something with row style
} elseif ($rowNode->nodeName == 'w:tc') { // Cell
$cellWidth = $xmlReader->getAttribute('w:w', $rowNode, 'w:tcPr/w:tcW');
$cellStyle = null;
$cellStyleNode = $xmlReader->getElement('w:tcPr', $rowNode);
if (!is_null($cellStyleNode)) {
$cellStyle = $this->readCellStyle($xmlReader, $cellStyleNode);
}
$cell = $row->addCell($cellWidth, $cellStyle);
$cellNodes = $xmlReader->getElements('*', $rowNode);
foreach ($cellNodes as $cellNode) {
if ($cellNode->nodeName == 'w:p') { // Paragraph
$this->readParagraph($xmlReader, $cellNode, $cell, $docPart);
}
}
}
}
}
}
}
/**
* Read w:pPr
*
@ -258,6 +408,26 @@ abstract class AbstractPart
return $style;
}
/**
* Read w:tcPr
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @return array
*/
private function readCellStyle(XMLReader $xmlReader, \DOMElement $domNode)
{
$styleDefs = array(
'valign' => array(self::READ_VALUE, 'w:vAlign'),
'textDirection' => array(self::READ_VALUE, 'w:textDirection'),
'gridSpan' => array(self::READ_VALUE, 'w:gridSpan'),
'vMerge' => array(self::READ_VALUE, 'w:vMerge'),
'bgColor' => array(self::READ_VALUE, 'w:shd/w:fill'),
);
return $this->readStyleDefs($xmlReader, $domNode, $styleDefs);
}
/**
* Read style definition
*

View File

@ -114,156 +114,6 @@ class Document extends AbstractPart
}
}
/**
* Read w:p
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @param mixed $parent
* @param string $docPart
*
* @todo Get font style for preserve text
*/
private function readParagraph(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
// Paragraph style
$paragraphStyle = null;
$headingMatches = array();
if ($xmlReader->elementExists('w:pPr', $domNode)) {
$paragraphStyle = $this->readParagraphStyle($xmlReader, $domNode);
if (is_array($paragraphStyle) && array_key_exists('styleName', $paragraphStyle)) {
preg_match('/Heading(\d)/', $paragraphStyle['styleName'], $headingMatches);
}
}
// PreserveText
if ($xmlReader->elementExists('w:r/w:instrText', $domNode)) {
$ignoreText = false;
$textContent = '';
$fontStyle = $this->readFontStyle($xmlReader, $domNode);
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$instrText = $xmlReader->getValue('w:instrText', $node);
if ($xmlReader->elementExists('w:fldChar', $node)) {
$fldCharType = $xmlReader->getAttribute('w:fldCharType', $node, 'w:fldChar');
if ($fldCharType == 'begin') {
$ignoreText = true;
} elseif ($fldCharType == 'end') {
$ignoreText = false;
}
}
if (!is_null($instrText)) {
$textContent .= '{' . $instrText . '}';
} else {
if ($ignoreText === false) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
}
}
$parent->addPreserveText($textContent, $fontStyle, $paragraphStyle);
// List item
} elseif ($xmlReader->elementExists('w:pPr/w:numPr', $domNode)) {
$textContent = '';
$numId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:numId');
$levelId = $xmlReader->getAttribute('w:val', $domNode, 'w:pPr/w:numPr/w:ilvl');
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
$parent->addListItem($textContent, $levelId, null, "PHPWordList{$numId}", $paragraphStyle);
// Heading
} elseif (!empty($headingMatches)) {
$textContent = '';
$nodes = $xmlReader->getElements('w:r', $domNode);
foreach ($nodes as $node) {
$textContent .= $xmlReader->getValue('w:t', $node);
}
$parent->addTitle($textContent, $headingMatches[1]);
// Text and TextRun
} else {
$runCount = $xmlReader->countElements('w:r', $domNode);
$linkCount = $xmlReader->countElements('w:hyperlink', $domNode);
$runLinkCount = $runCount + $linkCount;
if ($runLinkCount == 0) {
$parent->addTextBreak(null, $paragraphStyle);
} else {
if ($runLinkCount > 1) {
$textrun = $parent->addTextRun($paragraphStyle);
$textParent = &$textrun;
} else {
$textParent = &$parent;
}
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $node) {
$this->readRun($xmlReader, $node, $textParent, $docPart, $paragraphStyle);
}
}
}
}
/**
* Read w:tbl
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @param mixed $parent
* @param string $docPart
*/
private function readTable(XMLReader $xmlReader, \DOMElement $domNode, &$parent, $docPart)
{
// Table style
$tblStyle = null;
if ($xmlReader->elementExists('w:tblPr', $domNode)) {
$tblStyle = $this->readTableStyle($xmlReader, $domNode);
}
/** @var \PhpOffice\PhpWord\Element\Table $table Type hint */
$table = $parent->addTable($tblStyle);
$tblNodes = $xmlReader->getElements('*', $domNode);
foreach ($tblNodes as $tblNode) {
if ($tblNode->nodeName == 'w:tblGrid') { // Column
// @todo Do something with table columns
} elseif ($tblNode->nodeName == 'w:tr') { // Row
$rowHeight = $xmlReader->getAttribute('w:val', $tblNode, 'w:trPr/w:trHeight');
$rowHRule = $xmlReader->getAttribute('w:hRule', $tblNode, 'w:trPr/w:trHeight');
$rowHRule = $rowHRule == 'exact' ? true : false;
$rowStyle = array(
'tblHeader' => $xmlReader->elementExists('w:trPr/w:tblHeader', $tblNode),
'cantSplit' => $xmlReader->elementExists('w:trPr/w:cantSplit', $tblNode),
'exactHeight' => $rowHRule,
);
$row = $table->addRow($rowHeight, $rowStyle);
$rowNodes = $xmlReader->getElements('*', $tblNode);
foreach ($rowNodes as $rowNode) {
if ($rowNode->nodeName == 'w:trPr') { // Row style
// @todo Do something with row style
} elseif ($rowNode->nodeName == 'w:tc') { // Cell
$cellWidth = $xmlReader->getAttribute('w:w', $rowNode, 'w:tcPr/w:tcW');
$cellStyle = null;
$cellStyleNode = $xmlReader->getElement('w:tcPr', $rowNode);
if (!is_null($cellStyleNode)) {
$cellStyle = $this->readCellStyle($xmlReader, $cellStyleNode);
}
$cell = $row->addCell($cellWidth, $cellStyle);
$cellNodes = $xmlReader->getElements('*', $rowNode);
foreach ($cellNodes as $cellNode) {
if ($cellNode->nodeName == 'w:p') { // Paragraph
$this->readParagraph($xmlReader, $cellNode, $cell, $docPart);
}
}
}
}
}
}
}
/**
* Read w:sectPr
*
@ -305,24 +155,4 @@ class Document extends AbstractPart
return $styles;
}
/**
* Read w:tcPr
*
* @param \PhpOffice\PhpWord\Shared\XMLReader $xmlReader
* @param \DOMElement $domNode
* @return array
*/
private function readCellStyle(XMLReader $xmlReader, \DOMElement $domNode)
{
$styleDefs = array(
'valign' => array(self::READ_VALUE, 'w:vAlign'),
'textDirection' => array(self::READ_VALUE, 'w:textDirection'),
'gridSpan' => array(self::READ_VALUE, 'w:gridSpan'),
'vMerge' => array(self::READ_VALUE, 'w:vMerge'),
'bgColor' => array(self::READ_VALUE, 'w:shd/w:fill'),
);
return $this->readStyleDefs($xmlReader, $domNode, $styleDefs);
}
}

View File

@ -50,7 +50,6 @@ class MPDF extends AbstractRenderer implements WriterInterface
// Create PDF
$pdf = new \mpdf();
$pdf->_setPageSize($paperSize, $orientation);
$pdf->defOrientation = $orientation;
$pdf->addPage($orientation);
// Write document properties