Reader: Read section settings and font/paragraph styles

This commit is contained in:
Ivan Lanin 2014-03-11 16:27:42 +07:00
parent fa2878e530
commit ec514f310f
9 changed files with 295 additions and 105 deletions

View File

@ -35,7 +35,8 @@ if (!defined('PHPWORD_BASE_PATH')) {
/** /**
* PHPWord_Reader_Word2007 * PHPWord_Reader_Word2007
*/ */
class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord_Reader_IReader class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements
PHPWord_Reader_IReader
{ {
/** /**
@ -54,7 +55,8 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
{ {
// Check if file exists // Check if file exists
if (!file_exists($pFilename)) { if (!file_exists($pFilename)) {
throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist."); throw new PHPWord_Exception("Could not open " . $pFilename .
" for reading! File does not exist.");
} }
$return = false; $return = false;
@ -86,9 +88,13 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
* *
* @param ZipArchive $archive * @param ZipArchive $archive
* @param string $fileName * @param string $fileName
* @param bool $removeNamespace
*/ */
public function getFromZipArchive($archive, $fileName = '') public function getFromZipArchive(
{ $archive,
$fileName = '',
$removeNamespace = false
) {
// Root-relative paths // Root-relative paths
if (strpos($fileName, '//') !== false) if (strpos($fileName, '//') !== false)
{ {
@ -103,9 +109,9 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
$contents = $archive->getFromName(substr($fileName, 1)); $contents = $archive->getFromName(substr($fileName, 1));
} }
// Stupid hack for namespace // Remove namespaces from elements and attributes name
if ($contents != '' && $fileName = 'word/document.xml') { if ($removeNamespace) {
$contents = preg_replace('~(</?)w:~is', '$1', $contents); $contents = preg_replace('~(</?|\s)w:~is', '$1', $contents);
} }
return $contents; return $contents;
@ -122,7 +128,8 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
{ {
// Check if file exists // Check if file exists
if (!file_exists($pFilename)) { if (!file_exists($pFilename)) {
throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist."); throw new PHPWord_Exception("Could not open " . $pFilename .
" for reading! File does not exist.");
} }
// Initialisations // Initialisations
@ -130,17 +137,17 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
$zip = new ZipArchive; $zip = new ZipArchive;
$zip->open($pFilename); $zip->open($pFilename);
// Read relationships // Read properties and documents
$rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels")); $rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels"));
foreach ($rels->Relationship as $rel) { foreach ($rels->Relationship as $rel) {
switch ($rel["Type"]) { switch ($rel["Type"]) {
// Core properties // Core properties
case "http://schemas.openxmlformats.org/package/2006//relationships/metadata/core-properties": case "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties":
$xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); $xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
if (is_object($xmlCore)) { if (is_object($xmlCore)) {
$xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); $xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
$xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/"); $xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/");
$xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006//metadata/core-properties"); $xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties");
$docProps = $word->getProperties(); $docProps = $word->getProperties();
$docProps->setCreator((string) self::array_item($xmlCore->xpath("dc:creator"))); $docProps->setCreator((string) self::array_item($xmlCore->xpath("dc:creator")));
$docProps->setLastModifiedBy((string) self::array_item($xmlCore->xpath("cp:lastModifiedBy"))); $docProps->setLastModifiedBy((string) self::array_item($xmlCore->xpath("cp:lastModifiedBy")));
@ -188,32 +195,75 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
$dir = dirname($rel["Target"]); $dir = dirname($rel["Target"]);
$archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels"; $archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels";
$relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive)); $relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive));
$relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006//relationships"); $relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
$xpath = self::array_item($relsDoc->xpath("rel:Relationship[@Type='" . $xpath = self::array_item($relsDoc->xpath("rel:Relationship[@Type='" .
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']")); "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']"));
$xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); $xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}", true));
if ($xmlDoc->body) { if (is_object($xmlDoc)) {
$section = $word->createSection(); $section = $word->createSection();
foreach ($xmlDoc->body->children() as $element) {
switch ($element->getName()) { foreach ($xmlDoc->body->children() as $elm) {
case 'p': $elmName = $elm->getName();
if ($element->pPr->sectPr) { if ($elmName == 'p') { // Paragraph/section
$section = $word->createSection(); // Create new section if section section found
continue; if ($elm->pPr->sectPr) {
} $section->setSettings($this->loadSectionSettings($elm->pPr));
if ($element->r) { $section = $word->createSection();
if (count($element->r) == 1) { continue;
$section->addText($element->r->t); }
} else { // Has w:r? It's either text or textrun
$textRun = $section->createTextRun(); if ($elm->r) {
foreach ($element->r as $r) { // w:r = 1? It's a plain paragraph
$textRun->addText($r->t); if (count($elm->r) == 1) {
} $section->addText($elm->r->t,
} $this->loadFontStyle($elm->r));
// w:r more than 1? It's a textrun
} else { } else {
$section->addTextBreak(); $textRun = $section->createTextRun();
foreach ($elm->r as $r) {
$textRun->addText($r->t,
$this->loadFontStyle($r));
}
} }
break; // No, it's a textbreak
} else {
$section->addTextBreak();
}
} elseif ($elmName == 'sectPr') {
// Last section setting
$section->setSettings($this->loadSectionSettings($xmlDoc->body));
}
}
}
break;
}
}
// Read styles
$docRels = simplexml_load_string($this->getFromZipArchive($zip, "word/_rels/document.xml.rels"));
foreach ($docRels->Relationship as $rel) {
switch ($rel["Type"]) {
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles":
$xmlStyle = simplexml_load_string($this->getFromZipArchive($zip, "word/{$rel['Target']}", true));
if (is_object($xmlStyle)) {
foreach ($xmlStyle->children() as $elm) {
if ($elm->getName() != 'style') {
continue;
}
unset($pStyle);
unset($fStyle);
$hasParagraphStyle = $elm->pPr && ($elm->pPr != '');
$hasFontStyle = $elm->rPr && ($elm->rPr != '');
$styleName = (string)$elm->name['val'];
if ($hasParagraphStyle) {
$pStyle = $this->loadParagraphStyle($elm);
if (!$hasFontStyle) {
$word->addParagraphStyle($styleName, $pStyle);
}
}
if ($hasFontStyle) {
$fStyle = $this->loadFontStyle($elm);
$word->addFontStyle($styleName, $fStyle, $pStyle);
} }
} }
} }
@ -225,6 +275,181 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
return $word; return $word;
} }
/**
* Load section settings from SimpleXMLElement
*
* @param SimpleXMLElement $elm
* @return array|string|null
*
* @todo Implement gutter
*/
private function loadSectionSettings($elm)
{
if ($xml = $elm->sectPr) {
$setting = array();
if ($xml->type) {
$setting['breakType'] = (string)$xml->type['val'];
}
if ($xml->pgSz) {
if (isset($xml->pgSz['w'])) {
$setting['pageSizeW'] = (int)$xml->pgSz['w'];
}
if (isset($xml->pgSz['h'])) {
$setting['pageSizeH'] = (int)$xml->pgSz['h'];
}
if (isset($xml->pgSz['orient'])) {
$setting['orientation'] = (string)$xml->pgSz['orient'];
}
}
if ($xml->pgMar) {
if (isset($xml->pgMar['top'])) {
$setting['topMargin'] = (int)$xml->pgMar['top'];
}
if (isset($xml->pgMar['left'])) {
$setting['leftMargin'] = (int)$xml->pgMar['left'];
}
if (isset($xml->pgMar['bottom'])) {
$setting['bottomMargin'] = (int)$xml->pgMar['bottom'];
}
if (isset($xml->pgMar['right'])) {
$setting['rightMargin'] = (int)$xml->pgMar['right'];
}
if (isset($xml->pgMar['header'])) {
$setting['headerHeight'] = (int)$xml->pgMar['header'];
}
if (isset($xml->pgMar['footer'])) {
$setting['footerHeight'] = (int)$xml->pgMar['footer'];
}
if (isset($xml->pgMar['gutter'])) {
// $setting['gutter'] = (int)$xml->pgMar['gutter'];
}
}
if ($xml->cols) {
if (isset($xml->cols['num'])) {
$setting['colsNum'] = (int)$xml->cols['num'];
}
if (isset($xml->cols['space'])) {
$setting['colsSpace'] = (int)$xml->cols['space'];
}
}
return $setting;
} else {
return null;
}
}
/**
* Load paragraph style from SimpleXMLElement
*
* @param SimpleXMLElement $elm
* @return array|string|null
*/
private function loadParagraphStyle($elm)
{
if ($xml = $elm->pPr) {
if ($xml->pStyle) {
return (string)$xml->pStyle['val'];
}
$style = array();
if ($xml->jc) {
$style['align'] = (string)$xml->jc['val'];
}
if ($xml->ind) {
if (isset($xml->ind->left)) {
$style['indent'] = (int)$xml->ind->left;
}
if (isset($xml->ind->hanging)) {
$style['hanging'] = (int)$xml->ind->hanging;
}
if (isset($xml->ind->line)) {
$style['spacing'] = (int)$xml->ind->line;
}
}
if ($xml->spacing) {
if (isset($xml->spacing['after'])) {
$style['spaceAfter'] = (int)$xml->spacing['after'];
}
if (isset($xml->spacing['before'])) {
$style['spaceBefore'] = (int)$xml->spacing['before'];
}
if (isset($xml->spacing['line'])) {
$style['spacing'] = (int)$xml->spacing['line'];
}
}
if ($xml->basedOn) {
$style['basedOn'] = (string)$xml->basedOn['val'];
}
if ($xml->next) {
$style['next'] = (string)$xml->next['val'];
}
if ($xml->widowControl) {
$style['widowControl'] = false;
}
if ($xml->keepNext) {
$style['keepNext'] = true;
}
if ($xml->keepLines) {
$style['keepLines'] = true;
}
if ($xml->pageBreakBefore) {
$style['pageBreakBefore'] = true;
}
return $style;
} else {
return null;
}
}
/**
* Load font style from SimpleXMLElement
*
* @param SimpleXMLElement $elm
* @return array|string|null
*/
private function loadFontStyle($elm)
{
if ($xml = $elm->rPr) {
if ($xml->rStyle) {
return (string)$xml->rStyle['val'];
}
$style = array();
if ($xml->rFonts) {
$style['name'] = (string)$xml->rFonts['ascii'];
}
if ($xml->sz) {
$style['size'] = (int)$xml->sz['val'] / 2;
}
if ($xml->color) {
$style['color'] = (string)$xml->color['val'];
}
if ($xml->b) {
$style['bold'] = true;
}
if ($xml->i) {
$style['italic'] = true;
}
if ($xml->u) {
$style['underline'] = (string)$xml->u['val'];
}
if ($xml->strike) {
$style['strikethrough'] = true;
}
if ($xml->highlight) {
$style['fgColor'] = (string)$xml->highlight['val'];
}
if ($xml->vertAlign) {
if ($xml->vertAlign['val'] == 'superscript') {
$style['superScript'] = true;
} else {
$style['subScript'] = true;
}
}
return $style;
} else {
return null;
}
}
/** /**
* Get array item * Get array item
* *

View File

@ -77,7 +77,16 @@ class PHPWord_Section
{ {
$this->_sectionCount = $sectionCount; $this->_sectionCount = $sectionCount;
$this->_settings = new PHPWord_Section_Settings(); $this->_settings = new PHPWord_Section_Settings();
$this->setSettings($settings);
}
/**
* Set Section Settings
*
* @param array $settings
*/
public function setSettings($settings = null)
{
if (!is_null($settings) && is_array($settings)) { if (!is_null($settings) && is_array($settings)) {
foreach ($settings as $key => $value) { foreach ($settings as $key => $value) {
if (substr($key, 0, 1) != '_') { if (substr($key, 0, 1) != '_') {

View File

@ -76,7 +76,7 @@ class PHPWord_Shared_File
// Found something? // Found something?
if ($returnValue == '' || is_null($returnValue)) { if ($returnValue == '' || is_null($returnValue)) {
$pathArray = split('/', $pFilename); $pathArray = explode('/', $pFilename);
while (in_array('..', $pathArray) && $pathArray[0] != '..') { while (in_array('..', $pathArray) && $pathArray[0] != '..') {
for ($i = 0; $i < count($pathArray); ++$i) { for ($i = 0; $i < count($pathArray); ++$i) {
if ($pathArray[$i] == '..' && $i > 0) { if ($pathArray[$i] == '..' && $i > 0) {

View File

@ -506,4 +506,4 @@ class PHPWord_Style_Paragraph
{ {
return $this->lineHeight; return $this->lineHeight;
} }
} }

View File

@ -128,8 +128,8 @@ class PHPWord_Writer_Word2007_Base extends PHPWord_Writer_Word2007_WriterPart
protected function _writeParagraphStyle( protected function _writeParagraphStyle(
PHPWord_Shared_XMLWriter $objWriter = null, PHPWord_Shared_XMLWriter $objWriter = null,
PHPWord_Style_Paragraph $style, PHPWord_Style_Paragraph $style,
$withoutPPR = false) $withoutPPR = false
{ ) {
$align = $style->getAlign(); $align = $style->getAlign();
$spacing = $style->getSpacing(); $spacing = $style->getSpacing();
$spaceBefore = $style->getSpaceBefore(); $spaceBefore = $style->getSpaceBefore();
@ -926,4 +926,4 @@ class PHPWord_Writer_Word2007_Base extends PHPWord_Writer_Word2007_WriterPart
$objWriter->endElement(); // w:p $objWriter->endElement(); // w:p
} }
} }
} }

View File

@ -50,6 +50,7 @@ Changes in branch for release 0.7.1 :
- Bugfix: (ivanlanin) GH-94 - General: PHPWord_Shared_Drawing::centimetersToPixels() conversion - Bugfix: (ivanlanin) GH-94 - General: PHPWord_Shared_Drawing::centimetersToPixels() conversion
- Feature: (ivanlanin) - Paragraph: setTabs() function - Feature: (ivanlanin) - Paragraph: setTabs() function
- Feature: (ivanlanin) GH-99 - General: Basic support for TextRun on ODT and RTF - Feature: (ivanlanin) GH-99 - General: Basic support for TextRun on ODT and RTF
- Feature: (ivanlanin) - Reader: Initial effort for Word2007
- QA: (Progi1984) - UnitTests - QA: (Progi1984) - UnitTests
Changes in branch for release 0.7.0 : Changes in branch for release 0.7.0 :

View File

@ -5,74 +5,29 @@ define('EOL', (PHP_SAPI == 'cli') ? PHP_EOL : '<br />');
require_once '../Classes/PHPWord.php'; require_once '../Classes/PHPWord.php';
$files = array( // Read contents
"Sample_01_SimpleText.docx", $sample = 'Sample_10_ReadWord2007';
"Sample_02_TabStops.docx", $source = "resources/{$sample}.docx";
"Sample_03_Sections.docx", $target = "results/{$sample}";
"Sample_04_Textrun.docx", echo '<p><strong>', date('H:i:s'), " Reading contents from `{$source}`</strong></p>";
"Sample_05_Multicolumn.docx", $PHPWord = PHPWord_IOFactory::load($source);
"Sample_06_Footnote.docx",
"Sample_07_TemplateCloneRow.docx",
"Sample_08_ParagraphPagination.docx",
"Sample_09_Tables.docx",
);
foreach ($files as $file) { // Rewrite contents
echo '<hr />'; echo date('H:i:s') , " Write to Word2007 format" , EOL;
echo '<p><strong>', date('H:i:s'), " Load from {$file} with contents:</strong></p>"; $objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'Word2007');
unset($PHPWord); $objWriter->save("{$sample}.docx");
try { rename("{$sample}.docx", "{$target}.docx");
$PHPWord = PHPWord_IOFactory::load($file);
} catch (Exception $e) {
echo '<p style="color: red;">Caught exception: ', $e->getMessage(), '</p>';
continue;
}
$sections = $PHPWord->getSections();
$countSections = count($sections);
$pSection = 0;
if ($countSections > 0) { echo date('H:i:s') , ' Write to OpenDocumentText format' , EOL;
foreach ($sections as $section) { $objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'ODText');
$pSection++; $objWriter->save("{$sample}.odt");
echo "<p><strong>Section {$pSection}:</strong></p>"; rename("{$sample}.odt", "{$target}.odt");
$elements = $section->getElements();
foreach ($elements as $element) {
if ($element instanceof PHPWord_Section_Text) {
echo '<p>' . htmlspecialchars($element->getText()) . '</p>';
} elseif ($element instanceof PHPWord_Section_TextRun) {
$subelements = $element->getElements();
echo '<p>';
if (count($subelements) > 0) {
foreach ($subelements as $subelement) {
if ($subelement instanceof PHPWord_Section_Text) {
echo htmlspecialchars($subelement->getText());
}
}
}
echo '</p>';
} elseif ($element instanceof PHPWord_Section_Link) {
echo '<p style="color: red;">Link not yet supported.</p>';
} elseif ($element instanceof PHPWord_Section_Title) {
echo '<p style="color: red;">Title not yet supported.</p>';
} elseif ($element instanceof PHPWord_Section_TextBreak) {
echo '<br />';
} elseif ($element instanceof PHPWord_Section_PageBreak) {
echo '<p style="color: red;">Page break not yet supported.</p>';
} elseif ($element instanceof PHPWord_Section_Table) {
echo '<p style="color: red;">Table not yet supported.</p>';
} elseif ($element instanceof PHPWord_Section_ListItem) {
echo '<p style="color: red;">List item not yet supported.</p>';
} elseif ($element instanceof PHPWord_Section_Image ||
$element instanceof PHPWord_Section_MemoryImage
) {
echo '<p style="color: red;">Image not yet supported.</p>';
} elseif ($element instanceof PHPWord_TOC) {
echo '<p style="color: red;">TOC not yet supported.</p>';
} elseif($element instanceof PHPWord_Section_Footnote) {
echo '<p style="color: red;">Footnote not yet supported.</p>';
}
}
}
}
}
echo date('H:i:s') , ' Write to RTF format' , EOL;
$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'RTF');
$objWriter->save("{$sample}.rtf");
rename("{$sample}.rtf", "{$target}.rtf");
// Echo memory peak usage
echo date('H:i:s') , " Peak memory usage: " , (memory_get_peak_usage(true) / 1024 / 1024) , " MB" , EOL;
echo date('H:i:s') , " Done writing file" , EOL;

Binary file not shown.

0
samples/results/.gitkeep Normal file
View File