Reader: Read section settings and font/paragraph styles
This commit is contained in:
parent
fa2878e530
commit
ec514f310f
|
|
@ -35,7 +35,8 @@ if (!defined('PHPWORD_BASE_PATH')) {
|
|||
/**
|
||||
* PHPWord_Reader_Word2007
|
||||
*/
|
||||
class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord_Reader_IReader
|
||||
class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements
|
||||
PHPWord_Reader_IReader
|
||||
{
|
||||
|
||||
/**
|
||||
|
|
@ -54,7 +55,8 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
|
|||
{
|
||||
// Check if file exists
|
||||
if (!file_exists($pFilename)) {
|
||||
throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
|
||||
throw new PHPWord_Exception("Could not open " . $pFilename .
|
||||
" for reading! File does not exist.");
|
||||
}
|
||||
|
||||
$return = false;
|
||||
|
|
@ -86,9 +88,13 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
|
|||
*
|
||||
* @param ZipArchive $archive
|
||||
* @param string $fileName
|
||||
* @param bool $removeNamespace
|
||||
*/
|
||||
public function getFromZipArchive($archive, $fileName = '')
|
||||
{
|
||||
public function getFromZipArchive(
|
||||
$archive,
|
||||
$fileName = '',
|
||||
$removeNamespace = false
|
||||
) {
|
||||
// Root-relative paths
|
||||
if (strpos($fileName, '//') !== false)
|
||||
{
|
||||
|
|
@ -103,9 +109,9 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
|
|||
$contents = $archive->getFromName(substr($fileName, 1));
|
||||
}
|
||||
|
||||
// Stupid hack for namespace
|
||||
if ($contents != '' && $fileName = 'word/document.xml') {
|
||||
$contents = preg_replace('~(</?)w:~is', '$1', $contents);
|
||||
// Remove namespaces from elements and attributes name
|
||||
if ($removeNamespace) {
|
||||
$contents = preg_replace('~(</?|\s)w:~is', '$1', $contents);
|
||||
}
|
||||
|
||||
return $contents;
|
||||
|
|
@ -122,7 +128,8 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
|
|||
{
|
||||
// Check if file exists
|
||||
if (!file_exists($pFilename)) {
|
||||
throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
|
||||
throw new PHPWord_Exception("Could not open " . $pFilename .
|
||||
" for reading! File does not exist.");
|
||||
}
|
||||
|
||||
// Initialisations
|
||||
|
|
@ -130,17 +137,17 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
|
|||
$zip = new ZipArchive;
|
||||
$zip->open($pFilename);
|
||||
|
||||
// Read relationships
|
||||
// Read properties and documents
|
||||
$rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels"));
|
||||
foreach ($rels->Relationship as $rel) {
|
||||
switch ($rel["Type"]) {
|
||||
// Core properties
|
||||
case "http://schemas.openxmlformats.org/package/2006//relationships/metadata/core-properties":
|
||||
case "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties":
|
||||
$xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
|
||||
if (is_object($xmlCore)) {
|
||||
$xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
|
||||
$xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/");
|
||||
$xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006//metadata/core-properties");
|
||||
$xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties");
|
||||
$docProps = $word->getProperties();
|
||||
$docProps->setCreator((string) self::array_item($xmlCore->xpath("dc:creator")));
|
||||
$docProps->setLastModifiedBy((string) self::array_item($xmlCore->xpath("cp:lastModifiedBy")));
|
||||
|
|
@ -188,32 +195,75 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
|
|||
$dir = dirname($rel["Target"]);
|
||||
$archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels";
|
||||
$relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive));
|
||||
$relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006//relationships");
|
||||
$relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
|
||||
$xpath = self::array_item($relsDoc->xpath("rel:Relationship[@Type='" .
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']"));
|
||||
$xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
|
||||
if ($xmlDoc->body) {
|
||||
$xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}", true));
|
||||
if (is_object($xmlDoc)) {
|
||||
$section = $word->createSection();
|
||||
foreach ($xmlDoc->body->children() as $element) {
|
||||
switch ($element->getName()) {
|
||||
case 'p':
|
||||
if ($element->pPr->sectPr) {
|
||||
$section = $word->createSection();
|
||||
continue;
|
||||
}
|
||||
if ($element->r) {
|
||||
if (count($element->r) == 1) {
|
||||
$section->addText($element->r->t);
|
||||
} else {
|
||||
$textRun = $section->createTextRun();
|
||||
foreach ($element->r as $r) {
|
||||
$textRun->addText($r->t);
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($xmlDoc->body->children() as $elm) {
|
||||
$elmName = $elm->getName();
|
||||
if ($elmName == 'p') { // Paragraph/section
|
||||
// Create new section if section section found
|
||||
if ($elm->pPr->sectPr) {
|
||||
$section->setSettings($this->loadSectionSettings($elm->pPr));
|
||||
$section = $word->createSection();
|
||||
continue;
|
||||
}
|
||||
// Has w:r? It's either text or textrun
|
||||
if ($elm->r) {
|
||||
// w:r = 1? It's a plain paragraph
|
||||
if (count($elm->r) == 1) {
|
||||
$section->addText($elm->r->t,
|
||||
$this->loadFontStyle($elm->r));
|
||||
// w:r more than 1? It's a textrun
|
||||
} else {
|
||||
$section->addTextBreak();
|
||||
$textRun = $section->createTextRun();
|
||||
foreach ($elm->r as $r) {
|
||||
$textRun->addText($r->t,
|
||||
$this->loadFontStyle($r));
|
||||
}
|
||||
}
|
||||
break;
|
||||
// No, it's a textbreak
|
||||
} else {
|
||||
$section->addTextBreak();
|
||||
}
|
||||
} elseif ($elmName == 'sectPr') {
|
||||
// Last section setting
|
||||
$section->setSettings($this->loadSectionSettings($xmlDoc->body));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Read styles
|
||||
$docRels = simplexml_load_string($this->getFromZipArchive($zip, "word/_rels/document.xml.rels"));
|
||||
foreach ($docRels->Relationship as $rel) {
|
||||
switch ($rel["Type"]) {
|
||||
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles":
|
||||
$xmlStyle = simplexml_load_string($this->getFromZipArchive($zip, "word/{$rel['Target']}", true));
|
||||
if (is_object($xmlStyle)) {
|
||||
foreach ($xmlStyle->children() as $elm) {
|
||||
if ($elm->getName() != 'style') {
|
||||
continue;
|
||||
}
|
||||
unset($pStyle);
|
||||
unset($fStyle);
|
||||
$hasParagraphStyle = $elm->pPr && ($elm->pPr != '');
|
||||
$hasFontStyle = $elm->rPr && ($elm->rPr != '');
|
||||
$styleName = (string)$elm->name['val'];
|
||||
if ($hasParagraphStyle) {
|
||||
$pStyle = $this->loadParagraphStyle($elm);
|
||||
if (!$hasFontStyle) {
|
||||
$word->addParagraphStyle($styleName, $pStyle);
|
||||
}
|
||||
}
|
||||
if ($hasFontStyle) {
|
||||
$fStyle = $this->loadFontStyle($elm);
|
||||
$word->addFontStyle($styleName, $fStyle, $pStyle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -225,6 +275,181 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
|
|||
return $word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load section settings from SimpleXMLElement
|
||||
*
|
||||
* @param SimpleXMLElement $elm
|
||||
* @return array|string|null
|
||||
*
|
||||
* @todo Implement gutter
|
||||
*/
|
||||
private function loadSectionSettings($elm)
|
||||
{
|
||||
if ($xml = $elm->sectPr) {
|
||||
$setting = array();
|
||||
if ($xml->type) {
|
||||
$setting['breakType'] = (string)$xml->type['val'];
|
||||
}
|
||||
if ($xml->pgSz) {
|
||||
if (isset($xml->pgSz['w'])) {
|
||||
$setting['pageSizeW'] = (int)$xml->pgSz['w'];
|
||||
}
|
||||
if (isset($xml->pgSz['h'])) {
|
||||
$setting['pageSizeH'] = (int)$xml->pgSz['h'];
|
||||
}
|
||||
if (isset($xml->pgSz['orient'])) {
|
||||
$setting['orientation'] = (string)$xml->pgSz['orient'];
|
||||
}
|
||||
}
|
||||
if ($xml->pgMar) {
|
||||
if (isset($xml->pgMar['top'])) {
|
||||
$setting['topMargin'] = (int)$xml->pgMar['top'];
|
||||
}
|
||||
if (isset($xml->pgMar['left'])) {
|
||||
$setting['leftMargin'] = (int)$xml->pgMar['left'];
|
||||
}
|
||||
if (isset($xml->pgMar['bottom'])) {
|
||||
$setting['bottomMargin'] = (int)$xml->pgMar['bottom'];
|
||||
}
|
||||
if (isset($xml->pgMar['right'])) {
|
||||
$setting['rightMargin'] = (int)$xml->pgMar['right'];
|
||||
}
|
||||
if (isset($xml->pgMar['header'])) {
|
||||
$setting['headerHeight'] = (int)$xml->pgMar['header'];
|
||||
}
|
||||
if (isset($xml->pgMar['footer'])) {
|
||||
$setting['footerHeight'] = (int)$xml->pgMar['footer'];
|
||||
}
|
||||
if (isset($xml->pgMar['gutter'])) {
|
||||
// $setting['gutter'] = (int)$xml->pgMar['gutter'];
|
||||
}
|
||||
}
|
||||
if ($xml->cols) {
|
||||
if (isset($xml->cols['num'])) {
|
||||
$setting['colsNum'] = (int)$xml->cols['num'];
|
||||
}
|
||||
if (isset($xml->cols['space'])) {
|
||||
$setting['colsSpace'] = (int)$xml->cols['space'];
|
||||
}
|
||||
}
|
||||
return $setting;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load paragraph style from SimpleXMLElement
|
||||
*
|
||||
* @param SimpleXMLElement $elm
|
||||
* @return array|string|null
|
||||
*/
|
||||
private function loadParagraphStyle($elm)
|
||||
{
|
||||
if ($xml = $elm->pPr) {
|
||||
if ($xml->pStyle) {
|
||||
return (string)$xml->pStyle['val'];
|
||||
}
|
||||
$style = array();
|
||||
if ($xml->jc) {
|
||||
$style['align'] = (string)$xml->jc['val'];
|
||||
}
|
||||
if ($xml->ind) {
|
||||
if (isset($xml->ind->left)) {
|
||||
$style['indent'] = (int)$xml->ind->left;
|
||||
}
|
||||
if (isset($xml->ind->hanging)) {
|
||||
$style['hanging'] = (int)$xml->ind->hanging;
|
||||
}
|
||||
if (isset($xml->ind->line)) {
|
||||
$style['spacing'] = (int)$xml->ind->line;
|
||||
}
|
||||
}
|
||||
if ($xml->spacing) {
|
||||
if (isset($xml->spacing['after'])) {
|
||||
$style['spaceAfter'] = (int)$xml->spacing['after'];
|
||||
}
|
||||
if (isset($xml->spacing['before'])) {
|
||||
$style['spaceBefore'] = (int)$xml->spacing['before'];
|
||||
}
|
||||
if (isset($xml->spacing['line'])) {
|
||||
$style['spacing'] = (int)$xml->spacing['line'];
|
||||
}
|
||||
}
|
||||
if ($xml->basedOn) {
|
||||
$style['basedOn'] = (string)$xml->basedOn['val'];
|
||||
}
|
||||
if ($xml->next) {
|
||||
$style['next'] = (string)$xml->next['val'];
|
||||
}
|
||||
if ($xml->widowControl) {
|
||||
$style['widowControl'] = false;
|
||||
}
|
||||
if ($xml->keepNext) {
|
||||
$style['keepNext'] = true;
|
||||
}
|
||||
if ($xml->keepLines) {
|
||||
$style['keepLines'] = true;
|
||||
}
|
||||
if ($xml->pageBreakBefore) {
|
||||
$style['pageBreakBefore'] = true;
|
||||
}
|
||||
return $style;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load font style from SimpleXMLElement
|
||||
*
|
||||
* @param SimpleXMLElement $elm
|
||||
* @return array|string|null
|
||||
*/
|
||||
private function loadFontStyle($elm)
|
||||
{
|
||||
if ($xml = $elm->rPr) {
|
||||
if ($xml->rStyle) {
|
||||
return (string)$xml->rStyle['val'];
|
||||
}
|
||||
$style = array();
|
||||
if ($xml->rFonts) {
|
||||
$style['name'] = (string)$xml->rFonts['ascii'];
|
||||
}
|
||||
if ($xml->sz) {
|
||||
$style['size'] = (int)$xml->sz['val'] / 2;
|
||||
}
|
||||
if ($xml->color) {
|
||||
$style['color'] = (string)$xml->color['val'];
|
||||
}
|
||||
if ($xml->b) {
|
||||
$style['bold'] = true;
|
||||
}
|
||||
if ($xml->i) {
|
||||
$style['italic'] = true;
|
||||
}
|
||||
if ($xml->u) {
|
||||
$style['underline'] = (string)$xml->u['val'];
|
||||
}
|
||||
if ($xml->strike) {
|
||||
$style['strikethrough'] = true;
|
||||
}
|
||||
if ($xml->highlight) {
|
||||
$style['fgColor'] = (string)$xml->highlight['val'];
|
||||
}
|
||||
if ($xml->vertAlign) {
|
||||
if ($xml->vertAlign['val'] == 'superscript') {
|
||||
$style['superScript'] = true;
|
||||
} else {
|
||||
$style['subScript'] = true;
|
||||
}
|
||||
}
|
||||
return $style;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get array item
|
||||
*
|
||||
|
|
|
|||
|
|
@ -77,7 +77,16 @@ class PHPWord_Section
|
|||
{
|
||||
$this->_sectionCount = $sectionCount;
|
||||
$this->_settings = new PHPWord_Section_Settings();
|
||||
$this->setSettings($settings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Section Settings
|
||||
*
|
||||
* @param array $settings
|
||||
*/
|
||||
public function setSettings($settings = null)
|
||||
{
|
||||
if (!is_null($settings) && is_array($settings)) {
|
||||
foreach ($settings as $key => $value) {
|
||||
if (substr($key, 0, 1) != '_') {
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class PHPWord_Shared_File
|
|||
|
||||
// Found something?
|
||||
if ($returnValue == '' || is_null($returnValue)) {
|
||||
$pathArray = split('/', $pFilename);
|
||||
$pathArray = explode('/', $pFilename);
|
||||
while (in_array('..', $pathArray) && $pathArray[0] != '..') {
|
||||
for ($i = 0; $i < count($pathArray); ++$i) {
|
||||
if ($pathArray[$i] == '..' && $i > 0) {
|
||||
|
|
|
|||
|
|
@ -128,8 +128,8 @@ class PHPWord_Writer_Word2007_Base extends PHPWord_Writer_Word2007_WriterPart
|
|||
protected function _writeParagraphStyle(
|
||||
PHPWord_Shared_XMLWriter $objWriter = null,
|
||||
PHPWord_Style_Paragraph $style,
|
||||
$withoutPPR = false)
|
||||
{
|
||||
$withoutPPR = false
|
||||
) {
|
||||
$align = $style->getAlign();
|
||||
$spacing = $style->getSpacing();
|
||||
$spaceBefore = $style->getSpaceBefore();
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ Changes in branch for release 0.7.1 :
|
|||
- Bugfix: (ivanlanin) GH-94 - General: PHPWord_Shared_Drawing::centimetersToPixels() conversion
|
||||
- Feature: (ivanlanin) - Paragraph: setTabs() function
|
||||
- Feature: (ivanlanin) GH-99 - General: Basic support for TextRun on ODT and RTF
|
||||
- Feature: (ivanlanin) - Reader: Initial effort for Word2007
|
||||
- QA: (Progi1984) - UnitTests
|
||||
|
||||
Changes in branch for release 0.7.0 :
|
||||
|
|
|
|||
|
|
@ -5,74 +5,29 @@ define('EOL', (PHP_SAPI == 'cli') ? PHP_EOL : '<br />');
|
|||
|
||||
require_once '../Classes/PHPWord.php';
|
||||
|
||||
$files = array(
|
||||
"Sample_01_SimpleText.docx",
|
||||
"Sample_02_TabStops.docx",
|
||||
"Sample_03_Sections.docx",
|
||||
"Sample_04_Textrun.docx",
|
||||
"Sample_05_Multicolumn.docx",
|
||||
"Sample_06_Footnote.docx",
|
||||
"Sample_07_TemplateCloneRow.docx",
|
||||
"Sample_08_ParagraphPagination.docx",
|
||||
"Sample_09_Tables.docx",
|
||||
);
|
||||
// Read contents
|
||||
$sample = 'Sample_10_ReadWord2007';
|
||||
$source = "resources/{$sample}.docx";
|
||||
$target = "results/{$sample}";
|
||||
echo '<p><strong>', date('H:i:s'), " Reading contents from `{$source}`</strong></p>";
|
||||
$PHPWord = PHPWord_IOFactory::load($source);
|
||||
|
||||
foreach ($files as $file) {
|
||||
echo '<hr />';
|
||||
echo '<p><strong>', date('H:i:s'), " Load from {$file} with contents:</strong></p>";
|
||||
unset($PHPWord);
|
||||
try {
|
||||
$PHPWord = PHPWord_IOFactory::load($file);
|
||||
} catch (Exception $e) {
|
||||
echo '<p style="color: red;">Caught exception: ', $e->getMessage(), '</p>';
|
||||
continue;
|
||||
}
|
||||
$sections = $PHPWord->getSections();
|
||||
$countSections = count($sections);
|
||||
$pSection = 0;
|
||||
// Rewrite contents
|
||||
echo date('H:i:s') , " Write to Word2007 format" , EOL;
|
||||
$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'Word2007');
|
||||
$objWriter->save("{$sample}.docx");
|
||||
rename("{$sample}.docx", "{$target}.docx");
|
||||
|
||||
if ($countSections > 0) {
|
||||
foreach ($sections as $section) {
|
||||
$pSection++;
|
||||
echo "<p><strong>Section {$pSection}:</strong></p>";
|
||||
$elements = $section->getElements();
|
||||
foreach ($elements as $element) {
|
||||
if ($element instanceof PHPWord_Section_Text) {
|
||||
echo '<p>' . htmlspecialchars($element->getText()) . '</p>';
|
||||
} elseif ($element instanceof PHPWord_Section_TextRun) {
|
||||
$subelements = $element->getElements();
|
||||
echo '<p>';
|
||||
if (count($subelements) > 0) {
|
||||
foreach ($subelements as $subelement) {
|
||||
if ($subelement instanceof PHPWord_Section_Text) {
|
||||
echo htmlspecialchars($subelement->getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
echo '</p>';
|
||||
} elseif ($element instanceof PHPWord_Section_Link) {
|
||||
echo '<p style="color: red;">Link not yet supported.</p>';
|
||||
} elseif ($element instanceof PHPWord_Section_Title) {
|
||||
echo '<p style="color: red;">Title not yet supported.</p>';
|
||||
} elseif ($element instanceof PHPWord_Section_TextBreak) {
|
||||
echo '<br />';
|
||||
} elseif ($element instanceof PHPWord_Section_PageBreak) {
|
||||
echo '<p style="color: red;">Page break not yet supported.</p>';
|
||||
} elseif ($element instanceof PHPWord_Section_Table) {
|
||||
echo '<p style="color: red;">Table not yet supported.</p>';
|
||||
} elseif ($element instanceof PHPWord_Section_ListItem) {
|
||||
echo '<p style="color: red;">List item not yet supported.</p>';
|
||||
} elseif ($element instanceof PHPWord_Section_Image ||
|
||||
$element instanceof PHPWord_Section_MemoryImage
|
||||
) {
|
||||
echo '<p style="color: red;">Image not yet supported.</p>';
|
||||
} elseif ($element instanceof PHPWord_TOC) {
|
||||
echo '<p style="color: red;">TOC not yet supported.</p>';
|
||||
} elseif($element instanceof PHPWord_Section_Footnote) {
|
||||
echo '<p style="color: red;">Footnote not yet supported.</p>';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
echo date('H:i:s') , ' Write to OpenDocumentText format' , EOL;
|
||||
$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'ODText');
|
||||
$objWriter->save("{$sample}.odt");
|
||||
rename("{$sample}.odt", "{$target}.odt");
|
||||
|
||||
echo date('H:i:s') , ' Write to RTF format' , EOL;
|
||||
$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'RTF');
|
||||
$objWriter->save("{$sample}.rtf");
|
||||
rename("{$sample}.rtf", "{$target}.rtf");
|
||||
|
||||
// Echo memory peak usage
|
||||
echo date('H:i:s') , " Peak memory usage: " , (memory_get_peak_usage(true) / 1024 / 1024) , " MB" , EOL;
|
||||
echo date('H:i:s') , " Done writing file" , EOL;
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Reference in New Issue