diff --git a/Classes/PHPWord/Reader/Word2007.php b/Classes/PHPWord/Reader/Word2007.php
index c17d4074..78a9875c 100644
--- a/Classes/PHPWord/Reader/Word2007.php
+++ b/Classes/PHPWord/Reader/Word2007.php
@@ -35,7 +35,8 @@ if (!defined('PHPWORD_BASE_PATH')) {
/**
* PHPWord_Reader_Word2007
*/
-class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord_Reader_IReader
+class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements
+ PHPWord_Reader_IReader
{
/**
@@ -54,7 +55,8 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
{
// Check if file exists
if (!file_exists($pFilename)) {
- throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
+ throw new PHPWord_Exception("Could not open " . $pFilename .
+ " for reading! File does not exist.");
}
$return = false;
@@ -86,9 +88,13 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
*
* @param ZipArchive $archive
* @param string $fileName
+ * @param bool $removeNamespace
*/
- public function getFromZipArchive($archive, $fileName = '')
- {
+ public function getFromZipArchive(
+ $archive,
+ $fileName = '',
+ $removeNamespace = false
+ ) {
// Root-relative paths
if (strpos($fileName, '//') !== false)
{
@@ -103,9 +109,9 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
$contents = $archive->getFromName(substr($fileName, 1));
}
- // Stupid hack for namespace
- if ($contents != '' && $fileName = 'word/document.xml') {
- $contents = preg_replace('~(?)w:~is', '$1', $contents);
+ // Remove namespaces from elements and attributes name
+ if ($removeNamespace) {
+ $contents = preg_replace('~(?|\s)w:~is', '$1', $contents);
}
return $contents;
@@ -122,7 +128,8 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
{
// Check if file exists
if (!file_exists($pFilename)) {
- throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist.");
+ throw new PHPWord_Exception("Could not open " . $pFilename .
+ " for reading! File does not exist.");
}
// Initialisations
@@ -130,17 +137,17 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
$zip = new ZipArchive;
$zip->open($pFilename);
- // Read relationships
+ // Read properties and documents
$rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels"));
foreach ($rels->Relationship as $rel) {
switch ($rel["Type"]) {
// Core properties
- case "http://schemas.openxmlformats.org/package/2006//relationships/metadata/core-properties":
+ case "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties":
$xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
if (is_object($xmlCore)) {
$xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
$xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/");
- $xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006//metadata/core-properties");
+ $xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties");
$docProps = $word->getProperties();
$docProps->setCreator((string) self::array_item($xmlCore->xpath("dc:creator")));
$docProps->setLastModifiedBy((string) self::array_item($xmlCore->xpath("cp:lastModifiedBy")));
@@ -188,32 +195,75 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
$dir = dirname($rel["Target"]);
$archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels";
$relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive));
- $relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006//relationships");
+ $relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships");
$xpath = self::array_item($relsDoc->xpath("rel:Relationship[@Type='" .
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']"));
- $xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}"));
- if ($xmlDoc->body) {
+ $xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}", true));
+ if (is_object($xmlDoc)) {
$section = $word->createSection();
- foreach ($xmlDoc->body->children() as $element) {
- switch ($element->getName()) {
- case 'p':
- if ($element->pPr->sectPr) {
- $section = $word->createSection();
- continue;
- }
- if ($element->r) {
- if (count($element->r) == 1) {
- $section->addText($element->r->t);
- } else {
- $textRun = $section->createTextRun();
- foreach ($element->r as $r) {
- $textRun->addText($r->t);
- }
- }
+
+ foreach ($xmlDoc->body->children() as $elm) {
+ $elmName = $elm->getName();
+ if ($elmName == 'p') { // Paragraph/section
+ // Create new section if section section found
+ if ($elm->pPr->sectPr) {
+ $section->setSettings($this->loadSectionSettings($elm->pPr));
+ $section = $word->createSection();
+ continue;
+ }
+ // Has w:r? It's either text or textrun
+ if ($elm->r) {
+ // w:r = 1? It's a plain paragraph
+ if (count($elm->r) == 1) {
+ $section->addText($elm->r->t,
+ $this->loadFontStyle($elm->r));
+ // w:r more than 1? It's a textrun
} else {
- $section->addTextBreak();
+ $textRun = $section->createTextRun();
+ foreach ($elm->r as $r) {
+ $textRun->addText($r->t,
+ $this->loadFontStyle($r));
+ }
}
- break;
+ // No, it's a textbreak
+ } else {
+ $section->addTextBreak();
+ }
+ } elseif ($elmName == 'sectPr') {
+ // Last section setting
+ $section->setSettings($this->loadSectionSettings($xmlDoc->body));
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ // Read styles
+ $docRels = simplexml_load_string($this->getFromZipArchive($zip, "word/_rels/document.xml.rels"));
+ foreach ($docRels->Relationship as $rel) {
+ switch ($rel["Type"]) {
+ case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles":
+ $xmlStyle = simplexml_load_string($this->getFromZipArchive($zip, "word/{$rel['Target']}", true));
+ if (is_object($xmlStyle)) {
+ foreach ($xmlStyle->children() as $elm) {
+ if ($elm->getName() != 'style') {
+ continue;
+ }
+ unset($pStyle);
+ unset($fStyle);
+ $hasParagraphStyle = $elm->pPr && ($elm->pPr != '');
+ $hasFontStyle = $elm->rPr && ($elm->rPr != '');
+ $styleName = (string)$elm->name['val'];
+ if ($hasParagraphStyle) {
+ $pStyle = $this->loadParagraphStyle($elm);
+ if (!$hasFontStyle) {
+ $word->addParagraphStyle($styleName, $pStyle);
+ }
+ }
+ if ($hasFontStyle) {
+ $fStyle = $this->loadFontStyle($elm);
+ $word->addFontStyle($styleName, $fStyle, $pStyle);
}
}
}
@@ -225,6 +275,181 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord
return $word;
}
+ /**
+ * Load section settings from SimpleXMLElement
+ *
+ * @param SimpleXMLElement $elm
+ * @return array|string|null
+ *
+ * @todo Implement gutter
+ */
+ private function loadSectionSettings($elm)
+ {
+ if ($xml = $elm->sectPr) {
+ $setting = array();
+ if ($xml->type) {
+ $setting['breakType'] = (string)$xml->type['val'];
+ }
+ if ($xml->pgSz) {
+ if (isset($xml->pgSz['w'])) {
+ $setting['pageSizeW'] = (int)$xml->pgSz['w'];
+ }
+ if (isset($xml->pgSz['h'])) {
+ $setting['pageSizeH'] = (int)$xml->pgSz['h'];
+ }
+ if (isset($xml->pgSz['orient'])) {
+ $setting['orientation'] = (string)$xml->pgSz['orient'];
+ }
+ }
+ if ($xml->pgMar) {
+ if (isset($xml->pgMar['top'])) {
+ $setting['topMargin'] = (int)$xml->pgMar['top'];
+ }
+ if (isset($xml->pgMar['left'])) {
+ $setting['leftMargin'] = (int)$xml->pgMar['left'];
+ }
+ if (isset($xml->pgMar['bottom'])) {
+ $setting['bottomMargin'] = (int)$xml->pgMar['bottom'];
+ }
+ if (isset($xml->pgMar['right'])) {
+ $setting['rightMargin'] = (int)$xml->pgMar['right'];
+ }
+ if (isset($xml->pgMar['header'])) {
+ $setting['headerHeight'] = (int)$xml->pgMar['header'];
+ }
+ if (isset($xml->pgMar['footer'])) {
+ $setting['footerHeight'] = (int)$xml->pgMar['footer'];
+ }
+ if (isset($xml->pgMar['gutter'])) {
+ // $setting['gutter'] = (int)$xml->pgMar['gutter'];
+ }
+ }
+ if ($xml->cols) {
+ if (isset($xml->cols['num'])) {
+ $setting['colsNum'] = (int)$xml->cols['num'];
+ }
+ if (isset($xml->cols['space'])) {
+ $setting['colsSpace'] = (int)$xml->cols['space'];
+ }
+ }
+ return $setting;
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Load paragraph style from SimpleXMLElement
+ *
+ * @param SimpleXMLElement $elm
+ * @return array|string|null
+ */
+ private function loadParagraphStyle($elm)
+ {
+ if ($xml = $elm->pPr) {
+ if ($xml->pStyle) {
+ return (string)$xml->pStyle['val'];
+ }
+ $style = array();
+ if ($xml->jc) {
+ $style['align'] = (string)$xml->jc['val'];
+ }
+ if ($xml->ind) {
+ if (isset($xml->ind->left)) {
+ $style['indent'] = (int)$xml->ind->left;
+ }
+ if (isset($xml->ind->hanging)) {
+ $style['hanging'] = (int)$xml->ind->hanging;
+ }
+ if (isset($xml->ind->line)) {
+ $style['spacing'] = (int)$xml->ind->line;
+ }
+ }
+ if ($xml->spacing) {
+ if (isset($xml->spacing['after'])) {
+ $style['spaceAfter'] = (int)$xml->spacing['after'];
+ }
+ if (isset($xml->spacing['before'])) {
+ $style['spaceBefore'] = (int)$xml->spacing['before'];
+ }
+ if (isset($xml->spacing['line'])) {
+ $style['spacing'] = (int)$xml->spacing['line'];
+ }
+ }
+ if ($xml->basedOn) {
+ $style['basedOn'] = (string)$xml->basedOn['val'];
+ }
+ if ($xml->next) {
+ $style['next'] = (string)$xml->next['val'];
+ }
+ if ($xml->widowControl) {
+ $style['widowControl'] = false;
+ }
+ if ($xml->keepNext) {
+ $style['keepNext'] = true;
+ }
+ if ($xml->keepLines) {
+ $style['keepLines'] = true;
+ }
+ if ($xml->pageBreakBefore) {
+ $style['pageBreakBefore'] = true;
+ }
+ return $style;
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Load font style from SimpleXMLElement
+ *
+ * @param SimpleXMLElement $elm
+ * @return array|string|null
+ */
+ private function loadFontStyle($elm)
+ {
+ if ($xml = $elm->rPr) {
+ if ($xml->rStyle) {
+ return (string)$xml->rStyle['val'];
+ }
+ $style = array();
+ if ($xml->rFonts) {
+ $style['name'] = (string)$xml->rFonts['ascii'];
+ }
+ if ($xml->sz) {
+ $style['size'] = (int)$xml->sz['val'] / 2;
+ }
+ if ($xml->color) {
+ $style['color'] = (string)$xml->color['val'];
+ }
+ if ($xml->b) {
+ $style['bold'] = true;
+ }
+ if ($xml->i) {
+ $style['italic'] = true;
+ }
+ if ($xml->u) {
+ $style['underline'] = (string)$xml->u['val'];
+ }
+ if ($xml->strike) {
+ $style['strikethrough'] = true;
+ }
+ if ($xml->highlight) {
+ $style['fgColor'] = (string)$xml->highlight['val'];
+ }
+ if ($xml->vertAlign) {
+ if ($xml->vertAlign['val'] == 'superscript') {
+ $style['superScript'] = true;
+ } else {
+ $style['subScript'] = true;
+ }
+ }
+ return $style;
+ } else {
+ return null;
+ }
+ }
+
/**
* Get array item
*
diff --git a/Classes/PHPWord/Section.php b/Classes/PHPWord/Section.php
index c50e2d0c..3ed55ee5 100755
--- a/Classes/PHPWord/Section.php
+++ b/Classes/PHPWord/Section.php
@@ -77,7 +77,16 @@ class PHPWord_Section
{
$this->_sectionCount = $sectionCount;
$this->_settings = new PHPWord_Section_Settings();
+ $this->setSettings($settings);
+ }
+ /**
+ * Set Section Settings
+ *
+ * @param array $settings
+ */
+ public function setSettings($settings = null)
+ {
if (!is_null($settings) && is_array($settings)) {
foreach ($settings as $key => $value) {
if (substr($key, 0, 1) != '_') {
diff --git a/Classes/PHPWord/Shared/File.php b/Classes/PHPWord/Shared/File.php
index 7c1470fe..84cd2e84 100755
--- a/Classes/PHPWord/Shared/File.php
+++ b/Classes/PHPWord/Shared/File.php
@@ -76,7 +76,7 @@ class PHPWord_Shared_File
// Found something?
if ($returnValue == '' || is_null($returnValue)) {
- $pathArray = split('/', $pFilename);
+ $pathArray = explode('/', $pFilename);
while (in_array('..', $pathArray) && $pathArray[0] != '..') {
for ($i = 0; $i < count($pathArray); ++$i) {
if ($pathArray[$i] == '..' && $i > 0) {
diff --git a/Classes/PHPWord/Style/Paragraph.php b/Classes/PHPWord/Style/Paragraph.php
index bf0ae182..754589cb 100755
--- a/Classes/PHPWord/Style/Paragraph.php
+++ b/Classes/PHPWord/Style/Paragraph.php
@@ -506,4 +506,4 @@ class PHPWord_Style_Paragraph
{
return $this->lineHeight;
}
-}
\ No newline at end of file
+}
diff --git a/Classes/PHPWord/Writer/Word2007/Base.php b/Classes/PHPWord/Writer/Word2007/Base.php
index 77f4d0d7..3ee0823e 100755
--- a/Classes/PHPWord/Writer/Word2007/Base.php
+++ b/Classes/PHPWord/Writer/Word2007/Base.php
@@ -128,8 +128,8 @@ class PHPWord_Writer_Word2007_Base extends PHPWord_Writer_Word2007_WriterPart
protected function _writeParagraphStyle(
PHPWord_Shared_XMLWriter $objWriter = null,
PHPWord_Style_Paragraph $style,
- $withoutPPR = false)
- {
+ $withoutPPR = false
+ ) {
$align = $style->getAlign();
$spacing = $style->getSpacing();
$spaceBefore = $style->getSpaceBefore();
@@ -926,4 +926,4 @@ class PHPWord_Writer_Word2007_Base extends PHPWord_Writer_Word2007_WriterPart
$objWriter->endElement(); // w:p
}
}
-}
\ No newline at end of file
+}
diff --git a/changelog.txt b/changelog.txt
index 2230f687..a76efb00 100755
--- a/changelog.txt
+++ b/changelog.txt
@@ -50,6 +50,7 @@ Changes in branch for release 0.7.1 :
- Bugfix: (ivanlanin) GH-94 - General: PHPWord_Shared_Drawing::centimetersToPixels() conversion
- Feature: (ivanlanin) - Paragraph: setTabs() function
- Feature: (ivanlanin) GH-99 - General: Basic support for TextRun on ODT and RTF
+- Feature: (ivanlanin) - Reader: Initial effort for Word2007
- QA: (Progi1984) - UnitTests
Changes in branch for release 0.7.0 :
diff --git a/samples/Sample_10_ReadWord2007.php b/samples/Sample_10_ReadWord2007.php
index d7f81925..a837a574 100644
--- a/samples/Sample_10_ReadWord2007.php
+++ b/samples/Sample_10_ReadWord2007.php
@@ -5,74 +5,29 @@ define('EOL', (PHP_SAPI == 'cli') ? PHP_EOL : '
');
require_once '../Classes/PHPWord.php';
-$files = array(
- "Sample_01_SimpleText.docx",
- "Sample_02_TabStops.docx",
- "Sample_03_Sections.docx",
- "Sample_04_Textrun.docx",
- "Sample_05_Multicolumn.docx",
- "Sample_06_Footnote.docx",
- "Sample_07_TemplateCloneRow.docx",
- "Sample_08_ParagraphPagination.docx",
- "Sample_09_Tables.docx",
-);
+// Read contents
+$sample = 'Sample_10_ReadWord2007';
+$source = "resources/{$sample}.docx";
+$target = "results/{$sample}";
+echo '
', date('H:i:s'), " Reading contents from `{$source}`
"; +$PHPWord = PHPWord_IOFactory::load($source); -foreach ($files as $file) { - echo '', date('H:i:s'), " Load from {$file} with contents:
"; - unset($PHPWord); - try { - $PHPWord = PHPWord_IOFactory::load($file); - } catch (Exception $e) { - echo 'Caught exception: ', $e->getMessage(), '
'; - continue; - } - $sections = $PHPWord->getSections(); - $countSections = count($sections); - $pSection = 0; +// Rewrite contents +echo date('H:i:s') , " Write to Word2007 format" , EOL; +$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'Word2007'); +$objWriter->save("{$sample}.docx"); +rename("{$sample}.docx", "{$target}.docx"); - if ($countSections > 0) { - foreach ($sections as $section) { - $pSection++; - echo "Section {$pSection}:
"; - $elements = $section->getElements(); - foreach ($elements as $element) { - if ($element instanceof PHPWord_Section_Text) { - echo '' . htmlspecialchars($element->getText()) . '
'; - } elseif ($element instanceof PHPWord_Section_TextRun) { - $subelements = $element->getElements(); - echo ''; - if (count($subelements) > 0) { - foreach ($subelements as $subelement) { - if ($subelement instanceof PHPWord_Section_Text) { - echo htmlspecialchars($subelement->getText()); - } - } - } - echo '
'; - } elseif ($element instanceof PHPWord_Section_Link) { - echo 'Link not yet supported.
'; - } elseif ($element instanceof PHPWord_Section_Title) { - echo 'Title not yet supported.
'; - } elseif ($element instanceof PHPWord_Section_TextBreak) { - echo 'Page break not yet supported.
'; - } elseif ($element instanceof PHPWord_Section_Table) { - echo 'Table not yet supported.
'; - } elseif ($element instanceof PHPWord_Section_ListItem) { - echo 'List item not yet supported.
'; - } elseif ($element instanceof PHPWord_Section_Image || - $element instanceof PHPWord_Section_MemoryImage - ) { - echo 'Image not yet supported.
'; - } elseif ($element instanceof PHPWord_TOC) { - echo 'TOC not yet supported.
'; - } elseif($element instanceof PHPWord_Section_Footnote) { - echo 'Footnote not yet supported.
'; - } - } - } - } -} +echo date('H:i:s') , ' Write to OpenDocumentText format' , EOL; +$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'ODText'); +$objWriter->save("{$sample}.odt"); +rename("{$sample}.odt", "{$target}.odt"); +echo date('H:i:s') , ' Write to RTF format' , EOL; +$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'RTF'); +$objWriter->save("{$sample}.rtf"); +rename("{$sample}.rtf", "{$target}.rtf"); + +// Echo memory peak usage +echo date('H:i:s') , " Peak memory usage: " , (memory_get_peak_usage(true) / 1024 / 1024) , " MB" , EOL; +echo date('H:i:s') , " Done writing file" , EOL; diff --git a/samples/resources/Sample_10_ReadWord2007.docx b/samples/resources/Sample_10_ReadWord2007.docx new file mode 100644 index 00000000..fe8ec7ac Binary files /dev/null and b/samples/resources/Sample_10_ReadWord2007.docx differ diff --git a/samples/results/.gitkeep b/samples/results/.gitkeep new file mode 100644 index 00000000..e69de29b