diff --git a/Classes/PHPWord/Reader/Word2007.php b/Classes/PHPWord/Reader/Word2007.php index c17d4074..78a9875c 100644 --- a/Classes/PHPWord/Reader/Word2007.php +++ b/Classes/PHPWord/Reader/Word2007.php @@ -35,7 +35,8 @@ if (!defined('PHPWORD_BASE_PATH')) { /** * PHPWord_Reader_Word2007 */ -class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord_Reader_IReader +class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements + PHPWord_Reader_IReader { /** @@ -54,7 +55,8 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord { // Check if file exists if (!file_exists($pFilename)) { - throw new PHPWord_Exception("Could not open " . $pFilename . " for reading! File does not exist."); + throw new PHPWord_Exception("Could not open " . $pFilename . + " for reading! File does not exist."); } $return = false; @@ -86,9 +88,13 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord * * @param ZipArchive $archive * @param string $fileName + * @param bool $removeNamespace */ - public function getFromZipArchive($archive, $fileName = '') - { + public function getFromZipArchive( + $archive, + $fileName = '', + $removeNamespace = false + ) { // Root-relative paths if (strpos($fileName, '//') !== false) { @@ -103,9 +109,9 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord $contents = $archive->getFromName(substr($fileName, 1)); } - // Stupid hack for namespace - if ($contents != '' && $fileName = 'word/document.xml') { - $contents = preg_replace('~(open($pFilename); - // Read relationships + // Read properties and documents $rels = simplexml_load_string($this->getFromZipArchive($zip, "_rels/.rels")); foreach ($rels->Relationship as $rel) { switch ($rel["Type"]) { // Core properties - case "http://schemas.openxmlformats.org/package/2006//relationships/metadata/core-properties": + case "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties": $xmlCore = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); if (is_object($xmlCore)) { $xmlCore->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); $xmlCore->registerXPathNamespace("dcterms", "http://purl.org/dc/terms/"); - $xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006//metadata/core-properties"); + $xmlCore->registerXPathNamespace("cp", "http://schemas.openxmlformats.org/package/2006/metadata/core-properties"); $docProps = $word->getProperties(); $docProps->setCreator((string) self::array_item($xmlCore->xpath("dc:creator"))); $docProps->setLastModifiedBy((string) self::array_item($xmlCore->xpath("cp:lastModifiedBy"))); @@ -188,32 +195,75 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord $dir = dirname($rel["Target"]); $archive = "$dir/_rels/" . basename($rel["Target"]) . ".rels"; $relsDoc = simplexml_load_string($this->getFromZipArchive($zip, $archive)); - $relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006//relationships"); + $relsDoc->registerXPathNamespace("rel", "http://schemas.openxmlformats.org/package/2006/relationships"); $xpath = self::array_item($relsDoc->xpath("rel:Relationship[@Type='" . "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles']")); - $xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}")); - if ($xmlDoc->body) { + $xmlDoc = simplexml_load_string($this->getFromZipArchive($zip, "{$rel['Target']}", true)); + if (is_object($xmlDoc)) { $section = $word->createSection(); - foreach ($xmlDoc->body->children() as $element) { - switch ($element->getName()) { - case 'p': - if ($element->pPr->sectPr) { - $section = $word->createSection(); - continue; - } - if ($element->r) { - if (count($element->r) == 1) { - $section->addText($element->r->t); - } else { - $textRun = $section->createTextRun(); - foreach ($element->r as $r) { - $textRun->addText($r->t); - } - } + + foreach ($xmlDoc->body->children() as $elm) { + $elmName = $elm->getName(); + if ($elmName == 'p') { // Paragraph/section + // Create new section if section section found + if ($elm->pPr->sectPr) { + $section->setSettings($this->loadSectionSettings($elm->pPr)); + $section = $word->createSection(); + continue; + } + // Has w:r? It's either text or textrun + if ($elm->r) { + // w:r = 1? It's a plain paragraph + if (count($elm->r) == 1) { + $section->addText($elm->r->t, + $this->loadFontStyle($elm->r)); + // w:r more than 1? It's a textrun } else { - $section->addTextBreak(); + $textRun = $section->createTextRun(); + foreach ($elm->r as $r) { + $textRun->addText($r->t, + $this->loadFontStyle($r)); + } } - break; + // No, it's a textbreak + } else { + $section->addTextBreak(); + } + } elseif ($elmName == 'sectPr') { + // Last section setting + $section->setSettings($this->loadSectionSettings($xmlDoc->body)); + } + } + } + break; + } + } + + // Read styles + $docRels = simplexml_load_string($this->getFromZipArchive($zip, "word/_rels/document.xml.rels")); + foreach ($docRels->Relationship as $rel) { + switch ($rel["Type"]) { + case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles": + $xmlStyle = simplexml_load_string($this->getFromZipArchive($zip, "word/{$rel['Target']}", true)); + if (is_object($xmlStyle)) { + foreach ($xmlStyle->children() as $elm) { + if ($elm->getName() != 'style') { + continue; + } + unset($pStyle); + unset($fStyle); + $hasParagraphStyle = $elm->pPr && ($elm->pPr != ''); + $hasFontStyle = $elm->rPr && ($elm->rPr != ''); + $styleName = (string)$elm->name['val']; + if ($hasParagraphStyle) { + $pStyle = $this->loadParagraphStyle($elm); + if (!$hasFontStyle) { + $word->addParagraphStyle($styleName, $pStyle); + } + } + if ($hasFontStyle) { + $fStyle = $this->loadFontStyle($elm); + $word->addFontStyle($styleName, $fStyle, $pStyle); } } } @@ -225,6 +275,181 @@ class PHPWord_Reader_Word2007 extends PHPWord_Reader_Abstract implements PHPWord return $word; } + /** + * Load section settings from SimpleXMLElement + * + * @param SimpleXMLElement $elm + * @return array|string|null + * + * @todo Implement gutter + */ + private function loadSectionSettings($elm) + { + if ($xml = $elm->sectPr) { + $setting = array(); + if ($xml->type) { + $setting['breakType'] = (string)$xml->type['val']; + } + if ($xml->pgSz) { + if (isset($xml->pgSz['w'])) { + $setting['pageSizeW'] = (int)$xml->pgSz['w']; + } + if (isset($xml->pgSz['h'])) { + $setting['pageSizeH'] = (int)$xml->pgSz['h']; + } + if (isset($xml->pgSz['orient'])) { + $setting['orientation'] = (string)$xml->pgSz['orient']; + } + } + if ($xml->pgMar) { + if (isset($xml->pgMar['top'])) { + $setting['topMargin'] = (int)$xml->pgMar['top']; + } + if (isset($xml->pgMar['left'])) { + $setting['leftMargin'] = (int)$xml->pgMar['left']; + } + if (isset($xml->pgMar['bottom'])) { + $setting['bottomMargin'] = (int)$xml->pgMar['bottom']; + } + if (isset($xml->pgMar['right'])) { + $setting['rightMargin'] = (int)$xml->pgMar['right']; + } + if (isset($xml->pgMar['header'])) { + $setting['headerHeight'] = (int)$xml->pgMar['header']; + } + if (isset($xml->pgMar['footer'])) { + $setting['footerHeight'] = (int)$xml->pgMar['footer']; + } + if (isset($xml->pgMar['gutter'])) { + // $setting['gutter'] = (int)$xml->pgMar['gutter']; + } + } + if ($xml->cols) { + if (isset($xml->cols['num'])) { + $setting['colsNum'] = (int)$xml->cols['num']; + } + if (isset($xml->cols['space'])) { + $setting['colsSpace'] = (int)$xml->cols['space']; + } + } + return $setting; + } else { + return null; + } + } + + /** + * Load paragraph style from SimpleXMLElement + * + * @param SimpleXMLElement $elm + * @return array|string|null + */ + private function loadParagraphStyle($elm) + { + if ($xml = $elm->pPr) { + if ($xml->pStyle) { + return (string)$xml->pStyle['val']; + } + $style = array(); + if ($xml->jc) { + $style['align'] = (string)$xml->jc['val']; + } + if ($xml->ind) { + if (isset($xml->ind->left)) { + $style['indent'] = (int)$xml->ind->left; + } + if (isset($xml->ind->hanging)) { + $style['hanging'] = (int)$xml->ind->hanging; + } + if (isset($xml->ind->line)) { + $style['spacing'] = (int)$xml->ind->line; + } + } + if ($xml->spacing) { + if (isset($xml->spacing['after'])) { + $style['spaceAfter'] = (int)$xml->spacing['after']; + } + if (isset($xml->spacing['before'])) { + $style['spaceBefore'] = (int)$xml->spacing['before']; + } + if (isset($xml->spacing['line'])) { + $style['spacing'] = (int)$xml->spacing['line']; + } + } + if ($xml->basedOn) { + $style['basedOn'] = (string)$xml->basedOn['val']; + } + if ($xml->next) { + $style['next'] = (string)$xml->next['val']; + } + if ($xml->widowControl) { + $style['widowControl'] = false; + } + if ($xml->keepNext) { + $style['keepNext'] = true; + } + if ($xml->keepLines) { + $style['keepLines'] = true; + } + if ($xml->pageBreakBefore) { + $style['pageBreakBefore'] = true; + } + return $style; + } else { + return null; + } + } + + /** + * Load font style from SimpleXMLElement + * + * @param SimpleXMLElement $elm + * @return array|string|null + */ + private function loadFontStyle($elm) + { + if ($xml = $elm->rPr) { + if ($xml->rStyle) { + return (string)$xml->rStyle['val']; + } + $style = array(); + if ($xml->rFonts) { + $style['name'] = (string)$xml->rFonts['ascii']; + } + if ($xml->sz) { + $style['size'] = (int)$xml->sz['val'] / 2; + } + if ($xml->color) { + $style['color'] = (string)$xml->color['val']; + } + if ($xml->b) { + $style['bold'] = true; + } + if ($xml->i) { + $style['italic'] = true; + } + if ($xml->u) { + $style['underline'] = (string)$xml->u['val']; + } + if ($xml->strike) { + $style['strikethrough'] = true; + } + if ($xml->highlight) { + $style['fgColor'] = (string)$xml->highlight['val']; + } + if ($xml->vertAlign) { + if ($xml->vertAlign['val'] == 'superscript') { + $style['superScript'] = true; + } else { + $style['subScript'] = true; + } + } + return $style; + } else { + return null; + } + } + /** * Get array item * diff --git a/Classes/PHPWord/Section.php b/Classes/PHPWord/Section.php index c50e2d0c..3ed55ee5 100755 --- a/Classes/PHPWord/Section.php +++ b/Classes/PHPWord/Section.php @@ -77,7 +77,16 @@ class PHPWord_Section { $this->_sectionCount = $sectionCount; $this->_settings = new PHPWord_Section_Settings(); + $this->setSettings($settings); + } + /** + * Set Section Settings + * + * @param array $settings + */ + public function setSettings($settings = null) + { if (!is_null($settings) && is_array($settings)) { foreach ($settings as $key => $value) { if (substr($key, 0, 1) != '_') { diff --git a/Classes/PHPWord/Shared/File.php b/Classes/PHPWord/Shared/File.php index 7c1470fe..84cd2e84 100755 --- a/Classes/PHPWord/Shared/File.php +++ b/Classes/PHPWord/Shared/File.php @@ -76,7 +76,7 @@ class PHPWord_Shared_File // Found something? if ($returnValue == '' || is_null($returnValue)) { - $pathArray = split('/', $pFilename); + $pathArray = explode('/', $pFilename); while (in_array('..', $pathArray) && $pathArray[0] != '..') { for ($i = 0; $i < count($pathArray); ++$i) { if ($pathArray[$i] == '..' && $i > 0) { diff --git a/Classes/PHPWord/Style/Paragraph.php b/Classes/PHPWord/Style/Paragraph.php index bf0ae182..754589cb 100755 --- a/Classes/PHPWord/Style/Paragraph.php +++ b/Classes/PHPWord/Style/Paragraph.php @@ -506,4 +506,4 @@ class PHPWord_Style_Paragraph { return $this->lineHeight; } -} \ No newline at end of file +} diff --git a/Classes/PHPWord/Writer/Word2007/Base.php b/Classes/PHPWord/Writer/Word2007/Base.php index 77f4d0d7..3ee0823e 100755 --- a/Classes/PHPWord/Writer/Word2007/Base.php +++ b/Classes/PHPWord/Writer/Word2007/Base.php @@ -128,8 +128,8 @@ class PHPWord_Writer_Word2007_Base extends PHPWord_Writer_Word2007_WriterPart protected function _writeParagraphStyle( PHPWord_Shared_XMLWriter $objWriter = null, PHPWord_Style_Paragraph $style, - $withoutPPR = false) - { + $withoutPPR = false + ) { $align = $style->getAlign(); $spacing = $style->getSpacing(); $spaceBefore = $style->getSpaceBefore(); @@ -926,4 +926,4 @@ class PHPWord_Writer_Word2007_Base extends PHPWord_Writer_Word2007_WriterPart $objWriter->endElement(); // w:p } } -} \ No newline at end of file +} diff --git a/changelog.txt b/changelog.txt index 2230f687..a76efb00 100755 --- a/changelog.txt +++ b/changelog.txt @@ -50,6 +50,7 @@ Changes in branch for release 0.7.1 : - Bugfix: (ivanlanin) GH-94 - General: PHPWord_Shared_Drawing::centimetersToPixels() conversion - Feature: (ivanlanin) - Paragraph: setTabs() function - Feature: (ivanlanin) GH-99 - General: Basic support for TextRun on ODT and RTF +- Feature: (ivanlanin) - Reader: Initial effort for Word2007 - QA: (Progi1984) - UnitTests Changes in branch for release 0.7.0 : diff --git a/samples/Sample_10_ReadWord2007.php b/samples/Sample_10_ReadWord2007.php index d7f81925..a837a574 100644 --- a/samples/Sample_10_ReadWord2007.php +++ b/samples/Sample_10_ReadWord2007.php @@ -5,74 +5,29 @@ define('EOL', (PHP_SAPI == 'cli') ? PHP_EOL : '
'); require_once '../Classes/PHPWord.php'; -$files = array( - "Sample_01_SimpleText.docx", - "Sample_02_TabStops.docx", - "Sample_03_Sections.docx", - "Sample_04_Textrun.docx", - "Sample_05_Multicolumn.docx", - "Sample_06_Footnote.docx", - "Sample_07_TemplateCloneRow.docx", - "Sample_08_ParagraphPagination.docx", - "Sample_09_Tables.docx", -); +// Read contents +$sample = 'Sample_10_ReadWord2007'; +$source = "resources/{$sample}.docx"; +$target = "results/{$sample}"; +echo '

', date('H:i:s'), " Reading contents from `{$source}`

"; +$PHPWord = PHPWord_IOFactory::load($source); -foreach ($files as $file) { - echo '
'; - echo '

', date('H:i:s'), " Load from {$file} with contents:

"; - unset($PHPWord); - try { - $PHPWord = PHPWord_IOFactory::load($file); - } catch (Exception $e) { - echo '

Caught exception: ', $e->getMessage(), '

'; - continue; - } - $sections = $PHPWord->getSections(); - $countSections = count($sections); - $pSection = 0; +// Rewrite contents +echo date('H:i:s') , " Write to Word2007 format" , EOL; +$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'Word2007'); +$objWriter->save("{$sample}.docx"); +rename("{$sample}.docx", "{$target}.docx"); - if ($countSections > 0) { - foreach ($sections as $section) { - $pSection++; - echo "

Section {$pSection}:

"; - $elements = $section->getElements(); - foreach ($elements as $element) { - if ($element instanceof PHPWord_Section_Text) { - echo '

' . htmlspecialchars($element->getText()) . '

'; - } elseif ($element instanceof PHPWord_Section_TextRun) { - $subelements = $element->getElements(); - echo '

'; - if (count($subelements) > 0) { - foreach ($subelements as $subelement) { - if ($subelement instanceof PHPWord_Section_Text) { - echo htmlspecialchars($subelement->getText()); - } - } - } - echo '

'; - } elseif ($element instanceof PHPWord_Section_Link) { - echo '

Link not yet supported.

'; - } elseif ($element instanceof PHPWord_Section_Title) { - echo '

Title not yet supported.

'; - } elseif ($element instanceof PHPWord_Section_TextBreak) { - echo '
'; - } elseif ($element instanceof PHPWord_Section_PageBreak) { - echo '

Page break not yet supported.

'; - } elseif ($element instanceof PHPWord_Section_Table) { - echo '

Table not yet supported.

'; - } elseif ($element instanceof PHPWord_Section_ListItem) { - echo '

List item not yet supported.

'; - } elseif ($element instanceof PHPWord_Section_Image || - $element instanceof PHPWord_Section_MemoryImage - ) { - echo '

Image not yet supported.

'; - } elseif ($element instanceof PHPWord_TOC) { - echo '

TOC not yet supported.

'; - } elseif($element instanceof PHPWord_Section_Footnote) { - echo '

Footnote not yet supported.

'; - } - } - } - } -} +echo date('H:i:s') , ' Write to OpenDocumentText format' , EOL; +$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'ODText'); +$objWriter->save("{$sample}.odt"); +rename("{$sample}.odt", "{$target}.odt"); +echo date('H:i:s') , ' Write to RTF format' , EOL; +$objWriter = PHPWord_IOFactory::createWriter($PHPWord, 'RTF'); +$objWriter->save("{$sample}.rtf"); +rename("{$sample}.rtf", "{$target}.rtf"); + +// Echo memory peak usage +echo date('H:i:s') , " Peak memory usage: " , (memory_get_peak_usage(true) / 1024 / 1024) , " MB" , EOL; +echo date('H:i:s') , " Done writing file" , EOL; diff --git a/samples/resources/Sample_10_ReadWord2007.docx b/samples/resources/Sample_10_ReadWord2007.docx new file mode 100644 index 00000000..fe8ec7ac Binary files /dev/null and b/samples/resources/Sample_10_ReadWord2007.docx differ diff --git a/samples/results/.gitkeep b/samples/results/.gitkeep new file mode 100644 index 00000000..e69de29b