From 87f071d1b63183ffaf8801840386c65b4b9978e5 Mon Sep 17 00:00:00 2001 From: Ivan Lanin Date: Tue, 27 May 2014 23:41:15 +0700 Subject: [PATCH] #72: Basic RTF Reader --- samples/Sample_27_ReadRTF.php | 17 ++ samples/resources/rtf.rtf | 21 ++ src/PhpWord/IOFactory.php | 2 +- src/PhpWord/Reader/AbstractReader.php | 2 +- src/PhpWord/Reader/RTF.php | 50 ++++ src/PhpWord/Reader/RTF/Document.php | 351 +++++++++++++++++++++++++ src/PhpWord/Shared/XMLReader.php | 22 +- src/PhpWord/Writer/RTF/Part/Header.php | 2 +- 8 files changed, 459 insertions(+), 8 deletions(-) create mode 100644 samples/Sample_27_ReadRTF.php create mode 100644 samples/resources/rtf.rtf create mode 100644 src/PhpWord/Reader/RTF.php create mode 100644 src/PhpWord/Reader/RTF/Document.php diff --git a/samples/Sample_27_ReadRTF.php b/samples/Sample_27_ReadRTF.php new file mode 100644 index 00000000..a28ba40b --- /dev/null +++ b/samples/Sample_27_ReadRTF.php @@ -0,0 +1,17 @@ +canRead($docFile)) { + $doc = new Document(); + $doc->rtf = file_get_contents($docFile); + $doc->read($phpWord); + } else { + throw new \Exception("Cannot read {$docFile}."); + } + + return $phpWord; + } +} diff --git a/src/PhpWord/Reader/RTF/Document.php b/src/PhpWord/Reader/RTF/Document.php new file mode 100644 index 00000000..ccb9b595 --- /dev/null +++ b/src/PhpWord/Reader/RTF/Document.php @@ -0,0 +1,351 @@ + 'markOpening', // { + 125 => 'markClosing', // } + 92 => 'markBackslash', // \ + 10 => 'markNewline', // LF + 13 => 'markNewline' // CR + ); + + $this->phpWord = $phpWord; + $this->section = $phpWord->addSection(); + $this->textrun = $this->section->addTextRun(); + $this->length = strlen($this->rtf); + + $this->flags['paragraph'] = true; // Set paragraph flag from the beginning + + // Walk each characters + while ($this->offset < $this->length) { + $char = $this->rtf[$this->offset]; + $ascii = ord($char); + + if (array_key_exists($ascii, $markers)) { // Marker found: {, }, \, LF, or CR + $markerFunction = $markers[$ascii]; + $this->$markerFunction(); + } else { + if ($this->isControl === false) { // Non control word: Push character + $this->pushText($char); + } else { + if (preg_match("/^[a-zA-Z0-9-]?$/", $char)) { // No delimiter: Buffer control + $this->control .= $char; + $this->isFirst = false; + } else { // Delimiter found: Parse buffered control + if ($this->isFirst) { + $this->isFirst = false; + } else { + if ($char == ' ') { // Discard space as a control word delimiter + $this->flushControl(true); + } + } + } + } + } + $this->offset++; + } + $this->flushText(); + } + + /** + * Mark opening braket `{` character + */ + private function markOpening() + { + $this->flush(true); + array_push($this->groups, $this->flags); + } + + /** + * Mark closing braket `}` character + */ + private function markClosing() + { + $this->flush(true); + $this->flags = array_pop($this->groups); + } + + /** + * Mark backslash `\` character + */ + private function markBackslash() + { + if ($this->isFirst) { + $this->setControl(false); + $this->text .= '\\'; + } else { + $this->flush(); + $this->setControl(true); + $this->control = ''; + } + } + + /** + * Mark newline character: Flush control word because it's not possible to span multiline + */ + private function markNewline() + { + if ($this->isControl) { + $this->flushControl(true); + } + } + + /** + * Flush control word or text + * + * @param bool $isControl + */ + private function flush($isControl = false) + { + if ($this->isControl) { + $this->flushControl($isControl); + } else { + $this->flushText(); + } + } + + /** + * Flush control word + * + * @param bool $isControl + */ + private function flushControl($isControl = false) + { + if (preg_match("/^([A-Za-z]+)(-?[0-9]*) ?$/", $this->control, $match) === 1) { + list(, $control, $parameter) = $match; + $this->parseControl($control, $parameter); + } + + if ($isControl === true) { + $this->setControl(false); + } + } + + /* + * Flush text in queue + */ + private function flushText() + { + if ($this->text != '') { + if (isset($this->flags['property'])) { + $this->flags['value'] = $this->text; + var_dump($this->flags); + } else { + if ($this->flags['paragraph'] === true) { + $this->flags['paragraph'] = false; + $this->flags['text'] = $this->text; + } + } + if (!isset($this->flags['skipped'])) { + $this->textrun->addText($this->text); + } + + $this->text = ''; + } + } + + /** + * Reset control word and first char state + * + * @param bool $state + */ + private function setControl($value) + { + $this->isControl = $value; + $this->isFirst = $value; + } + + /** + * Push text into queue + * + * @param string $char + */ + private function pushText($char) + { + if ($char == '<') { + $this->text .= "<"; + } elseif ($char == '>') { + $this->text .= ">"; + } else { + $this->text .= $char; + } + } + + /** + * Parse control + * + * @param string $control + * @param string $parameter + */ + private function parseControl($control, $parameter) + { + $controls = array( + 'par' => array(self::PARA, 'paragraph', true), + 'b' => array(self::STYL, 'bold', true), + 'i' => array(self::STYL, 'italic', true), + 'u' => array(self::STYL, 'underline', true), + 'fonttbl' => array(self::SKIP, 'fonttbl', null), + 'colortbl' => array(self::SKIP, 'colortbl', null), + 'info' => array(self::SKIP, 'info', null), + 'generator' => array(self::SKIP, 'generator', null), + 'title' => array(self::SKIP, 'title', null), + 'subject' => array(self::SKIP, 'subject', null), + 'category' => array(self::SKIP, 'category', null), + 'keywords' => array(self::SKIP, 'keywords', null), + 'comment' => array(self::SKIP, 'comment', null), + 'shppict' => array(self::SKIP, 'pic', null), + 'fldinst' => array(self::SKIP, 'link', null), + ); + + if (array_key_exists($control, $controls)) { + list($mode, $property, $value) = $controls[$control]; + switch ($mode) { + case self::PARA: // Paragraph + $this->textrun = $this->section->addTextRun(); + $this->flags[$property] = $value; + break; + case self::STYL: // Style + $this->flags[$property] = $value; + break; + case self::SKIP: // Destination + $this->flags['property'] = $property; + $this->flags['skipped'] = true; + } + } + } +} diff --git a/src/PhpWord/Shared/XMLReader.php b/src/PhpWord/Shared/XMLReader.php index 60474ce9..153152ee 100644 --- a/src/PhpWord/Shared/XMLReader.php +++ b/src/PhpWord/Shared/XMLReader.php @@ -56,18 +56,30 @@ class XMLReader $zip = new ZipArchive(); $zip->open($zipFile); - $contents = $zip->getFromName($xmlFile); + $content = $zip->getFromName($xmlFile); $zip->close(); - if ($contents === false) { + if ($content === false) { return false; } else { - $this->dom = new \DOMDocument(); - $this->dom->loadXML($contents); - return $this->dom; + return $this->getDomFromString($content); } } + /** + * Get DOMDocument from content string + * + * @param string $content + * @return \DOMDocument + */ + public function getDomFromString($content) + { + $this->dom = new \DOMDocument(); + $this->dom->loadXML($content); + + return $this->dom; + } + /** * Get elements * diff --git a/src/PhpWord/Writer/RTF/Part/Header.php b/src/PhpWord/Writer/RTF/Part/Header.php index c401b500..15a0c303 100644 --- a/src/PhpWord/Writer/RTF/Part/Header.php +++ b/src/PhpWord/Writer/RTF/Part/Header.php @@ -123,7 +123,7 @@ class Header extends AbstractPart $content .= '{'; $content .= '\fonttbl'; foreach ($this->fontTable as $index => $font) { - $content .= "{\\f{$index}\\fnil\\fcharset0{$font};}"; + $content .= "{\\f{$index}\\fnil\\fcharset0 {$font};}"; } $content .= '}'; $content .= PHP_EOL;