Merge branch 'rtfreader' into rtf
This commit is contained in:
commit
f9a1f34dcb
|
|
@ -0,0 +1,17 @@
|
||||||
|
<?php
|
||||||
|
include_once 'Sample_Header.php';
|
||||||
|
|
||||||
|
// Read contents
|
||||||
|
$name = basename(__FILE__, '.php');
|
||||||
|
$source = "results/Sample_01_SimpleText.rtf";
|
||||||
|
$source = "resources/rtf.rtf";
|
||||||
|
$source = "results/Sample_11_ReadWord2007.rtf";
|
||||||
|
|
||||||
|
echo date('H:i:s'), " Reading contents from `{$source}`", EOL;
|
||||||
|
$phpWord = \PhpOffice\PhpWord\IOFactory::load($source, 'RTF');
|
||||||
|
|
||||||
|
// Save file
|
||||||
|
echo write($phpWord, basename(__FILE__, '.php'), $writers);
|
||||||
|
if (!CLI) {
|
||||||
|
include_once 'Sample_Footer.php';
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
{\rtf1
|
||||||
|
\ansi\ansicpg1252
|
||||||
|
\deff0
|
||||||
|
{\fonttbl{\f0\fnil\fcharset0 Arial;}{\f1\fnil\fcharset0 Times New Roman;}}
|
||||||
|
{\colortbl;\red255\green0\blue0;\red14\green0\blue0}
|
||||||
|
{\*\generator PhpWord;}
|
||||||
|
|
||||||
|
{\info{\title }{\subject }{\category }{\keywords }{\comment }{\author }{\operator }{\creatim \yr2014\mo05\dy27\hr23\min36\sec45}{\revtim \yr2014\mo05\dy27\hr23\min36\sec45}{\company }{\manager }}
|
||||||
|
\deftab720\viewkind1\uc1\pard\nowidctlpar\lang1036\kerning1\fs20
|
||||||
|
{Welcome to PhpWord}\par
|
||||||
|
\pard\nowidctlpar{\cf0\f0 Hello World!}\par
|
||||||
|
\par
|
||||||
|
\par
|
||||||
|
\pard\nowidctlpar{\cf0\f0\fs32\b\i I am styled by a font style definition.}\par
|
||||||
|
\pard\nowidctlpar{\cf0\f0 I am styled by a paragraph style definition.}\par
|
||||||
|
\pard\nowidctlpar\qc\sa100{\cf0\f0\fs32\b\i I am styled by both font and paragraph style.}\par
|
||||||
|
\pard\nowidctlpar{\cf1\f1\fs40\b\i\ul\strike\super I am inline styled.}\par
|
||||||
|
\par
|
||||||
|
{\field {\*\fldinst {HYPERLINK "http://www.google.com"}}{\fldrslt {Google}}}\par
|
||||||
|
\par
|
||||||
|
}
|
||||||
|
|
@ -51,7 +51,7 @@ abstract class IOFactory
|
||||||
*/
|
*/
|
||||||
public static function createReader($name = 'Word2007')
|
public static function createReader($name = 'Word2007')
|
||||||
{
|
{
|
||||||
if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText'))) {
|
if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText', 'RTF'))) {
|
||||||
throw new Exception("\"{$name}\" is not a valid reader.");
|
throw new Exception("\"{$name}\" is not a valid reader.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ abstract class AbstractReader implements ReaderInterface
|
||||||
*
|
*
|
||||||
* @var bool|resource
|
* @var bool|resource
|
||||||
*/
|
*/
|
||||||
protected $fileHandle = true;
|
protected $fileHandle;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read data only?
|
* Read data only?
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* This file is part of PHPWord - A pure PHP library for reading and writing
|
||||||
|
* word processing documents.
|
||||||
|
*
|
||||||
|
* PHPWord is free software distributed under the terms of the GNU Lesser
|
||||||
|
* General Public License version 3 as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* For the full copyright and license information, please read the LICENSE
|
||||||
|
* file that was distributed with this source code. For the full list of
|
||||||
|
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
|
||||||
|
*
|
||||||
|
* @link https://github.com/PHPOffice/PHPWord
|
||||||
|
* @copyright 2010-2014 PHPWord contributors
|
||||||
|
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace PhpOffice\PhpWord\Reader;
|
||||||
|
|
||||||
|
use PhpOffice\PhpWord\PhpWord;
|
||||||
|
use PhpOffice\PhpWord\Reader\RTF\Document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RTF Reader class
|
||||||
|
*
|
||||||
|
* @since 0.11.0
|
||||||
|
*/
|
||||||
|
class RTF extends AbstractReader implements ReaderInterface
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Loads PhpWord from file
|
||||||
|
*
|
||||||
|
* @param string $docFile
|
||||||
|
* @return \PhpOffice\PhpWord\PhpWord
|
||||||
|
*/
|
||||||
|
public function load($docFile)
|
||||||
|
{
|
||||||
|
$phpWord = new PhpWord();
|
||||||
|
|
||||||
|
if ($this->canRead($docFile)) {
|
||||||
|
$doc = new Document();
|
||||||
|
$doc->rtf = file_get_contents($docFile);
|
||||||
|
$doc->read($phpWord);
|
||||||
|
} else {
|
||||||
|
throw new \Exception("Cannot read {$docFile}.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return $phpWord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,351 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* This file is part of PHPWord - A pure PHP library for reading and writing
|
||||||
|
* word processing documents.
|
||||||
|
*
|
||||||
|
* PHPWord is free software distributed under the terms of the GNU Lesser
|
||||||
|
* General Public License version 3 as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* For the full copyright and license information, please read the LICENSE
|
||||||
|
* file that was distributed with this source code. For the full list of
|
||||||
|
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
|
||||||
|
*
|
||||||
|
* @link https://github.com/PHPOffice/PHPWord
|
||||||
|
* @copyright 2010-2014 PHPWord contributors
|
||||||
|
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace PhpOffice\PhpWord\Reader\RTF;
|
||||||
|
|
||||||
|
use PhpOffice\PhpWord\PhpWord;
|
||||||
|
use PhpOffice\PhpWord\Element\Section;
|
||||||
|
use PhpOffice\PhpWord\Element\TextRun;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RTF document reader
|
||||||
|
*
|
||||||
|
* References:
|
||||||
|
* - How to Write an RTF Reader http://latex2rtf.sourceforge.net/rtfspec_45.html
|
||||||
|
* - PHP rtfclass by Markus Fischer https://github.com/mfn/rtfclass
|
||||||
|
* - JavaScript RTF-parser by LazyGyu https://github.com/lazygyu/RTF-parser
|
||||||
|
*
|
||||||
|
* @since 0.11.0
|
||||||
|
* @SuppressWarnings(PHPMD.UnusedPrivateMethod)
|
||||||
|
*/
|
||||||
|
class Document
|
||||||
|
{
|
||||||
|
/** @const int */
|
||||||
|
const PARA = 0;
|
||||||
|
const STYL = 1;
|
||||||
|
const SKIP = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PhpWord object
|
||||||
|
*
|
||||||
|
* @var \PhpOffice\PhpWord\PhpWord
|
||||||
|
*/
|
||||||
|
private $phpWord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Section object
|
||||||
|
*
|
||||||
|
* @var \PhpOffice\PhpWord\Element\Section
|
||||||
|
*/
|
||||||
|
private $section;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Textrun object
|
||||||
|
*
|
||||||
|
* @var \PhpOffice\PhpWord\Element\TextRun
|
||||||
|
*/
|
||||||
|
private $textrun;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RTF content
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
public $rtf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Content length
|
||||||
|
*
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $length = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character index
|
||||||
|
*
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $offset = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current control word
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
private $control = '';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Text content
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
private $text = '';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parsing a control word flag
|
||||||
|
*
|
||||||
|
* @var bool
|
||||||
|
*/
|
||||||
|
private $isControl = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* First character flag: watch out for control symbols
|
||||||
|
*
|
||||||
|
* @var bool
|
||||||
|
*/
|
||||||
|
private $isFirst = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Group groups
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $groups = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parser flags; not used
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $flags = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse RTF content
|
||||||
|
*
|
||||||
|
* - Marks controlling characters `{`, `}`, and `\`
|
||||||
|
* - Removes line endings
|
||||||
|
* - Builds control words and control symbols
|
||||||
|
* - Pushes every other character into the text queue
|
||||||
|
*
|
||||||
|
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||||
|
* @todo Use `fread` stream for scalability
|
||||||
|
*/
|
||||||
|
public function read(PhpWord &$phpWord)
|
||||||
|
{
|
||||||
|
$markers = array(
|
||||||
|
123 => 'markOpening', // {
|
||||||
|
125 => 'markClosing', // }
|
||||||
|
92 => 'markBackslash', // \
|
||||||
|
10 => 'markNewline', // LF
|
||||||
|
13 => 'markNewline' // CR
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->phpWord = $phpWord;
|
||||||
|
$this->section = $phpWord->addSection();
|
||||||
|
$this->textrun = $this->section->addTextRun();
|
||||||
|
$this->length = strlen($this->rtf);
|
||||||
|
|
||||||
|
$this->flags['paragraph'] = true; // Set paragraph flag from the beginning
|
||||||
|
|
||||||
|
// Walk each characters
|
||||||
|
while ($this->offset < $this->length) {
|
||||||
|
$char = $this->rtf[$this->offset];
|
||||||
|
$ascii = ord($char);
|
||||||
|
|
||||||
|
if (array_key_exists($ascii, $markers)) { // Marker found: {, }, \, LF, or CR
|
||||||
|
$markerFunction = $markers[$ascii];
|
||||||
|
$this->$markerFunction();
|
||||||
|
} else {
|
||||||
|
if ($this->isControl === false) { // Non control word: Push character
|
||||||
|
$this->pushText($char);
|
||||||
|
} else {
|
||||||
|
if (preg_match("/^[a-zA-Z0-9-]?$/", $char)) { // No delimiter: Buffer control
|
||||||
|
$this->control .= $char;
|
||||||
|
$this->isFirst = false;
|
||||||
|
} else { // Delimiter found: Parse buffered control
|
||||||
|
if ($this->isFirst) {
|
||||||
|
$this->isFirst = false;
|
||||||
|
} else {
|
||||||
|
if ($char == ' ') { // Discard space as a control word delimiter
|
||||||
|
$this->flushControl(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->offset++;
|
||||||
|
}
|
||||||
|
$this->flushText();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark opening braket `{` character
|
||||||
|
*/
|
||||||
|
private function markOpening()
|
||||||
|
{
|
||||||
|
$this->flush(true);
|
||||||
|
array_push($this->groups, $this->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark closing braket `}` character
|
||||||
|
*/
|
||||||
|
private function markClosing()
|
||||||
|
{
|
||||||
|
$this->flush(true);
|
||||||
|
$this->flags = array_pop($this->groups);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark backslash `\` character
|
||||||
|
*/
|
||||||
|
private function markBackslash()
|
||||||
|
{
|
||||||
|
if ($this->isFirst) {
|
||||||
|
$this->setControl(false);
|
||||||
|
$this->text .= '\\';
|
||||||
|
} else {
|
||||||
|
$this->flush();
|
||||||
|
$this->setControl(true);
|
||||||
|
$this->control = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark newline character: Flush control word because it's not possible to span multiline
|
||||||
|
*/
|
||||||
|
private function markNewline()
|
||||||
|
{
|
||||||
|
if ($this->isControl) {
|
||||||
|
$this->flushControl(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush control word or text
|
||||||
|
*
|
||||||
|
* @param bool $isControl
|
||||||
|
*/
|
||||||
|
private function flush($isControl = false)
|
||||||
|
{
|
||||||
|
if ($this->isControl) {
|
||||||
|
$this->flushControl($isControl);
|
||||||
|
} else {
|
||||||
|
$this->flushText();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush control word
|
||||||
|
*
|
||||||
|
* @param bool $isControl
|
||||||
|
*/
|
||||||
|
private function flushControl($isControl = false)
|
||||||
|
{
|
||||||
|
if (preg_match("/^([A-Za-z]+)(-?[0-9]*) ?$/", $this->control, $match) === 1) {
|
||||||
|
list(, $control, $parameter) = $match;
|
||||||
|
$this->parseControl($control, $parameter);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($isControl === true) {
|
||||||
|
$this->setControl(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flush text in queue
|
||||||
|
*/
|
||||||
|
private function flushText()
|
||||||
|
{
|
||||||
|
if ($this->text != '') {
|
||||||
|
if (isset($this->flags['property'])) {
|
||||||
|
$this->flags['value'] = $this->text;
|
||||||
|
var_dump($this->flags);
|
||||||
|
} else {
|
||||||
|
if ($this->flags['paragraph'] === true) {
|
||||||
|
$this->flags['paragraph'] = false;
|
||||||
|
$this->flags['text'] = $this->text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!isset($this->flags['skipped'])) {
|
||||||
|
$this->textrun->addText($this->text);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->text = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset control word and first char state
|
||||||
|
*
|
||||||
|
* @param bool $state
|
||||||
|
*/
|
||||||
|
private function setControl($value)
|
||||||
|
{
|
||||||
|
$this->isControl = $value;
|
||||||
|
$this->isFirst = $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Push text into queue
|
||||||
|
*
|
||||||
|
* @param string $char
|
||||||
|
*/
|
||||||
|
private function pushText($char)
|
||||||
|
{
|
||||||
|
if ($char == '<') {
|
||||||
|
$this->text .= "<";
|
||||||
|
} elseif ($char == '>') {
|
||||||
|
$this->text .= ">";
|
||||||
|
} else {
|
||||||
|
$this->text .= $char;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse control
|
||||||
|
*
|
||||||
|
* @param string $control
|
||||||
|
* @param string $parameter
|
||||||
|
*/
|
||||||
|
private function parseControl($control, $parameter)
|
||||||
|
{
|
||||||
|
$controls = array(
|
||||||
|
'par' => array(self::PARA, 'paragraph', true),
|
||||||
|
'b' => array(self::STYL, 'bold', true),
|
||||||
|
'i' => array(self::STYL, 'italic', true),
|
||||||
|
'u' => array(self::STYL, 'underline', true),
|
||||||
|
'fonttbl' => array(self::SKIP, 'fonttbl', null),
|
||||||
|
'colortbl' => array(self::SKIP, 'colortbl', null),
|
||||||
|
'info' => array(self::SKIP, 'info', null),
|
||||||
|
'generator' => array(self::SKIP, 'generator', null),
|
||||||
|
'title' => array(self::SKIP, 'title', null),
|
||||||
|
'subject' => array(self::SKIP, 'subject', null),
|
||||||
|
'category' => array(self::SKIP, 'category', null),
|
||||||
|
'keywords' => array(self::SKIP, 'keywords', null),
|
||||||
|
'comment' => array(self::SKIP, 'comment', null),
|
||||||
|
'shppict' => array(self::SKIP, 'pic', null),
|
||||||
|
'fldinst' => array(self::SKIP, 'link', null),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (array_key_exists($control, $controls)) {
|
||||||
|
list($mode, $property, $value) = $controls[$control];
|
||||||
|
switch ($mode) {
|
||||||
|
case self::PARA: // Paragraph
|
||||||
|
$this->textrun = $this->section->addTextRun();
|
||||||
|
$this->flags[$property] = $value;
|
||||||
|
break;
|
||||||
|
case self::STYL: // Style
|
||||||
|
$this->flags[$property] = $value;
|
||||||
|
break;
|
||||||
|
case self::SKIP: // Destination
|
||||||
|
$this->flags['property'] = $property;
|
||||||
|
$this->flags['skipped'] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -56,18 +56,30 @@ class XMLReader
|
||||||
|
|
||||||
$zip = new ZipArchive();
|
$zip = new ZipArchive();
|
||||||
$zip->open($zipFile);
|
$zip->open($zipFile);
|
||||||
$contents = $zip->getFromName($xmlFile);
|
$content = $zip->getFromName($xmlFile);
|
||||||
$zip->close();
|
$zip->close();
|
||||||
|
|
||||||
if ($contents === false) {
|
if ($content === false) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
$this->dom = new \DOMDocument();
|
return $this->getDomFromString($content);
|
||||||
$this->dom->loadXML($contents);
|
|
||||||
return $this->dom;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get DOMDocument from content string
|
||||||
|
*
|
||||||
|
* @param string $content
|
||||||
|
* @return \DOMDocument
|
||||||
|
*/
|
||||||
|
public function getDomFromString($content)
|
||||||
|
{
|
||||||
|
$this->dom = new \DOMDocument();
|
||||||
|
$this->dom->loadXML($content);
|
||||||
|
|
||||||
|
return $this->dom;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get elements
|
* Get elements
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,7 @@ class Header extends AbstractPart
|
||||||
$content .= '{';
|
$content .= '{';
|
||||||
$content .= '\fonttbl';
|
$content .= '\fonttbl';
|
||||||
foreach ($this->fontTable as $index => $font) {
|
foreach ($this->fontTable as $index => $font) {
|
||||||
$content .= "{\\f{$index}\\fnil\\fcharset0{$font};}";
|
$content .= "{\\f{$index}\\fnil\\fcharset0 {$font};}";
|
||||||
}
|
}
|
||||||
$content .= '}';
|
$content .= '}';
|
||||||
$content .= PHP_EOL;
|
$content .= PHP_EOL;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue