#158: Convert UTF8 text to Unicode before writing RTF (support UTF8 in RTF)

This commit is contained in:
Ivan Lanin 2014-05-12 22:55:06 +07:00
parent 55e715b5b1
commit e589961e68
19 changed files with 144 additions and 65 deletions

View File

@ -4,7 +4,7 @@ This is the changelog between releases of PHPWord. Releases are listed in revers
## 0.11.0 - Not yet released
This release changed PHPWord license from LGPL 2.1 to LGPL 3.
This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3; new relative and absolute positioning for image; new `TextBox` and `ListItemRun` element; refactorings of writer classes into parts, elements, and styles; and ability to add elements to PHPWord object via HTML.
### Features
@ -15,6 +15,7 @@ This release changed PHPWord license from LGPL 2.1 to LGPL 3.
- HTML: Ability to add elements to PHPWord object via html - @basjan GH-231
- ListItemRun: New element that can add a list item with inline formatting like a textrun - @basjan GH-235
- Table: Ability to add table inside a cell (nested table) - @ivanlanin GH-149
- RTF: UTF8 support for RTF: Internal UTF8 text is converted to Unicode before writing - @ivanlanin GH-158
### Bugfixes

2
composer.lock generated
View File

@ -3,7 +3,7 @@
"This file locks the dependencies of your project to a known state",
"Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file"
],
"hash": "6daefa91649add98af3850b0a3f13415",
"hash": "77631436badcf4f49d673498ab6f1916",
"packages": [
],

View File

@ -85,6 +85,54 @@ class String
return $value;
}
/**
* Returns unicode from UTF8 text
*
* @param string $text UTF8 text
* @return string Unicode text
* @since 0.11.0
* @link http://www.randomchaos.com/documents/?source=php_and_unicode
*/
public static function toUnicode($text)
{
$unicode = array();
$values = array();
$lookingFor = 1;
// Gets unicode for each character
for ($i = 0; $i < strlen($text); $i++) {
$thisValue = ord($text[$i]);
if ($thisValue < 128) {
$unicode[] = $thisValue;
} else {
if (count($values) == 0) {
$lookingFor = $thisValue < 224 ? 2 : 3;
}
$values[] = $thisValue;
if (count($values) == $lookingFor) {
if ($lookingFor == 3) {
$number = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
} else {
$number = (($values[0] % 32) * 64) + ($values[1] % 64);
}
$unicode[] = $number;
$values = array();
$lookingFor = 1;
}
}
}
// Converts text with utf8 characters into rtf utf8 entites preserving ascii
$entities = '';
foreach ($unicode as $value) {
if ($value != 65279) {
$entities .= $value > 127 ? '\uc0{\u' . $value . '}' : chr($value);
}
}
return $entities;
}
/**
* Return name without underscore for < 0.10.0 variable name compatibility
*

View File

@ -25,8 +25,20 @@ use PhpOffice\PhpWord\Shared\String;
*/
class Paragraph extends AbstractStyle
{
/**
* @const int One line height equals 240 twip
*/
const LINE_HEIGHT = 240;
/**
* @const string Alignment http://www.schemacentral.com/sc/ooxml/t-w_ST_Jc.html
*/
const ALIGN_LEFT = 'left'; // Align left
const ALIGN_RIGHT = 'right'; // Align right
const ALIGN_CENTER = 'center'; // Align center
const ALIGN_BOTH = 'both'; // Align both
const ALIGN_JUSTIFY = 'justify'; // Alias for align both
/**
* Aliases
*
@ -147,10 +159,11 @@ class Paragraph extends AbstractStyle
*/
public function setAlign($value = null)
{
if (strtolower($value) == 'justify') {
$value = 'both';
if (strtolower($value) == self::ALIGN_JUSTIFY) {
$value = self::ALIGN_BOTH;
}
$this->align = $value;
$enum = array(self::ALIGN_LEFT, self::ALIGN_RIGHT, self::ALIGN_CENTER, self::ALIGN_BOTH, self::ALIGN_JUSTIFY);
$this->align = $this->setEnumVal($value, $enum, $this->align);
return $this;
}

View File

@ -44,9 +44,7 @@ class Container extends \PhpOffice\PhpWord\Writer\HTML\Element\Container
$writerClass = str_replace('\\Element', '\\Writer\\RTF\\Element', get_class($element));
if (class_exists($writerClass)) {
$writer = new $writerClass($this->parentWriter, $element, $withoutP);
$content .= '{';
$content .= $writer->write();
$content .= '}' . PHP_EOL;
}
}

View File

@ -18,6 +18,7 @@
namespace PhpOffice\PhpWord\Writer\RTF\Element;
use PhpOffice\PhpWord\Element\Text as TextElement;
use PhpOffice\PhpWord\Shared\String;
use PhpOffice\PhpWord\Style;
use PhpOffice\PhpWord\Style\Font as FontStyle;
use PhpOffice\PhpWord\Writer\RTF\Style\Font as FontStyleWriter;
@ -46,12 +47,17 @@ class Text extends AbstractElement
$content = '';
$content .= $this->writeParagraphStyle($this->element);
$content .= $this->writeFontStyleBegin($fontStyle);
if ($parentWriter->getLastParagraphStyle() != '' || $fontStyle) {
$content .= '{';
$content .= $this->writeFontStyle($fontStyle);
if ($fontStyle || $parentWriter->getLastParagraphStyle() != '') {
$content .= ' ';
}
$content .= $this->element->getText();
$content .= $this->writeFontStyleEnd($fontStyle);
$content .= String::toUnicode($this->element->getText());
$content .= '}';
// Remarked to test using closure {} to avoid closing tags
// @since 0.11.0
// $content .= $this->writeFontStyleClosing($fontStyle);
if (!$this->withoutP) {
$content .= '\par' . PHP_EOL;
@ -80,9 +86,10 @@ class Text extends AbstractElement
// Write style when applicable
if ($paragraphStyle && !$this->withoutP) {
if ($parentWriter->getLastParagraphStyle() != $element->getParagraphStyle()) {
$parentWriter->setLastParagraphStyle($element->getParagraphStyle());
$styleWriter = new ParagraphStyleWriter($paragraphStyle);
$content = $styleWriter->write();
$parentWriter->setLastParagraphStyle($element->getParagraphStyle());
} else {
$parentWriter->setLastParagraphStyle();
}
@ -99,7 +106,7 @@ class Text extends AbstractElement
* @param mixed $style
* @return string
*/
private function writeFontStyleBegin($style)
private function writeFontStyle($style)
{
if (!$style instanceof FontStyle) {
return '';
@ -135,14 +142,14 @@ class Text extends AbstractElement
* @param \PhpOffice\PhpWord\Style\Font $style
* @return string
*/
private function writeFontStyleEnd($style)
private function writeFontStyleClosing($style)
{
if (!$style instanceof FontStyle) {
return '';
}
$styleWriter = new FontStyleWriter($style);
$content = $styleWriter->writeEnd();
$content = $styleWriter->writeClosing();
return $content;
}

View File

@ -35,10 +35,10 @@ class TextRun extends AbstractElement
{
$content = '';
$content .= '\pard\nowidctlpar' . PHP_EOL;
$content .= '{\pard\nowidctlpar';
$writer = new Container($this->parentWriter, $this->element);
$content .= $writer->write();
$content .= '\par' . PHP_EOL;
$content .= '\par}' . PHP_EOL;
return $content;
}

View File

@ -37,8 +37,8 @@ class Title extends AbstractElement
$content = '';
$content .= '\pard\nowidctlpar' . PHP_EOL;
$content .= $this->element->getText();
$content .= '\pard\nowidctlpar';
$content .= String::toUnicode($this->element->getText());
$content .= '\par' . PHP_EOL;
return $content;

View File

@ -18,6 +18,7 @@
namespace PhpOffice\PhpWord\Writer\RTF\Style;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Style\Font as FontStyle;
/**
* RTF font style writer
@ -51,12 +52,17 @@ class Font extends AbstractStyle
$content = '';
$content .= '\cf' . $this->colorIndex;
$content .= '\f' . $this->nameIndex;
$content .= $this->getValueIf($style->isBold(), '\b');
$content .= $this->getValueIf($style->isItalic(), '\i');
$size = $style->getSize();
$content .= $this->getValueIf(is_numeric($size), '\fs' . ($size * 2));
$content .= $this->getValueIf($style->isBold(), '\b');
$content .= $this->getValueIf($style->isItalic(), '\i');
$content .= $this->getValueIf($style->getUnderline() != FontStyle::UNDERLINE_NONE, '\ul');
$content .= $this->getValueIf($style->isStrikethrough(), '\strike');
$content .= $this->getValueIf($style->isSuperScript(), '\super');
$content .= $this->getValueIf($style->isSubScript(), '\sub');
return $content;
}
@ -65,7 +71,7 @@ class Font extends AbstractStyle
*
* @return string
*/
public function writeEnd()
public function writeClosing()
{
$style = $this->getStyle();
if (!$style instanceof \PhpOffice\PhpWord\Style\Font) {
@ -75,12 +81,17 @@ class Font extends AbstractStyle
$content = '';
$content .= '\cf0';
$content .= '\f0';
$content .= $this->getValueIf($style->isBold(), '\b0');
$content .= $this->getValueIf($style->isItalic(), '\i0');
$size = $style->getSize();
$content .= $this->getValueIf(is_numeric($size), '\fs' . (PhpWord::DEFAULT_FONT_SIZE * 2));
$content .= $this->getValueIf($style->isBold(), '\b0');
$content .= $this->getValueIf($style->isItalic(), '\i0');
$content .= $this->getValueIf($style->getUnderline() != FontStyle::UNDERLINE_NONE, '\ul0');
$content .= $this->getValueIf($style->isStrikethrough(), '\strike0');
$content .= $this->getValueIf($style->isSuperScript(), '\super0');
$content .= $this->getValueIf($style->isSubScript(), '\sub0');
return $content;
}

View File

@ -17,6 +17,8 @@
namespace PhpOffice\PhpWord\Writer\RTF\Style;
use PhpOffice\PhpWord\Style\Paragraph as ParagraphStyle;
/**
* RTF paragraph style writer
*
@ -36,15 +38,23 @@ class Paragraph extends AbstractStyle
return;
}
$content = '\pard\nowidctlpar';
$alignments = array(
ParagraphStyle::ALIGN_LEFT => '\ql',
ParagraphStyle::ALIGN_RIGHT => '\qr',
ParagraphStyle::ALIGN_CENTER => '\qc',
ParagraphStyle::ALIGN_BOTH => '\qj',
);
// Alignment
$align = $style->getAlign();
$content .= $this->getValueIf(!is_null($align) && $align == 'center', '\qc');
// Spacing
$spaceAfter = $style->getSpaceAfter();
$content .= $this->getValueIf(!is_null($spaceAfter), '\sa' . $spaceAfter);
$spaceBefore = $style->getSpaceBefore();
$content = '\pard\nowidctlpar';
if (isset($alignments[$align])) {
$content .= $alignments[$align];
}
$content .= $this->getValueIf($spaceBefore !== null, '\sb' . $spaceBefore);
$content .= $this->getValueIf($spaceAfter !== null, '\sa' . $spaceAfter);
return $content;
}

View File

@ -19,6 +19,7 @@ namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Element\AbstractElement as Element;
use PhpOffice\PhpWord\Exception\Exception;
use PhpOffice\PhpWord\Shared\String;
use PhpOffice\PhpWord\Shared\XMLWriter;
/**
@ -77,7 +78,7 @@ abstract class AbstractElement
}
/**
* Get Element
* Get element
*
* @return \PhpOffice\PhpWord\Element\AbstractElement
*/
@ -89,4 +90,15 @@ abstract class AbstractElement
throw new Exception('No element assigned.');
}
}
/**
* Convert text to valid format
*
* @param string $text
* @return string
*/
protected function getText($text)
{
return String::controlCharacterPHP2OOXML(htmlspecialchars($text));
}
}

View File

@ -17,8 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
/**
* CheckBox element writer
*
@ -37,11 +35,6 @@ class CheckBox extends Text
return;
}
$name = htmlspecialchars($element->getName());
$name = String::controlCharacterPHP2OOXML($name);
$text = htmlspecialchars($element->getText());
$text = String::controlCharacterPHP2OOXML($text);
$this->writeOpeningWP();
$xmlWriter->startElement('w:r');
@ -49,7 +42,7 @@ class CheckBox extends Text
$xmlWriter->writeAttribute('w:fldCharType', 'begin');
$xmlWriter->startElement('w:ffData');
$xmlWriter->startElement('w:name');
$xmlWriter->writeAttribute('w:val', $name);
$xmlWriter->writeAttribute('w:val', $this->getText($element->getName()));
$xmlWriter->endElement(); //w:name
$xmlWriter->writeAttribute('w:enabled', '');
$xmlWriter->startElement('w:calcOnExit');
@ -88,10 +81,10 @@ class CheckBox extends Text
$xmlWriter->startElement('w:t');
$xmlWriter->writeAttribute('xml:space', 'preserve');
$xmlWriter->writeRaw($text);
$xmlWriter->writeRaw($this->getText($element->getText()));
$xmlWriter->endElement(); // w:t
$xmlWriter->endElement(); // w:r
$this->writeEndingWP();
$this->writeClosingWP();
}
}

View File

@ -55,6 +55,6 @@ class Footnote extends Text
$xmlWriter->endElement(); // w:$referenceType
$xmlWriter->endElement(); // w:r
$this->writeEndingWP();
$this->writeClosingWP();
}
}

View File

@ -53,6 +53,6 @@ class Link extends Text
$xmlWriter->endElement(); // w:r
$xmlWriter->endElement(); // w:hyperlink
$this->writeEndingWP();
$this->writeClosingWP();
}
}

View File

@ -17,8 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
/**
* PreserveText element writer
*
@ -76,21 +74,18 @@ class PreserveText extends Text
$xmlWriter->endElement();
$xmlWriter->endElement();
} else {
$text = htmlspecialchars($text);
$text = String::controlCharacterPHP2OOXML($text);
$xmlWriter->startElement('w:r');
$this->writeFontStyle();
$xmlWriter->startElement('w:t');
$xmlWriter->writeAttribute('xml:space', 'preserve');
$xmlWriter->writeRaw($text);
$xmlWriter->writeRaw($this->getText($text));
$xmlWriter->endElement();
$xmlWriter->endElement();
}
}
$this->writeEndingWP();
$this->writeClosingWP();
}
}

View File

@ -17,7 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
use PhpOffice\PhpWord\Writer\Word2007\Style\Font as FontStyleWriter;
use PhpOffice\PhpWord\Writer\Word2007\Style\Paragraph as ParagraphStyleWriter;
@ -39,9 +38,6 @@ class Text extends AbstractElement
return;
}
$text = htmlspecialchars($element->getText());
$text = String::controlCharacterPHP2OOXML($text);
$this->writeOpeningWP();
$xmlWriter->startElement('w:r');
@ -50,11 +46,11 @@ class Text extends AbstractElement
$xmlWriter->startElement('w:t');
$xmlWriter->writeAttribute('xml:space', 'preserve');
$xmlWriter->writeRaw($text);
$xmlWriter->writeRaw($this->getText($element->getText()));
$xmlWriter->endElement();
$xmlWriter->endElement(); // w:r
$this->writeEndingWP();
$this->writeClosingWP();
}
/**
@ -77,7 +73,7 @@ class Text extends AbstractElement
/**
* Write ending
*/
protected function writeEndingWP()
protected function writeClosingWP()
{
$xmlWriter = $this->getXmlWriter();

View File

@ -42,7 +42,7 @@ class TextBreak extends Text
$xmlWriter->startElement('w:pPr');
$this->writeFontStyle();
$xmlWriter->endElement(); // w:pPr
$this->writeEndingWP();
$this->writeClosingWP();
} else {
$xmlWriter->writeElement('w:p');
}

View File

@ -37,6 +37,6 @@ class TextRun extends Text
$containerWriter = new Container($xmlWriter, $element);
$containerWriter->write();
$this->writeEndingWP();
$this->writeClosingWP();
}
}

View File

@ -17,8 +17,6 @@
namespace PhpOffice\PhpWord\Writer\Word2007\Element;
use PhpOffice\PhpWord\Shared\String;
/**
* TextRun element writer
*
@ -41,9 +39,6 @@ class Title extends AbstractElement
$anchor = '_Toc' . ($rId + 252634154);
$style = $element->getStyle();
$text = htmlspecialchars($element->getText());
$text = String::controlCharacterPHP2OOXML($text);
$xmlWriter->startElement('w:p');
if (!empty($style)) {
@ -67,7 +62,7 @@ class Title extends AbstractElement
$xmlWriter->startElement('w:r');
$xmlWriter->startElement('w:t');
$xmlWriter->writeRaw($text);
$xmlWriter->writeRaw($this->getText($element->getText()));
$xmlWriter->endElement();
$xmlWriter->endElement();