diff --git a/CHANGELOG.md b/CHANGELOG.md index ebf98184..7f9745d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Place announcement text here. - Introduced writer for the "Paragraph Alignment" element (see `\PhpOffice\PhpWord\Writer\Word2007\Element\ParagraphAlignment`). - @RomanSyroeshko - Introduced writer for the "Table Alignment" element (see `\PhpOffice\PhpWord\Writer\Word2007\Element\TableAlignment`). - @RomanSyroeshko - Supported indexed arrays in arguments of `TemplateProcessor::setValue()`. - @RomanSyroeshko #618 +- Introduced automatic output escaping for OOXML, ODF, HTML, and RTF. To turn the feature on use `phpword.ini` or `\PhpOffice\PhpWord\Settings`. - @RomanSyroeshko #483 ### Changed - Improved error message for the case when `autoload.php` is not found. - @RomanSyroeshko #371 diff --git a/src/PhpWord/Escaper/AbstractEscaper.php b/src/PhpWord/Escaper/AbstractEscaper.php index 37b82dff..6ddcbb51 100644 --- a/src/PhpWord/Escaper/AbstractEscaper.php +++ b/src/PhpWord/Escaper/AbstractEscaper.php @@ -25,22 +25,22 @@ namespace PhpOffice\PhpWord\Escaper; abstract class AbstractEscaper implements EscaperInterface { /** - * @param string $subject + * @param string $input * * @return string */ - abstract protected function escapeSingleValue($subject); + abstract protected function escapeSingleValue($input); - public function escape($subject) + public function escape($input) { - if (is_array($subject)) { - foreach ($subject as &$item) { + if (is_array($input)) { + foreach ($input as &$item) { $item = $this->escapeSingleValue($item); } } else { - $subject = $this->escapeSingleValue($subject); + $input = $this->escapeSingleValue($input); } - return $subject; + return $input; } } diff --git a/src/PhpWord/Escaper/EscaperInterface.php b/src/PhpWord/Escaper/EscaperInterface.php index 39ddf9b4..c34cf370 100644 --- a/src/PhpWord/Escaper/EscaperInterface.php +++ b/src/PhpWord/Escaper/EscaperInterface.php @@ -25,9 +25,9 @@ namespace PhpOffice\PhpWord\Escaper; interface EscaperInterface { /** - * @param mixed $subject + * @param mixed $input * * @return mixed */ - public function escape($subject); + public function escape($input); } diff --git a/src/PhpWord/Escaper/RegExp.php b/src/PhpWord/Escaper/RegExp.php index 30d799d9..de510bcf 100644 --- a/src/PhpWord/Escaper/RegExp.php +++ b/src/PhpWord/Escaper/RegExp.php @@ -26,8 +26,8 @@ class RegExp extends AbstractEscaper { const REG_EXP_DELIMITER = '/'; - protected function escapeSingleValue($subject) + protected function escapeSingleValue($input) { - return self::REG_EXP_DELIMITER . preg_quote($subject, self::REG_EXP_DELIMITER) . self::REG_EXP_DELIMITER . 'u'; + return self::REG_EXP_DELIMITER . preg_quote($input, self::REG_EXP_DELIMITER) . self::REG_EXP_DELIMITER . 'u'; } } diff --git a/src/PhpWord/Escaper/Rtf.php b/src/PhpWord/Escaper/Rtf.php new file mode 100644 index 00000000..9385e63e --- /dev/null +++ b/src/PhpWord/Escaper/Rtf.php @@ -0,0 +1,88 @@ + $code || $code >= 80) { + return '{\u' . $code . '}'; + } else { + return chr($code); + } + } + + protected function escapeMultibyteCharacter($code) { + return '\uc0{\u' . $code . '}'; + } + + /** + * @see http://www.randomchaos.com/documents/?source=php_and_unicode + */ + protected function escapeSingleValue($input) + { + $escapedValue = ''; + + $numberOfBytes = 1; + $bytes = array(); + for ($i = 0; $i < strlen($input); ++$i) { + $character = $input[$i]; + $asciiCode = ord($character); + + if ($asciiCode < 128) { + $escapedValue .= $this->escapeAsciiCharacter($asciiCode); + } else { + if (0 == count($bytes)) { + if ($asciiCode < 224) { + $numberOfBytes = 2; + } else if ($asciiCode < 240) { + $numberOfBytes = 3; + } else if ($asciiCode < 248) { + $numberOfBytes = 4; + } + } + + $bytes[] = $asciiCode; + + if ($numberOfBytes == count($bytes)) { + if (4 == $numberOfBytes) { + $multibyteCode = ($bytes[0] % 8) * 262144 + ($bytes[1] % 64) * 4096 + ($bytes[2] % 64) * 64 + ($bytes[3] % 64); + } elseif (3 == $numberOfBytes) { + $multibyteCode = ($bytes[0] % 16) * 4096 + ($bytes[1] % 64) * 64 + ($bytes[2] % 64); + } else { + $multibyteCode = ($bytes[0] % 32) * 64 + ($bytes[1] % 64); + } + + if (65279 != $multibyteCode) { + $escapedValue .= $multibyteCode < 128 ? $this->escapeAsciiCharacter($multibyteCode) : $this->escapeMultibyteCharacter($multibyteCode); + } + + $numberOfBytes = 1; + $bytes = array(); + } + } + } + + return $escapedValue; + } +} diff --git a/src/PhpWord/Escaper/Xml.php b/src/PhpWord/Escaper/Xml.php index 6cbdceca..274cade5 100644 --- a/src/PhpWord/Escaper/Xml.php +++ b/src/PhpWord/Escaper/Xml.php @@ -24,9 +24,9 @@ namespace PhpOffice\PhpWord\Escaper; */ class Xml extends AbstractEscaper { - protected function escapeSingleValue($subject) + protected function escapeSingleValue($input) { // todo: omit encoding parameter after migration onto PHP 5.4 - return htmlspecialchars($subject, ENT_QUOTES, 'UTF-8'); + return htmlspecialchars($input, ENT_QUOTES, 'UTF-8'); } } diff --git a/src/PhpWord/Writer/HTML/Part/AbstractPart.php b/src/PhpWord/Writer/HTML/Part/AbstractPart.php index 4c5f66e9..8dcd2e4b 100644 --- a/src/PhpWord/Writer/HTML/Part/AbstractPart.php +++ b/src/PhpWord/Writer/HTML/Part/AbstractPart.php @@ -22,15 +22,11 @@ use PhpOffice\PhpWord\Writer\AbstractWriter; use Zend\Escaper\Escaper; /** - * Abstract HTML part writer - * * @since 0.11.0 */ abstract class AbstractPart { /** - * Parent writer - * * @var \PhpOffice\PhpWord\Writer\AbstractWriter */ private $parentWriter; @@ -46,16 +42,13 @@ abstract class AbstractPart } /** - * Write part - * * @return string */ abstract public function write(); /** - * Set parent writer. - * * @param \PhpOffice\PhpWord\Writer\AbstractWriter $writer + * * @return void */ public function setParentWriter(AbstractWriter $writer = null) @@ -64,8 +57,6 @@ abstract class AbstractPart } /** - * Get parent writer - * * @return \PhpOffice\PhpWord\Writer\AbstractWriter * * @throws \PhpOffice\PhpWord\Exception\Exception diff --git a/src/PhpWord/Writer/RTF/Element/AbstractElement.php b/src/PhpWord/Writer/RTF/Element/AbstractElement.php index 3ac9c6e7..b185cd92 100644 --- a/src/PhpWord/Writer/RTF/Element/AbstractElement.php +++ b/src/PhpWord/Writer/RTF/Element/AbstractElement.php @@ -18,9 +18,13 @@ namespace PhpOffice\PhpWord\Writer\RTF\Element; use PhpOffice\Common\Text as CommonText; +use PhpOffice\PhpWord\Element\AbstractElement as Element; +use PhpOffice\PhpWord\Escaper\Rtf; +use PhpOffice\PhpWord\Settings; use PhpOffice\PhpWord\Style; use PhpOffice\PhpWord\Style\Font as FontStyle; use PhpOffice\PhpWord\Style\Paragraph as ParagraphStyle; +use PhpOffice\PhpWord\Writer\AbstractWriter; use PhpOffice\PhpWord\Writer\HTML\Element\AbstractElement as HTMLAbstractElement; use PhpOffice\PhpWord\Writer\RTF\Style\Font as FontStyleWriter; use PhpOffice\PhpWord\Writer\RTF\Style\Paragraph as ParagraphStyleWriter; @@ -46,6 +50,13 @@ abstract class AbstractElement extends HTMLAbstractElement */ private $paragraphStyle; + public function __construct(AbstractWriter $parentWriter, Element $element, $withoutP) + { + parent::__construct($parentWriter, $element, $withoutP); + + $this->escaper = new Rtf(); + } + /** * Get font and paragraph styles. * @@ -112,7 +123,11 @@ abstract class AbstractElement extends HTMLAbstractElement */ protected function writeText($text) { - return CommonText::toUnicode($text); + if (Settings::isOutputEscapingEnabled()) { + return $this->escaper->escape($text); + } else { + return CommonText::toUnicode($text); + } } /** diff --git a/src/PhpWord/Writer/RTF/Part/AbstractPart.php b/src/PhpWord/Writer/RTF/Part/AbstractPart.php index b1d97eee..b1cd1fb3 100644 --- a/src/PhpWord/Writer/RTF/Part/AbstractPart.php +++ b/src/PhpWord/Writer/RTF/Part/AbstractPart.php @@ -17,13 +17,56 @@ namespace PhpOffice\PhpWord\Writer\RTF\Part; -use PhpOffice\PhpWord\Writer\HTML\Part\AbstractPart as HTMLAbstractPart; +use PhpOffice\PhpWord\Escaper\Rtf; +use PhpOffice\PhpWord\Exception\Exception; +use PhpOffice\PhpWord\Writer\AbstractWriter; /** - * Abstract RTF part writer - * * @since 0.11.0 */ -abstract class AbstractPart extends HTMLAbstractPart +abstract class AbstractPart { + /** + * @var \PhpOffice\PhpWord\Writer\AbstractWriter + */ + private $parentWriter; + + /** + * @var \PhpOffice\PhpWord\Escaper\EscaperInterface + */ + protected $escaper; + + public function __construct() + { + $this->escaper = new Rtf(); + } + + /** + * @return string + */ + abstract public function write(); + + /** + * @param \PhpOffice\PhpWord\Writer\AbstractWriter $writer + * + * @return void + */ + public function setParentWriter(AbstractWriter $writer = null) + { + $this->parentWriter = $writer; + } + + /** + * @return \PhpOffice\PhpWord\Writer\AbstractWriter + * + * @throws \PhpOffice\PhpWord\Exception\Exception + */ + public function getParentWriter() + { + if ($this->parentWriter !== null) { + return $this->parentWriter; + } else { + throw new Exception('No parent WriterInterface assigned.'); + } + } } diff --git a/src/PhpWord/Writer/RTF/Part/Document.php b/src/PhpWord/Writer/RTF/Part/Document.php index 51b6dbc4..168b9cae 100644 --- a/src/PhpWord/Writer/RTF/Part/Document.php +++ b/src/PhpWord/Writer/RTF/Part/Document.php @@ -65,7 +65,11 @@ class Document extends AbstractPart $content .= '\info'; foreach ($properties as $property) { $method = 'get' . (isset($mapping[$property]) ? $mapping[$property] : $property); - $value = $docProps->$method(); + if (!in_array($property, $dateFields) && Settings::isOutputEscapingEnabled()) { + $value = $this->escaper->escape($docProps->$method()); + } else { + $value = $docProps->$method(); + } $value = in_array($property, $dateFields) ? $this->getDateValue($value) : $value; $content .= "{\\{$property} {$value}}"; } @@ -105,7 +109,6 @@ class Document extends AbstractPart */ private function writeSections() { - $content = ''; $sections = $this->getParentWriter()->getPhpWord()->getSections(); diff --git a/src/PhpWord/Writer/RTF/Part/Header.php b/src/PhpWord/Writer/RTF/Part/Header.php index cc5d3dd4..fad62278 100644 --- a/src/PhpWord/Writer/RTF/Part/Header.php +++ b/src/PhpWord/Writer/RTF/Part/Header.php @@ -173,7 +173,7 @@ class Header extends AbstractPart { $content = ''; - $content .= '{\*\generator PhpWord;}'; // Set the generator + $content .= '{\*\generator PHPWord;}'; // Set the generator $content .= PHP_EOL; return $content;