From 4c7e1399fed7905c86aa6a1c9d0f767dac0dc6c6 Mon Sep 17 00:00:00 2001 From: Roman Syroeshko Date: Tue, 28 Jun 2016 21:37:36 +0400 Subject: [PATCH] #483. Output escaping for HTML. --- README.md | 11 +++--- composer.json | 20 +++++----- docs/installing.rst | 4 +- docs/intro.rst | 4 +- .../Writer/HTML/Element/AbstractElement.php | 7 ++++ src/PhpWord/Writer/HTML/Element/Link.php | 7 +++- src/PhpWord/Writer/HTML/Element/ListItem.php | 8 +++- src/PhpWord/Writer/HTML/Element/Text.php | 14 ++++++- src/PhpWord/Writer/HTML/Element/Title.php | 7 +++- src/PhpWord/Writer/HTML/Part/AbstractPart.php | 11 ++++++ src/PhpWord/Writer/HTML/Part/Head.php | 19 +++++----- src/PhpWord/Writer/HTML/Style/Paragraph.php | 37 ++++++++++++++++++- 12 files changed, 114 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 1d7a4802..3c2bd5c1 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Read more about PHPWord: ## Features -With PHPWord, you can create DOCX, ODT, or RTF documents dynamically using your PHP 5.3+ scripts. Below are some of the things that you can do with PHPWord library: +With PHPWord, you can create OOXML, ODF, or RTF documents dynamically using your PHP 5.3.3+ scripts. Below are some of the things that you can do with PHPWord library: - Set document properties, e.g. title, subject, and creator. - Create document sections with different settings, e.g. portrait/landscape, page size, and page numbering @@ -52,12 +52,14 @@ With PHPWord, you can create DOCX, ODT, or RTF documents dynamically using your PHPWord requires the following: -- PHP 5.3+ +- PHP 5.3.3+ - [XML Parser extension](http://www.php.net/manual/en/xml.installation.php) +- [Zend\Escaper component](http://framework.zend.com/manual/current/en/modules/zend.escaper.introduction.html) +- Zend\Stdlib component - [Zend\Validator component](http://framework.zend.com/manual/current/en/modules/zend.validator.html) -- [Zip extension](http://php.net/manual/en/book.zip.php) (optional, used to write DOCX and ODT) +- [Zip extension](http://php.net/manual/en/book.zip.php) (optional, used to write OOXML and ODF) - [GD extension](http://php.net/manual/en/book.image.php) (optional, used to add images) -- [XMLWriter extension](http://php.net/manual/en/book.xmlwriter.php) (optional, used to write DOCX and ODT) +- [XMLWriter extension](http://php.net/manual/en/book.xmlwriter.php) (optional, used to write OOXML and ODF) - [XSL extension](http://php.net/manual/en/book.xsl.php) (optional, used to apply XSL style sheet to template ) - [dompdf library](https://github.com/dompdf/dompdf) (optional, used to write PDF) @@ -149,7 +151,6 @@ $objWriter->save('helloWorld.html'); /* Note: we skip RTF, because it's not XML-based and requires a different example. */ /* Note: we skip PDF, because "HTML-to-PDF" approach is used to create PDF documents. */ ``` -:warning: Escape any string you pass to HTML document, otherwise it may get broken. More examples are provided in the [samples folder](samples/). You can also read the [Developers' Documentation](http://phpword.readthedocs.org/) and the [API Documentation](http://phpoffice.github.io/PHPWord/docs/master/) for more detail. diff --git a/composer.json b/composer.json index 48d39ada..e9399914 100644 --- a/composer.json +++ b/composer.json @@ -1,10 +1,10 @@ { "name": "phpoffice/phpword", - "description": "PHPWord - A pure PHP library for reading and writing word processing documents (DOCX, ODT, RTF, HTML, PDF)", + "description": "PHPWord - A pure PHP library for reading and writing word processing documents (OOXML, ODF, RTF, HTML, PDF)", "keywords": [ - "PHP", "PhpOffice", "office", "PhpWord", "word", "template", "template processor", "reader", "writer", + "PHP", "PHPOffice", "office", "PHPWord", "word", "template", "template processor", "reader", "writer", "docx", "OOXML", "OpenXML", "Office Open XML", "ISO IEC 29500", "WordprocessingML", - "RTF", "Rich Text Format", "doc", "odt", "OpenDocument", "PDF", "HTML" + "RTF", "Rich Text Format", "doc", "odt", "ODF", "OpenDocument", "PDF", "HTML" ], "homepage": "http://phpoffice.github.io", "type": "library", @@ -34,8 +34,9 @@ "require": { "php": ">=5.3.3", "ext-xml": "*", - "zendframework/zend-stdlib": "~2.5", - "zendframework/zend-validator": "2.5.*", + "zendframework/zend-escaper": "2.4.*", + "zendframework/zend-stdlib": "2.4.*", + "zendframework/zend-validator": "2.4.*", "phpoffice/common": "0.2.*" }, "require-dev": { @@ -46,15 +47,12 @@ "phploc/phploc": "2.*", "dompdf/dompdf":"0.6.*", "tecnickcom/tcpdf": "6.*", - "mpdf/mpdf": "5.*", - "zendframework/zend-stdlib": "~2.5", - "zendframework/zend-validator": "2.5.*", - "phpoffice/common": "0.2.*" + "mpdf/mpdf": "5.*" }, "suggest": { - "ext-zip": "Allows writing DOCX and ODT", + "ext-zip": "Allows writing OOXML and ODF", "ext-gd2": "Allows adding images", - "ext-xmlwriter": "Allows writing DOCX and ODT", + "ext-xmlwriter": "Allows writing OOXML and ODF", "ext-xsl": "Allows applying XSL style sheet to main document part of OOXML template", "dompdf/dompdf": "Allows writing PDF" }, diff --git a/docs/installing.rst b/docs/installing.rst index dc1f40f3..9593484a 100644 --- a/docs/installing.rst +++ b/docs/installing.rst @@ -8,8 +8,10 @@ Requirements Mandatory: -- PHP 5.3+ +- PHP 5.3.3+ - `XML Parser `__ extension +- `Zend\\Escaper `__ component +- Zend\\Stdlib component - `Zend\\Validator `__ component Optional: diff --git a/docs/intro.rst b/docs/intro.rst index 8e3d2d05..0ef27c9f 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -63,7 +63,7 @@ Writers ~~~~~~~ +---------------------------+----------------------+--------+-------+-------+--------+-------+ -| Features | | DOCX | ODT | RTF | HTML | PDF | +| Features | | OOXML | ODF | RTF | HTML | PDF | +===========================+======================+========+=======+=======+========+=======+ | **Document Properties** | Standard | ✓ | ✓ | ✓ | ✓ | ✓ | +---------------------------+----------------------+--------+-------+-------+--------+-------+ @@ -122,7 +122,7 @@ Readers ~~~~~~~ +---------------------------+----------------------+--------+-------+-------+-------+-------+ -| Features | | DOCX | DOC | ODT | RTF | HTML | +| Features | | OOXML | DOC | ODF | RTF | HTML | +===========================+======================+========+=======+=======+=======+=======+ | **Document Properties** | Standard | ✓ | | | | | +---------------------------+----------------------+--------+-------+-------+-------+-------+ diff --git a/src/PhpWord/Writer/HTML/Element/AbstractElement.php b/src/PhpWord/Writer/HTML/Element/AbstractElement.php index 13681423..9eae024c 100644 --- a/src/PhpWord/Writer/HTML/Element/AbstractElement.php +++ b/src/PhpWord/Writer/HTML/Element/AbstractElement.php @@ -19,6 +19,7 @@ namespace PhpOffice\PhpWord\Writer\HTML\Element; use PhpOffice\PhpWord\Element\AbstractElement as Element; use PhpOffice\PhpWord\Writer\AbstractWriter; +use Zend\Escaper\Escaper; /** * Abstract HTML element writer @@ -48,6 +49,11 @@ abstract class AbstractElement */ protected $withoutP = false; + /** + * @var \Zend\Escaper\Escaper + */ + protected $escaper; + /** * Write element */ @@ -65,6 +71,7 @@ abstract class AbstractElement $this->parentWriter = $parentWriter; $this->element = $element; $this->withoutP = $withoutP; + $this->escaper = new Escaper(); } /** diff --git a/src/PhpWord/Writer/HTML/Element/Link.php b/src/PhpWord/Writer/HTML/Element/Link.php index 50c4e6af..ec0e1746 100644 --- a/src/PhpWord/Writer/HTML/Element/Link.php +++ b/src/PhpWord/Writer/HTML/Element/Link.php @@ -16,6 +16,7 @@ */ namespace PhpOffice\PhpWord\Writer\HTML\Element; +use PhpOffice\PhpWord\Settings; /** * Link element HTML writer @@ -37,7 +38,11 @@ class Link extends Text $content = ''; $content .= $this->writeOpening(); - $content .= "element->getSource()}\">{$this->element->getText()}"; + if (Settings::isOutputEscapingEnabled()) { + $content .= "escaper->escapeHtmlAttr($this->element->getSource())}\">{$this->escaper->escapeHtml($this->element->getText())}"; + } else { + $content .= "element->getSource()}\">{$this->element->getText()}"; + } $content .= $this->writeClosing(); return $content; diff --git a/src/PhpWord/Writer/HTML/Element/ListItem.php b/src/PhpWord/Writer/HTML/Element/ListItem.php index a046a0d4..d1d0ae9b 100644 --- a/src/PhpWord/Writer/HTML/Element/ListItem.php +++ b/src/PhpWord/Writer/HTML/Element/ListItem.php @@ -16,6 +16,7 @@ */ namespace PhpOffice\PhpWord\Writer\HTML\Element; +use PhpOffice\PhpWord\Settings; /** * ListItem element HTML writer @@ -35,8 +36,11 @@ class ListItem extends AbstractElement return ''; } - $text = $this->element->getTextObject()->getText(); - $content = '

' . $text . '

' . PHP_EOL; + if (Settings::isOutputEscapingEnabled()) { + $content = '

' . $this->escaper->escapeHtml($this->element->getTextObject()->getText()) . '

' . PHP_EOL; + } else { + $content = '

' . $this->element->getTextObject()->getText() . '

' . PHP_EOL; + } return $content; } diff --git a/src/PhpWord/Writer/HTML/Element/Text.php b/src/PhpWord/Writer/HTML/Element/Text.php index f57e9c21..66455c1f 100644 --- a/src/PhpWord/Writer/HTML/Element/Text.php +++ b/src/PhpWord/Writer/HTML/Element/Text.php @@ -17,6 +17,7 @@ namespace PhpOffice\PhpWord\Writer\HTML\Element; +use PhpOffice\PhpWord\Settings; use PhpOffice\PhpWord\Style\Font; use PhpOffice\PhpWord\Style\Paragraph; use PhpOffice\PhpWord\Writer\HTML\Style\Font as FontStyleWriter; @@ -72,7 +73,11 @@ class Text extends AbstractElement $content .= $this->writeOpening(); $content .= $this->openingText; $content .= $this->openingTags; - $content .= $element->getText(); + if (Settings::isOutputEscapingEnabled()) { + $content .= $this->escaper->escapeHtml($element->getText()); + } else { + $content .= $element->getText(); + } $content .= $this->closingTags; $content .= $this->closingText; $content .= $this->writeClosing(); @@ -130,7 +135,12 @@ class Text extends AbstractElement { $content = ''; if (!$this->withoutP) { - $content .= $this->closingText; + if (Settings::isOutputEscapingEnabled()) { + $content .= $this->escaper->escapeHtml($this->closingText); + } else { + $content .= $this->closingText; + } + $content .= "

" . PHP_EOL; } diff --git a/src/PhpWord/Writer/HTML/Element/Title.php b/src/PhpWord/Writer/HTML/Element/Title.php index 8018178c..b4674612 100644 --- a/src/PhpWord/Writer/HTML/Element/Title.php +++ b/src/PhpWord/Writer/HTML/Element/Title.php @@ -16,6 +16,7 @@ */ namespace PhpOffice\PhpWord\Writer\HTML\Element; +use PhpOffice\PhpWord\Settings; /** * TextRun element HTML writer @@ -36,7 +37,11 @@ class Title extends AbstractElement } $tag = 'h' . $this->element->getDepth(); - $text = $this->element->getText(); + if (Settings::isOutputEscapingEnabled()) { + $text = $this->escaper->escapeHtml($this->element->getText()); + } else { + $text = $this->element->getText(); + } $content = "<{$tag}>{$text}" . PHP_EOL; return $content; diff --git a/src/PhpWord/Writer/HTML/Part/AbstractPart.php b/src/PhpWord/Writer/HTML/Part/AbstractPart.php index cf4da1b0..4c5f66e9 100644 --- a/src/PhpWord/Writer/HTML/Part/AbstractPart.php +++ b/src/PhpWord/Writer/HTML/Part/AbstractPart.php @@ -19,6 +19,7 @@ namespace PhpOffice\PhpWord\Writer\HTML\Part; use PhpOffice\PhpWord\Exception\Exception; use PhpOffice\PhpWord\Writer\AbstractWriter; +use Zend\Escaper\Escaper; /** * Abstract HTML part writer @@ -34,6 +35,16 @@ abstract class AbstractPart */ private $parentWriter; + /** + * @var \Zend\Escaper\Escaper + */ + protected $escaper; + + public function __construct() + { + $this->escaper = new Escaper(); + } + /** * Write part * diff --git a/src/PhpWord/Writer/HTML/Part/Head.php b/src/PhpWord/Writer/HTML/Part/Head.php index 438c7ad7..c9f947cf 100644 --- a/src/PhpWord/Writer/HTML/Part/Head.php +++ b/src/PhpWord/Writer/HTML/Part/Head.php @@ -41,14 +41,14 @@ class Head extends AbstractPart { $docProps = $this->getParentWriter()->getPhpWord()->getDocInfo(); $propertiesMapping = array( - 'creator' => 'author', - 'title' => '', + 'creator' => 'author', + 'title' => '', 'description' => '', - 'subject' => '', - 'keywords' => '', - 'category' => '', - 'company' => '', - 'manager' => '' + 'subject' => '', + 'keywords' => '', + 'category' => '', + 'company' => '', + 'manager' => '' ); $title = $docProps->getTitle(); $title = ($title != '') ? $title : 'PHPWord'; @@ -62,8 +62,9 @@ class Head extends AbstractPart $value = ($value == '') ? $key : $value; $method = "get" . $key; if ($docProps->$method() != '') { - $content .= '' . PHP_EOL; + $content .= '' . PHP_EOL; } } $content .= $this->writeStyles(); diff --git a/src/PhpWord/Writer/HTML/Style/Paragraph.php b/src/PhpWord/Writer/HTML/Style/Paragraph.php index 52708dcb..78a6f4bb 100644 --- a/src/PhpWord/Writer/HTML/Style/Paragraph.php +++ b/src/PhpWord/Writer/HTML/Style/Paragraph.php @@ -17,6 +17,8 @@ namespace PhpOffice\PhpWord\Writer\HTML\Style; +use PhpOffice\PhpWord\SimpleType\Jc; + /** * Paragraph style HTML writer * @@ -39,7 +41,40 @@ class Paragraph extends AbstractStyle // Alignment if ('' !== $style->getAlignment()) { - $css['text-align'] = $style->getAlignment(); // todo: convert OpenXml to Html values + $textAlign = ''; + + switch ($style->getAlignment()) { + case Jc::START: + case Jc::NUM_TAB: + case Jc::LEFT: + $textAlign = 'left'; + break; + + case Jc::CENTER: + $textAlign = 'center'; + break; + + case Jc::END: + case Jc::MEDIUM_KASHIDA: + case Jc::HIGH_KASHIDA: + case Jc::LOW_KASHIDA: + case Jc::RIGHT: + $textAlign = 'right'; + break; + + case Jc::BOTH: + case Jc::DISTRIBUTE: + case Jc::THAI_DISTRIBUTE: + case Jc::JUSTIFY: + $textAlign = 'justify'; + break; + + default: + $textAlign = 'left'; + break; + } + + $css['text-align'] = $textAlign; } // Spacing