From 8a9a4784d91da529ab327670f6712f719541491a Mon Sep 17 00:00:00 2001 From: Sami Mussbach Date: Thu, 1 Feb 2018 13:58:08 +0100 Subject: [PATCH 1/8] add (failing) test and correct documentation sample to valid HTML --- samples/Sample_26_Html.php | 11 +++++---- tests/PhpWord/Shared/HtmlTest.php | 37 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/samples/Sample_26_Html.php b/samples/Sample_26_Html.php index 69d9d131..b05f8d08 100644 --- a/samples/Sample_26_Html.php +++ b/samples/Sample_26_Html.php @@ -29,10 +29,13 @@ $html .= '
    1. List 2 item 1
    2. List 2 item 2
    3. -
        -
      1. sub list 1
      2. -
      3. sub list 2
      4. -
      +
    4. +
        +
      1. sub list 1
      2. +
      3. sub list 2
      4. +
      +
    5. +
    6. List 2 item 3
      1. sub list 1, restarts with a
      2. diff --git a/tests/PhpWord/Shared/HtmlTest.php b/tests/PhpWord/Shared/HtmlTest.php index 6122924f..936c35f9 100644 --- a/tests/PhpWord/Shared/HtmlTest.php +++ b/tests/PhpWord/Shared/HtmlTest.php @@ -272,6 +272,43 @@ class HtmlTest extends \PHPUnit\Framework\TestCase $this->assertNotEquals($firstListnumId, $secondListnumId); } + /** + * Tests parsing of nested ul/li + */ + public function testOrderedNestedListNumbering() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + $html = '
          +
        1. List 1 item 1
        2. +
        3. List 1 item 2
        4. +
        +

        Some Text

        +
          +
        1. List 2 item 1
        2. +
        3. +
            +
          1. sub list 1
          2. +
          3. sub list 2
          4. +
          +
        4. +
        '; + Html::addHtml($section, $html, false, false); + + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + echo $doc->printXml(); + $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:pPr/w:numPr/w:numId')); + $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:t')); + + $this->assertEquals('List 1 item 1', $doc->getElement('/w:document/w:body/w:p[1]/w:r/w:t')->nodeValue); + $this->assertEquals('List 2 item 1', $doc->getElement('/w:document/w:body/w:p[4]/w:r/w:t')->nodeValue); + + $firstListnumId = $doc->getElementAttribute('/w:document/w:body/w:p[1]/w:pPr/w:numPr/w:numId', 'w:val'); + $secondListnumId = $doc->getElementAttribute('/w:document/w:body/w:p[4]/w:pPr/w:numPr/w:numId', 'w:val'); + + $this->assertNotEquals($firstListnumId, $secondListnumId); + } + /** * Tests parsing of ul/li */ From 46a5f96d3b5104aaa6b016e5df0730e4382058e7 Mon Sep 17 00:00:00 2001 From: troosan Date: Tue, 6 Feb 2018 23:16:32 +0100 Subject: [PATCH 2/8] fix parsing of table and p inside table cells --- CHANGELOG.md | 2 ++ samples/Sample_26_Html.php | 16 +++++++-- src/PhpWord/Shared/Html.php | 38 +++++++++++++++++++++- src/PhpWord/Writer/Word2007/Style/Font.php | 4 +++ tests/PhpWord/Shared/HtmlTest.php | 6 ++-- 5 files changed, 60 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6f8b2da..fca2922c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,11 @@ v0.15.0 (?? ??? 2018) ### Added - Parsing of "align" HTML attribute - @troosan #1231 - Parse formatting inside HTML lists - @troosan @samimussbach #1239 #945 #1215 #508 +- Parsing of CSS `direction` instruction, HTML `lang` attribute, formatting inside table cell - @troosan # ### Fixed - fix reading of docx default style - @troosan #1238 +- fix the size unit of when parsing html images - @troosan #1254 diff --git a/samples/Sample_26_Html.php b/samples/Sample_26_Html.php index 69d9d131..d54d548c 100644 --- a/samples/Sample_26_Html.php +++ b/samples/Sample_26_Html.php @@ -7,11 +7,13 @@ $phpWord = new \PhpOffice\PhpWord\PhpWord(); $section = $phpWord->addSection(); $html = '

        Adding element via HTML

        '; -$html .= '

        Some well formed HTML snippet needs to be used

        '; +$html .= '

        Some well-formed HTML snippet needs to be used

        '; $html .= '

        With for example some1 inline formatting1

        '; $html .= '

        A link to Read the docs

        '; +$html .= '

        היי, זה פסקה מימין לשמאל

        '; + $html .= '

        Unordered (bulleted) list:

        '; $html .= '
        • Item 1
        • Item 2
          • Item 2.1
          • Item 2.1
        '; @@ -65,10 +67,20 @@ $html .= ' - +
        12
        456
        This is bold text6
        '; +$html .= '

        Table inside another table:

        '; +$html .= ' + + +
        + + +
        column 1column 2
        +
        Cell in parent table
        '; + \PhpOffice\PhpWord\Shared\Html::addHtml($section, $html, false, false); // Save file diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index 0f5f446a..2eeaae8b 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -31,6 +31,7 @@ use PhpOffice\PhpWord\SimpleType\NumberFormat; class Html { private static $listIndex = 0; + private static $xpath; /** * Add HTML parts. @@ -65,6 +66,7 @@ class Html $dom = new \DOMDocument(); $dom->preserveWhiteSpace = $preserveWhiteSpace; $dom->loadXML($html); + self::$xpath = new \DOMXpath($dom); $node = $dom->getElementsByTagName('body'); self::parseNode($node->item(0), $element); @@ -89,6 +91,10 @@ class Html break; case 'align': $styles['alignment'] = self::mapAlign($attribute->value); + break; + case 'lang': + $styles['lang'] = $attribute->value; + break; } } } @@ -343,8 +349,33 @@ class Html if (!empty($colspan)) { $cellStyles['gridSpan'] = $colspan - 0; } + $cell = $element->addCell(null, $cellStyles); - return $element->addCell(null, $cellStyles); + if (self::shouldAddTextRun($node)) { + return $cell->addTextRun(self::parseInlineStyle($node, $styles['paragraph'])); + } + + return $cell; + } + + /** + * Checks if $node contains an HTML element that cannot be added to TextRun + * + * @param \DOMNode $node + * @return bool Returns true if the node contains an HTML element that cannot be added to TextRun + */ + private static function shouldAddTextRun(\DOMNode $node) + { + if (!$node->hasChildNodes()) { + return false; + } + + $containsBlockElement = self::$xpath->query('.//table|./p', $node)->length > 0; + if ($containsBlockElement) { + return false; + } + + return true; } /** @@ -469,6 +500,9 @@ class Html case 'text-align': $styles['alignment'] = self::mapAlign($cValue); break; + case 'direction': + $styles['rtl'] = $cValue === 'rtl'; + break; case 'font-size': $styles['size'] = Converter::cssToPoint($cValue); break; @@ -556,10 +590,12 @@ class Html case 'width': $width = $attribute->value; $style['width'] = $width; + $style['unit'] = \PhpOffice\PhpWord\Style\Image::UNIT_PX; break; case 'height': $height = $attribute->value; $style['height'] = $height; + $style['unit'] = \PhpOffice\PhpWord\Style\Image::UNIT_PX; break; case 'style': $styleattr = explode(';', $attribute->value); diff --git a/src/PhpWord/Writer/Word2007/Style/Font.php b/src/PhpWord/Writer/Word2007/Style/Font.php index 9c2714dc..ecaad416 100644 --- a/src/PhpWord/Writer/Word2007/Style/Font.php +++ b/src/PhpWord/Writer/Word2007/Style/Font.php @@ -90,6 +90,10 @@ class Font extends AbstractStyle $xmlWriter->writeAttributeIf($language->getLatin() !== null, 'w:val', $language->getLatin()); $xmlWriter->writeAttributeIf($language->getEastAsia() !== null, 'w:eastAsia', $language->getEastAsia()); $xmlWriter->writeAttributeIf($language->getBidirectional() !== null, 'w:bidi', $language->getBidirectional()); + //if bidi is not set but we are writing RTL, write the latin language in the bidi tag + if ($style->isRTL() && $language->getBidirectional() === null && $language->getLatin() !== null) { + $xmlWriter->writeAttribute('w:bidi', $language->getLatin()); + } $xmlWriter->endElement(); } diff --git a/tests/PhpWord/Shared/HtmlTest.php b/tests/PhpWord/Shared/HtmlTest.php index 97a8fb15..7d5f0b4c 100644 --- a/tests/PhpWord/Shared/HtmlTest.php +++ b/tests/PhpWord/Shared/HtmlTest.php @@ -259,7 +259,7 @@ class HtmlTest extends \PHPUnit\Framework\TestCase Html::addHtml($section, $html, false, false); $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); - echo $doc->printXml(); + $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:pPr/w:numPr/w:numId')); $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:t')); @@ -336,8 +336,8 @@ class HtmlTest extends \PHPUnit\Framework\TestCase $baseXpath = '/w:document/w:body/w:p/w:r'; $this->assertTrue($doc->elementExists($baseXpath . '/w:pict/v:shape')); - $this->assertStringMatchesFormat('%Swidth:150pt%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style')); - $this->assertStringMatchesFormat('%Sheight:200pt%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style')); + $this->assertStringMatchesFormat('%Swidth:150px%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style')); + $this->assertStringMatchesFormat('%Sheight:200px%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style')); $this->assertStringMatchesFormat('%Smso-position-horizontal:right%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style')); $this->assertStringMatchesFormat('%Smso-position-horizontal:left%S', $doc->getElementAttribute($baseXpath . '[2]/w:pict/v:shape', 'style')); } From 47c837abef8f25fa0f28da352cd3e94c8be99ece Mon Sep 17 00:00:00 2001 From: troosan Date: Tue, 6 Feb 2018 23:31:56 +0100 Subject: [PATCH 3/8] add unit tests --- src/PhpWord/Shared/Html.php | 4 ---- tests/PhpWord/Shared/HtmlTest.php | 29 ++++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index 2eeaae8b..e11d7390 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -366,10 +366,6 @@ class Html */ private static function shouldAddTextRun(\DOMNode $node) { - if (!$node->hasChildNodes()) { - return false; - } - $containsBlockElement = self::$xpath->query('.//table|./p', $node)->length > 0; if ($containsBlockElement) { return false; diff --git a/tests/PhpWord/Shared/HtmlTest.php b/tests/PhpWord/Shared/HtmlTest.php index 7d5f0b4c..44fe97fc 100644 --- a/tests/PhpWord/Shared/HtmlTest.php +++ b/tests/PhpWord/Shared/HtmlTest.php @@ -150,6 +150,33 @@ class HtmlTest extends \PHPUnit\Framework\TestCase $this->assertEquals('15', $doc->getElementAttribute('/w:document/w:body/w:p[2]/w:r/w:rPr/w:sz', 'w:val')); } + /** + * Test direction style + */ + public function testParseTextDirection() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + Html::addHtml($section, 'test'); + + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:rPr/w:rtl')); + } + + /** + * Test html lang + */ + public function testParseLang() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + Html::addHtml($section, 'test'); + + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:rPr/w:lang')); + $this->assertEquals('fr-BE', $doc->getElementAttribute('/w:document/w:body/w:p/w:r/w:rPr/w:lang', 'w:val')); + } + /** * Test font-family style */ @@ -199,7 +226,7 @@ class HtmlTest extends \PHPUnit\Framework\TestCase 12 - 456 + This is bold text5

        6

        '; Html::addHtml($section, $html); From 46476d71014a1136810bdf7576724edc6e50fe30 Mon Sep 17 00:00:00 2001 From: troosan Date: Wed, 7 Feb 2018 07:09:27 +0100 Subject: [PATCH 4/8] update phpdoc --- src/PhpWord/Shared/Html.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index e11d7390..2a92ed2a 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -339,7 +339,7 @@ class Html * @param \DOMNode $node * @param \PhpOffice\PhpWord\Element\Table $element * @param array &$styles - * @return \PhpOffice\PhpWord\Element\Cell $element + * @return \PhpOffice\PhpWord\Element\Cell|\PhpOffice\PhpWord\Element\TextRun $element */ private static function parseCell($node, $element, &$styles) { From 33739ea21cbdc74b661ed8e9de42a75db7c6391e Mon Sep 17 00:00:00 2001 From: troosan Date: Wed, 7 Feb 2018 21:39:01 +0100 Subject: [PATCH 5/8] cannot add list on textrun --- src/PhpWord/Shared/Html.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index 2a92ed2a..a3ea0cc0 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -366,7 +366,7 @@ class Html */ private static function shouldAddTextRun(\DOMNode $node) { - $containsBlockElement = self::$xpath->query('.//table|./p', $node)->length > 0; + $containsBlockElement = self::$xpath->query('.//table|./p|./ul|./li', $node)->length > 0; if ($containsBlockElement) { return false; } From 304173c4d78b65685e2e91654beccd573fe8b4ee Mon Sep 17 00:00:00 2001 From: troosan Date: Thu, 8 Feb 2018 07:02:28 +0100 Subject: [PATCH 6/8] fix nested list --- src/PhpWord/Element/AbstractElement.php | 13 +++++++++++++ src/PhpWord/Shared/Html.php | 7 +++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/PhpWord/Element/AbstractElement.php b/src/PhpWord/Element/AbstractElement.php index 63892b74..52279645 100644 --- a/src/PhpWord/Element/AbstractElement.php +++ b/src/PhpWord/Element/AbstractElement.php @@ -93,6 +93,13 @@ abstract class AbstractElement */ private $nestedLevel = 0; + /** + * A reference to the parent + * + * @var \PhpOffice\PhpWord\Element\AbstractElement + */ + private $parent; + /** * Parent container type * @@ -321,6 +328,11 @@ abstract class AbstractElement $this->commentRangeEnd->setEndElement($this); } + public function getParent() + { + return $this->parent; + } + /** * Set parent container * @@ -331,6 +343,7 @@ abstract class AbstractElement public function setParentContainer(AbstractElement $container) { $this->parentContainer = substr(get_class($container), strrpos(get_class($container), '\\') + 1); + $this->parent = $container; // Set nested level $this->nestedLevel = $container->getNestedLevel(); diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index a3ea0cc0..971776ff 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -366,7 +366,7 @@ class Html */ private static function shouldAddTextRun(\DOMNode $node) { - $containsBlockElement = self::$xpath->query('.//table|./p|./ul|./li', $node)->length > 0; + $containsBlockElement = self::$xpath->query('.//table|./p|./ul|./ol', $node)->length > 0; if ($containsBlockElement) { return false; } @@ -402,7 +402,7 @@ class Html */ private static function parseList($node, $element, &$styles, &$data) { - $isOrderedList = $node->nodeName == 'ol'; + $isOrderedList = $node->nodeName === 'ol'; if (isset($data['listdepth'])) { $data['listdepth']++; } else { @@ -410,6 +410,9 @@ class Html $styles['list'] = 'listStyle_' . self::$listIndex++; $element->getPhpWord()->addNumberingStyle($styles['list'], self::getListStyle($isOrderedList)); } + if ($node->parentNode->nodeName === 'li') { + return $element->getParent(); + } } private static function getListStyle($isOrderedList) From 24f3463f9af8a4ade049d11202ee8e34bec92ec3 Mon Sep 17 00:00:00 2001 From: troosan Date: Thu, 8 Feb 2018 07:18:02 +0100 Subject: [PATCH 7/8] remove output --- CHANGELOG.md | 3 ++- tests/PhpWord/Shared/HtmlTest.php | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fca2922c..21f4ec81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,11 +8,12 @@ v0.15.0 (?? ??? 2018) ### Added - Parsing of "align" HTML attribute - @troosan #1231 - Parse formatting inside HTML lists - @troosan @samimussbach #1239 #945 #1215 #508 -- Parsing of CSS `direction` instruction, HTML `lang` attribute, formatting inside table cell - @troosan # +- Parsing of CSS `direction` instruction, HTML `lang` attribute, formatting inside table cell - @troosan #1273 #1252 #1254 ### Fixed - fix reading of docx default style - @troosan #1238 - fix the size unit of when parsing html images - @troosan #1254 +- fixed HTML parsing of nested lists - @troosan #1265 diff --git a/tests/PhpWord/Shared/HtmlTest.php b/tests/PhpWord/Shared/HtmlTest.php index b1ebf349..ac68b887 100644 --- a/tests/PhpWord/Shared/HtmlTest.php +++ b/tests/PhpWord/Shared/HtmlTest.php @@ -323,7 +323,7 @@ class HtmlTest extends \PHPUnit\Framework\TestCase Html::addHtml($section, $html, false, false); $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); - echo $doc->printXml(); + $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:pPr/w:numPr/w:numId')); $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:t')); From 9cd5ab74330f9957b99c7d4634bf95fdb7e1a9e6 Mon Sep 17 00:00:00 2001 From: troosan Date: Fri, 9 Feb 2018 17:17:13 +0100 Subject: [PATCH 8/8] update changelog --- CHANGELOG.md | 1 + samples/Sample_26_Html.php | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21f4ec81..3d3f60f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ v0.15.0 (?? ??? 2018) - fix reading of docx default style - @troosan #1238 - fix the size unit of when parsing html images - @troosan #1254 - fixed HTML parsing of nested lists - @troosan #1265 +- Save PNG alpha information when using remote images. @samsullivan #779 diff --git a/samples/Sample_26_Html.php b/samples/Sample_26_Html.php index 6e505fdb..f6086357 100644 --- a/samples/Sample_26_Html.php +++ b/samples/Sample_26_Html.php @@ -37,7 +37,6 @@ $html .= '
        1. sub list 2
        -
      3. List 2 item 3
        1. sub list 1, restarts with a