Merge pull request #1273 from troosan/various_html_parsing_fixes
Various html parsing fixes, fixes for #1252 and #1254
This commit is contained in:
commit
3f40c5e408
|
|
@ -8,10 +8,14 @@ v0.15.0 (?? ??? 2018)
|
|||
### Added
|
||||
- Parsing of "align" HTML attribute - @troosan #1231
|
||||
- Parse formatting inside HTML lists - @troosan @samimussbach #1239 #945 #1215 #508
|
||||
- Parsing of CSS `direction` instruction, HTML `lang` attribute, formatting inside table cell - @troosan #1273 #1252 #1254
|
||||
- Add support for Track changes @Cip @troosan #354 #1262
|
||||
|
||||
### Fixed
|
||||
- fix reading of docx default style - @troosan #1238
|
||||
- fix the size unit of when parsing html images - @troosan #1254
|
||||
- fixed HTML parsing of nested lists - @troosan #1265
|
||||
- Save PNG alpha information when using remote images. @samsullivan #779
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,11 +7,13 @@ $phpWord = new \PhpOffice\PhpWord\PhpWord();
|
|||
|
||||
$section = $phpWord->addSection();
|
||||
$html = '<h1>Adding element via HTML</h1>';
|
||||
$html .= '<p>Some well formed HTML snippet needs to be used</p>';
|
||||
$html .= '<p>Some well-formed HTML snippet needs to be used</p>';
|
||||
$html .= '<p>With for example <strong>some<sup>1</sup> <em>inline</em> formatting</strong><sub>1</sub></p>';
|
||||
|
||||
$html .= '<p>A link to <a href="http://phpword.readthedocs.io/">Read the docs</a></p>';
|
||||
|
||||
$html .= '<p lang="he-IL" style="text-align: right; direction: rtl">היי, זה פסקה מימין לשמאל</p>';
|
||||
|
||||
$html .= '<p style="margin-top: 240pt;">Unordered (bulleted) list:</p>';
|
||||
$html .= '<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>';
|
||||
|
||||
|
|
@ -29,10 +31,12 @@ $html .= '<ol>
|
|||
<ol>
|
||||
<li>List 2 item 1</li>
|
||||
<li>List 2 item 2</li>
|
||||
<ol>
|
||||
<li>sub list 1</li>
|
||||
<li>sub list 2</li>
|
||||
</ol>
|
||||
<li>
|
||||
<ol>
|
||||
<li>sub list 1</li>
|
||||
<li>sub list 2</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>List 2 item 3</li>
|
||||
<ol>
|
||||
<li>sub list 1, restarts with a</li>
|
||||
|
|
@ -65,10 +69,20 @@ $html .= '<table align="center" style="width: 50%; border: 6px #0000FF double;">
|
|||
</thead>
|
||||
<tbody>
|
||||
<tr><td style="border-style: dotted;">1</td><td colspan="2">2</td></tr>
|
||||
<tr><td>4</td><td>5</td><td>6</td></tr>
|
||||
<tr><td>This is <b>bold</b> text</td><td></td><td>6</td></tr>
|
||||
</tbody>
|
||||
</table>';
|
||||
|
||||
$html .= '<p style="margin-top: 240pt;">Table inside another table:</p>';
|
||||
$html .= '<table align="center" style="width: 80%; border: 6px #0000FF double;">
|
||||
<tr><td>
|
||||
<table style="width: 100%; border: 4px #FF0000 dotted;">
|
||||
<tr><td>column 1</td><td>column 2</td></tr>
|
||||
</table>
|
||||
</td></tr>
|
||||
<tr><td style="text-align: center;">Cell in parent table</td></tr>
|
||||
</table>';
|
||||
|
||||
\PhpOffice\PhpWord\Shared\Html::addHtml($section, $html, false, false);
|
||||
|
||||
// Save file
|
||||
|
|
|
|||
|
|
@ -93,6 +93,13 @@ abstract class AbstractElement
|
|||
*/
|
||||
private $nestedLevel = 0;
|
||||
|
||||
/**
|
||||
* A reference to the parent
|
||||
*
|
||||
* @var \PhpOffice\PhpWord\Element\AbstractElement
|
||||
*/
|
||||
private $parent;
|
||||
|
||||
/**
|
||||
* changed element info
|
||||
*
|
||||
|
|
@ -328,6 +335,11 @@ abstract class AbstractElement
|
|||
$this->commentRangeEnd->setEndElement($this);
|
||||
}
|
||||
|
||||
public function getParent()
|
||||
{
|
||||
return $this->parent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set parent container
|
||||
*
|
||||
|
|
@ -338,6 +350,7 @@ abstract class AbstractElement
|
|||
public function setParentContainer(AbstractElement $container)
|
||||
{
|
||||
$this->parentContainer = substr(get_class($container), strrpos(get_class($container), '\\') + 1);
|
||||
$this->parent = $container;
|
||||
|
||||
// Set nested level
|
||||
$this->nestedLevel = $container->getNestedLevel();
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ use PhpOffice\PhpWord\SimpleType\NumberFormat;
|
|||
class Html
|
||||
{
|
||||
private static $listIndex = 0;
|
||||
private static $xpath;
|
||||
|
||||
/**
|
||||
* Add HTML parts.
|
||||
|
|
@ -65,6 +66,7 @@ class Html
|
|||
$dom = new \DOMDocument();
|
||||
$dom->preserveWhiteSpace = $preserveWhiteSpace;
|
||||
$dom->loadXML($html);
|
||||
self::$xpath = new \DOMXpath($dom);
|
||||
$node = $dom->getElementsByTagName('body');
|
||||
|
||||
self::parseNode($node->item(0), $element);
|
||||
|
|
@ -89,6 +91,10 @@ class Html
|
|||
break;
|
||||
case 'align':
|
||||
$styles['alignment'] = self::mapAlign($attribute->value);
|
||||
break;
|
||||
case 'lang':
|
||||
$styles['lang'] = $attribute->value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -333,7 +339,7 @@ class Html
|
|||
* @param \DOMNode $node
|
||||
* @param \PhpOffice\PhpWord\Element\Table $element
|
||||
* @param array &$styles
|
||||
* @return \PhpOffice\PhpWord\Element\Cell $element
|
||||
* @return \PhpOffice\PhpWord\Element\Cell|\PhpOffice\PhpWord\Element\TextRun $element
|
||||
*/
|
||||
private static function parseCell($node, $element, &$styles)
|
||||
{
|
||||
|
|
@ -343,8 +349,29 @@ class Html
|
|||
if (!empty($colspan)) {
|
||||
$cellStyles['gridSpan'] = $colspan - 0;
|
||||
}
|
||||
$cell = $element->addCell(null, $cellStyles);
|
||||
|
||||
return $element->addCell(null, $cellStyles);
|
||||
if (self::shouldAddTextRun($node)) {
|
||||
return $cell->addTextRun(self::parseInlineStyle($node, $styles['paragraph']));
|
||||
}
|
||||
|
||||
return $cell;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if $node contains an HTML element that cannot be added to TextRun
|
||||
*
|
||||
* @param \DOMNode $node
|
||||
* @return bool Returns true if the node contains an HTML element that cannot be added to TextRun
|
||||
*/
|
||||
private static function shouldAddTextRun(\DOMNode $node)
|
||||
{
|
||||
$containsBlockElement = self::$xpath->query('.//table|./p|./ul|./ol', $node)->length > 0;
|
||||
if ($containsBlockElement) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -375,7 +402,7 @@ class Html
|
|||
*/
|
||||
private static function parseList($node, $element, &$styles, &$data)
|
||||
{
|
||||
$isOrderedList = $node->nodeName == 'ol';
|
||||
$isOrderedList = $node->nodeName === 'ol';
|
||||
if (isset($data['listdepth'])) {
|
||||
$data['listdepth']++;
|
||||
} else {
|
||||
|
|
@ -383,6 +410,9 @@ class Html
|
|||
$styles['list'] = 'listStyle_' . self::$listIndex++;
|
||||
$element->getPhpWord()->addNumberingStyle($styles['list'], self::getListStyle($isOrderedList));
|
||||
}
|
||||
if ($node->parentNode->nodeName === 'li') {
|
||||
return $element->getParent();
|
||||
}
|
||||
}
|
||||
|
||||
private static function getListStyle($isOrderedList)
|
||||
|
|
@ -469,6 +499,9 @@ class Html
|
|||
case 'text-align':
|
||||
$styles['alignment'] = self::mapAlign($cValue);
|
||||
break;
|
||||
case 'direction':
|
||||
$styles['rtl'] = $cValue === 'rtl';
|
||||
break;
|
||||
case 'font-size':
|
||||
$styles['size'] = Converter::cssToPoint($cValue);
|
||||
break;
|
||||
|
|
@ -556,10 +589,12 @@ class Html
|
|||
case 'width':
|
||||
$width = $attribute->value;
|
||||
$style['width'] = $width;
|
||||
$style['unit'] = \PhpOffice\PhpWord\Style\Image::UNIT_PX;
|
||||
break;
|
||||
case 'height':
|
||||
$height = $attribute->value;
|
||||
$style['height'] = $height;
|
||||
$style['unit'] = \PhpOffice\PhpWord\Style\Image::UNIT_PX;
|
||||
break;
|
||||
case 'style':
|
||||
$styleattr = explode(';', $attribute->value);
|
||||
|
|
|
|||
|
|
@ -90,6 +90,10 @@ class Font extends AbstractStyle
|
|||
$xmlWriter->writeAttributeIf($language->getLatin() !== null, 'w:val', $language->getLatin());
|
||||
$xmlWriter->writeAttributeIf($language->getEastAsia() !== null, 'w:eastAsia', $language->getEastAsia());
|
||||
$xmlWriter->writeAttributeIf($language->getBidirectional() !== null, 'w:bidi', $language->getBidirectional());
|
||||
//if bidi is not set but we are writing RTL, write the latin language in the bidi tag
|
||||
if ($style->isRTL() && $language->getBidirectional() === null && $language->getLatin() !== null) {
|
||||
$xmlWriter->writeAttribute('w:bidi', $language->getLatin());
|
||||
}
|
||||
$xmlWriter->endElement();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -150,6 +150,33 @@ class HtmlTest extends \PHPUnit\Framework\TestCase
|
|||
$this->assertEquals('15', $doc->getElementAttribute('/w:document/w:body/w:p[2]/w:r/w:rPr/w:sz', 'w:val'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test direction style
|
||||
*/
|
||||
public function testParseTextDirection()
|
||||
{
|
||||
$phpWord = new \PhpOffice\PhpWord\PhpWord();
|
||||
$section = $phpWord->addSection();
|
||||
Html::addHtml($section, '<span style="direction: rtl">test</span>');
|
||||
|
||||
$doc = TestHelperDOCX::getDocument($phpWord, 'Word2007');
|
||||
$this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:rPr/w:rtl'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html lang
|
||||
*/
|
||||
public function testParseLang()
|
||||
{
|
||||
$phpWord = new \PhpOffice\PhpWord\PhpWord();
|
||||
$section = $phpWord->addSection();
|
||||
Html::addHtml($section, '<span lang="fr-BE">test</span>');
|
||||
|
||||
$doc = TestHelperDOCX::getDocument($phpWord, 'Word2007');
|
||||
$this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:rPr/w:lang'));
|
||||
$this->assertEquals('fr-BE', $doc->getElementAttribute('/w:document/w:body/w:p/w:r/w:rPr/w:lang', 'w:val'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test font-family style
|
||||
*/
|
||||
|
|
@ -199,7 +226,7 @@ class HtmlTest extends \PHPUnit\Framework\TestCase
|
|||
</thead>
|
||||
<tbody>
|
||||
<tr><td style="border-style: dotted;">1</td><td colspan="2">2</td></tr>
|
||||
<tr><td>4</td><td>5</td><td>6</td></tr>
|
||||
<tr><td>This is <b>bold</b> text</td><td>5</td><td><p>6</p></td></tr>
|
||||
</tbody>
|
||||
</table>';
|
||||
Html::addHtml($section, $html);
|
||||
|
|
@ -272,6 +299,43 @@ class HtmlTest extends \PHPUnit\Framework\TestCase
|
|||
$this->assertNotEquals($firstListnumId, $secondListnumId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests parsing of nested ul/li
|
||||
*/
|
||||
public function testOrderedNestedListNumbering()
|
||||
{
|
||||
$phpWord = new \PhpOffice\PhpWord\PhpWord();
|
||||
$section = $phpWord->addSection();
|
||||
$html = '<ol>
|
||||
<li>List 1 item 1</li>
|
||||
<li>List 1 item 2</li>
|
||||
</ol>
|
||||
<p>Some Text</p>
|
||||
<ol>
|
||||
<li>List 2 item 1</li>
|
||||
<li>
|
||||
<ol>
|
||||
<li>sub list 1</li>
|
||||
<li>sub list 2</li>
|
||||
</ol>
|
||||
</li>
|
||||
</ol>';
|
||||
Html::addHtml($section, $html, false, false);
|
||||
|
||||
$doc = TestHelperDOCX::getDocument($phpWord, 'Word2007');
|
||||
|
||||
$this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:pPr/w:numPr/w:numId'));
|
||||
$this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:t'));
|
||||
|
||||
$this->assertEquals('List 1 item 1', $doc->getElement('/w:document/w:body/w:p[1]/w:r/w:t')->nodeValue);
|
||||
$this->assertEquals('List 2 item 1', $doc->getElement('/w:document/w:body/w:p[4]/w:r/w:t')->nodeValue);
|
||||
|
||||
$firstListnumId = $doc->getElementAttribute('/w:document/w:body/w:p[1]/w:pPr/w:numPr/w:numId', 'w:val');
|
||||
$secondListnumId = $doc->getElementAttribute('/w:document/w:body/w:p[4]/w:pPr/w:numPr/w:numId', 'w:val');
|
||||
|
||||
$this->assertNotEquals($firstListnumId, $secondListnumId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests parsing of ul/li
|
||||
*/
|
||||
|
|
@ -336,8 +400,8 @@ class HtmlTest extends \PHPUnit\Framework\TestCase
|
|||
|
||||
$baseXpath = '/w:document/w:body/w:p/w:r';
|
||||
$this->assertTrue($doc->elementExists($baseXpath . '/w:pict/v:shape'));
|
||||
$this->assertStringMatchesFormat('%Swidth:150pt%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style'));
|
||||
$this->assertStringMatchesFormat('%Sheight:200pt%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style'));
|
||||
$this->assertStringMatchesFormat('%Swidth:150px%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style'));
|
||||
$this->assertStringMatchesFormat('%Sheight:200px%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style'));
|
||||
$this->assertStringMatchesFormat('%Smso-position-horizontal:right%S', $doc->getElementAttribute($baseXpath . '[1]/w:pict/v:shape', 'style'));
|
||||
$this->assertStringMatchesFormat('%Smso-position-horizontal:left%S', $doc->getElementAttribute($baseXpath . '[2]/w:pict/v:shape', 'style'));
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue