diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f4e6cf0..8b36f34a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ This is the changelog between releases of PHPWord. Releases are listed in revers ## 0.11.0 - Not yet released -This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four new elements were added: TextBox, ListItemRun, Field, and Line. Relative and absolute positioning for images and textboxes were added. Writer classes were refactored into parts, elements, and styles. ODT and RTF features were enhanced. Ability to add elements to PHPWord object via HTML were implemeted. RTF reader were initiated. +This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four new elements were added: TextBox, ListItemRun, Field, and Line. Relative and absolute positioning for images and textboxes were added. Writer classes were refactored into parts, elements, and styles. ODT and RTF features were enhanced. Ability to add elements to PHPWord object via HTML were implemeted. RTF and HTML reader were initiated. ### Features @@ -33,6 +33,7 @@ This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four - RTF Reader: Basic RTF reader - @ivanlanin GH-72 GH-252 - Element: New `Line` element - @basjan GH-253 - Title: Ability to apply numbering in heading - @ivanlanin GH-193 +- HTML Reader: Basic HTML reader - @ivanlanin GH-80 ### Bugfixes diff --git a/samples/Sample_30_ReadHTML.php b/samples/Sample_30_ReadHTML.php new file mode 100644 index 00000000..029f8c8c --- /dev/null +++ b/samples/Sample_30_ReadHTML.php @@ -0,0 +1,15 @@ + + + +PHPWord + + +

Adding element via HTML

+

Some well formed HTML snippet needs to be used

+

With for example some1 inline formatting1

+

Unordered (bulleted) list:

+ +

Ordered (numbered) list:

+
  1. Item 1
  2. Item 2
+ + diff --git a/src/PhpWord/IOFactory.php b/src/PhpWord/IOFactory.php index 0d5fe689..1d784962 100644 --- a/src/PhpWord/IOFactory.php +++ b/src/PhpWord/IOFactory.php @@ -51,7 +51,7 @@ abstract class IOFactory */ public static function createReader($name = 'Word2007') { - if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText', 'RTF'))) { + if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText', 'RTF', 'HTML'))) { throw new Exception("\"{$name}\" is not a valid reader."); } diff --git a/src/PhpWord/Reader/HTML.php b/src/PhpWord/Reader/HTML.php new file mode 100644 index 00000000..a6582a3f --- /dev/null +++ b/src/PhpWord/Reader/HTML.php @@ -0,0 +1,50 @@ +canRead($docFile)) { + $section = $phpWord->addSection(); + HTMLParser::addHtml($section, file_get_contents($docFile), true); + } else { + throw new \Exception("Cannot read {$docFile}."); + } + + return $phpWord; + } +} diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index bfe64a25..83292a3a 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -32,20 +32,27 @@ class Html * Note: $stylesheet parameter is removed to avoid PHPMD error for unused parameter * * @param \PhpOffice\PhpWord\Element\AbstractContainer $element Where the parts need to be added - * @param string $html the code to parse + * @param string $html The code to parse + * @param bool $fullHTML If it's a full HTML, no need to add 'body' tag */ - public static function addHtml($element, $html) + public static function addHtml($element, $html, $fullHTML = false) { /* * @todo parse $stylesheet for default styles. Should result in an array based on id, class and element, * which could be applied when such an element occurs in the parseNode function. */ - $html = str_replace(array("\n", "\r"), '', $html); + // Preprocess: remove all line ends, decode HTML entity, and add body tag for HTML fragments + $html = str_replace(array("\n", "\r"), '', $html); + $html = html_entity_decode($html); + if ($fullHTML === false) { + $html = '' . $html . ''; + } + + // Load DOM $dom = new \DOMDocument(); $dom->preserveWhiteSpace = true; - $dom->loadXML('' . html_entity_decode($html) . ''); - + $dom->loadXML($html); $node = $dom->getElementsByTagName('body'); self::parseNode($node->item(0), $element); diff --git a/tests/PhpWord/Tests/Reader/HTMLTest.php b/tests/PhpWord/Tests/Reader/HTMLTest.php new file mode 100644 index 00000000..cb3dc55c --- /dev/null +++ b/tests/PhpWord/Tests/Reader/HTMLTest.php @@ -0,0 +1,51 @@ +assertInstanceOf('PhpOffice\\PhpWord\\PhpWord', $phpWord); + } + + /** + * Test load exception + * + * @expectedException \Exception + * @expectedExceptionMessage Cannot read + */ + public function testLoadException() + { + $filename = __DIR__ . '/../_files/documents/foo.html'; + IOFactory::load($filename, 'HTML'); + } +} diff --git a/tests/PhpWord/Tests/_files/documents/reader.html b/tests/PhpWord/Tests/_files/documents/reader.html new file mode 100644 index 00000000..5593298b --- /dev/null +++ b/tests/PhpWord/Tests/_files/documents/reader.html @@ -0,0 +1,15 @@ + + + +PHPWord + + +

Adding element via HTML

+

Some well formed HTML snippet needs to be used

+

With for example some1 inline formatting1

+

Unordered (bulleted) list:

+ +

Ordered (numbered) list:

+
  1. Item 1
  2. Item 2
+ +