#80: Basic HTML reader
This commit is contained in:
parent
0164e37873
commit
ec85d7d641
|
|
@ -4,7 +4,7 @@ This is the changelog between releases of PHPWord. Releases are listed in revers
|
|||
|
||||
## 0.11.0 - Not yet released
|
||||
|
||||
This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four new elements were added: TextBox, ListItemRun, Field, and Line. Relative and absolute positioning for images and textboxes were added. Writer classes were refactored into parts, elements, and styles. ODT and RTF features were enhanced. Ability to add elements to PHPWord object via HTML were implemeted. RTF reader were initiated.
|
||||
This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four new elements were added: TextBox, ListItemRun, Field, and Line. Relative and absolute positioning for images and textboxes were added. Writer classes were refactored into parts, elements, and styles. ODT and RTF features were enhanced. Ability to add elements to PHPWord object via HTML were implemeted. RTF and HTML reader were initiated.
|
||||
|
||||
### Features
|
||||
|
||||
|
|
@ -33,6 +33,7 @@ This release marked the change of PHPWord license from LGPL 2.1 to LGPL 3. Four
|
|||
- RTF Reader: Basic RTF reader - @ivanlanin GH-72 GH-252
|
||||
- Element: New `Line` element - @basjan GH-253
|
||||
- Title: Ability to apply numbering in heading - @ivanlanin GH-193
|
||||
- HTML Reader: Basic HTML reader - @ivanlanin GH-80
|
||||
|
||||
### Bugfixes
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
include_once 'Sample_Header.php';
|
||||
|
||||
// Read contents
|
||||
$name = basename(__FILE__, '.php');
|
||||
$source = realpath(__DIR__ . "/resources/{$name}.html");
|
||||
|
||||
echo date('H:i:s'), " Reading contents from `{$source}`", EOL;
|
||||
$phpWord = \PhpOffice\PhpWord\IOFactory::load($source, 'HTML');
|
||||
|
||||
// Save file
|
||||
echo write($phpWord, basename(__FILE__, '.php'), $writers);
|
||||
if (!CLI) {
|
||||
include_once 'Sample_Footer.php';
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>PHPWord</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Adding element via HTML</h1>
|
||||
<p>Some well formed HTML snippet needs to be used</p>
|
||||
<p>With for example <strong>some<sup>1</sup> <em>inline</em> formatting</strong><sub>1</sub></p>
|
||||
<p>Unordered (bulleted) list:</p>
|
||||
<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>
|
||||
<p>Ordered (numbered) list:</p>
|
||||
<ol><li>Item 1</li><li>Item 2</li></ol>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -51,7 +51,7 @@ abstract class IOFactory
|
|||
*/
|
||||
public static function createReader($name = 'Word2007')
|
||||
{
|
||||
if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText', 'RTF'))) {
|
||||
if (!in_array($name, array('ReaderInterface', 'Word2007', 'ODText', 'RTF', 'HTML'))) {
|
||||
throw new Exception("\"{$name}\" is not a valid reader.");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
<?php
|
||||
/**
|
||||
* This file is part of PHPWord - A pure PHP library for reading and writing
|
||||
* word processing documents.
|
||||
*
|
||||
* PHPWord is free software distributed under the terms of the GNU Lesser
|
||||
* General Public License version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* For the full copyright and license information, please read the LICENSE
|
||||
* file that was distributed with this source code. For the full list of
|
||||
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2010-2014 PHPWord contributors
|
||||
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Reader;
|
||||
|
||||
use PhpOffice\PhpWord\PhpWord;
|
||||
use PhpOffice\PhpWord\Shared\Html as HTMLParser;
|
||||
|
||||
/**
|
||||
* HTML Reader class
|
||||
*
|
||||
* @since 0.11.0
|
||||
*/
|
||||
class HTML extends AbstractReader implements ReaderInterface
|
||||
{
|
||||
/**
|
||||
* Loads PhpWord from file
|
||||
*
|
||||
* @param string $docFile
|
||||
* @throws \Exception
|
||||
* @return \PhpOffice\PhpWord\PhpWord
|
||||
*/
|
||||
public function load($docFile)
|
||||
{
|
||||
$phpWord = new PhpWord();
|
||||
|
||||
if ($this->canRead($docFile)) {
|
||||
$section = $phpWord->addSection();
|
||||
HTMLParser::addHtml($section, file_get_contents($docFile), true);
|
||||
} else {
|
||||
throw new \Exception("Cannot read {$docFile}.");
|
||||
}
|
||||
|
||||
return $phpWord;
|
||||
}
|
||||
}
|
||||
|
|
@ -32,20 +32,27 @@ class Html
|
|||
* Note: $stylesheet parameter is removed to avoid PHPMD error for unused parameter
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\Element\AbstractContainer $element Where the parts need to be added
|
||||
* @param string $html the code to parse
|
||||
* @param string $html The code to parse
|
||||
* @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
|
||||
*/
|
||||
public static function addHtml($element, $html)
|
||||
public static function addHtml($element, $html, $fullHTML = false)
|
||||
{
|
||||
/*
|
||||
* @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
|
||||
* which could be applied when such an element occurs in the parseNode function.
|
||||
*/
|
||||
$html = str_replace(array("\n", "\r"), '', $html);
|
||||
|
||||
// Preprocess: remove all line ends, decode HTML entity, and add body tag for HTML fragments
|
||||
$html = str_replace(array("\n", "\r"), '', $html);
|
||||
$html = html_entity_decode($html);
|
||||
if ($fullHTML === false) {
|
||||
$html = '<body>' . $html . '</body>';
|
||||
}
|
||||
|
||||
// Load DOM
|
||||
$dom = new \DOMDocument();
|
||||
$dom->preserveWhiteSpace = true;
|
||||
$dom->loadXML('<body>' . html_entity_decode($html) . '</body>');
|
||||
|
||||
$dom->loadXML($html);
|
||||
$node = $dom->getElementsByTagName('body');
|
||||
|
||||
self::parseNode($node->item(0), $element);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
<?php
|
||||
/**
|
||||
* This file is part of PHPWord - A pure PHP library for reading and writing
|
||||
* word processing documents.
|
||||
*
|
||||
* PHPWord is free software distributed under the terms of the GNU Lesser
|
||||
* General Public License version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* For the full copyright and license information, please read the LICENSE
|
||||
* file that was distributed with this source code. For the full list of
|
||||
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
|
||||
*
|
||||
* @link https://github.com/PHPOffice/PHPWord
|
||||
* @copyright 2010-2014 PHPWord contributors
|
||||
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
|
||||
*/
|
||||
|
||||
namespace PhpOffice\PhpWord\Tests\Reader;
|
||||
|
||||
use PhpOffice\PhpWord\IOFactory;
|
||||
|
||||
/**
|
||||
* Test class for PhpOffice\PhpWord\Reader\HTML
|
||||
*
|
||||
* @coversDefaultClass \PhpOffice\PhpWord\Reader\HTML
|
||||
* @runTestsInSeparateProcesses
|
||||
*/
|
||||
class HTMLTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* Test load
|
||||
*/
|
||||
public function testLoad()
|
||||
{
|
||||
$filename = __DIR__ . '/../_files/documents/reader.html';
|
||||
$phpWord = IOFactory::load($filename, 'HTML');
|
||||
$this->assertInstanceOf('PhpOffice\\PhpWord\\PhpWord', $phpWord);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test load exception
|
||||
*
|
||||
* @expectedException \Exception
|
||||
* @expectedExceptionMessage Cannot read
|
||||
*/
|
||||
public function testLoadException()
|
||||
{
|
||||
$filename = __DIR__ . '/../_files/documents/foo.html';
|
||||
IOFactory::load($filename, 'HTML');
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>PHPWord</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Adding element via HTML</h1>
|
||||
<p>Some well formed HTML snippet needs to be used</p>
|
||||
<p>With for example <strong>some<sup>1</sup> <em>inline</em> formatting</strong><sub>1</sub></p>
|
||||
<p>Unordered (bulleted) list:</p>
|
||||
<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>
|
||||
<p>Ordered (numbered) list:</p>
|
||||
<ol><li>Item 1</li><li>Item 2</li></ol>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Reference in New Issue