Optimize applyFromArray by caching existing styles (#1785)
Prevent calling clone and getHashCode when not needed because these calls are very expensive. When applying styles to a range of cells can we cache the styles we encounter along the way so we don't need to look them up with getHashCode later.
This commit is contained in:
parent
5b4b12f77b
commit
7635b3f91a
|
|
@ -144,6 +144,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
|
||||||
- Column width and Row height styles in the Html Reader when the value includes a unit of measure. [Issue #2145](https://github.com/PHPOffice/PhpSpreadsheet/issues/2145).
|
- Column width and Row height styles in the Html Reader when the value includes a unit of measure. [Issue #2145](https://github.com/PHPOffice/PhpSpreadsheet/issues/2145).
|
||||||
- Data Validation flags not set correctly when reading XLSX files. [Issue #2224](https://github.com/PHPOffice/PhpSpreadsheet/issues/2224) [PR #2225](https://github.com/PHPOffice/PhpSpreadsheet/pull/2225)
|
- Data Validation flags not set correctly when reading XLSX files. [Issue #2224](https://github.com/PHPOffice/PhpSpreadsheet/issues/2224) [PR #2225](https://github.com/PHPOffice/PhpSpreadsheet/pull/2225)
|
||||||
- Reading XLSX files without styles.xml throws an exception. [Issue #2246](https://github.com/PHPOffice/PhpSpreadsheet/issues/2246)
|
- Reading XLSX files without styles.xml throws an exception. [Issue #2246](https://github.com/PHPOffice/PhpSpreadsheet/issues/2246)
|
||||||
|
- Improved performance of `Style::applyFromArray()` when applied to several cells. [PR #1785](https://github.com/PHPOffice/PhpSpreadsheet/issues/1785).
|
||||||
- Improve XLSX parsing speed if no readFilter is applied (again) - [#772](https://github.com/PHPOffice/PhpSpreadsheet/issues/772)
|
- Improve XLSX parsing speed if no readFilter is applied (again) - [#772](https://github.com/PHPOffice/PhpSpreadsheet/issues/772)
|
||||||
|
|
||||||
## 1.18.0 - 2021-05-31
|
## 1.18.0 - 2021-05-31
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,27 @@ class Style extends Supervisor
|
||||||
*/
|
*/
|
||||||
protected $quotePrefix = false;
|
protected $quotePrefix = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal cache for styles
|
||||||
|
* Used when applying style on range of cells (column or row) and cleared when
|
||||||
|
* all cells in range is styled.
|
||||||
|
*
|
||||||
|
* PhpSpreadsheet will always minimize the amount of styles used. So cells with
|
||||||
|
* same styles will reference the same Style instance. To check if two styles
|
||||||
|
* are similar Style::getHashCode() is used. This call is expensive. To minimize
|
||||||
|
* the need to call this method we can cache the internal PHP object id of the
|
||||||
|
* Style in the range. Style::getHashCode() will then only be called when we
|
||||||
|
* encounter a unique style.
|
||||||
|
*
|
||||||
|
* @see Style::applyFromArray()
|
||||||
|
* @see Style::getHashCode()
|
||||||
|
*
|
||||||
|
* @phpstan-var null|array{styleByHash: array<string, Style>, hashByObjId: array<int, string>}
|
||||||
|
*
|
||||||
|
* @var array<string, array>
|
||||||
|
*/
|
||||||
|
private static $cachedStyles;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new Style.
|
* Create a new Style.
|
||||||
*
|
*
|
||||||
|
|
@ -341,8 +362,14 @@ class Style extends Supervisor
|
||||||
// Selection type, inspect
|
// Selection type, inspect
|
||||||
if (preg_match('/^[A-Z]+1:[A-Z]+1048576$/', $pRange)) {
|
if (preg_match('/^[A-Z]+1:[A-Z]+1048576$/', $pRange)) {
|
||||||
$selectionType = 'COLUMN';
|
$selectionType = 'COLUMN';
|
||||||
|
|
||||||
|
// Enable caching of styles
|
||||||
|
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
|
||||||
} elseif (preg_match('/^A\d+:XFD\d+$/', $pRange)) {
|
} elseif (preg_match('/^A\d+:XFD\d+$/', $pRange)) {
|
||||||
$selectionType = 'ROW';
|
$selectionType = 'ROW';
|
||||||
|
|
||||||
|
// Enable caching of styles
|
||||||
|
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
|
||||||
} else {
|
} else {
|
||||||
$selectionType = 'CELL';
|
$selectionType = 'CELL';
|
||||||
}
|
}
|
||||||
|
|
@ -355,13 +382,55 @@ class Style extends Supervisor
|
||||||
$newXfIndexes = [];
|
$newXfIndexes = [];
|
||||||
foreach ($oldXfIndexes as $oldXfIndex => $dummy) {
|
foreach ($oldXfIndexes as $oldXfIndex => $dummy) {
|
||||||
$style = $workbook->getCellXfByIndex($oldXfIndex);
|
$style = $workbook->getCellXfByIndex($oldXfIndex);
|
||||||
|
|
||||||
|
// $cachedStyles is set when applying style for a range of cells, either column or row
|
||||||
|
if (self::$cachedStyles === null) {
|
||||||
|
// Clone the old style and apply style-array
|
||||||
$newStyle = clone $style;
|
$newStyle = clone $style;
|
||||||
$newStyle->applyFromArray($pStyles);
|
$newStyle->applyFromArray($pStyles);
|
||||||
|
|
||||||
if ($existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode())) {
|
// Look for existing style we can use instead (reduce memory usage)
|
||||||
|
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());
|
||||||
|
} else {
|
||||||
|
// Style cache is stored by Style::getHashCode(). But calling this method is
|
||||||
|
// expensive. So we cache the php obj id -> hash.
|
||||||
|
$objId = spl_object_id($style);
|
||||||
|
|
||||||
|
// Look for the original HashCode
|
||||||
|
$styleHash = self::$cachedStyles['hashByObjId'][$objId] ?? null;
|
||||||
|
if ($styleHash === null) {
|
||||||
|
// This object_id is not cached, store the hashcode in case encounter again
|
||||||
|
$styleHash = self::$cachedStyles['hashByObjId'][$objId] = $style->getHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find existing style by hash.
|
||||||
|
$existingStyle = self::$cachedStyles['styleByHash'][$styleHash] ?? null;
|
||||||
|
|
||||||
|
if (!$existingStyle) {
|
||||||
|
// The old style combined with the new style array is not cached, so we create it now
|
||||||
|
$newStyle = clone $style;
|
||||||
|
$newStyle->applyFromArray($pStyles);
|
||||||
|
|
||||||
|
// Look for similar style in workbook to reduce memory usage
|
||||||
|
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());
|
||||||
|
|
||||||
|
// Cache the new style by original hashcode
|
||||||
|
self::$cachedStyles['styleByHash'][$styleHash] = $existingStyle instanceof self ? $existingStyle : $newStyle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($existingStyle) {
|
||||||
// there is already such cell Xf in our collection
|
// there is already such cell Xf in our collection
|
||||||
$newXfIndexes[$oldXfIndex] = $existingStyle->getIndex();
|
$newXfIndexes[$oldXfIndex] = $existingStyle->getIndex();
|
||||||
} else {
|
} else {
|
||||||
|
if (!isset($newStyle)) {
|
||||||
|
// Handle bug in PHPStan, see https://github.com/phpstan/phpstan/issues/5805
|
||||||
|
// $newStyle should always be defined.
|
||||||
|
// This block might not be needed in the future
|
||||||
|
$newStyle = clone $style;
|
||||||
|
$newStyle->applyFromArray($pStyles);
|
||||||
|
}
|
||||||
|
|
||||||
// we don't have such a cell Xf, need to add
|
// we don't have such a cell Xf, need to add
|
||||||
$workbook->addCellXf($newStyle);
|
$workbook->addCellXf($newStyle);
|
||||||
$newXfIndexes[$oldXfIndex] = $newStyle->getIndex();
|
$newXfIndexes[$oldXfIndex] = $newStyle->getIndex();
|
||||||
|
|
@ -377,6 +446,9 @@ class Style extends Supervisor
|
||||||
$columnDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
|
$columnDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Disable caching of styles
|
||||||
|
self::$cachedStyles = null;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case 'ROW':
|
case 'ROW':
|
||||||
for ($row = $rangeStartIndexes[1]; $row <= $rangeEndIndexes[1]; ++$row) {
|
for ($row = $rangeStartIndexes[1]; $row <= $rangeEndIndexes[1]; ++$row) {
|
||||||
|
|
@ -386,6 +458,9 @@ class Style extends Supervisor
|
||||||
$rowDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
|
$rowDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Disable caching of styles
|
||||||
|
self::$cachedStyles = null;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case 'CELL':
|
case 'CELL':
|
||||||
for ($col = $rangeStartIndexes[0]; $col <= $rangeEndIndexes[0]; ++$col) {
|
for ($col = $rangeStartIndexes[0]; $col <= $rangeEndIndexes[0]; ++$col) {
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,29 @@ class StyleTest extends TestCase
|
||||||
self::assertFalse($sheet->getStyle('C3')->getFont()->getItalic());
|
self::assertFalse($sheet->getStyle('C3')->getFont()->getItalic());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testStyleIsReused(): void
|
||||||
|
{
|
||||||
|
$spreadsheet = new Spreadsheet();
|
||||||
|
$sheet = $spreadsheet->getActiveSheet();
|
||||||
|
$styleArray = [
|
||||||
|
'font' => [
|
||||||
|
'italic' => true,
|
||||||
|
],
|
||||||
|
];
|
||||||
|
|
||||||
|
$sheet->getStyle('A1')->getFont()->setBold(true);
|
||||||
|
$sheet->getStyle('A2')->getFont()->setBold(true);
|
||||||
|
$sheet->getStyle('A3')->getFont()->setBold(true);
|
||||||
|
$sheet->getStyle('A3')->getFont()->setItalic(true);
|
||||||
|
|
||||||
|
$sheet->getStyle('A')->applyFromArray($styleArray);
|
||||||
|
|
||||||
|
self::assertCount(4, $spreadsheet->getCellXfCollection());
|
||||||
|
$spreadsheet->garbageCollect();
|
||||||
|
|
||||||
|
self::assertCount(3, $spreadsheet->getCellXfCollection());
|
||||||
|
}
|
||||||
|
|
||||||
public function testStyleRow(): void
|
public function testStyleRow(): void
|
||||||
{
|
{
|
||||||
$spreadsheet = new Spreadsheet();
|
$spreadsheet = new Spreadsheet();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue