Optimize applyFromArray by caching existing styles (#1785)

Prevent calling clone and getHashCode when not needed
because these calls are very expensive.

When applying styles to a range of cells can we cache the
styles we encounter along the way so we don't need to look
them up with getHashCode later.
This commit is contained in:
Einar 2021-10-30 17:55:00 +02:00 committed by GitHub
parent 5b4b12f77b
commit 7635b3f91a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 102 additions and 3 deletions

View File

@ -144,6 +144,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- Column width and Row height styles in the Html Reader when the value includes a unit of measure. [Issue #2145](https://github.com/PHPOffice/PhpSpreadsheet/issues/2145). - Column width and Row height styles in the Html Reader when the value includes a unit of measure. [Issue #2145](https://github.com/PHPOffice/PhpSpreadsheet/issues/2145).
- Data Validation flags not set correctly when reading XLSX files. [Issue #2224](https://github.com/PHPOffice/PhpSpreadsheet/issues/2224) [PR #2225](https://github.com/PHPOffice/PhpSpreadsheet/pull/2225) - Data Validation flags not set correctly when reading XLSX files. [Issue #2224](https://github.com/PHPOffice/PhpSpreadsheet/issues/2224) [PR #2225](https://github.com/PHPOffice/PhpSpreadsheet/pull/2225)
- Reading XLSX files without styles.xml throws an exception. [Issue #2246](https://github.com/PHPOffice/PhpSpreadsheet/issues/2246) - Reading XLSX files without styles.xml throws an exception. [Issue #2246](https://github.com/PHPOffice/PhpSpreadsheet/issues/2246)
- Improved performance of `Style::applyFromArray()` when applied to several cells. [PR #1785](https://github.com/PHPOffice/PhpSpreadsheet/issues/1785).
- Improve XLSX parsing speed if no readFilter is applied (again) - [#772](https://github.com/PHPOffice/PhpSpreadsheet/issues/772) - Improve XLSX parsing speed if no readFilter is applied (again) - [#772](https://github.com/PHPOffice/PhpSpreadsheet/issues/772)
## 1.18.0 - 2021-05-31 ## 1.18.0 - 2021-05-31

View File

@ -63,6 +63,27 @@ class Style extends Supervisor
*/ */
protected $quotePrefix = false; protected $quotePrefix = false;
/**
* Internal cache for styles
* Used when applying style on range of cells (column or row) and cleared when
* all cells in range is styled.
*
* PhpSpreadsheet will always minimize the amount of styles used. So cells with
* same styles will reference the same Style instance. To check if two styles
* are similar Style::getHashCode() is used. This call is expensive. To minimize
* the need to call this method we can cache the internal PHP object id of the
* Style in the range. Style::getHashCode() will then only be called when we
* encounter a unique style.
*
* @see Style::applyFromArray()
* @see Style::getHashCode()
*
* @phpstan-var null|array{styleByHash: array<string, Style>, hashByObjId: array<int, string>}
*
* @var array<string, array>
*/
private static $cachedStyles;
/** /**
* Create a new Style. * Create a new Style.
* *
@ -341,8 +362,14 @@ class Style extends Supervisor
// Selection type, inspect // Selection type, inspect
if (preg_match('/^[A-Z]+1:[A-Z]+1048576$/', $pRange)) { if (preg_match('/^[A-Z]+1:[A-Z]+1048576$/', $pRange)) {
$selectionType = 'COLUMN'; $selectionType = 'COLUMN';
// Enable caching of styles
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
} elseif (preg_match('/^A\d+:XFD\d+$/', $pRange)) { } elseif (preg_match('/^A\d+:XFD\d+$/', $pRange)) {
$selectionType = 'ROW'; $selectionType = 'ROW';
// Enable caching of styles
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
} else { } else {
$selectionType = 'CELL'; $selectionType = 'CELL';
} }
@ -355,13 +382,55 @@ class Style extends Supervisor
$newXfIndexes = []; $newXfIndexes = [];
foreach ($oldXfIndexes as $oldXfIndex => $dummy) { foreach ($oldXfIndexes as $oldXfIndex => $dummy) {
$style = $workbook->getCellXfByIndex($oldXfIndex); $style = $workbook->getCellXfByIndex($oldXfIndex);
// $cachedStyles is set when applying style for a range of cells, either column or row
if (self::$cachedStyles === null) {
// Clone the old style and apply style-array
$newStyle = clone $style; $newStyle = clone $style;
$newStyle->applyFromArray($pStyles); $newStyle->applyFromArray($pStyles);
if ($existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode())) { // Look for existing style we can use instead (reduce memory usage)
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());
} else {
// Style cache is stored by Style::getHashCode(). But calling this method is
// expensive. So we cache the php obj id -> hash.
$objId = spl_object_id($style);
// Look for the original HashCode
$styleHash = self::$cachedStyles['hashByObjId'][$objId] ?? null;
if ($styleHash === null) {
// This object_id is not cached, store the hashcode in case encounter again
$styleHash = self::$cachedStyles['hashByObjId'][$objId] = $style->getHashCode();
}
// Find existing style by hash.
$existingStyle = self::$cachedStyles['styleByHash'][$styleHash] ?? null;
if (!$existingStyle) {
// The old style combined with the new style array is not cached, so we create it now
$newStyle = clone $style;
$newStyle->applyFromArray($pStyles);
// Look for similar style in workbook to reduce memory usage
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());
// Cache the new style by original hashcode
self::$cachedStyles['styleByHash'][$styleHash] = $existingStyle instanceof self ? $existingStyle : $newStyle;
}
}
if ($existingStyle) {
// there is already such cell Xf in our collection // there is already such cell Xf in our collection
$newXfIndexes[$oldXfIndex] = $existingStyle->getIndex(); $newXfIndexes[$oldXfIndex] = $existingStyle->getIndex();
} else { } else {
if (!isset($newStyle)) {
// Handle bug in PHPStan, see https://github.com/phpstan/phpstan/issues/5805
// $newStyle should always be defined.
// This block might not be needed in the future
$newStyle = clone $style;
$newStyle->applyFromArray($pStyles);
}
// we don't have such a cell Xf, need to add // we don't have such a cell Xf, need to add
$workbook->addCellXf($newStyle); $workbook->addCellXf($newStyle);
$newXfIndexes[$oldXfIndex] = $newStyle->getIndex(); $newXfIndexes[$oldXfIndex] = $newStyle->getIndex();
@ -377,6 +446,9 @@ class Style extends Supervisor
$columnDimension->setXfIndex($newXfIndexes[$oldXfIndex]); $columnDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
} }
// Disable caching of styles
self::$cachedStyles = null;
break; break;
case 'ROW': case 'ROW':
for ($row = $rangeStartIndexes[1]; $row <= $rangeEndIndexes[1]; ++$row) { for ($row = $rangeStartIndexes[1]; $row <= $rangeEndIndexes[1]; ++$row) {
@ -386,6 +458,9 @@ class Style extends Supervisor
$rowDimension->setXfIndex($newXfIndexes[$oldXfIndex]); $rowDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
} }
// Disable caching of styles
self::$cachedStyles = null;
break; break;
case 'CELL': case 'CELL':
for ($col = $rangeStartIndexes[0]; $col <= $rangeEndIndexes[0]; ++$col) { for ($col = $rangeStartIndexes[0]; $col <= $rangeEndIndexes[0]; ++$col) {

View File

@ -55,6 +55,29 @@ class StyleTest extends TestCase
self::assertFalse($sheet->getStyle('C3')->getFont()->getItalic()); self::assertFalse($sheet->getStyle('C3')->getFont()->getItalic());
} }
public function testStyleIsReused(): void
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$styleArray = [
'font' => [
'italic' => true,
],
];
$sheet->getStyle('A1')->getFont()->setBold(true);
$sheet->getStyle('A2')->getFont()->setBold(true);
$sheet->getStyle('A3')->getFont()->setBold(true);
$sheet->getStyle('A3')->getFont()->setItalic(true);
$sheet->getStyle('A')->applyFromArray($styleArray);
self::assertCount(4, $spreadsheet->getCellXfCollection());
$spreadsheet->garbageCollect();
self::assertCount(3, $spreadsheet->getCellXfCollection());
}
public function testStyleRow(): void public function testStyleRow(): void
{ {
$spreadsheet = new Spreadsheet(); $spreadsheet = new Spreadsheet();