From 1924f3c1d711a0fc54abbae160c7067d9942cae4 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Thu, 28 Apr 2022 12:43:05 +0200 Subject: [PATCH 1/3] Improved memory performance for getSortedCoordinates() in the Cell Collection; with minimal execution time overhead. This is an important method, as it's called by the Reference Helper and for the Writer save() method; so users should see that improvement. --- src/PhpSpreadsheet/Collection/Cells.php | 65 ++++++++++--------- .../Collection/CellsTest.php | 28 +++++--- 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/src/PhpSpreadsheet/Collection/Cells.php b/src/PhpSpreadsheet/Collection/Cells.php index d49f5457..cc3d17e8 100644 --- a/src/PhpSpreadsheet/Collection/Cells.php +++ b/src/PhpSpreadsheet/Collection/Cells.php @@ -4,6 +4,7 @@ namespace PhpOffice\PhpSpreadsheet\Collection; use Generator; use PhpOffice\PhpSpreadsheet\Cell\Cell; +use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException; use PhpOffice\PhpSpreadsheet\Settings; use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; @@ -150,41 +151,16 @@ class Cells public function getSortedCoordinates() { $sortKeys = []; - foreach ($this->getCoordinates() as $coord) { - sscanf($coord, '%[A-Z]%d', $column, $row); - $sortKeys[sprintf('%09d%3s', $row, $column)] = $coord; + foreach ($this->getCoordinates() as $coordinate) { + sscanf($coordinate, '%[A-Z]%d', $column, $row); + $key = (--$row * 16384) + Coordinate::columnIndexFromString($column); + $sortKeys[$key] = $coordinate; } ksort($sortKeys); return array_values($sortKeys); } - /** - * Get highest worksheet column and highest row that have cell records. - * - * @return array Highest column name and highest row number - */ - public function getHighestRowAndColumn() - { - // Lookup highest column and highest row - $col = ['A' => '1A']; - $row = [1]; - foreach ($this->getCoordinates() as $coord) { - sscanf($coord, '%[A-Z]%d', $c, $r); - $row[$r] = $r; - $col[$c] = strlen($c) . $c; - } - - // Determine highest column and row - $highestRow = max($row); - $highestColumn = substr((string) @max($col), 1); - - return [ - 'row' => $highestRow, - 'column' => $highestColumn, - ]; - } - /** * Return the cell coordinate of the currently active cell object. * @@ -219,6 +195,32 @@ class Cells return (int) $row; } + /** + * Get highest worksheet column and highest row that have cell records. + * + * @return array Highest column name and highest row number + */ + public function getHighestRowAndColumn() + { + // Lookup highest column and highest row + $columns = ['1A']; + $rows = [1]; + foreach ($this->getCoordinates() as $coordinate) { + sscanf($coordinate, '%[A-Z]%d', $column, $row); + $rows[$row] = $rows[$row] ?? $row; + $columns[$column] = $columns[$column] ?? strlen($column) . $column; + } + + // Determine highest column and row + $highestRow = max($rows); + $highestColumn = substr((string) max($columns), 1); + + return [ + 'row' => $highestRow, + 'column' => $highestColumn, + ]; + } + /** * Get highest worksheet column. * @@ -239,7 +241,8 @@ class Cells if ($r != $row) { continue; } - $maxColumn = max($maxColumn, strlen($c) . $c); + $sortableColum = strlen($c) . $c; + $maxColumn = $maxColumn > $sortableColum ? $maxColumn : $sortableColum; } return substr($maxColumn, 1); @@ -265,7 +268,7 @@ class Cells if ($c != $column) { continue; } - $maxRow = max($maxRow, $r); + $maxRow = ($maxRow > $r) ? $maxRow : $r; } return $maxRow; diff --git a/tests/PhpSpreadsheetTests/Collection/CellsTest.php b/tests/PhpSpreadsheetTests/Collection/CellsTest.php index 0183e16a..866f4117 100644 --- a/tests/PhpSpreadsheetTests/Collection/CellsTest.php +++ b/tests/PhpSpreadsheetTests/Collection/CellsTest.php @@ -29,16 +29,28 @@ class CellsTest extends TestCase self::assertSame($cell1, $collection->add('B2', $cell1), 'adding a cell should return the cell'); // Assert cell presence - self::assertEquals(['B2'], $collection->getCoordinates(), 'cell list should contains the cell'); - self::assertEquals(['B2'], $collection->getSortedCoordinates(), 'sorted cell list contains the cell'); + self::assertEquals(['B2'], $collection->getCoordinates(), 'cell list should contains the B2 cell'); + self::assertEquals(['B2'], $collection->getSortedCoordinates(), 'sorted cell list contains the B2 cell'); self::assertSame($cell1, $collection->get('B2'), 'should get exact same object'); - self::assertTrue($collection->has('B2'), 'cell should exists'); + self::assertTrue($collection->has('B2'), 'B2 cell should exists'); // Add a second cell $cell2 = $sheet->getCell('A1'); self::assertSame($cell2, $collection->add('A1', $cell2), 'adding a second cell should return the cell'); - self::assertEquals(['B2', 'A1'], $collection->getCoordinates(), 'cell list should contains the cell'); - self::assertEquals(['A1', 'B2'], $collection->getSortedCoordinates(), 'sorted cell list contains the cell'); + self::assertEquals(['B2', 'A1'], $collection->getCoordinates(), 'cell list should contains the second cell'); + self::assertEquals(['A1', 'B2'], $collection->getSortedCoordinates(), 'sorted cell list contains the second cell'); + + // Add a third cell + $cell3 = $sheet->getCell('AA1'); + self::assertSame($cell3, $collection->add('AA1', $cell3), 'adding a third cell should return the cell'); + self::assertEquals(['B2', 'A1', 'AA1'], $collection->getCoordinates(), 'cell list should contains the third cell'); + self::assertEquals(['A1', 'AA1', 'B2'], $collection->getSortedCoordinates(), 'sorted cell list contains the third cell'); + + // Add a fourth cell + $cell4 = $sheet->getCell('Z1'); + self::assertSame($cell4, $collection->add('Z1', $cell4), 'adding a fourth cell should return the cell'); + self::assertEquals(['B2', 'A1', 'AA1', 'Z1'], $collection->getCoordinates(), 'cell list should contains the fourth cell'); + self::assertEquals(['A1', 'Z1', 'AA1', 'B2'], $collection->getSortedCoordinates(), 'sorted cell list contains the fourth cell'); // Assert collection copy $sheet2 = $spreadsheet->createSheet(); @@ -53,7 +65,7 @@ class CellsTest extends TestCase // Assert deletion $collection->delete('B2'); self::assertFalse($collection->has('B2'), 'cell should have been deleted'); - self::assertEquals(['A1'], $collection->getCoordinates(), 'cell list should contains the cell'); + self::assertEquals(['A1', 'AA1', 'Z1'], $collection->getCoordinates(), 'cell list should still contains the A1,Z1 and A11 cells'); // Assert update $cell2 = $sheet->getCell('A1'); @@ -61,8 +73,8 @@ class CellsTest extends TestCase self::assertSame($cell2, $collection->update($cell2), 'should update existing cell'); $cell3 = $sheet->getCell('C3'); - self::assertSame($cell3, $collection->update($cell3), 'should silently add non-existing cell'); - self::assertEquals(['A1', 'C3'], $collection->getCoordinates(), 'cell list should contains the cell'); + self::assertSame($cell3, $collection->update($cell3), 'should silently add non-existing C3 cell'); + self::assertEquals(['A1', 'AA1', 'Z1', 'C3'], $collection->getCoordinates(), 'cell list should contains the C3 cell'); } public function testCacheLastCell(): void From 0171709e7fd484d9ffa0614d2e5a88021e809475 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Thu, 28 Apr 2022 17:18:37 +0200 Subject: [PATCH 2/3] The `sortCellReferenceArray()` in `Coordinate` should have returned cells ordered by row, then by column... but instead sorted by column, then row. Fixed that bug, using a slightly faster algorithm for the sort index than the simple fix would have used, and modified the tests that didn't have the correct expected result :-( --- src/PhpSpreadsheet/Cell/Coordinate.php | 10 +-- .../CellExtractAllCellReferencesInRange.php | 70 +++++++++---------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/PhpSpreadsheet/Cell/Coordinate.php b/src/PhpSpreadsheet/Cell/Coordinate.php index ea53919c..b2dd65c4 100644 --- a/src/PhpSpreadsheet/Cell/Coordinate.php +++ b/src/PhpSpreadsheet/Cell/Coordinate.php @@ -353,9 +353,8 @@ abstract class Coordinate } $cellList = array_merge(...$cells); - $cellList = self::sortCellReferenceArray($cellList); - return $cellList; + return self::sortCellReferenceArray($cellList); } private static function processRangeSetOperators(array $operators, array $cells): array @@ -382,9 +381,10 @@ abstract class Coordinate { // Sort the result by column and row $sortKeys = []; - foreach ($cellList as $coord) { - sscanf($coord, '%[A-Z]%d', $column, $row); - $sortKeys[sprintf('%3s%09d', $column, $row)] = $coord; + foreach ($cellList as $coordinate) { + sscanf($coordinate, '%[A-Z]%d', $column, $row); + $key = (--$row * 16384) + self::columnIndexFromString($column); + $sortKeys[$key] = $coordinate; } ksort($sortKeys); diff --git a/tests/data/CellExtractAllCellReferencesInRange.php b/tests/data/CellExtractAllCellReferencesInRange.php index b005b1fe..a2ecbdd2 100644 --- a/tests/data/CellExtractAllCellReferencesInRange.php +++ b/tests/data/CellExtractAllCellReferencesInRange.php @@ -12,10 +12,10 @@ return [ [ [ 'B4', - 'B5', - 'B6', 'D4', + 'B5', 'D5', + 'B6', 'D6', ], 'B4:B6,D4:D6', @@ -28,13 +28,13 @@ return [ [ [ 'B4', - 'B5', - 'B6', 'C4', - 'C5', - 'C6', 'D4', + 'B5', + 'C5', 'D5', + 'B6', + 'C6', 'D6', ], 'B4:D6', @@ -42,18 +42,18 @@ return [ [ [ 'B4', - 'B5', - 'B6', 'C4', - 'C5', - 'C6', - 'C7', 'D4', + 'B5', + 'C5', 'D5', - 'D6', - 'D7', 'E5', + 'B6', + 'C6', + 'D6', 'E6', + 'C7', + 'D7', 'E7', ], 'B4:D6,C5:E7', @@ -61,8 +61,8 @@ return [ [ [ 'C5', - 'C6', 'D5', + 'C6', 'D6', ], 'B4:D6 C5:E7', @@ -70,23 +70,23 @@ return [ [ [ 'B2', - 'B3', - 'B4', 'C2', - 'C3', - 'C4', - 'C5', 'D2', + 'B3', + 'C3', 'D3', - 'D4', - 'D5', - 'D6', 'E3', + 'B4', + 'C4', + 'D4', 'E4', - 'E5', - 'E6', 'F4', + 'C5', + 'D5', + 'E5', 'F5', + 'D6', + 'E6', 'F6', ], 'B2:D4,C5:D5,E3:E5,D6:E6,F4:F6', @@ -94,23 +94,23 @@ return [ [ [ 'B2', - 'B3', - 'B4', 'C2', - 'C3', - 'C4', - 'C5', 'D2', + 'B3', + 'C3', 'D3', - 'D4', - 'D5', - 'D6', 'E3', + 'B4', + 'C4', + 'D4', 'E4', - 'E5', - 'E6', 'F4', + 'C5', + 'D5', + 'E5', 'F5', + 'D6', + 'E6', 'F6', ], 'B2:D4,C3:E5,D4:F6', @@ -124,8 +124,8 @@ return [ [ [ 'Z2', - 'Z3', 'AA2', + 'Z3', 'AA3', ], 'Z2:AA3', From 45be09e98b3e533ea91c319bee6a5215b3eb1c91 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Thu, 28 Apr 2022 19:04:53 +0200 Subject: [PATCH 3/3] Minor tweaks to execution speed, including eliminating a method call on storing a cell --- src/PhpSpreadsheet/Calculation/Calculation.php | 4 ---- src/PhpSpreadsheet/Collection/Cells.php | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/PhpSpreadsheet/Calculation/Calculation.php b/src/PhpSpreadsheet/Calculation/Calculation.php index a6449c77..5f2af864 100644 --- a/src/PhpSpreadsheet/Calculation/Calculation.php +++ b/src/PhpSpreadsheet/Calculation/Calculation.php @@ -5236,8 +5236,6 @@ class Calculation $aReferences = Coordinate::extractAllCellReferencesInRange($range); $range = "'" . $worksheetName . "'" . '!' . $range; if (!isset($aReferences[1])) { - $currentCol = ''; - $currentRow = 0; // Single cell in range sscanf($aReferences[0], '%[A-Z]%d', $currentCol, $currentRow); if ($worksheet->cellExists($aReferences[0])) { @@ -5248,8 +5246,6 @@ class Calculation } else { // Extract cell data for all cells in the range foreach ($aReferences as $reference) { - $currentCol = ''; - $currentRow = 0; // Extract range sscanf($reference, '%[A-Z]%d', $currentCol, $currentRow); if ($worksheet->cellExists($reference)) { diff --git a/src/PhpSpreadsheet/Collection/Cells.php b/src/PhpSpreadsheet/Collection/Cells.php index cc3d17e8..72790e9a 100644 --- a/src/PhpSpreadsheet/Collection/Cells.php +++ b/src/PhpSpreadsheet/Collection/Cells.php @@ -317,7 +317,9 @@ class Cells // Store new values $stored = $newCollection->cache->setMultiple($newValues); - $this->destructIfNeeded($stored, $newCollection, 'Failed to copy cells in cache'); + if ($stored === false) { + $this->destructIfNeeded($newCollection, 'Failed to copy cells in cache'); + } return $newCollection; } @@ -362,7 +364,9 @@ class Cells $this->currentCell->detach(); $stored = $this->cache->set($this->cachePrefix . $this->currentCoordinate, $this->currentCell); - $this->destructIfNeeded($stored, $this, "Failed to store cell {$this->currentCoordinate} in cache"); + if ($stored === false) { + $this->destructIfNeeded($this, "Failed to store cell {$this->currentCoordinate} in cache"); + } $this->currentCellIsDirty = false; } @@ -370,13 +374,11 @@ class Cells $this->currentCell = null; } - private function destructIfNeeded(bool $stored, self $cells, string $message): void + private function destructIfNeeded(self $cells, string $message): void { - if (!$stored) { - $cells->__destruct(); + $cells->__destruct(); - throw new PhpSpreadsheetException($message); - } + throw new PhpSpreadsheetException($message); } /** @@ -416,7 +418,7 @@ class Cells $this->storeCurrentCell(); // Return null if requested entry doesn't exist in collection - if (!$this->has($cellCoordinate)) { + if ($this->has($cellCoordinate) === false) { return null; }