From 7635b3f91a1fc7a7883db4ebaa88f86fe0385df6 Mon Sep 17 00:00:00 2001 From: Einar Date: Sat, 30 Oct 2021 17:55:00 +0200 Subject: [PATCH] Optimize applyFromArray by caching existing styles (#1785) Prevent calling clone and getHashCode when not needed because these calls are very expensive. When applying styles to a range of cells can we cache the styles we encounter along the way so we don't need to look them up with getHashCode later. --- CHANGELOG.md | 1 + src/PhpSpreadsheet/Style/Style.php | 81 ++++++++++++++++++- tests/PhpSpreadsheetTests/Style/StyleTest.php | 23 ++++++ 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea916954f6..83239f45bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -144,6 +144,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). - Column width and Row height styles in the Html Reader when the value includes a unit of measure. [Issue #2145](https://github.com/PHPOffice/PhpSpreadsheet/issues/2145). - Data Validation flags not set correctly when reading XLSX files. [Issue #2224](https://github.com/PHPOffice/PhpSpreadsheet/issues/2224) [PR #2225](https://github.com/PHPOffice/PhpSpreadsheet/pull/2225) - Reading XLSX files without styles.xml throws an exception. [Issue #2246](https://github.com/PHPOffice/PhpSpreadsheet/issues/2246) +- Improved performance of `Style::applyFromArray()` when applied to several cells. [PR #1785](https://github.com/PHPOffice/PhpSpreadsheet/issues/1785). - Improve XLSX parsing speed if no readFilter is applied (again) - [#772](https://github.com/PHPOffice/PhpSpreadsheet/issues/772) ## 1.18.0 - 2021-05-31 diff --git a/src/PhpSpreadsheet/Style/Style.php b/src/PhpSpreadsheet/Style/Style.php index 224c0feb0a..5453baa875 100644 --- a/src/PhpSpreadsheet/Style/Style.php +++ b/src/PhpSpreadsheet/Style/Style.php @@ -63,6 +63,27 @@ class Style extends Supervisor */ protected $quotePrefix = false; + /** + * Internal cache for styles + * Used when applying style on range of cells (column or row) and cleared when + * all cells in range is styled. + * + * PhpSpreadsheet will always minimize the amount of styles used. So cells with + * same styles will reference the same Style instance. To check if two styles + * are similar Style::getHashCode() is used. This call is expensive. To minimize + * the need to call this method we can cache the internal PHP object id of the + * Style in the range. Style::getHashCode() will then only be called when we + * encounter a unique style. + * + * @see Style::applyFromArray() + * @see Style::getHashCode() + * + * @phpstan-var null|array{styleByHash: array, hashByObjId: array} + * + * @var array + */ + private static $cachedStyles; + /** * Create a new Style. * @@ -341,8 +362,14 @@ public function applyFromArray(array $pStyles, $pAdvanced = true) // Selection type, inspect if (preg_match('/^[A-Z]+1:[A-Z]+1048576$/', $pRange)) { $selectionType = 'COLUMN'; + + // Enable caching of styles + self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []]; } elseif (preg_match('/^A\d+:XFD\d+$/', $pRange)) { $selectionType = 'ROW'; + + // Enable caching of styles + self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []]; } else { $selectionType = 'CELL'; } @@ -355,13 +382,55 @@ public function applyFromArray(array $pStyles, $pAdvanced = true) $newXfIndexes = []; foreach ($oldXfIndexes as $oldXfIndex => $dummy) { $style = $workbook->getCellXfByIndex($oldXfIndex); - $newStyle = clone $style; - $newStyle->applyFromArray($pStyles); - if ($existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode())) { + // $cachedStyles is set when applying style for a range of cells, either column or row + if (self::$cachedStyles === null) { + // Clone the old style and apply style-array + $newStyle = clone $style; + $newStyle->applyFromArray($pStyles); + + // Look for existing style we can use instead (reduce memory usage) + $existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode()); + } else { + // Style cache is stored by Style::getHashCode(). But calling this method is + // expensive. So we cache the php obj id -> hash. + $objId = spl_object_id($style); + + // Look for the original HashCode + $styleHash = self::$cachedStyles['hashByObjId'][$objId] ?? null; + if ($styleHash === null) { + // This object_id is not cached, store the hashcode in case encounter again + $styleHash = self::$cachedStyles['hashByObjId'][$objId] = $style->getHashCode(); + } + + // Find existing style by hash. + $existingStyle = self::$cachedStyles['styleByHash'][$styleHash] ?? null; + + if (!$existingStyle) { + // The old style combined with the new style array is not cached, so we create it now + $newStyle = clone $style; + $newStyle->applyFromArray($pStyles); + + // Look for similar style in workbook to reduce memory usage + $existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode()); + + // Cache the new style by original hashcode + self::$cachedStyles['styleByHash'][$styleHash] = $existingStyle instanceof self ? $existingStyle : $newStyle; + } + } + + if ($existingStyle) { // there is already such cell Xf in our collection $newXfIndexes[$oldXfIndex] = $existingStyle->getIndex(); } else { + if (!isset($newStyle)) { + // Handle bug in PHPStan, see https://github.com/phpstan/phpstan/issues/5805 + // $newStyle should always be defined. + // This block might not be needed in the future + $newStyle = clone $style; + $newStyle->applyFromArray($pStyles); + } + // we don't have such a cell Xf, need to add $workbook->addCellXf($newStyle); $newXfIndexes[$oldXfIndex] = $newStyle->getIndex(); @@ -377,6 +446,9 @@ public function applyFromArray(array $pStyles, $pAdvanced = true) $columnDimension->setXfIndex($newXfIndexes[$oldXfIndex]); } + // Disable caching of styles + self::$cachedStyles = null; + break; case 'ROW': for ($row = $rangeStartIndexes[1]; $row <= $rangeEndIndexes[1]; ++$row) { @@ -386,6 +458,9 @@ public function applyFromArray(array $pStyles, $pAdvanced = true) $rowDimension->setXfIndex($newXfIndexes[$oldXfIndex]); } + // Disable caching of styles + self::$cachedStyles = null; + break; case 'CELL': for ($col = $rangeStartIndexes[0]; $col <= $rangeEndIndexes[0]; ++$col) { diff --git a/tests/PhpSpreadsheetTests/Style/StyleTest.php b/tests/PhpSpreadsheetTests/Style/StyleTest.php index 6f1577099a..3d20b044c5 100644 --- a/tests/PhpSpreadsheetTests/Style/StyleTest.php +++ b/tests/PhpSpreadsheetTests/Style/StyleTest.php @@ -55,6 +55,29 @@ public function testStyleColumn(): void self::assertFalse($sheet->getStyle('C3')->getFont()->getItalic()); } + public function testStyleIsReused(): void + { + $spreadsheet = new Spreadsheet(); + $sheet = $spreadsheet->getActiveSheet(); + $styleArray = [ + 'font' => [ + 'italic' => true, + ], + ]; + + $sheet->getStyle('A1')->getFont()->setBold(true); + $sheet->getStyle('A2')->getFont()->setBold(true); + $sheet->getStyle('A3')->getFont()->setBold(true); + $sheet->getStyle('A3')->getFont()->setItalic(true); + + $sheet->getStyle('A')->applyFromArray($styleArray); + + self::assertCount(4, $spreadsheet->getCellXfCollection()); + $spreadsheet->garbageCollect(); + + self::assertCount(3, $spreadsheet->getCellXfCollection()); + } + public function testStyleRow(): void { $spreadsheet = new Spreadsheet();