Skip to content

Commit

Permalink
Optimize applyFromArray by caching existing styles (#1785)
Browse files Browse the repository at this point in the history
Prevent calling clone and getHashCode when not needed
because these calls are very expensive.

When applying styles to a range of cells can we cache the
styles we encounter along the way so we don't need to look
them up with getHashCode later.
  • Loading branch information
eigan authored Oct 30, 2021
1 parent 5b4b12f commit 7635b3f
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
- Column width and Row height styles in the Html Reader when the value includes a unit of measure. [Issue #2145](https://github.com/PHPOffice/PhpSpreadsheet/issues/2145).
- Data Validation flags not set correctly when reading XLSX files. [Issue #2224](https://github.com/PHPOffice/PhpSpreadsheet/issues/2224) [PR #2225](https://github.com/PHPOffice/PhpSpreadsheet/pull/2225)
- Reading XLSX files without styles.xml throws an exception. [Issue #2246](https://github.com/PHPOffice/PhpSpreadsheet/issues/2246)
- Improved performance of `Style::applyFromArray()` when applied to several cells. [PR #1785](https://github.com/PHPOffice/PhpSpreadsheet/issues/1785).
- Improve XLSX parsing speed if no readFilter is applied (again) - [#772](https://github.com/PHPOffice/PhpSpreadsheet/issues/772)

## 1.18.0 - 2021-05-31
Expand Down
81 changes: 78 additions & 3 deletions src/PhpSpreadsheet/Style/Style.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,27 @@ class Style extends Supervisor
*/
protected $quotePrefix = false;

/**
* Internal cache for styles
* Used when applying style on range of cells (column or row) and cleared when
* all cells in range is styled.
*
* PhpSpreadsheet will always minimize the amount of styles used. So cells with
* same styles will reference the same Style instance. To check if two styles
* are similar Style::getHashCode() is used. This call is expensive. To minimize
* the need to call this method we can cache the internal PHP object id of the
* Style in the range. Style::getHashCode() will then only be called when we
* encounter a unique style.
*
* @see Style::applyFromArray()
* @see Style::getHashCode()
*
* @phpstan-var null|array{styleByHash: array<string, Style>, hashByObjId: array<int, string>}
*
* @var array<string, array>
*/
private static $cachedStyles;

/**
* Create a new Style.
*
Expand Down Expand Up @@ -341,8 +362,14 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
// Selection type, inspect
if (preg_match('/^[A-Z]+1:[A-Z]+1048576$/', $pRange)) {
$selectionType = 'COLUMN';

// Enable caching of styles
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
} elseif (preg_match('/^A\d+:XFD\d+$/', $pRange)) {
$selectionType = 'ROW';

// Enable caching of styles
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
} else {
$selectionType = 'CELL';
}
Expand All @@ -355,13 +382,55 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
$newXfIndexes = [];
foreach ($oldXfIndexes as $oldXfIndex => $dummy) {
$style = $workbook->getCellXfByIndex($oldXfIndex);
$newStyle = clone $style;
$newStyle->applyFromArray($pStyles);

if ($existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode())) {
// $cachedStyles is set when applying style for a range of cells, either column or row
if (self::$cachedStyles === null) {
// Clone the old style and apply style-array
$newStyle = clone $style;
$newStyle->applyFromArray($pStyles);

// Look for existing style we can use instead (reduce memory usage)
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());
} else {
// Style cache is stored by Style::getHashCode(). But calling this method is
// expensive. So we cache the php obj id -> hash.
$objId = spl_object_id($style);

// Look for the original HashCode
$styleHash = self::$cachedStyles['hashByObjId'][$objId] ?? null;
if ($styleHash === null) {
// This object_id is not cached, store the hashcode in case encounter again
$styleHash = self::$cachedStyles['hashByObjId'][$objId] = $style->getHashCode();
}

// Find existing style by hash.
$existingStyle = self::$cachedStyles['styleByHash'][$styleHash] ?? null;

if (!$existingStyle) {
// The old style combined with the new style array is not cached, so we create it now
$newStyle = clone $style;
$newStyle->applyFromArray($pStyles);

// Look for similar style in workbook to reduce memory usage
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());

// Cache the new style by original hashcode
self::$cachedStyles['styleByHash'][$styleHash] = $existingStyle instanceof self ? $existingStyle : $newStyle;
}
}

if ($existingStyle) {
// there is already such cell Xf in our collection
$newXfIndexes[$oldXfIndex] = $existingStyle->getIndex();
} else {
if (!isset($newStyle)) {
// Handle bug in PHPStan, see https://github.com/phpstan/phpstan/issues/5805
// $newStyle should always be defined.
// This block might not be needed in the future
$newStyle = clone $style;
$newStyle->applyFromArray($pStyles);
}

// we don't have such a cell Xf, need to add
$workbook->addCellXf($newStyle);
$newXfIndexes[$oldXfIndex] = $newStyle->getIndex();
Expand All @@ -377,6 +446,9 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
$columnDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
}

// Disable caching of styles
self::$cachedStyles = null;

break;
case 'ROW':
for ($row = $rangeStartIndexes[1]; $row <= $rangeEndIndexes[1]; ++$row) {
Expand All @@ -386,6 +458,9 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
$rowDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
}

// Disable caching of styles
self::$cachedStyles = null;

break;
case 'CELL':
for ($col = $rangeStartIndexes[0]; $col <= $rangeEndIndexes[0]; ++$col) {
Expand Down
23 changes: 23 additions & 0 deletions tests/PhpSpreadsheetTests/Style/StyleTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,29 @@ public function testStyleColumn(): void
self::assertFalse($sheet->getStyle('C3')->getFont()->getItalic());
}

public function testStyleIsReused(): void
{
$spreadsheet = new Spreadsheet();
$sheet = $spreadsheet->getActiveSheet();
$styleArray = [
'font' => [
'italic' => true,
],
];

$sheet->getStyle('A1')->getFont()->setBold(true);
$sheet->getStyle('A2')->getFont()->setBold(true);
$sheet->getStyle('A3')->getFont()->setBold(true);
$sheet->getStyle('A3')->getFont()->setItalic(true);

$sheet->getStyle('A')->applyFromArray($styleArray);

self::assertCount(4, $spreadsheet->getCellXfCollection());
$spreadsheet->garbageCollect();

self::assertCount(3, $spreadsheet->getCellXfCollection());
}

public function testStyleRow(): void
{
$spreadsheet = new Spreadsheet();
Expand Down

0 comments on commit 7635b3f

Please sign in to comment.