forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Intern IndexFieldCapabilities Type String on Read (elastic#76405)
In case of handling a large number of these messages, i.e. when fetching field caps for many indices (and/or those indices contain lots of fields) the type string is repeated many times over. As these strings are already interned because they are constants, taking the performance hit of interning them on deserialization seems a reasonable trade-off for the benefit of saving a non-trivial amount of memory for large clusters as well as speeding up `org.elasticsearch.action.fieldcaps.TransportFieldCapabilitiesAction#merge` which uses these strings in map lookup and will run significantly faster with interned strings instead of fresh strings that do not have their hash values cached yet.
- Loading branch information
1 parent
843ee21
commit 7ca9940
Showing
2 changed files
with
49 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
45 changes: 45 additions & 0 deletions
45
server/src/main/java/org/elasticsearch/common/util/StringLiteralDeduplicator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
package org.elasticsearch.common.util; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections; | ||
|
||
import java.util.Map; | ||
|
||
/** | ||
* A cache in front of Java's string interning. This method assumes that it is only called with strings that are already part of the | ||
* JVM's string pool so that interning them does not grow the pool. Calling it with strings not in the interned string pool is not | ||
* advisable as its performance may deteriorate to slower than outright calls to {@link String#intern()}. | ||
*/ | ||
public final class StringLiteralDeduplicator { | ||
|
||
private static final Logger logger = LogManager.getLogger(StringLiteralDeduplicator.class); | ||
|
||
private static final int MAX_SIZE = 1000; | ||
|
||
private final Map<String, String> map = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(); | ||
|
||
public StringLiteralDeduplicator() { | ||
} | ||
|
||
public String deduplicate(String string) { | ||
final String res = map.get(string); | ||
if (res != null) { | ||
return res; | ||
} | ||
final String interned = string.intern(); | ||
if (map.size() > MAX_SIZE) { | ||
map.clear(); | ||
logger.debug("clearing intern cache"); | ||
} | ||
map.put(interned, interned); | ||
return interned; | ||
} | ||
} |