Skip to content

Commit

Permalink
Intern IndexFieldCapabilities Type String on Read (elastic#76405)
Browse files Browse the repository at this point in the history
In case of handling a large number of these messages, i.e. when fetching field caps
for many indices (and/or those indices contain lots of fields) the type string is repeated
many times over. As these strings are already interned because they are constants, taking
the performance hit of interning them on deserialization seems a reasonable trade-off
for the benefit of saving a non-trivial amount of memory for large clusters as well as
speeding up `org.elasticsearch.action.fieldcaps.TransportFieldCapabilitiesAction#merge`
which uses these strings in map lookup and will run significantly faster with interned strings
instead of fresh strings that do not have their hash values cached yet.
  • Loading branch information
original-brownbear committed Sep 15, 2021
1 parent 843ee21 commit 7ca9940
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.util.StringLiteralDeduplicator;

import java.io.IOException;
import java.util.Collections;
Expand All @@ -25,6 +26,8 @@
*/
public class IndexFieldCapabilities implements Writeable {

private static final StringLiteralDeduplicator typeStringDeduplicator = new StringLiteralDeduplicator();

private final String name;
private final String type;
private final boolean isMetadatafield;
Expand Down Expand Up @@ -55,7 +58,7 @@ public class IndexFieldCapabilities implements Writeable {
IndexFieldCapabilities(StreamInput in) throws IOException {
if (in.getVersion().onOrAfter(Version.V_7_7_0)) {
this.name = in.readString();
this.type = in.readString();
this.type = typeStringDeduplicator.deduplicate(in.readString());
this.isMetadatafield = in.getVersion().onOrAfter(Version.V_7_13_0) ? in.readBoolean() : false;
this.isSearchable = in.readBoolean();
this.isAggregatable = in.readBoolean();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.common.util;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;

import java.util.Map;

/**
* A cache in front of Java's string interning. This method assumes that it is only called with strings that are already part of the
* JVM's string pool so that interning them does not grow the pool. Calling it with strings not in the interned string pool is not
* advisable as its performance may deteriorate to slower than outright calls to {@link String#intern()}.
*/
public final class StringLiteralDeduplicator {

private static final Logger logger = LogManager.getLogger(StringLiteralDeduplicator.class);

private static final int MAX_SIZE = 1000;

private final Map<String, String> map = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();

public StringLiteralDeduplicator() {
}

public String deduplicate(String string) {
final String res = map.get(string);
if (res != null) {
return res;
}
final String interned = string.intern();
if (map.size() > MAX_SIZE) {
map.clear();
logger.debug("clearing intern cache");
}
map.put(interned, interned);
return interned;
}
}

0 comments on commit 7ca9940

Please sign in to comment.