From 6285a26da636654549ec44462cfc4aa64a8e2b64 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 11 Jan 2023 07:48:47 -0800 Subject: [PATCH] Reduce memory usage of match all bitset (#92777) By default, Elasticsearch uses up to 1 bit per document to store the set of root documents when the index mapping has nested fields. This PR introduces a special BitSet using less memory for match_all filters. This optimization is only triggered when the index mapping has a nested field, but that field never exists in documents. --- docs/changelog/92777.yaml | 5 ++ .../index/cache/bitset/BitsetFilterCache.java | 3 +- .../elasticsearch/lucene/util/BitSets.java | 31 +++++++ .../lucene/util/MatchAllBitSet.java | 90 +++++++++++++++++++ .../cache/bitset/BitSetFilterCacheTests.java | 49 ++++++++++ .../lucene/util/BitSetsTests.java | 58 ++++++++++++ .../DocumentSubsetBitsetCache.java | 16 +--- .../accesscontrol/DocumentSubsetReader.java | 5 +- .../accesscontrol/MatchAllRoleBitSet.java | 76 ---------------- .../DocumentSubsetBitsetCacheTests.java | 41 --------- 10 files changed, 242 insertions(+), 132 deletions(-) create mode 100644 docs/changelog/92777.yaml create mode 100644 server/src/main/java/org/elasticsearch/lucene/util/BitSets.java create mode 100644 server/src/main/java/org/elasticsearch/lucene/util/MatchAllBitSet.java create mode 100644 server/src/test/java/org/elasticsearch/lucene/util/BitSetsTests.java delete mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/MatchAllRoleBitSet.java diff --git a/docs/changelog/92777.yaml b/docs/changelog/92777.yaml new file mode 100644 index 0000000000000..7159a258e8977 --- /dev/null +++ b/docs/changelog/92777.yaml @@ -0,0 +1,5 @@ +pr: 92777 +summary: Reduce memory usage of match all bitset +area: Search +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java b/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java index 266d84f394ac6..f1f03eff88d08 100644 --- a/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java +++ b/server/src/main/java/org/elasticsearch/index/cache/bitset/BitsetFilterCache.java @@ -43,6 +43,7 @@ import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardUtils; +import org.elasticsearch.lucene.util.BitSets; import org.elasticsearch.threadpool.ThreadPool; import java.io.Closeable; @@ -104,7 +105,7 @@ public static BitSet bitsetFromQuery(Query query, LeafReaderContext context) thr if (s == null) { return null; } else { - return BitSet.of(s.iterator(), context.reader().maxDoc()); + return BitSets.of(s.iterator(), context.reader().maxDoc()); } } diff --git a/server/src/main/java/org/elasticsearch/lucene/util/BitSets.java b/server/src/main/java/org/elasticsearch/lucene/util/BitSets.java new file mode 100644 index 0000000000000..53b4d68ce7496 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/lucene/util/BitSets.java @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.lucene.util; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BitSet; + +import java.io.IOException; + +public final class BitSets { + private BitSets() {} + + /** + * Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}. If the iterator matches all documents, + * then this method will wrap the returned Bitset as {@link MatchAllBitSet} to reduce memory usage. + */ + public static BitSet of(DocIdSetIterator iter, int maxDocs) throws IOException { + final BitSet bitSet = BitSet.of(iter, maxDocs); + if (bitSet.cardinality() == maxDocs) { + return new MatchAllBitSet(maxDocs); + } else { + return bitSet; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/lucene/util/MatchAllBitSet.java b/server/src/main/java/org/elasticsearch/lucene/util/MatchAllBitSet.java new file mode 100644 index 0000000000000..7cce67199a218 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/lucene/util/MatchAllBitSet.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.lucene.util; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; + +/** + * An optimized implementation of {@link BitSet} that matches all documents to reduce memory usage. + */ +public final class MatchAllBitSet extends BitSet { + private static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MatchAllBitSet.class); + + private final int numBits; + + public MatchAllBitSet(int numBits) { + this.numBits = numBits; + } + + @Override + public void set(int i) { + + } + + @Override + public boolean getAndSet(int i) { + return true; + } + + @Override + public void clear(int i) { + assert false : "MatchAllBitSet doesn't support clear"; + throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear"); + } + + @Override + public void clear(int startIndex, int endIndex) { + assert false : "MatchAllBitSet doesn't support clear"; + throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear"); + } + + @Override + public int cardinality() { + return numBits; + } + + @Override + public int approximateCardinality() { + return numBits; + } + + @Override + public int prevSetBit(int index) { + return index; + } + + @Override + public int nextSetBit(int index) { + return index; + } + + @Override + public long ramBytesUsed() { + return RAM_BYTES_USED; + } + + @Override + public boolean get(int index) { + return true; + } + + @Override + public int length() { + return numBits; + } + + @Override + public void or(DocIdSetIterator iter) throws IOException { + + } +} diff --git a/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java b/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java index 3428c8d3653b4..1c164e898426d 100644 --- a/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java +++ b/server/src/test/java/org/elasticsearch/index/cache/bitset/BitSetFilterCacheTests.java @@ -11,6 +11,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -20,6 +21,7 @@ import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.store.ByteBuffersDirectory; @@ -31,6 +33,7 @@ import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.lucene.util.MatchAllBitSet; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; @@ -39,6 +42,10 @@ import java.util.concurrent.atomic.AtomicLong; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThan; public class BitSetFilterCacheTests extends ESTestCase { @@ -168,6 +175,48 @@ public void onRemoval(ShardId shardId, Accountable accountable) { assertEquals(0, stats.get()); } + public void testStats() throws IOException { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig()); + int numDocs = randomIntBetween(2000, 5000); + for (int i = 0; i < numDocs; i++) { + Document d = new Document(); + d.add(new LongPoint("f", i)); + writer.addDocument(d); + } + writer.commit(); + writer.forceMerge(1); + IndexReader reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("test", "_na_", 0)); + assertThat(reader.leaves(), hasSize(1)); + assertThat(reader.numDocs(), equalTo(numDocs)); + + final AtomicLong stats = new AtomicLong(); + final BitsetFilterCache cache = new BitsetFilterCache(INDEX_SETTINGS, new BitsetFilterCache.Listener() { + @Override + public void onCache(ShardId shardId, Accountable accountable) { + stats.addAndGet(accountable.ramBytesUsed()); + } + + @Override + public void onRemoval(ShardId shardId, Accountable accountable) { + stats.addAndGet(-accountable.ramBytesUsed()); + } + }); + // match all + Query matchAll = randomBoolean() ? LongPoint.newRangeQuery("f", 0, numDocs + between(0, 1000)) : new MatchAllDocsQuery(); + BitSetProducer bitSetProducer = cache.getBitSetProducer(matchAll); + BitSet bitset = bitSetProducer.getBitSet(reader.leaves().get(0)); + assertThat(bitset, instanceOf(MatchAllBitSet.class)); + long usedBytes = stats.get(); + assertThat(usedBytes, lessThan(32L)); + // range + bitSetProducer = cache.getBitSetProducer(LongPoint.newRangeQuery("f", 0, between(1000, 2000))); + bitSetProducer.getBitSet(reader.leaves().get(0)); + usedBytes = stats.get() - usedBytes; + assertThat(usedBytes, greaterThan(256L)); + IOUtils.close(cache, reader, writer, directory); + } + public void testSetNullListener() { try { new BitsetFilterCache(INDEX_SETTINGS, null); diff --git a/server/src/test/java/org/elasticsearch/lucene/util/BitSetsTests.java b/server/src/test/java/org/elasticsearch/lucene/util/BitSetsTests.java new file mode 100644 index 0000000000000..178963b8d5d83 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/lucene/util/BitSetsTests.java @@ -0,0 +1,58 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.lucene.util; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class BitSetsTests extends ESTestCase { + + public void testRandomBitset() throws Exception { + int maxDocs = randomIntBetween(1, 1024); + int numDocs = 0; + FixedBitSet matches = new FixedBitSet(maxDocs); + for (int i = 0; i < maxDocs; i++) { + if (numDocs < maxDocs && randomBoolean()) { + numDocs++; + matches.set(i); + } + } + DocIdSetIterator it = new BitSetIterator(matches, randomIntBetween(0, numDocs)); + BitSet bitSet = BitSets.of(it, maxDocs); + assertThat(bitSet.cardinality(), equalTo(numDocs)); + assertThat(bitSet.length(), equalTo(maxDocs)); + for (int i = 0; i < maxDocs; i++) { + assertThat(bitSet.get(i), equalTo(matches.get(i))); + assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i))); + assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i))); + } + } + + public void testMatchAllBitSet() throws Exception { + int maxDocs = randomIntBetween(1, 128); + FixedBitSet matches = new FixedBitSet(maxDocs); + for (int i = 0; i < maxDocs; i++) { + matches.set(i); + } + DocIdSetIterator it = new BitSetIterator(matches, randomNonNegativeLong()); + BitSet bitSet = BitSets.of(it, maxDocs); + assertThat(bitSet, instanceOf(MatchAllBitSet.class)); + for (int i = 0; i < maxDocs; i++) { + assertTrue(bitSet.get(i)); + assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i))); + assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i))); + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java index 20a5ffdebb249..417a270cda8b7 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java @@ -14,7 +14,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; @@ -35,6 +34,8 @@ import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.lucene.util.BitSets; +import org.elasticsearch.lucene.util.MatchAllBitSet; import org.elasticsearch.threadpool.ThreadPool; import java.io.Closeable; @@ -277,14 +278,14 @@ private BitSet computeBitSet(Query query, LeafReaderContext context) throws IOEx searcher.setQueryCache(null); final Query rewrittenQuery = searcher.rewrite(query); if (isEffectiveMatchAllDocsQuery(rewrittenQuery)) { - return new MatchAllRoleBitSet(context.reader().maxDoc()); + return new MatchAllBitSet(context.reader().maxDoc()); } final Weight weight = searcher.createWeight(rewrittenQuery, ScoreMode.COMPLETE_NO_SCORES, 1f); final Scorer s = weight.scorer(context); if (s == null) { return null; } else { - return bitSetFromDocIterator(s.iterator(), context.reader().maxDoc()); + return BitSets.of(s.iterator(), context.reader().maxDoc()); } } @@ -380,13 +381,4 @@ void verifyInternalConsistency() { }); } - static BitSet bitSetFromDocIterator(DocIdSetIterator iter, int maxDoc) throws IOException { - final BitSet set = BitSet.of(iter, maxDoc); - if (set.cardinality() == maxDoc) { - return new MatchAllRoleBitSet(maxDoc); - } else { - return set; - } - } - } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReader.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReader.java index c64cb3d155410..50089309caddc 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReader.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReader.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; import org.elasticsearch.lucene.util.CombinedBitSet; +import org.elasticsearch.lucene.util.MatchAllBitSet; import org.elasticsearch.transport.Transports; import java.io.IOException; @@ -59,7 +60,7 @@ private static int computeNumDocs(LeafReader reader, BitSet roleQueryBits) { final Bits liveDocs = reader.getLiveDocs(); if (roleQueryBits == null) { return 0; - } else if (roleQueryBits instanceof MatchAllRoleBitSet) { + } else if (roleQueryBits instanceof MatchAllBitSet) { return reader.numDocs(); } else if (liveDocs == null) { // slow @@ -197,7 +198,7 @@ public Bits getLiveDocs() { // If we would return a null liveDocs then that would mean that no docs are marked as deleted, // but that isn't the case. No docs match with the role query and therefore all docs are marked as deleted return new Bits.MatchNoBits(in.maxDoc()); - } else if (roleQueryBits instanceof MatchAllRoleBitSet) { + } else if (roleQueryBits instanceof MatchAllBitSet) { return actualLiveDocs; } else if (actualLiveDocs == null) { return roleQueryBits; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/MatchAllRoleBitSet.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/MatchAllRoleBitSet.java deleted file mode 100644 index 4c0f21715fed4..0000000000000 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/MatchAllRoleBitSet.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.core.security.authz.accesscontrol; - -import org.apache.lucene.util.BitSet; -import org.apache.lucene.util.RamUsageEstimator; - -final class MatchAllRoleBitSet extends BitSet { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MatchAllRoleBitSet.class); - private final int length; - - MatchAllRoleBitSet(int length) { - this.length = length; - } - - @Override - public void set(int i) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean getAndSet(int i) { - throw new UnsupportedOperationException(); - } - - @Override - public void clear(int i) { - throw new UnsupportedOperationException(); - } - - @Override - public void clear(int startIndex, int endIndex) { - throw new UnsupportedOperationException(); - } - - @Override - public int cardinality() { - return length(); - } - - @Override - public int approximateCardinality() { - return length(); - } - - @Override - public int prevSetBit(int index) { - return index; - } - - @Override - public int nextSetBit(int index) { - return index; - } - - @Override - public long ramBytesUsed() { - return BASE_RAM_BYTES_USED; - } - - @Override - public boolean get(int index) { - assert 0 <= index && index < this.length(); - return true; - } - - @Override - public int length() { - return length; - } -} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java index f999db8a1b594..021eab54436c6 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java @@ -22,15 +22,12 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitSet; -import org.apache.lucene.util.BitSetIterator; -import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.client.internal.Client; import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.logging.Loggers; @@ -71,7 +68,6 @@ import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; @@ -533,43 +529,6 @@ public void testCacheClearEntriesWhenIndexIsClosed() throws Exception { } } - public void testRoleBitSets() throws Exception { - int maxDocs = randomIntBetween(1, 1024); - int numDocs = 0; - FixedBitSet matches = new FixedBitSet(maxDocs); - for (int i = 0; i < maxDocs; i++) { - if (numDocs < maxDocs && randomBoolean()) { - numDocs++; - matches.set(i); - } - } - DocIdSetIterator it = new BitSetIterator(matches, randomIntBetween(0, numDocs)); - BitSet bitSet = DocumentSubsetBitsetCache.bitSetFromDocIterator(it, maxDocs); - assertThat(bitSet.cardinality(), equalTo(numDocs)); - assertThat(bitSet.length(), equalTo(maxDocs)); - for (int i = 0; i < maxDocs; i++) { - assertThat(bitSet.get(i), equalTo(matches.get(i))); - assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i))); - assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i))); - } - } - - public void testMatchAllRoleBitSet() throws Exception { - int maxDocs = randomIntBetween(1, 128); - FixedBitSet matches = new FixedBitSet(maxDocs); - for (int i = 0; i < maxDocs; i++) { - matches.set(i); - } - DocIdSetIterator it = new BitSetIterator(matches, randomNonNegativeLong()); - BitSet bitSet = DocumentSubsetBitsetCache.bitSetFromDocIterator(it, maxDocs); - assertThat(bitSet, instanceOf(MatchAllRoleBitSet.class)); - for (int i = 0; i < maxDocs; i++) { - assertTrue(bitSet.get(i)); - assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i))); - assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i))); - } - } - public void testEquivalentMatchAllDocsQuery() { assertTrue(DocumentSubsetBitsetCache.isEffectiveMatchAllDocsQuery(new MatchAllDocsQuery())); assertTrue(DocumentSubsetBitsetCache.isEffectiveMatchAllDocsQuery(new ConstantScoreQuery(new MatchAllDocsQuery())));