Skip to content

Commit

Permalink
Reduce memory usage of match all bitset (#92777)
Browse files Browse the repository at this point in the history
By default, Elasticsearch uses up to 1 bit per document to store the set
of root documents when the index mapping has nested fields. This PR
introduces a special BitSet using less memory for match_all filters.
This optimization is only triggered when the index mapping has a nested
field, but that field never exists in documents.
  • Loading branch information
dnhatn authored Jan 11, 2023
1 parent 29509ed commit fa58477
Show file tree
Hide file tree
Showing 10 changed files with 242 additions and 132 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/92777.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 92777
summary: Reduce memory usage of match all bitset
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.ShardUtils;
import org.elasticsearch.lucene.util.BitSets;
import org.elasticsearch.threadpool.ThreadPool;

import java.io.Closeable;
Expand Down Expand Up @@ -104,7 +105,7 @@ public static BitSet bitsetFromQuery(Query query, LeafReaderContext context) thr
if (s == null) {
return null;
} else {
return BitSet.of(s.iterator(), context.reader().maxDoc());
return BitSets.of(s.iterator(), context.reader().maxDoc());
}
}

Expand Down
31 changes: 31 additions & 0 deletions server/src/main/java/org/elasticsearch/lucene/util/BitSets.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;

import java.io.IOException;

public final class BitSets {
private BitSets() {}

/**
* Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}. If the iterator matches all documents,
* then this method will wrap the returned Bitset as {@link MatchAllBitSet} to reduce memory usage.
*/
public static BitSet of(DocIdSetIterator iter, int maxDocs) throws IOException {
final BitSet bitSet = BitSet.of(iter, maxDocs);
if (bitSet.cardinality() == maxDocs) {
return new MatchAllBitSet(maxDocs);
} else {
return bitSet;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.RamUsageEstimator;

import java.io.IOException;

/**
* An optimized implementation of {@link BitSet} that matches all documents to reduce memory usage.
*/
public final class MatchAllBitSet extends BitSet {
private static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MatchAllBitSet.class);

private final int numBits;

public MatchAllBitSet(int numBits) {
this.numBits = numBits;
}

@Override
public void set(int i) {

}

@Override
public boolean getAndSet(int i) {
return true;
}

@Override
public void clear(int i) {
assert false : "MatchAllBitSet doesn't support clear";
throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear");
}

@Override
public void clear(int startIndex, int endIndex) {
assert false : "MatchAllBitSet doesn't support clear";
throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear");
}

@Override
public int cardinality() {
return numBits;
}

@Override
public int approximateCardinality() {
return numBits;
}

@Override
public int prevSetBit(int index) {
return index;
}

@Override
public int nextSetBit(int index) {
return index;
}

@Override
public long ramBytesUsed() {
return RAM_BYTES_USED;
}

@Override
public boolean get(int index) {
return true;
}

@Override
public int length() {
return numBits;
}

@Override
public void or(DocIdSetIterator iter) throws IOException {

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
Expand All @@ -20,6 +21,7 @@
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.store.ByteBuffersDirectory;
Expand All @@ -31,6 +33,7 @@
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.lucene.util.MatchAllBitSet;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;

Expand All @@ -39,6 +42,10 @@
import java.util.concurrent.atomic.AtomicLong;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.lessThan;

public class BitSetFilterCacheTests extends ESTestCase {

Expand Down Expand Up @@ -168,6 +175,48 @@ public void onRemoval(ShardId shardId, Accountable accountable) {
assertEquals(0, stats.get());
}

public void testStats() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig());
int numDocs = randomIntBetween(2000, 5000);
for (int i = 0; i < numDocs; i++) {
Document d = new Document();
d.add(new LongPoint("f", i));
writer.addDocument(d);
}
writer.commit();
writer.forceMerge(1);
IndexReader reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("test", "_na_", 0));
assertThat(reader.leaves(), hasSize(1));
assertThat(reader.numDocs(), equalTo(numDocs));

final AtomicLong stats = new AtomicLong();
final BitsetFilterCache cache = new BitsetFilterCache(INDEX_SETTINGS, new BitsetFilterCache.Listener() {
@Override
public void onCache(ShardId shardId, Accountable accountable) {
stats.addAndGet(accountable.ramBytesUsed());
}

@Override
public void onRemoval(ShardId shardId, Accountable accountable) {
stats.addAndGet(-accountable.ramBytesUsed());
}
});
// match all
Query matchAll = randomBoolean() ? LongPoint.newRangeQuery("f", 0, numDocs + between(0, 1000)) : new MatchAllDocsQuery();
BitSetProducer bitSetProducer = cache.getBitSetProducer(matchAll);
BitSet bitset = bitSetProducer.getBitSet(reader.leaves().get(0));
assertThat(bitset, instanceOf(MatchAllBitSet.class));
long usedBytes = stats.get();
assertThat(usedBytes, lessThan(32L));
// range
bitSetProducer = cache.getBitSetProducer(LongPoint.newRangeQuery("f", 0, between(1000, 2000)));
bitSetProducer.getBitSet(reader.leaves().get(0));
usedBytes = stats.get() - usedBytes;
assertThat(usedBytes, greaterThan(256L));
IOUtils.close(cache, reader, writer, directory);
}

public void testSetNullListener() {
try {
new BitsetFilterCache(INDEX_SETTINGS, null);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.test.ESTestCase;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;

public class BitSetsTests extends ESTestCase {

public void testRandomBitset() throws Exception {
int maxDocs = randomIntBetween(1, 1024);
int numDocs = 0;
FixedBitSet matches = new FixedBitSet(maxDocs);
for (int i = 0; i < maxDocs; i++) {
if (numDocs < maxDocs && randomBoolean()) {
numDocs++;
matches.set(i);
}
}
DocIdSetIterator it = new BitSetIterator(matches, randomIntBetween(0, numDocs));
BitSet bitSet = BitSets.of(it, maxDocs);
assertThat(bitSet.cardinality(), equalTo(numDocs));
assertThat(bitSet.length(), equalTo(maxDocs));
for (int i = 0; i < maxDocs; i++) {
assertThat(bitSet.get(i), equalTo(matches.get(i)));
assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
}
}

public void testMatchAllBitSet() throws Exception {
int maxDocs = randomIntBetween(1, 128);
FixedBitSet matches = new FixedBitSet(maxDocs);
for (int i = 0; i < maxDocs; i++) {
matches.set(i);
}
DocIdSetIterator it = new BitSetIterator(matches, randomNonNegativeLong());
BitSet bitSet = BitSets.of(it, maxDocs);
assertThat(bitSet, instanceOf(MatchAllBitSet.class));
for (int i = 0; i < maxDocs; i++) {
assertTrue(bitSet.get(i));
assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
Expand All @@ -35,6 +34,8 @@
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.lucene.util.BitSets;
import org.elasticsearch.lucene.util.MatchAllBitSet;
import org.elasticsearch.threadpool.ThreadPool;

import java.io.Closeable;
Expand Down Expand Up @@ -277,14 +278,14 @@ private BitSet computeBitSet(Query query, LeafReaderContext context) throws IOEx
searcher.setQueryCache(null);
final Query rewrittenQuery = searcher.rewrite(query);
if (isEffectiveMatchAllDocsQuery(rewrittenQuery)) {
return new MatchAllRoleBitSet(context.reader().maxDoc());
return new MatchAllBitSet(context.reader().maxDoc());
}
final Weight weight = searcher.createWeight(rewrittenQuery, ScoreMode.COMPLETE_NO_SCORES, 1f);
final Scorer s = weight.scorer(context);
if (s == null) {
return null;
} else {
return bitSetFromDocIterator(s.iterator(), context.reader().maxDoc());
return BitSets.of(s.iterator(), context.reader().maxDoc());
}
}

Expand Down Expand Up @@ -380,13 +381,4 @@ void verifyInternalConsistency() {
});
}

static BitSet bitSetFromDocIterator(DocIdSetIterator iter, int maxDoc) throws IOException {
final BitSet set = BitSet.of(iter, maxDoc);
if (set.cardinality() == maxDoc) {
return new MatchAllRoleBitSet(maxDoc);
} else {
return set;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.lucene.util.CombinedBitSet;
import org.elasticsearch.lucene.util.MatchAllBitSet;
import org.elasticsearch.transport.Transports;

import java.io.IOException;
Expand Down Expand Up @@ -59,7 +60,7 @@ private static int computeNumDocs(LeafReader reader, BitSet roleQueryBits) {
final Bits liveDocs = reader.getLiveDocs();
if (roleQueryBits == null) {
return 0;
} else if (roleQueryBits instanceof MatchAllRoleBitSet) {
} else if (roleQueryBits instanceof MatchAllBitSet) {
return reader.numDocs();
} else if (liveDocs == null) {
// slow
Expand Down Expand Up @@ -197,7 +198,7 @@ public Bits getLiveDocs() {
// If we would return a <code>null</code> liveDocs then that would mean that no docs are marked as deleted,
// but that isn't the case. No docs match with the role query and therefore all docs are marked as deleted
return new Bits.MatchNoBits(in.maxDoc());
} else if (roleQueryBits instanceof MatchAllRoleBitSet) {
} else if (roleQueryBits instanceof MatchAllBitSet) {
return actualLiveDocs;
} else if (actualLiveDocs == null) {
return roleQueryBits;
Expand Down
Loading

0 comments on commit fa58477

Please sign in to comment.