Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory usage of match all bitset #92777

Merged
merged 3 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/92777.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 92777
summary: Reduce memory usage of match all bitset
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.ShardUtils;
import org.elasticsearch.lucene.util.BitSets;
import org.elasticsearch.threadpool.ThreadPool;

import java.io.Closeable;
Expand Down Expand Up @@ -104,7 +105,7 @@ public static BitSet bitsetFromQuery(Query query, LeafReaderContext context) thr
if (s == null) {
return null;
} else {
return BitSet.of(s.iterator(), context.reader().maxDoc());
return BitSets.of(s.iterator(), context.reader().maxDoc());
}
}

Expand Down
31 changes: 31 additions & 0 deletions server/src/main/java/org/elasticsearch/lucene/util/BitSets.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;

import java.io.IOException;

public final class BitSets {
private BitSets() {}

/**
* Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}. If the iterator matches all documents,
* then this method will wrap the returned Bitset as {@link MatchAllBitSet} to reduce memory usage.
*/
public static BitSet of(DocIdSetIterator iter, int maxDocs) throws IOException {
final BitSet bitSet = BitSet.of(iter, maxDocs);
if (bitSet.cardinality() == maxDocs) {
return new MatchAllBitSet(maxDocs);
} else {
return bitSet;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.RamUsageEstimator;

import java.io.IOException;

/**
* An optimized implementation of {@link BitSet} that matches all documents to reduce memory usage.
*/
public final class MatchAllBitSet extends BitSet {
private static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MatchAllBitSet.class);

private final int numBits;

public MatchAllBitSet(int numBits) {
this.numBits = numBits;
}

@Override
public void set(int i) {

}

@Override
public boolean getAndSet(int i) {
return true;
}

@Override
public void clear(int i) {
assert false : "MatchAllBitSet doesn't support clear";
throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear");
}

@Override
public void clear(int startIndex, int endIndex) {
assert false : "MatchAllBitSet doesn't support clear";
throw new UnsupportedOperationException("MatchAllBitSet doesn't support clear");
}

@Override
public int cardinality() {
return numBits;
}

@Override
public int approximateCardinality() {
return numBits;
}

@Override
public int prevSetBit(int index) {
return index;
}

@Override
public int nextSetBit(int index) {
return index;
}

@Override
public long ramBytesUsed() {
return RAM_BYTES_USED;
}

@Override
public boolean get(int index) {
return true;
}

@Override
public int length() {
return numBits;
}

@Override
public void or(DocIdSetIterator iter) throws IOException {

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
Expand All @@ -20,6 +21,7 @@
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.store.ByteBuffersDirectory;
Expand All @@ -31,6 +33,7 @@
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.lucene.util.MatchAllBitSet;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;

Expand All @@ -39,6 +42,10 @@
import java.util.concurrent.atomic.AtomicLong;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.lessThan;

public class BitSetFilterCacheTests extends ESTestCase {

Expand Down Expand Up @@ -168,6 +175,48 @@ public void onRemoval(ShardId shardId, Accountable accountable) {
assertEquals(0, stats.get());
}

public void testStats() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig());
int numDocs = randomIntBetween(2000, 5000);
for (int i = 0; i < numDocs; i++) {
Document d = new Document();
d.add(new LongPoint("f", i));
writer.addDocument(d);
}
writer.commit();
writer.forceMerge(1);
IndexReader reader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("test", "_na_", 0));
assertThat(reader.leaves(), hasSize(1));
assertThat(reader.numDocs(), equalTo(numDocs));

final AtomicLong stats = new AtomicLong();
final BitsetFilterCache cache = new BitsetFilterCache(INDEX_SETTINGS, new BitsetFilterCache.Listener() {
@Override
public void onCache(ShardId shardId, Accountable accountable) {
stats.addAndGet(accountable.ramBytesUsed());
}

@Override
public void onRemoval(ShardId shardId, Accountable accountable) {
stats.addAndGet(-accountable.ramBytesUsed());
}
});
// match all
Query matchAll = randomBoolean() ? LongPoint.newRangeQuery("f", 0, numDocs + between(0, 1000)) : new MatchAllDocsQuery();
BitSetProducer bitSetProducer = cache.getBitSetProducer(matchAll);
BitSet bitset = bitSetProducer.getBitSet(reader.leaves().get(0));
assertThat(bitset, instanceOf(MatchAllBitSet.class));
long usedBytes = stats.get();
assertThat(usedBytes, lessThan(32L));
// range
bitSetProducer = cache.getBitSetProducer(LongPoint.newRangeQuery("f", 0, between(1000, 2000)));
bitSetProducer.getBitSet(reader.leaves().get(0));
usedBytes = stats.get() - usedBytes;
assertThat(usedBytes, greaterThan(256L));
IOUtils.close(cache, reader, writer, directory);
}

public void testSetNullListener() {
try {
new BitsetFilterCache(INDEX_SETTINGS, null);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.test.ESTestCase;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;

public class BitSetsTests extends ESTestCase {

public void testRandomBitset() throws Exception {
int maxDocs = randomIntBetween(1, 1024);
int numDocs = 0;
FixedBitSet matches = new FixedBitSet(maxDocs);
for (int i = 0; i < maxDocs; i++) {
if (numDocs < maxDocs && randomBoolean()) {
numDocs++;
matches.set(i);
}
}
DocIdSetIterator it = new BitSetIterator(matches, randomIntBetween(0, numDocs));
BitSet bitSet = BitSets.of(it, maxDocs);
assertThat(bitSet.cardinality(), equalTo(numDocs));
assertThat(bitSet.length(), equalTo(maxDocs));
for (int i = 0; i < maxDocs; i++) {
assertThat(bitSet.get(i), equalTo(matches.get(i)));
assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
}
}

public void testMatchAllBitSet() throws Exception {
int maxDocs = randomIntBetween(1, 128);
FixedBitSet matches = new FixedBitSet(maxDocs);
for (int i = 0; i < maxDocs; i++) {
matches.set(i);
}
DocIdSetIterator it = new BitSetIterator(matches, randomNonNegativeLong());
BitSet bitSet = BitSets.of(it, maxDocs);
assertThat(bitSet, instanceOf(MatchAllBitSet.class));
for (int i = 0; i < maxDocs; i++) {
assertTrue(bitSet.get(i));
assertThat(bitSet.nextSetBit(i), equalTo(matches.nextSetBit(i)));
assertThat(bitSet.prevSetBit(i), equalTo(matches.prevSetBit(i)));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
Expand All @@ -35,6 +34,8 @@
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.lucene.util.BitSets;
import org.elasticsearch.lucene.util.MatchAllBitSet;
import org.elasticsearch.threadpool.ThreadPool;

import java.io.Closeable;
Expand Down Expand Up @@ -277,14 +278,14 @@ private BitSet computeBitSet(Query query, LeafReaderContext context) throws IOEx
searcher.setQueryCache(null);
final Query rewrittenQuery = searcher.rewrite(query);
if (isEffectiveMatchAllDocsQuery(rewrittenQuery)) {
return new MatchAllRoleBitSet(context.reader().maxDoc());
return new MatchAllBitSet(context.reader().maxDoc());
}
final Weight weight = searcher.createWeight(rewrittenQuery, ScoreMode.COMPLETE_NO_SCORES, 1f);
final Scorer s = weight.scorer(context);
if (s == null) {
return null;
} else {
return bitSetFromDocIterator(s.iterator(), context.reader().maxDoc());
return BitSets.of(s.iterator(), context.reader().maxDoc());
}
}

Expand Down Expand Up @@ -380,13 +381,4 @@ void verifyInternalConsistency() {
});
}

static BitSet bitSetFromDocIterator(DocIdSetIterator iter, int maxDoc) throws IOException {
final BitSet set = BitSet.of(iter, maxDoc);
if (set.cardinality() == maxDoc) {
return new MatchAllRoleBitSet(maxDoc);
} else {
return set;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
import org.elasticsearch.lucene.util.CombinedBitSet;
import org.elasticsearch.lucene.util.MatchAllBitSet;
import org.elasticsearch.transport.Transports;

import java.io.IOException;
Expand Down Expand Up @@ -59,7 +60,7 @@ private static int computeNumDocs(LeafReader reader, BitSet roleQueryBits) {
final Bits liveDocs = reader.getLiveDocs();
if (roleQueryBits == null) {
return 0;
} else if (roleQueryBits instanceof MatchAllRoleBitSet) {
} else if (roleQueryBits instanceof MatchAllBitSet) {
return reader.numDocs();
} else if (liveDocs == null) {
// slow
Expand Down Expand Up @@ -197,7 +198,7 @@ public Bits getLiveDocs() {
// If we would return a <code>null</code> liveDocs then that would mean that no docs are marked as deleted,
// but that isn't the case. No docs match with the role query and therefore all docs are marked as deleted
return new Bits.MatchNoBits(in.maxDoc());
} else if (roleQueryBits instanceof MatchAllRoleBitSet) {
} else if (roleQueryBits instanceof MatchAllBitSet) {
return actualLiveDocs;
} else if (actualLiveDocs == null) {
return roleQueryBits;
Expand Down
Loading