Skip to content

Commit

Permalink
Use delCount of SegmentInfos to calculate numDocs (#36323)
Browse files Browse the repository at this point in the history
Today, we iterate the bitset of hardLiveDocs to calculate the number of
live docs. This calculation might be expensive if we enable soft-deletes
(by default) for old indices whose soft-deletes was disabled previously
and had hard-deletes.

Once soft-deletes is enabled, we no longer hard-update or hard-delete
documents directly. We have hard-deletes in two scenarios: (1) from old
segments where soft-deletes was disabled, (2) when IndexWriter hits
non-aborted exceptions. These two cases, IW flushes SegmentInfos before
exposing the hard-deletes; thus we can use the hard-delete count of
SegmentInfos.
  • Loading branch information
dnhatn authored Dec 9, 2018
1 parent cc3872d commit d41cf6a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 9 deletions.
28 changes: 19 additions & 9 deletions server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -956,18 +956,17 @@ public CacheHelper getReaderCacheHelper() {
super(in, new SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader leaf) {
SegmentReader segmentReader = segmentReader(leaf);
Bits hardLiveDocs = segmentReader.getHardLiveDocs();
final SegmentReader segmentReader = segmentReader(leaf);
final Bits hardLiveDocs = segmentReader.getHardLiveDocs();
if (hardLiveDocs == null) {
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
}
// TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458?
int numDocs = 0;
for (int i = 0; i < hardLiveDocs.length(); i++) {
if (hardLiveDocs.get(i)) {
numDocs++;
}
}
// Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly.
// Two scenarios that we have hard-deletes: (1) from old segments where soft-deletes was disabled,
// (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos
// before exposing the hard-deletes, thus we can use the hard-delete count of SegmentInfos.
final int numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount();
assert numDocs == popCount(hardLiveDocs) : numDocs + " != " + popCount(hardLiveDocs);
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
}
});
Expand All @@ -984,6 +983,17 @@ public CacheHelper getReaderCacheHelper() {
}
}

private static int popCount(Bits bits) {
assert bits != null;
int onBits = 0;
for (int i = 0; i < bits.length(); i++) {
if (bits.get(i)) {
onBits++;
}
}
return onBits;
}

/**
* Returns a numeric docvalues which can be used to soft-delete documents.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2585,6 +2585,11 @@ public long softUpdateDocuments(Term term, Iterable<? extends Iterable<? extends
assert softDeleteEnabled : "Call #softUpdateDocuments but soft-deletes is disabled";
return super.softUpdateDocuments(term, docs, softDeletes);
}
@Override
public long tryDeleteDocument(IndexReader readerIn, int docID) {
assert false : "#tryDeleteDocument is not supported. See Lucene#DirectoryReaderWithAllLiveDocs";
throw new UnsupportedOperationException();
}
}

/**
Expand Down

0 comments on commit d41cf6a

Please sign in to comment.