diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 026b1cc38a59..01c07d608b63 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -131,6 +131,8 @@ Improvements Optimizations --------------------- +* GITHUB#12324: Speed up sparse block advanceExact with tiny step in IndexedDISI. (Guo Feng) + * GITHUB#12270 Don't generate stacktrace in CollectionTerminatedException. (Armin Braun) * GITHUB#12286 Toposort use iterator to avoid stackoverflow. (Tang Donghai) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java index 8da289e3ad35..74b8d593d71f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java @@ -110,12 +110,12 @@ public final class IndexedDISI extends DocIdSetIterator { private static void flush( int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) throws IOException { - assert block >= 0 && block < 65536; + assert block >= 0 && block < BLOCK_SIZE; out.writeShort((short) block); - assert cardinality > 0 && cardinality <= 65536; + assert cardinality > 0 && cardinality <= BLOCK_SIZE; out.writeShort((short) (cardinality - 1)); if (cardinality > MAX_ARRAY_LENGTH) { - if (cardinality != 65536) { // all docs are set + if (cardinality != BLOCK_SIZE) { // all docs are set if (denseRankPower != -1) { final byte[] rank = createRank(buffer, denseRankPower); out.writeBytes(rank, rank.length); @@ -418,6 +418,7 @@ public static RandomAccessInput createJumpTable( // SPARSE variables boolean exists; + int nextExistDocInBlock = -1; // DENSE variables long word; @@ -495,7 +496,8 @@ private void readBlockHeader() throws IOException { if (numValues <= MAX_ARRAY_LENGTH) { method = Method.SPARSE; blockEnd = slice.getFilePointer() + (numValues << 1); - } else if (numValues == 65536) { + nextExistDocInBlock = -1; + } else if (numValues == BLOCK_SIZE) { method = Method.ALL; blockEnd = slice.getFilePointer(); gap = block - index - 1; @@ -550,6 +552,7 @@ boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException { if (doc >= targetInBlock) { disi.doc = disi.block | doc; disi.exists = true; + disi.nextExistDocInBlock = doc; return true; } } @@ -560,6 +563,10 @@ boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException { boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException { final int targetInBlock = target & 0xFFFF; // TODO: binary search + if (disi.nextExistDocInBlock > targetInBlock) { + assert !disi.exists; + return false; + } if (target == disi.doc) { return disi.exists; } @@ -567,6 +574,7 @@ boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException int doc = Short.toUnsignedInt(disi.slice.readShort()); disi.index++; if (doc >= targetInBlock) { + disi.nextExistDocInBlock = doc; if (doc != targetInBlock) { disi.index--; disi.slice.seek(disi.slice.getFilePointer() - Short.BYTES);