Skip to content

Commit

Permalink
Better paging when random reads go backwards (#12357)
Browse files Browse the repository at this point in the history
When reading data from outside the buffer, BufferedIndexInput always resets
its buffer to start at the new read position. If we are reading backwards (for example,
using an OffHeapFSTStore for a terms dictionary) then this can have the effect of
re-reading the same data over and over again.

This commit changes BufferedIndexInput to use paging when reading backwards,
so that if we ask for a byte that is before the current buffer, we read a block of data
of bufferSize that ends at the previous buffer start.

Fixes #12356
  • Loading branch information
romseygeek authored Jun 9, 2023
1 parent 2934899 commit a51241e
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 36 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ Optimizations

* GITHUB#12328: Optimize ConjunctionDISI.createConjunction (Armin Braun)

* GITHUB#12357: Better paging when doing backwards random reads. This speeds up
queries relying on terms in NIOFSDirectory and SimpleFSDirectory. (Alan Woodward)

* GITHUB#12339: Optimize part of duplicate calculation numDeletesToMerge in merge phase (fudongying)

Bug Fixes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public abstract class BufferedIndexInput extends IndexInput implements RandomAcc
/** A buffer size for merges set to {@value #MERGE_BUFFER_SIZE}. */
public static final int MERGE_BUFFER_SIZE = 4096;

private int bufferSize = BUFFER_SIZE;
private final int bufferSize;

private ByteBuffer buffer = EMPTY_BYTEBUFFER;

Expand Down Expand Up @@ -72,7 +72,7 @@ public BufferedIndexInput(String resourceDesc, int bufferSize) {
this.bufferSize = bufferSize;
}

/** Returns buffer size. @see #setBufferSize */
/** Returns buffer size */
public final int getBufferSize() {
return bufferSize;
}
Expand Down Expand Up @@ -159,55 +159,53 @@ public final long readLong() throws IOException {
}
}

@Override
public final byte readByte(long pos) throws IOException {
// Computes an offset into the current buffer from an absolute position to read
// `width` bytes from. If the buffer does not contain the position, then we
// readjust the bufferStart and refill.
private long resolvePositionInBuffer(long pos, int width) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit()) {
if (index >= 0 && index <= buffer.limit() - width) {
return index;
}
if (index < 0) {
// if we're moving backwards, then try and fill up the previous page rather than
// starting again at the current pos, to avoid successive backwards reads reloading
// the same data over and over again. We also check that we can read `width`
// bytes without going over the end of the buffer
bufferStart = Math.max(bufferStart - bufferSize, pos + width - bufferSize);
bufferStart = Math.max(bufferStart, 0);
bufferStart = Math.min(bufferStart, pos);
} else {
// we're moving forwards, reset the buffer to start at pos
bufferStart = pos;
buffer.limit(0); // trigger refill() on read
seekInternal(pos);
refill();
index = 0;
}
buffer.limit(0); // trigger refill() on read
seekInternal(bufferStart);
refill();
return pos - bufferStart;
}

@Override
public final byte readByte(long pos) throws IOException {
long index = resolvePositionInBuffer(pos, Byte.BYTES);
return buffer.get((int) index);
}

@Override
public final short readShort(long pos) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit() - 1) {
bufferStart = pos;
buffer.limit(0); // trigger refill() on read
seekInternal(pos);
refill();
index = 0;
}
long index = resolvePositionInBuffer(pos, Short.BYTES);
return buffer.getShort((int) index);
}

@Override
public final int readInt(long pos) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit() - 3) {
bufferStart = pos;
buffer.limit(0); // trigger refill() on read
seekInternal(pos);
refill();
index = 0;
}
long index = resolvePositionInBuffer(pos, Integer.BYTES);
return buffer.getInt((int) index);
}

@Override
public final long readLong(long pos) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit() - 7) {
bufferStart = pos;
buffer.limit(0); // trigger refill() on read
seekInternal(pos);
refill();
index = 0;
}
long index = resolvePositionInBuffer(pos, Long.BYTES);
return buffer.getLong((int) index);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Random;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.ArrayUtil;
Expand Down Expand Up @@ -142,6 +143,72 @@ public void testEOF() throws Exception {
});
}

// Test that when reading backwards, we page backwards rather than refilling
// on every call
public void testBackwardsByteReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
for (int i = 2048; i > 0; i -= random().nextInt(16)) {
assertEquals(byten(i), input.readByte(i));
}
assertEquals(3, input.readCount);
}

public void testBackwardsShortReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
ByteBuffer bb = ByteBuffer.allocate(2);
bb.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 2048; i > 0; i -= (random().nextInt(16) + 1)) {
bb.clear();
bb.put(byten(i));
bb.put(byten(i + 1));
assertEquals(bb.getShort(0), input.readShort(i));
}
// readCount can be three or four, depending on whether or not we had to adjust the bufferStart
// to include a whole short
assertTrue(
"Expected 4 or 3, got " + input.readCount, input.readCount == 4 || input.readCount == 3);
}

public void testBackwardsIntReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
ByteBuffer bb = ByteBuffer.allocate(4);
bb.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 2048; i > 0; i -= (random().nextInt(16) + 3)) {
bb.clear();
bb.put(byten(i));
bb.put(byten(i + 1));
bb.put(byten(i + 2));
bb.put(byten(i + 3));
assertEquals(bb.getInt(0), input.readInt(i));
}
// readCount can be three or four, depending on whether or not we had to adjust the bufferStart
// to include a whole int
assertTrue(
"Expected 4 or 3, got " + input.readCount, input.readCount == 4 || input.readCount == 3);
}

public void testBackwardsLongReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
ByteBuffer bb = ByteBuffer.allocate(8);
bb.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 2048; i > 0; i -= (random().nextInt(16) + 7)) {
bb.clear();
bb.put(byten(i));
bb.put(byten(i + 1));
bb.put(byten(i + 2));
bb.put(byten(i + 3));
bb.put(byten(i + 4));
bb.put(byten(i + 5));
bb.put(byten(i + 6));
bb.put(byten(i + 7));
assertEquals(bb.getLong(0), input.readLong(i));
}
// readCount can be three or four, depending on whether or not we had to adjust the bufferStart
// to include a whole long
assertTrue(
"Expected 4 or 3, got " + input.readCount, input.readCount == 4 || input.readCount == 3);
}

// byten emulates a file - byten(n) returns the n'th byte in that file.
// MyBufferedIndexInput reads this "file".
private static byte byten(long n) {
Expand All @@ -150,7 +217,8 @@ private static byte byten(long n) {

private static class MyBufferedIndexInput extends BufferedIndexInput {
private long pos;
private long len;
private final long len;
private long readCount = 0;

public MyBufferedIndexInput(long len) {
super("MyBufferedIndexInput(len=" + len + ")", BufferedIndexInput.BUFFER_SIZE);
Expand All @@ -164,14 +232,15 @@ public MyBufferedIndexInput() {
}

@Override
protected void readInternal(ByteBuffer b) throws IOException {
protected void readInternal(ByteBuffer b) {
readCount++;
while (b.hasRemaining()) {
b.put(byten(pos++));
}
}

@Override
protected void seekInternal(long pos) throws IOException {
protected void seekInternal(long pos) {
this.pos = pos;
}

Expand Down

0 comments on commit a51241e

Please sign in to comment.