From 7dbbd0daa97e1ffd0c8dd27d1441c257cf8751da Mon Sep 17 00:00:00 2001
From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com>
Date: Fri, 29 Nov 2024 10:28:32 +0000
Subject: [PATCH] Add IndexInput isLoaded (#13998)
This commit adds IndexInput::isLoaded to help determine if the contents of an input is resident in physical memory.
The intent of this new method is to help build inspection and diagnostic infrastructure on top.
---
.../org/apache/lucene/store/IndexInput.java | 16 +++++++
.../lucene/store/RandomAccessInput.java | 10 +++++
.../lucene/store/MemorySegmentIndexInput.java | 10 +++++
.../tests/store/BaseDirectoryTestCase.java | 42 +++++++++++++++++++
.../tests/store/MockIndexInputWrapper.java | 8 ++++
.../store/SerialIOCountingDirectory.java | 6 +++
6 files changed, 92 insertions(+)
diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java
index 09f9211ac176..b649ace8c4db 100644
--- a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java
@@ -18,6 +18,7 @@
import java.io.Closeable;
import java.io.IOException;
+import java.util.Optional;
import org.apache.lucene.codecs.CompoundFormat;
/**
@@ -234,4 +235,19 @@ public void prefetch(long offset, long length) throws IOException {}
*
The default implementation is a no-op.
*/
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {}
+
+ /**
+ * Returns a hint whether all the contents of this input are resident in physical memory. It's a
+ * hint because the operating system may have paged out some of the data by the time this method
+ * returns. If the optional is true, then it's likely that the contents of this input are resident
+ * in physical memory. A value of false does not imply that the contents are not resident in
+ * physical memory. An empty optional is returned if it is not possible to determine.
+ *
+ *
This runs in linear time with the {@link #length()} of this input / page size.
+ *
+ *
The default implementation returns an empty optional.
+ */
+ public Optional isLoaded() {
+ return Optional.empty();
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java b/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java
index 08b2e83d36d4..44127d90a980 100644
--- a/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/RandomAccessInput.java
@@ -17,6 +17,7 @@
package org.apache.lucene.store;
import java.io.IOException;
+import java.util.Optional;
import org.apache.lucene.util.BitUtil; // javadocs
/**
@@ -77,4 +78,13 @@ default void readBytes(long pos, byte[] bytes, int offset, int length) throws IO
* @see IndexInput#prefetch
*/
default void prefetch(long offset, long length) throws IOException {}
+
+ /**
+ * Returns a hint whether all the contents of this input are resident in physical memory.
+ *
+ * @see IndexInput#isLoaded()
+ */
+ default Optional isLoaded() {
+ return Optional.empty();
+ }
}
diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
index 5824e78bdc73..2fc9f6369b72 100644
--- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
@@ -420,6 +420,16 @@ void advise(long offset, long length, IOConsumer advice) throws I
}
}
+ @Override
+ public Optional isLoaded() {
+ for (MemorySegment seg : segments) {
+ if (seg.isLoaded() == false) {
+ return Optional.of(Boolean.FALSE);
+ }
+ }
+ return Optional.of(Boolean.TRUE);
+ }
+
@Override
public byte readByte(long pos) throws IOException {
try {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java
index e041afdbcb38..d8ec0529b5f6 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java
@@ -51,9 +51,11 @@
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.tests.mockfile.ExtrasFS;
@@ -1636,4 +1638,44 @@ private void doTestPrefetch(int startOffset) throws IOException {
}
}
}
+
+ public void testIsLoaded() throws IOException {
+ testIsLoaded(0);
+ }
+
+ public void testIsLoadedOnSlice() throws IOException {
+ testIsLoaded(TestUtil.nextInt(random(), 1, 1024));
+ }
+
+ private void testIsLoaded(int startOffset) throws IOException {
+ try (Directory dir = getDirectory(createTempDir())) {
+ if (FilterDirectory.unwrap(dir) instanceof MMapDirectory mMapDirectory) {
+ mMapDirectory.setPreload(MMapDirectory.ALL_FILES);
+ }
+ final int totalLength = startOffset + TestUtil.nextInt(random(), 16384, 65536);
+ byte[] arr = new byte[totalLength];
+ random().nextBytes(arr);
+ try (IndexOutput out = dir.createOutput("temp.bin", IOContext.DEFAULT)) {
+ out.writeBytes(arr, arr.length);
+ }
+
+ try (IndexInput orig = dir.openInput("temp.bin", IOContext.DEFAULT)) {
+ IndexInput in;
+ if (startOffset == 0) {
+ in = orig.clone();
+ } else {
+ in = orig.slice("slice", startOffset, totalLength - startOffset);
+ }
+ var loaded = in.isLoaded();
+ if (FilterDirectory.unwrap(dir) instanceof MMapDirectory
+ // direct IO wraps MMap but does not support isLoaded
+ && !(dir.getClass().getName().contains("DirectIO"))) {
+ assertTrue(loaded.isPresent());
+ assertTrue(loaded.get());
+ } else {
+ assertFalse(loaded.isPresent());
+ }
+ }
+ }
+ }
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java
index aa5c841bf3bd..1dc5456d64ef 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java
@@ -19,6 +19,7 @@
import java.io.Closeable;
import java.io.IOException;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import org.apache.lucene.internal.tests.TestSecrets;
import org.apache.lucene.store.FilterIndexInput;
@@ -184,6 +185,13 @@ public void prefetch(long offset, long length) throws IOException {
in.prefetch(offset, length);
}
+ @Override
+ public Optional isLoaded() {
+ ensureOpen();
+ ensureAccessible();
+ return in.isLoaded();
+ }
+
@Override
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {
ensureOpen();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java
index 2afbc2fd4a76..1b4234c3d79f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java
@@ -17,6 +17,7 @@
package org.apache.lucene.tests.store;
import java.io.IOException;
+import java.util.Optional;
import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.internal.hppc.LongHashSet;
import org.apache.lucene.store.ChecksumIndexInput;
@@ -206,5 +207,10 @@ public IndexInput clone() {
IndexInput clone = in.clone();
return new SerializedIOCountingIndexInput(clone, readAdvice, sliceOffset, sliceLength);
}
+
+ @Override
+ public Optional isLoaded() {
+ return in.isLoaded();
+ }
}
}