Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Move vector search from IndexInput to RandomAccessInput (#13938) #28

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ public void readInts(int[] dst, int offset, int length) throws IOException {
public void readFloats(float[] dst, int offset, int length) throws IOException {
in.readFloats(dst, offset, length);
for (int i = 0; i < length; ++i) {
dst[offset + i] =
Float.intBitsToFloat(Integer.reverseBytes(Float.floatToRawIntBits(dst[offset + i])));
dst[offset + i] = revertFloat(dst[offset + i]);
}
}

Expand Down Expand Up @@ -106,6 +105,14 @@ public byte readByte(long pos) throws IOException {
return in.readByte(pos);
}

@Override
public void readFloats(long pos, float[] floats, int offset, int length) throws IOException {
in.readFloats(pos, floats, offset, length);
for (int i = 0; i < length; ++i) {
floats[offset + i] = revertFloat(floats[offset + i]);
}
}

@Override
public short readShort(long pos) throws IOException {
return Short.reverseBytes(in.readShort(pos));
Expand All @@ -120,5 +127,19 @@ public int readInt(long pos) throws IOException {
public long readLong(long pos) throws IOException {
return Long.reverseBytes(in.readLong(pos));
}

@Override
public Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
throw new Error(
"This cannot happen: Failing to clone EndiannessReverserRandomAccessInput", e);
}
}
}

private static float revertFloat(float value) {
return Float.intBitsToFloat(Integer.reverseBytes(Float.floatToRawIntBits(value)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
new OffHeapByteVectorValues.DenseOffHeapVectorValues(
fieldInfo.getVectorDimension(),
docsWithField.cardinality(),
vectorDataInput,
vectorDataInput.toRandomAccessInput(),
byteSize,
defaultFlatVectorScorer,
fieldInfo.getVectorSimilarityFunction()));
Expand All @@ -462,7 +462,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
fieldInfo.getVectorDimension(),
docsWithField.cardinality(),
vectorDataInput,
vectorDataInput.toRandomAccessInput(),
byteSize,
defaultFlatVectorScorer,
fieldInfo.getVectorSimilarityFunction()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public float binaryDotProductMemSeg() throws IOException {
static KnnVectorValues vectorValues(
int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
return new OffHeapByteVectorValues.DenseOffHeapVectorValues(
dims, size, in.slice("test", 0, in.length()), dims, new ThrowingFlatVectorScorer(), sim);
dims, size, in.toRandomAccessInput(), dims, new ThrowingFlatVectorScorer(), sim);
}

static final class ThrowingFlatVectorScorer implements FlatVectorsScorer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
*/
package org.apache.lucene.codecs.lucene95;

import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;

/**
* Implementors can return the IndexInput from which their values are read. For use by vector
* Implementors can return the RandomAccessInput from which their values are read. For use by vector
* quantizers.
*/
public interface HasIndexSlice {

/** Returns an IndexInput from which to read this instance's values. */
IndexInput getSlice();
/** Returns a RandomAccessInput from which to read this instance's values. */
RandomAccessInput getSlice();
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues implement

protected final int dimension;
protected final int size;
protected final IndexInput slice;
protected final RandomAccessInput slice;
protected int lastOrd = -1;
protected final byte[] binaryValue;
protected final ByteBuffer byteBuffer;
Expand All @@ -48,7 +48,7 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues implement
OffHeapByteVectorValues(
int dimension,
int size,
IndexInput slice,
RandomAccessInput slice,
int byteSize,
FlatVectorsScorer flatVectorsScorer,
VectorSimilarityFunction similarityFunction) {
Expand Down Expand Up @@ -82,13 +82,13 @@ public byte[] vectorValue(int targetOrd) throws IOException {
}

@Override
public IndexInput getSlice() {
public RandomAccessInput getSlice() {
return slice;
}

private void readValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize);
slice.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
slice.readBytes(
(long) targetOrd * byteSize, byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
}

public static OffHeapByteVectorValues load(
Expand All @@ -104,7 +104,7 @@ public static OffHeapByteVectorValues load(
if (configuration.isEmpty() || vectorEncoding != VectorEncoding.BYTE) {
return new EmptyOffHeapVectorValues(dimension, flatVectorsScorer, vectorSimilarityFunction);
}
IndexInput bytesSlice = vectorData.slice("vector-data", vectorDataOffset, vectorDataLength);
RandomAccessInput bytesSlice = vectorData.randomAccessSlice(vectorDataOffset, vectorDataLength);
if (configuration.isDense()) {
return new DenseOffHeapVectorValues(
dimension,
Expand Down Expand Up @@ -133,7 +133,7 @@ public static class DenseOffHeapVectorValues extends OffHeapByteVectorValues {
public DenseOffHeapVectorValues(
int dimension,
int size,
IndexInput slice,
RandomAccessInput slice,
int byteSize,
FlatVectorsScorer flatVectorsScorer,
VectorSimilarityFunction vectorSimilarityFunction) {
Expand All @@ -143,7 +143,12 @@ public DenseOffHeapVectorValues(
@Override
public DenseOffHeapVectorValues copy() throws IOException {
return new DenseOffHeapVectorValues(
dimension, size, slice.clone(), byteSize, flatVectorsScorer, similarityFunction);
dimension,
size,
(RandomAccessInput) slice.clone(),
byteSize,
flatVectorsScorer,
similarityFunction);
}

@Override
Expand Down Expand Up @@ -186,7 +191,7 @@ private static class SparseOffHeapVectorValues extends OffHeapByteVectorValues {
public SparseOffHeapVectorValues(
OrdToDocDISIReaderConfiguration configuration,
IndexInput dataIn,
IndexInput slice,
RandomAccessInput slice,
int dimension,
int byteSize,
FlatVectorsScorer flatVectorsScorer,
Expand Down Expand Up @@ -220,7 +225,7 @@ public SparseOffHeapVectorValues copy() throws IOException {
return new SparseOffHeapVectorValues(
configuration,
dataIn,
slice.clone(),
(RandomAccessInput) slice.clone(),
dimension,
byteSize,
flatVectorsScorer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues impleme

protected final int dimension;
protected final int size;
protected final IndexInput slice;
protected final RandomAccessInput slice;
protected final int byteSize;
protected int lastOrd = -1;
protected final float[] value;
Expand All @@ -46,7 +46,7 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues impleme
OffHeapFloatVectorValues(
int dimension,
int size,
IndexInput slice,
RandomAccessInput slice,
int byteSize,
FlatVectorsScorer flatVectorsScorer,
VectorSimilarityFunction similarityFunction) {
Expand All @@ -70,7 +70,7 @@ public int size() {
}

@Override
public IndexInput getSlice() {
public RandomAccessInput getSlice() {
return slice;
}

Expand All @@ -79,8 +79,7 @@ public float[] vectorValue(int targetOrd) throws IOException {
if (lastOrd == targetOrd) {
return value;
}
slice.seek((long) targetOrd * byteSize);
slice.readFloats(value, 0, value.length);
slice.readFloats((long) targetOrd * byteSize, value, 0, value.length);
lastOrd = targetOrd;
return value;
}
Expand All @@ -98,7 +97,7 @@ public static OffHeapFloatVectorValues load(
if (configuration.docsWithFieldOffset == -2 || vectorEncoding != VectorEncoding.FLOAT32) {
return new EmptyOffHeapVectorValues(dimension, flatVectorsScorer, vectorSimilarityFunction);
}
IndexInput bytesSlice = vectorData.slice("vector-data", vectorDataOffset, vectorDataLength);
RandomAccessInput bytesSlice = vectorData.randomAccessSlice(vectorDataOffset, vectorDataLength);
int byteSize = dimension * Float.BYTES;
if (configuration.docsWithFieldOffset == -1) {
return new DenseOffHeapVectorValues(
Expand Down Expand Up @@ -129,7 +128,7 @@ public static class DenseOffHeapVectorValues extends OffHeapFloatVectorValues {
public DenseOffHeapVectorValues(
int dimension,
int size,
IndexInput slice,
RandomAccessInput slice,
int byteSize,
FlatVectorsScorer flatVectorsScorer,
VectorSimilarityFunction similarityFunction) {
Expand All @@ -139,7 +138,12 @@ public DenseOffHeapVectorValues(
@Override
public DenseOffHeapVectorValues copy() throws IOException {
return new DenseOffHeapVectorValues(
dimension, size, slice.clone(), byteSize, flatVectorsScorer, similarityFunction);
dimension,
size,
(RandomAccessInput) slice.clone(),
byteSize,
flatVectorsScorer,
similarityFunction);
}

@Override
Expand Down Expand Up @@ -187,7 +191,7 @@ private static class SparseOffHeapVectorValues extends OffHeapFloatVectorValues
public SparseOffHeapVectorValues(
OrdToDocDISIReaderConfiguration configuration,
IndexInput dataIn,
IndexInput slice,
RandomAccessInput slice,
int dimension,
int byteSize,
FlatVectorsScorer flatVectorsScorer,
Expand Down Expand Up @@ -215,7 +219,7 @@ public SparseOffHeapVectorValues copy() throws IOException {
return new SparseOffHeapVectorValues(
configuration,
dataIn,
slice.clone(),
(RandomAccessInput) slice.clone(),
dimension,
byteSize,
flatVectorsScorer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
new OffHeapByteVectorValues.DenseOffHeapVectorValues(
fieldInfo.getVectorDimension(),
docsWithField.cardinality(),
finalVectorDataInput,
finalVectorDataInput.toRandomAccessInput(),
fieldInfo.getVectorDimension() * Byte.BYTES,
vectorsScorer,
fieldInfo.getVectorSimilarityFunction()));
Expand All @@ -313,7 +313,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
fieldInfo.getVectorDimension(),
docsWithField.cardinality(),
finalVectorDataInput,
finalVectorDataInput.toRandomAccessInput(),
fieldInfo.getVectorDimension() * Float.BYTES,
vectorsScorer,
fieldInfo.getVectorSimilarityFunction()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ private CompressedInt4DotProduct(
public float score(int vectorOrdinal) throws IOException {
// get compressed vector, in Lucene99, vector values are stored and have a single value for
// offset correction
values.getSlice().seek((long) vectorOrdinal * (values.getVectorByteLength() + Float.BYTES));
values.getSlice().readBytes(compressedVector, 0, compressedVector.length);
long pos = (long) vectorOrdinal * (values.getVectorByteLength() + Float.BYTES);
values.getSlice().readBytes(pos, compressedVector, 0, compressedVector.length);
float vectorOffset = values.getScoreCorrectionConstant(vectorOrdinal);
int dotProduct = VectorUtil.int4DotProductPacked(targetBytes, compressedVector);
// For the current implementation of scalar quantization, all dotproducts should be >= 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ private ScalarQuantizedCloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
compress,
fieldInfo.getVectorSimilarityFunction(),
vectorsScorer,
quantizationDataInput)));
quantizationDataInput.toRandomAccessInput())));
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(tempQuantizedVectorData, quantizationDataInput);
Expand Down
Loading