Skip to content

Commit

Permalink
Improve testing of mismatched field numbers. (#13812)
Browse files Browse the repository at this point in the history
This improves testing of mismatched field numbers by
 - improving `AssertingDocValuesProducer` to detect mismatched field numbers,
 - introducing a `MismatchedCodecReader` to actually test mismatched field
   numbers on `DocValuesProducer` (a `MismatchedLeafReader` wrapping a
`SlowCodecReaderWrapper` doesn't work since `SlowCodecReaderWrapper` implicitly
resolves the correct `FieldInfo` object),
 - introducing an explicit test for mismatched field numbers for doc values, points,
postings and knn vectors.

These new tests uncovered a bug when merging sorted doc values, which would
call the underlying doc values producer with the merged field info.

Closes #13805
  • Loading branch information
jpountz committed Sep 20, 2024
1 parent dfc454c commit 2d3c032
Show file tree
Hide file tree
Showing 15 changed files with 491 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,9 @@ public void testEmptyByteVectorData() {
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}

@Override
public void testMismatchedFields() throws Exception {
// requires byte support
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,9 @@ public void testEmptyByteVectorData() {
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}

@Override
public void testMismatchedFields() throws Exception {
// requires byte support
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,9 @@ public void testEmptyByteVectorData() {
public void testMergingWithDifferentByteKnnFields() {
// unimplemented
}

@Override
public void testMismatchedFields() throws Exception {
// requires byte support
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState)
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
values = docValuesProducer.getSorted(fieldInfo);
values = docValuesProducer.getSorted(readerFieldInfo);
}
}
if (values == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@

/** Reads point values previously written with {@link Lucene90PointsWriter} */
public class Lucene90PointsReader extends PointsReader {
final IndexInput indexIn, dataIn;
final SegmentReadState readState;
final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();
private final IndexInput indexIn, dataIn;
private final SegmentReadState readState;
private final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();

/** Sole constructor */
public Lucene90PointsReader(SegmentReadState readState) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ public void merge(MergeState mergeState) throws IOException {
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
PointValues aPointValues = reader90.readers.get(readerFieldInfo.number);
PointValues aPointValues = reader90.getValues(readerFieldInfo.name);
if (aPointValues != null) {
pointValues.add(aPointValues);
docMaps.add(mergeState.docMaps[i]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
Expand Down Expand Up @@ -57,7 +58,8 @@ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOExcepti
assert state.fieldInfos.hasDocValues();
DocValuesProducer producer = in.fieldsProducer(state);
assert producer != null;
return new AssertingDocValuesProducer(producer, state.segmentInfo.maxDoc(), false);
return new AssertingDocValuesProducer(
producer, state.fieldInfos, state.segmentInfo.maxDoc(), false);
}

static class AssertingDocValuesConsumer extends DocValuesConsumer {
Expand Down Expand Up @@ -212,12 +214,15 @@ public void close() throws IOException {

static class AssertingDocValuesProducer extends DocValuesProducer {
private final DocValuesProducer in;
private final FieldInfos fieldInfos;
private final int maxDoc;
private final boolean merging;
private final Thread creationThread;

AssertingDocValuesProducer(DocValuesProducer in, int maxDoc, boolean merging) {
AssertingDocValuesProducer(
DocValuesProducer in, FieldInfos fieldInfos, int maxDoc, boolean merging) {
this.in = in;
this.fieldInfos = fieldInfos;
this.maxDoc = maxDoc;
this.merging = merging;
this.creationThread = Thread.currentThread();
Expand All @@ -227,6 +232,7 @@ static class AssertingDocValuesProducer extends DocValuesProducer {

@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
Expand All @@ -238,6 +244,7 @@ public NumericDocValues getNumeric(FieldInfo field) throws IOException {

@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
Expand All @@ -249,6 +256,7 @@ public BinaryDocValues getBinary(FieldInfo field) throws IOException {

@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
Expand All @@ -260,6 +268,7 @@ public SortedDocValues getSorted(FieldInfo field) throws IOException {

@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
Expand All @@ -271,6 +280,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti

@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
assert fieldInfos.fieldInfo(field.name).number == field.number;
if (merging) {
AssertingCodec.assertThread("DocValuesProducer", creationThread);
}
Expand All @@ -293,7 +303,7 @@ public void checkIntegrity() throws IOException {

@Override
public DocValuesProducer getMergeInstance() {
return new AssertingDocValuesProducer(in.getMergeInstance(), maxDoc, true);
return new AssertingDocValuesProducer(in.getMergeInstance(), fieldInfos, maxDoc, true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
Expand Down Expand Up @@ -3840,4 +3841,74 @@ private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException {
protected boolean codecAcceptsHugeBinaryValues(String field) {
return true;
}

public void testMismatchedFields() throws Exception {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("lucene")));
doc.add(new NumericDocValuesField("numeric", 0L));
doc.add(new SortedDocValuesField("sorted", new BytesRef("search")));
doc.add(new SortedNumericDocValuesField("sorted_numeric", 1L));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("engine")));
w1.addDocument(doc);

Directory dir2 = newDirectory();
IndexWriter w2 =
new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()));
w2.addDocument(doc);
w2.commit();

DirectoryReader reader = DirectoryReader.open(w1);
w1.close();
w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random()));
reader.close();
w2.forceMerge(1);
reader = DirectoryReader.open(w2);
w2.close();

LeafReader leafReader = getOnlyLeafReader(reader);

BinaryDocValues bdv = leafReader.getBinaryDocValues("binary");
assertNotNull(bdv);
assertEquals(0, bdv.nextDoc());
assertEquals(new BytesRef("lucene"), bdv.binaryValue());
assertEquals(1, bdv.nextDoc());
assertEquals(new BytesRef("lucene"), bdv.binaryValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, bdv.nextDoc());

NumericDocValues ndv = leafReader.getNumericDocValues("numeric");
assertNotNull(ndv);
assertEquals(0, ndv.nextDoc());
assertEquals(0, ndv.longValue());
assertEquals(1, ndv.nextDoc());
assertEquals(0, ndv.longValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, ndv.nextDoc());

SortedDocValues sdv = leafReader.getSortedDocValues("sorted");
assertNotNull(sdv);
assertEquals(0, sdv.nextDoc());
assertEquals(new BytesRef("search"), sdv.lookupOrd(sdv.ordValue()));
assertEquals(1, sdv.nextDoc());
assertEquals(new BytesRef("search"), sdv.lookupOrd(sdv.ordValue()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sdv.nextDoc());

SortedNumericDocValues sndv = leafReader.getSortedNumericDocValues("sorted_numeric");
assertNotNull(sndv);
assertEquals(0, sndv.nextDoc());
assertEquals(1, sndv.nextValue());
assertEquals(1, sndv.nextDoc());
assertEquals(1, sndv.nextValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sndv.nextDoc());

SortedSetDocValues ssdv = leafReader.getSortedSetDocValues("sorted_set");
assertNotNull(ssdv);
assertEquals(0, ssdv.nextDoc());
assertEquals(new BytesRef("engine"), ssdv.lookupOrd(ssdv.nextOrd()));
assertEquals(1, ssdv.nextDoc());
assertEquals(new BytesRef("engine"), ssdv.lookupOrd(ssdv.nextOrd()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, ssdv.nextDoc());

IOUtils.close(reader, w2, dir1, dir2);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.VectorEncoding;
Expand Down Expand Up @@ -1824,4 +1825,51 @@ public void testVectorValuesReportCorrectDocs() throws Exception {
}
}
}

public void testMismatchedFields() throws Exception {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
Document doc = new Document();
doc.add(new KnnFloatVectorField("float", new float[] {1f}));
doc.add(new KnnByteVectorField("byte", new byte[] {42}));
w1.addDocument(doc);

Directory dir2 = newDirectory();
IndexWriter w2 =
new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()));
w2.addDocument(doc);
w2.commit();

DirectoryReader reader = DirectoryReader.open(w1);
w1.close();
w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random()));
reader.close();
w2.forceMerge(1);
reader = DirectoryReader.open(w2);
w2.close();

LeafReader leafReader = getOnlyLeafReader(reader);

ByteVectorValues byteVectors = leafReader.getByteVectorValues("byte");
assertNotNull(byteVectors);
assertEquals(0, byteVectors.nextDoc());
assertArrayEquals(new byte[] {42}, byteVectors.vectorValue());
assertEquals(1, byteVectors.nextDoc());
assertArrayEquals(new byte[] {42}, byteVectors.vectorValue());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, byteVectors.nextDoc());

FloatVectorValues floatVectors = leafReader.getFloatVectorValues("float");
assertNotNull(floatVectors);
assertEquals(0, floatVectors.nextDoc());
float[] vector = floatVectors.vectorValue();
assertEquals(1, vector.length);
assertEquals(1f, vector[0], 0f);
assertEquals(1, floatVectors.nextDoc());
vector = floatVectors.vectorValue();
assertEquals(1, vector.length);
assertEquals(1f, vector[0], 0f);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, floatVectors.nextDoc());

IOUtils.close(reader, w2, dir1, dir2);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CodecReader;
Expand All @@ -46,6 +47,7 @@
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.internal.tests.ConcurrentMergeSchedulerAccess;
import org.apache.lucene.internal.tests.TestSecrets;
Expand Down Expand Up @@ -1408,4 +1410,80 @@ public int getDocCount() {
}
};
}

public void testMismatchedFields() throws Exception {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
Document doc = new Document();
doc.add(new LongPoint("f", 1L));
doc.add(new LongPoint("g", 42L, 43L));
w1.addDocument(doc);

Directory dir2 = newDirectory();
IndexWriter w2 =
new IndexWriter(dir2, newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler()));
w2.addDocument(doc);
w2.commit();

DirectoryReader reader = DirectoryReader.open(w1);
w1.close();
w2.addIndexes(new MismatchedCodecReader((CodecReader) getOnlyLeafReader(reader), random()));
reader.close();
w2.forceMerge(1);
reader = DirectoryReader.open(w2);
w2.close();

LeafReader leafReader = getOnlyLeafReader(reader);
assertEquals(2, leafReader.maxDoc());

PointValues fPoints = leafReader.getPointValues("f");
assertEquals(2, fPoints.size());
fPoints.intersect(
new IntersectVisitor() {

int expectedDoc = 0;

@Override
public void visit(int docID, byte[] packedValue) throws IOException {
assertEquals(LongPoint.pack(1L), new BytesRef(packedValue));
assertEquals(expectedDoc++, docID);
}

@Override
public void visit(int docID) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});

PointValues gPoints = leafReader.getPointValues("g");
assertEquals(2, fPoints.size());
gPoints.intersect(
new IntersectVisitor() {

int expectedDoc = 0;

@Override
public void visit(int docID, byte[] packedValue) throws IOException {
assertEquals(LongPoint.pack(42L, 43L), new BytesRef(packedValue));
assertEquals(expectedDoc++, docID);
}

@Override
public void visit(int docID) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});

IOUtils.close(reader, w2, dir1, dir2);
}
}
Loading

0 comments on commit 2d3c032

Please sign in to comment.