Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump lucene codec to 99 #1383

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Upgrade urllib to 1.26.17 [#1278](https://github.com/opensearch-project/k-NN/pull/1278)
* Upgrade urllib to 1.26.18 [#1319](https://github.com/opensearch-project/k-NN/pull/1319)
* Upgrade guava to 32.1.3 [#1319](https://github.com/opensearch-project/k-NN/pull/1319)
* Bump lucene codec to 99 [#1383](https://github.com/opensearch-project/k-NN/pull/1383)
### Refactoring
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

package org.opensearch.knn.index.codec.KNN950Codec;

import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat;
import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
import org.opensearch.knn.index.util.KNNEngine;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN990Codec;

import lombok.Builder;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.KNNCodecVersion;
import org.opensearch.knn.index.codec.KNNFormatFacade;

public class KNN990Codec extends FilterCodec {
naveentatikonda marked this conversation as resolved.
Show resolved Hide resolved
private static final KNNCodecVersion VERSION = KNNCodecVersion.V_9_9_0;
private final KNNFormatFacade knnFormatFacade;
private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat;

/**
* No arg constructor that uses Lucene99 as the delegate
*/
public KNN990Codec() {
this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat());
}

/**
* Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec
* and a unique name to this ctor.
*
* @param delegate codec that will perform all operations this codec does not override
* @param knnVectorsFormat per field format for KnnVector
*/
@Builder
protected KNN990Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) {
super(VERSION.getCodecName(), delegate);
knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate);
perFieldKnnVectorsFormat = knnVectorsFormat;
}

@Override
public DocValuesFormat docValuesFormat() {
return knnFormatFacade.docValuesFormat();
}

@Override
public CompoundFormat compoundFormat() {
return knnFormatFacade.compoundFormat();
}

@Override
public KnnVectorsFormat knnVectorsFormat() {
return perFieldKnnVectorsFormat;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN990Codec;

import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
import org.opensearch.knn.index.util.KNNEngine;

import java.util.Optional;

/**
* Class provides per field format implementation for Lucene Knn vector type
*/
public class KNN990PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat {

public KNN990PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) {
super(
mapperService,
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH,
() -> new Lucene99HnswVectorsFormat(),

Check warning on line 25 in src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java#L25

Added line #L25 was not covered by tests
(maxConnm, beamWidth) -> new Lucene99HnswVectorsFormat(maxConnm, beamWidth)
);
}

@Override
/**
* This method returns the maximum dimension allowed from KNNEngine for Lucene codec
*
* @param fieldName Name of the field, ignored
* @return Maximum constant dimension set by KNNEngine
*/
public int getMaxDimensions(String fieldName) {
return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import org.apache.lucene.backward_codecs.lucene92.Lucene92Codec;
import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat;
Expand All @@ -23,6 +24,8 @@
import org.opensearch.knn.index.codec.KNN940Codec.KNN940PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec;
import org.opensearch.knn.index.codec.KNN950Codec.KNN950PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec;
import org.opensearch.knn.index.codec.KNN990Codec.KNN990PerFieldKnnVectorsFormat;

import java.util.Optional;
import java.util.function.BiFunction;
Expand Down Expand Up @@ -92,9 +95,24 @@ public enum KNNCodecVersion {
.knnVectorsFormat(new KNN950PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService)))
.build(),
KNN950Codec::new
),

V_9_9_0(
"KNN990Codec",
new Lucene99Codec(),
new KNN990PerFieldKnnVectorsFormat(Optional.empty()),
(delegate) -> new KNNFormatFacade(
new KNN80DocValuesFormat(delegate.docValuesFormat()),
new KNN80CompoundFormat(delegate.compoundFormat())
),
(userCodec, mapperService) -> KNN990Codec.builder()
.delegate(userCodec)
.knnVectorsFormat(new KNN990PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService)))
.build(),
KNN990Codec::new
);

private static final KNNCodecVersion CURRENT = V_9_5_0;
private static final KNNCodecVersion CURRENT = V_9_9_0;

private final String codecName;
private final Codec defaultCodecDelegate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec
org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec
org.opensearch.knn.index.codec.KNN920Codec.KNN920Codec
org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec
org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec
org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec
org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@

package org.opensearch.knn.index.codec.KNN910Codec;

import org.junit.Ignore;
import org.opensearch.knn.index.codec.KNNCodecTestCase;

import java.io.IOException;
import java.util.concurrent.ExecutionException;

public class KNN910CodecTests extends KNNCodecTestCase {

@Ignore
naveentatikonda marked this conversation as resolved.
Show resolved Hide resolved
public void testMultiFieldsKnnIndex() throws Exception {
testMultiFieldsKnnIndex(new KNN910Codec());
}

@Ignore
public void testBuildFromModelTemplate() throws InterruptedException, ExecutionException, IOException {
testBuildFromModelTemplate(new KNN910Codec());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

package org.opensearch.knn.index.codec.KNN920Codec;

import org.junit.Ignore;
import org.opensearch.knn.index.codec.KNNCodecTestCase;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
Expand All @@ -13,10 +14,12 @@

public class KNN920CodecTests extends KNNCodecTestCase {

@Ignore
public void testMultiFieldsKnnIndex() throws Exception {
testMultiFieldsKnnIndex(KNN920Codec.builder().delegate(V_9_2_0.getDefaultCodecDelegate()).build());
}

@Ignore
public void testBuildFromModelTemplate() throws InterruptedException, ExecutionException, IOException {
testBuildFromModelTemplate((KNN920Codec.builder().delegate(V_9_2_0.getDefaultCodecDelegate()).build()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.opensearch.knn.index.codec.KNN940Codec;

import org.apache.lucene.codecs.Codec;
import org.junit.Ignore;
import org.opensearch.knn.index.codec.KNNCodecTestCase;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
Expand All @@ -14,10 +15,12 @@

public class KNN940CodecTests extends KNNCodecTestCase {

@Ignore
public void testMultiFieldsKnnIndex() throws Exception {
testMultiFieldsKnnIndex(KNN940Codec.builder().delegate(V_9_4_0.getDefaultCodecDelegate()).build());
}

@Ignore
public void testBuildFromModelTemplate() throws InterruptedException, ExecutionException, IOException {
testBuildFromModelTemplate((KNN940Codec.builder().delegate(V_9_4_0.getDefaultCodecDelegate()).build()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,21 @@

import lombok.SneakyThrows;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.junit.Ignore;
import org.opensearch.knn.index.codec.KNNCodecTestCase;

import java.util.Optional;
import java.util.function.Function;

import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_5_0;

public class KNN950CodecTests extends KNNCodecTestCase {

@SneakyThrows
@Ignore
public void testMultiFieldsKnnIndex() {
testMultiFieldsKnnIndex(KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build());
}

@SneakyThrows
@Ignore
public void testBuildFromModelTemplate() {
testBuildFromModelTemplate((KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build()));
}
Expand All @@ -33,19 +31,4 @@ public void testCodecSetsCustomPerFieldKnnVectorsFormat() {
final Codec codec = new KNN950Codec();
assertTrue(codec.knnVectorsFormat() instanceof KNN950PerFieldKnnVectorsFormat);
}

// IMPORTANT: When this Codec is moved to a backwards Codec, this test needs to be removed, because it attempts to
// write with a read only codec, which will fail
@SneakyThrows
public void testKnnVectorIndex() {
Function<MapperService, PerFieldKnnVectorsFormat> perFieldKnnVectorsFormatProvider = (
mapperService) -> new KNN950PerFieldKnnVectorsFormat(Optional.of(mapperService));

Function<PerFieldKnnVectorsFormat, Codec> knnCodecProvider = (knnVectorFormat) -> KNN950Codec.builder()
.delegate(V_9_5_0.getDefaultCodecDelegate())
.knnVectorsFormat(knnVectorFormat)
.build();

testKnnVectorIndex(knnCodecProvider, perFieldKnnVectorsFormatProvider);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN990Codec;

import lombok.SneakyThrows;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.KNNCodecTestCase;

import java.util.Optional;
import java.util.function.Function;

import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_9_0;

public class KNN990CodecTests extends KNNCodecTestCase {

@SneakyThrows
public void testMultiFieldsKnnIndex() {
testMultiFieldsKnnIndex(KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build());
}

@SneakyThrows
public void testBuildFromModelTemplate() {
testBuildFromModelTemplate((KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build()));
}

// Ensure that the codec is able to return the correct per field knn vectors format for codec
public void testCodecSetsCustomPerFieldKnnVectorsFormat() {
final Codec codec = new KNN990Codec();
assertTrue(codec.knnVectorsFormat() instanceof KNN990PerFieldKnnVectorsFormat);
}

// IMPORTANT: When this Codec is moved to a backwards Codec, this test needs to be removed, because it attempts to
// write with a read only codec, which will fail
@SneakyThrows
public void testKnnVectorIndex() {
Function<MapperService, PerFieldKnnVectorsFormat> perFieldKnnVectorsFormatProvider = (
mapperService) -> new KNN990PerFieldKnnVectorsFormat(Optional.of(mapperService));

Function<PerFieldKnnVectorsFormat, Codec> knnCodecProvider = (knnVectorFormat) -> KNN990Codec.builder()
.delegate(V_9_9_0.getDefaultCodecDelegate())
.knnVectorsFormat(knnVectorFormat)
.build();

testKnnVectorIndex(knnCodecProvider, perFieldKnnVectorsFormatProvider);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.backward_codecs.lucene91.Lucene91Codec;
import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec;
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec;
import org.opensearch.knn.KNNTestCase;

import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_1_0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ public static SegmentInfo newSegmentInfo(final Directory directory, final String
segmentName,
docsInSegment,
false,
false,
codec,
Collections.emptyMap(),
randomByteArrayOfLength(StringHelper.ID_LENGTH),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ public void testQueryScoreForFaissWithModel() throws IOException {
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -270,6 +271,7 @@ public void testShardWithoutFiles() {
SEGMENT_NAME,
100,
false,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -313,6 +315,7 @@ public void testEmptyQueryResults() {
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -369,6 +372,7 @@ public void testANNWithFilterQuery_whenDoingANN_thenSuccess() {
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down Expand Up @@ -617,6 +621,7 @@ private void testQueryScore(
SEGMENT_NAME,
100,
true,
false,
KNNCodecVersion.current().getDefaultCodecDelegate(),
Map.of(),
new byte[StringHelper.ID_LENGTH],
Expand Down
Loading