From 2ee8779246f0e96de68123aa5af2501c6a671c7f Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Wed, 12 May 2021 16:43:32 +0200 Subject: [PATCH 01/27] [SOLR-12697] add DocValuesFieldValueFeatureScorer to read docValues for ltr feature calculation --- .../solr/ltr/feature/FieldValueFeature.java | 146 +++++++- .../solr/collection1/conf/schema.xml | 11 +- .../apache/solr/ltr/TestLTROnSolrCloud.java | 79 ++++- .../solr/ltr/TestLTRReRankingPipeline.java | 334 ++++++++---------- 4 files changed, 359 insertions(+), 211 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index d12795d2663..129ecf03950 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -23,24 +23,35 @@ import java.util.Set; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.NumberType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrIndexSearcher; /** * This feature returns the value of a field in the current document * Example configuration: *
{
-  "name":  "rawHits",
-  "class": "org.apache.solr.ltr.feature.FieldValueFeature",
-  "params": {
-      "field": "hits"
-  }
-}
+ * "name": "rawHits", + * "class": "org.apache.solr.ltr.feature.FieldValueFeature", + * "params": { + * "field": "hits", + * "defaultValue": -1 + * } + * } */ public class FieldValueFeature extends Feature { @@ -83,24 +94,42 @@ public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, } public class FieldValueFeatureWeight extends FeatureWeight { + private final SchemaField schemaField; public FieldValueFeatureWeight(IndexSearcher searcher, SolrQueryRequest request, Query originalQuery, Map efi) { super(FieldValueFeature.this, searcher, request, originalQuery, efi); + if (searcher instanceof SolrIndexSearcher) { + schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); + } else { + schemaField = null; + } } + /** + * Return a FeatureScorer that uses docValues or storedFields if no docValues are present + * @param context the segment this FeatureScorer is working with + * @return FeatureScorer for the current segment and field + * @throws IOException as defined by abstract class Feature + */ @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { + // always prefer docValues + if (schemaField != null && schemaField.hasDocValues()) { + return new DocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField); + } return new FieldValueFeatureScorer(this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } + /** + * A FeatureScorer that reads the stored value for a field + */ public class FieldValueFeatureScorer extends FeatureScorer { + LeafReaderContext context; - LeafReaderContext context = null; - - public FieldValueFeatureScorer(FeatureWeight weight, - LeafReaderContext context, DocIdSetIterator itr) { + public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context, DocIdSetIterator itr) { super(weight, itr); this.context = context; } @@ -146,5 +175,102 @@ public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } + + /** + * A FeatureScorer that reads the docValues for a field + */ + public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { + final LeafReaderContext context; + final DocIdSetIterator docValues; + final FieldType schemaFieldType; + DocValuesType docValuesType = DocValuesType.NONE; + + public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, + final DocIdSetIterator itr, final SchemaField schemaField) { + super(weight, itr); + this.context = context; + schemaFieldType = schemaField.getType(); + + try { + FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); + // if fieldInfo is null, just use NONE-Type. This causes no problems, because we won't call score() anyway + docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; + switch (docValuesType) { + case NUMERIC: + docValues = DocValues.getNumeric(context.reader(), field); + break; + case SORTED: + docValues = DocValues.getSorted(context.reader(), field); + break; + case BINARY: + docValues = DocValues.getBinary(context.reader(), field); + break; + case SORTED_NUMERIC: + case SORTED_SET: + case NONE: + default: + docValues = null; + } + } catch (IOException e) { + throw new IllegalArgumentException("Could not read docValues for field " + field + " with docValuesType " + + docValuesType.name()); + } + } + + @Override + public float score() throws IOException { + if (docValues != null && docValues.advance(itr.docID()) < DocIdSetIterator.NO_MORE_DOCS) { + switch (docValuesType) { + case NUMERIC: + if (NumberType.FLOAT.equals(schemaFieldType.getNumberType())) { + // convert float value that was stored as long back to float + return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue()); + } else if (NumberType.DOUBLE.equals(schemaFieldType.getNumberType())) { + // handle double value conversion + return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue()); + } + // just take the long value + return ((NumericDocValues) docValues).longValue(); + case SORTED: + int ord = ((SortedDocValues) docValues).ordValue(); + // try to interpret bytesRef either as number string or as true / false token + return handleBytesRef(((SortedDocValues) docValues).lookupOrd(ord)); + case BINARY: + case SORTED_SET: + case SORTED_NUMERIC: + case NONE: + default: + throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field + + " is not supported!"); + } + } + return FieldValueFeature.this.getDefaultValue(); + } + + private float handleBytesRef(BytesRef bytesRef) { + String string = bytesRef.utf8ToString(); + if (string.length() == 1 + && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) { + // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN + // (see BoolField) + if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { + return 1f; + } else { + return 0f; + } + } else { + try { + return Float.parseFloat(string); + } catch (NumberFormatException ex) { + throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float."); + } + } + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + } } } diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 4699b0f83f4..4187ce9424f 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -18,13 +18,22 @@ - + + + + + + + + + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 21b71c3e5ec..93709cad43a 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -28,6 +28,7 @@ import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.ltr.feature.FieldValueFeature; import org.apache.solr.ltr.feature.OriginalScoreFeature; import org.apache.solr.ltr.feature.SolrFeature; import org.apache.solr.ltr.feature.ValueFeature; @@ -107,21 +108,21 @@ public void testSimpleQuery() throws Exception { final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score"); final String result0_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","64.0", "c3","2.0", "original","0.0"); + "powpularityS","64.0", "c3","2.0", "original","0.0", "dvIntFieldFeature","8.0","dvLongFieldFeature","8.0","dvFloatFieldFeature","0.8","dvDoubleFieldFeature","0.8","dvStrNumFieldFeature","8.0","dvStrBoolFieldFeature","1.0"); final String result1_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","49.0", "c3","2.0", "original","1.0"); + "powpularityS","49.0", "c3","2.0", "original","1.0", "dvIntFieldFeature","7.0","dvLongFieldFeature","7.0","dvFloatFieldFeature","0.7","dvDoubleFieldFeature","0.7","dvStrNumFieldFeature","7.0","dvStrBoolFieldFeature","0.0"); final String result2_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","36.0", "c3","2.0", "original","2.0"); + "powpularityS","36.0", "c3","2.0", "original","2.0", "dvIntFieldFeature","6.0","dvLongFieldFeature","6.0","dvFloatFieldFeature","0.6","dvDoubleFieldFeature","0.6","dvStrNumFieldFeature","6.0","dvStrBoolFieldFeature","1.0"); final String result3_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","25.0", "c3","2.0", "original","3.0"); + "powpularityS","25.0", "c3","2.0", "original","3.0", "dvIntFieldFeature","5.0","dvLongFieldFeature","5.0","dvFloatFieldFeature","0.5","dvDoubleFieldFeature","0.5","dvStrNumFieldFeature","5.0","dvStrBoolFieldFeature","0.0"); final String result4_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","16.0", "c3","2.0", "original","4.0"); + "powpularityS","16.0", "c3","2.0", "original","4.0", "dvIntFieldFeature","4.0","dvLongFieldFeature","4.0","dvFloatFieldFeature","0.4","dvDoubleFieldFeature","0.4","dvStrNumFieldFeature","4.0","dvStrBoolFieldFeature","1.0"); final String result5_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "9.0", "c3","2.0", "original","5.0"); + "powpularityS", "9.0", "c3","2.0", "original","5.0", "dvIntFieldFeature","3.0","dvLongFieldFeature","3.0","dvFloatFieldFeature","0.3","dvDoubleFieldFeature","0.3","dvStrNumFieldFeature","3.0","dvStrBoolFieldFeature","0.0"); final String result6_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "4.0", "c3","2.0", "original","6.0"); + "powpularityS", "4.0", "c3","2.0", "original","6.0", "dvIntFieldFeature","2.0","dvLongFieldFeature","2.0","dvFloatFieldFeature","0.2","dvDoubleFieldFeature","0.2","dvStrNumFieldFeature","2.0","dvStrBoolFieldFeature","1.0"); final String result7_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "1.0", "c3","2.0", "original","7.0"); + "powpularityS", "1.0", "c3","2.0", "original","7.0", "dvIntFieldFeature","-1.0","dvLongFieldFeature","-2.0","dvFloatFieldFeature","-3.0","dvDoubleFieldFeature","-4.0","dvStrNumFieldFeature","-5.0","dvStrBoolFieldFeature","0.0"); // Test feature vectors returned (without re-ranking) @@ -240,24 +241,40 @@ void indexDocument(String collection, String id, String title, String descriptio doc.setField("title", title); doc.setField("description", description); doc.setField("popularity", popularity); + if(popularity != 1) { + // check that empty values will be read as default + doc.setField("dvIntField", popularity); + doc.setField("dvLongField", popularity); + doc.setField("dvFloatField", ((float) popularity) / 10); + doc.setField("dvDoubleField", ((double) popularity) / 10); + doc.setField("dvStrNumField", popularity); + doc.setField("dvStrBoolField", popularity % 2 == 0 ? "T" : "F"); + } solrCluster.getSolrClient().add(collection, doc); } private void indexDocuments(final String collection) throws Exception { final int collectionSize = 8; - for (int docId = 1; docId <= collectionSize; docId++) { + // put documents in reversed order to check that advanceExact is working correctly + for (int docId = collectionSize; docId >= 1; docId--) { final int popularity = docId; indexDocument(collection, String.valueOf(docId), "a1", "bloom", popularity); + if(docId == collectionSize / 2) { + // commit in the middle in order to check that everything works fine for multi-segment case + solrCluster.getSolrClient().commit(collection); + } } - solrCluster.getSolrClient().commit(collection); + solrCluster.getSolrClient().commit(collection, true, true); } - private void loadModelsAndFeatures() throws Exception { final String featureStore = "test"; - final String[] featureNames = new String[] {"powpularityS","c3", "original"}; - final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1}}"; + final String[] featureNames = new String[]{"powpularityS", "c3", "original", "dvIntFieldFeature", + "dvLongFieldFeature", "dvFloatFieldFeature", "dvDoubleFieldFeature", "dvStrNumFieldFeature", "dvStrBoolFieldFeature"}; + final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1," + + "\"dvIntFieldFeature\":0.1,\"dvLongFieldFeature\":0.1," + + "\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1}}"; loadFeature( featureNames[0], @@ -277,6 +294,42 @@ private void loadModelsAndFeatures() throws Exception { featureStore, null ); + loadFeature( + featureNames[3], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvIntField\"}" + ); + loadFeature( + featureNames[4], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvLongField\"}" + ); + loadFeature( + featureNames[5], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvFloatField\"}" + ); + loadFeature( + featureNames[6], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvDoubleField\",\"defaultValue\":-4.0}" + ); + loadFeature( + featureNames[7], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvStrNumField\",\"defaultValue\":-5}" + ); + loadFeature( + featureNames[8], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvStrBoolField\"}" + ); loadModel( "powpularityS-model", diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java index 85019445546..cfdfcd5f416 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java @@ -25,12 +25,8 @@ import java.util.List; import java.util.Map; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -41,8 +37,8 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.store.Directory; -import org.apache.solr.SolrTestCase; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.ltr.feature.Feature; import org.apache.solr.ltr.feature.FieldValueFeature; @@ -50,16 +46,24 @@ import org.apache.solr.ltr.model.TestLinearModel; import org.apache.solr.ltr.norm.IdentityNormalizer; import org.apache.solr.ltr.norm.Normalizer; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class TestLTRReRankingPipeline extends SolrTestCase { +public class TestLTRReRankingPipeline extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath()); + @BeforeClass + public static void setup() throws Exception { + initCore("solrconfig-ltr.xml", "schema.xml"); + } + private IndexSearcher getSearcher(IndexReader r) { // 'yes' to maybe wrapping in general final boolean maybeWrap = true; @@ -109,199 +113,155 @@ public Explanation explain(LeafReaderContext context, int doc, } @Test - public void testRescorer() throws IOException { - final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random(), dir); - - Document doc = new Document(); - doc.add(newStringField("id", "0", Field.Store.YES)); - doc.add(newTextField("field", "wizard the the the the the oz", - Field.Store.NO)); - doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field - - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "1", Field.Store.YES)); - // 1 extra token, but wizard and oz are close; - doc.add(newTextField("field", "wizard oz the the the the the the", - Field.Store.NO)); - doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field - w.addDocument(doc); - - final IndexReader r = w.getReader(); - w.close(); - - // Do ordinary BooleanQuery: - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = getSearcher(r); - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(2, hits.totalHits.value); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "final-score"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); - - final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); - hits = rescorer.rescore(searcher, hits, 2); - - // rerank using the field final-score - assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); - - r.close(); - dir.close(); - + public void testRescorer() throws Exception { + assertU(adoc("id", "0", "field", "wizard the the the the the oz", "final-score", "F")); + assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "final-score", "T")); + assertU(commit()); + + try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { + + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = solrQueryRequest.getSearcher(); + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(2, hits.totalHits.value); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "final-score"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel); + ltrScoringQuery.setRequest(solrQueryRequest); + final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery); + hits = rescorer.rescore(searcher, hits, 2); + + // rerank using the field final-score + assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); + } } @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134") @Test public void testDifferentTopN() throws IOException { - final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random(), dir); - - Document doc = new Document(); - doc.add(newStringField("id", "0", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 1.0f)); - w.addDocument(doc); - - doc = new Document(); - doc.add(newStringField("id", "1", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 2.0f)); - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "2", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 3.0f)); - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "3", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz the the the the ", - Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 4.0f)); - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "4", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz the the the the the the", - Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 5.0f)); - w.addDocument(doc); - - final IndexReader r = w.getReader(); - w.close(); - - // Do ordinary BooleanQuery: - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = getSearcher(r); - - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(5, hits.totalHits.value); - - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "final-score"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); - - final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); - - // rerank @ 0 should not change the order - hits = rescorer.rescore(searcher, hits, 0); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - // test rerank with different topN cuts - - for (int topN = 1; topN <= 5; topN++) { - log.info("rerank {} documents ", topN); - hits = searcher.search(bqBuilder.build(), 10); - - final ScoreDoc[] slice = new ScoreDoc[topN]; - System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); - hits = new TopDocs(hits.totalHits, slice); - hits = rescorer.rescore(searcher, hits, topN); - for (int i = topN - 1, j = 0; i >= 0; i--, j++) { - if (log.isInfoEnabled()) { - log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) - .get("id"), j); - } - - assertEquals(i, - Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); - assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); + assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "final-score-float", "1.0")); + assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "final-score-float", "2.0")); + assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "final-score-float", "3.0")); + assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "final-score-float", "4.0")); + assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "final-score-float", "5.0")); + assertU(commit()); + + try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { + // Do ordinary BooleanQuery: + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = solrQueryRequest.getSearcher(); + + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(5, hits.totalHits.value); + + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "final-score-float"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "final-score-float"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel); + scoringQuery.setRequest(solrQueryRequest); + final LTRRescorer rescorer = new LTRRescorer(scoringQuery); + + // rerank @ 0 should not change the order + hits = rescorer.rescore(searcher, hits, 0); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + // test rerank with different topN cuts + + for (int topN = 1; topN <= 5; topN++) { + log.info("rerank {} documents ", topN); + hits = searcher.search(bqBuilder.build(), 10); + + final ScoreDoc[] slice = new ScoreDoc[topN]; + System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); + hits = new TopDocs(hits.totalHits, slice); + hits = rescorer.rescore(searcher, hits, topN); + for (int i = topN - 1, j = 0; i >= 0; i--, j++) { + if (log.isInfoEnabled()) { + log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) + .get("id"), j); + } + + assertEquals(i, + Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); + assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); + } } } - - r.close(); - dir.close(); - } @Test public void testDocParam() throws Exception { - final Map test = new HashMap(); - test.put("fake", 2); - List features = makeFieldValueFeatures(new int[] {0}, - "final-score"); - List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - List allFeatures = makeFieldValueFeatures(new int[] {0}, - "final-score"); - MockModel ltrScoringModel = new MockModel("test", - features, norms, "test", allFeatures, null); - LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); - LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); - } + try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { + List features = makeFieldValueFeatures(new int[] {0}, + "final-score"); + List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + List allFeatures = makeFieldValueFeatures(new int[] {0}, + "final-score"); + MockModel ltrScoringModel = new MockModel("test", + features, norms, "test", allFeatures, null); + LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); + query.setRequest(solrQueryRequest); + LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + } - features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score"); - norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9}, "final-score"); - ltrScoringModel = new MockModel("test", features, norms, - "test", allFeatures, null); - query = new LTRScoringQuery(ltrScoringModel); - wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score"); + norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, + 9}, "final-score"); + ltrScoringModel = new MockModel("test", features, norms, + "test", allFeatures, null); + query = new LTRScoringQuery(ltrScoringModel); + query.setRequest(solrQueryRequest); + wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + } } } - } From bdce029f3c95526a79e86a0d73389db42832bee9 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Wed, 12 May 2021 16:44:02 +0200 Subject: [PATCH 02/27] [SOLR-12697] formatting changes --- .../solr/ltr/feature/FieldValueFeature.java | 21 ++++++++----------- .../apache/solr/ltr/TestLTROnSolrCloud.java | 5 ----- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 129ecf03950..11470dc5a8a 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -68,8 +68,8 @@ public void setField(String field) { } @Override - public LinkedHashMap paramsToMap() { - final LinkedHashMap params = defaultParamsToMap(); + public LinkedHashMap paramsToMap() { + final LinkedHashMap params = defaultParamsToMap(); params.put("field", field); return params; } @@ -77,19 +77,17 @@ public LinkedHashMap paramsToMap() { @Override protected void validate() throws FeatureException { if (field == null || field.isEmpty()) { - throw new FeatureException(getClass().getSimpleName()+ - ": field must be provided"); + throw new FeatureException(getClass().getSimpleName() + ": field must be provided"); } } - public FieldValueFeature(String name, Map params) { + public FieldValueFeature(String name, Map params) { super(name, params); } @Override - public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, - SolrQueryRequest request, Query originalQuery, Map efi) - throws IOException { + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request, + Query originalQuery, Map efi) throws IOException { return new FieldValueFeatureWeight(searcher, request, originalQuery, efi); } @@ -150,14 +148,13 @@ public float score() throws IOException { } else { final String string = indexableField.stringValue(); if (string.length() == 1) { - // boolean values in the index are encoded with the - // a single char contained in TRUE_TOKEN or FALSE_TOKEN + // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN // (see BoolField) if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { - return 1; + return 1f; } if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) { - return 0; + return 0f; } } } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 93709cad43a..732b5fd8ff6 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -58,11 +58,8 @@ public void setUp() throws Exception { int numberOfNodes = numberOfShards * numberOfReplicas; setupSolrCluster(numberOfShards, numberOfReplicas, numberOfNodes); - - } - @Override public void tearDown() throws Exception { restTestHarness.close(); @@ -233,7 +230,6 @@ private void createCollection(String name, String config, int numShards, int num solrCluster.waitForActiveCollection(name, numShards, numShards * numReplicas); } - void indexDocument(String collection, String id, String title, String description, int popularity) throws Exception{ SolrInputDocument doc = new SolrInputDocument(); @@ -356,5 +352,4 @@ public static void after() throws Exception { } System.clearProperty("managed.schema.mutable"); } - } From e6601eeecf999207df01447e1f8a0c0ff20b03aa Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Wed, 12 May 2021 17:09:41 +0200 Subject: [PATCH 03/27] [SOLR-12697] only apply new scorer to fields that are not stored --- .../java/org/apache/solr/ltr/feature/FieldValueFeature.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 11470dc5a8a..6373f0b108b 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -112,8 +112,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher, */ @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { - // always prefer docValues - if (schemaField != null && schemaField.hasDocValues()) { + if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { return new DocValuesFieldValueFeatureScorer(this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField); } From d6e14779a5648f928efa63d36246973dec5cd09b Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Wed, 12 May 2021 17:17:09 +0200 Subject: [PATCH 04/27] [SOLR-12697] remove BINARY case because it is not supported --- .../src/java/org/apache/solr/ltr/feature/FieldValueFeature.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 6373f0b108b..f71e02b932b 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -199,8 +199,6 @@ public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafRe docValues = DocValues.getSorted(context.reader(), field); break; case BINARY: - docValues = DocValues.getBinary(context.reader(), field); - break; case SORTED_NUMERIC: case SORTED_SET: case NONE: From 5bc995c04a4489377c4beda1ec62d74685f050e5 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Thu, 13 May 2021 09:46:47 +0200 Subject: [PATCH 05/27] [SOLR-12697] only pass fieldType to constructor; determine numberType in constructor instead of once per doc --- .../apache/solr/ltr/feature/FieldValueFeature.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index f71e02b932b..f4df25885a6 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -114,7 +114,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher, public FeatureScorer scorer(LeafReaderContext context) throws IOException { if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { return new DocValuesFieldValueFeatureScorer(this, context, - DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField); + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType()); } return new FieldValueFeatureScorer(this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); @@ -178,14 +178,15 @@ public float getMaxScore(int upTo) throws IOException { public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { final LeafReaderContext context; final DocIdSetIterator docValues; - final FieldType schemaFieldType; + final FieldType fieldType; + NumberType fieldNumberType; DocValuesType docValuesType = DocValuesType.NONE; public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, - final DocIdSetIterator itr, final SchemaField schemaField) { + final DocIdSetIterator itr, final FieldType fieldType) { super(weight, itr); this.context = context; - schemaFieldType = schemaField.getType(); + this.fieldType = fieldType; try { FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); @@ -194,6 +195,7 @@ public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafRe switch (docValuesType) { case NUMERIC: docValues = DocValues.getNumeric(context.reader(), field); + fieldNumberType = fieldType.getNumberType(); break; case SORTED: docValues = DocValues.getSorted(context.reader(), field); @@ -216,10 +218,10 @@ public float score() throws IOException { if (docValues != null && docValues.advance(itr.docID()) < DocIdSetIterator.NO_MORE_DOCS) { switch (docValuesType) { case NUMERIC: - if (NumberType.FLOAT.equals(schemaFieldType.getNumberType())) { + if (NumberType.FLOAT.equals(fieldNumberType)) { // convert float value that was stored as long back to float return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue()); - } else if (NumberType.DOUBLE.equals(schemaFieldType.getNumberType())) { + } else if (NumberType.DOUBLE.equals(fieldNumberType)) { // handle double value conversion return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue()); } From 4559415aec0e31db19ae999cc49031ca744bc14b Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Thu, 13 May 2021 09:51:38 +0200 Subject: [PATCH 06/27] [SOLR-12697] remove - from fieldnames; randomize indexing order for documents and commit during indexing; improve formatting of test --- .../solr/collection1/conf/schema.xml | 4 +- .../apache/solr/ltr/TestLTROnSolrCloud.java | 141 ++++++++++-------- .../solr/ltr/TestLTRReRankingPipeline.java | 32 ++-- 3 files changed, 100 insertions(+), 77 deletions(-) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 4187ce9424f..88e0b73f0f9 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -19,8 +19,8 @@ - - + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 732b5fd8ff6..28d6c5c8774 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -16,7 +16,10 @@ package org.apache.solr.ltr; import java.io.File; +import java.util.Collections; +import java.util.List; import java.util.SortedMap; +import java.util.stream.IntStream; import org.apache.commons.io.FileUtils; import org.apache.solr.client.solrj.SolrQuery; @@ -38,13 +41,15 @@ import org.junit.AfterClass; import org.junit.Test; +import static java.util.stream.Collectors.toList; + public class TestLTROnSolrCloud extends TestRerankBase { private MiniSolrCloudCluster solrCluster; String solrconfig = "solrconfig-ltr.xml"; String schema = "schema.xml"; - SortedMap extraServlets = null; + SortedMap extraServlets = null; @Override public void setUp() throws Exception { @@ -52,8 +57,8 @@ public void setUp() throws Exception { extraServlets = setupTestInit(solrconfig, schema, true); System.setProperty("enable.update.log", "true"); - int numberOfShards = random().nextInt(4)+1; - int numberOfReplicas = random().nextInt(2)+1; + int numberOfShards = random().nextInt(4) + 1; + int numberOfReplicas = random().nextInt(2) + 1; int numberOfNodes = numberOfShards * numberOfReplicas; @@ -84,7 +89,7 @@ public void testSimpleQuery() throws Exception { query.setParam("rows", "8"); QueryResponse queryResponse = - solrCluster.getSolrClient().query(COLLECTION,query); + solrCluster.getSolrClient().query(COLLECTION, query); assertEquals(8, queryResponse.getResults().getNumFound()); assertEquals("1", queryResponse.getResults().get(0).get("id").toString()); assertEquals("2", queryResponse.getResults().get(1).get("id").toString()); @@ -95,37 +100,52 @@ public void testSimpleQuery() throws Exception { assertEquals("7", queryResponse.getResults().get(6).get("id").toString()); assertEquals("8", queryResponse.getResults().get(7).get("id").toString()); - final Float original_result0_score = (Float)queryResponse.getResults().get(0).get("score"); - final Float original_result1_score = (Float)queryResponse.getResults().get(1).get("score"); - final Float original_result2_score = (Float)queryResponse.getResults().get(2).get("score"); - final Float original_result3_score = (Float)queryResponse.getResults().get(3).get("score"); - final Float original_result4_score = (Float)queryResponse.getResults().get(4).get("score"); - final Float original_result5_score = (Float)queryResponse.getResults().get(5).get("score"); - final Float original_result6_score = (Float)queryResponse.getResults().get(6).get("score"); - final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score"); - - final String result0_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","64.0", "c3","2.0", "original","0.0", "dvIntFieldFeature","8.0","dvLongFieldFeature","8.0","dvFloatFieldFeature","0.8","dvDoubleFieldFeature","0.8","dvStrNumFieldFeature","8.0","dvStrBoolFieldFeature","1.0"); - final String result1_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","49.0", "c3","2.0", "original","1.0", "dvIntFieldFeature","7.0","dvLongFieldFeature","7.0","dvFloatFieldFeature","0.7","dvDoubleFieldFeature","0.7","dvStrNumFieldFeature","7.0","dvStrBoolFieldFeature","0.0"); - final String result2_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","36.0", "c3","2.0", "original","2.0", "dvIntFieldFeature","6.0","dvLongFieldFeature","6.0","dvFloatFieldFeature","0.6","dvDoubleFieldFeature","0.6","dvStrNumFieldFeature","6.0","dvStrBoolFieldFeature","1.0"); - final String result3_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","25.0", "c3","2.0", "original","3.0", "dvIntFieldFeature","5.0","dvLongFieldFeature","5.0","dvFloatFieldFeature","0.5","dvDoubleFieldFeature","0.5","dvStrNumFieldFeature","5.0","dvStrBoolFieldFeature","0.0"); - final String result4_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","16.0", "c3","2.0", "original","4.0", "dvIntFieldFeature","4.0","dvLongFieldFeature","4.0","dvFloatFieldFeature","0.4","dvDoubleFieldFeature","0.4","dvStrNumFieldFeature","4.0","dvStrBoolFieldFeature","1.0"); - final String result5_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "9.0", "c3","2.0", "original","5.0", "dvIntFieldFeature","3.0","dvLongFieldFeature","3.0","dvFloatFieldFeature","0.3","dvDoubleFieldFeature","0.3","dvStrNumFieldFeature","3.0","dvStrBoolFieldFeature","0.0"); - final String result6_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "4.0", "c3","2.0", "original","6.0", "dvIntFieldFeature","2.0","dvLongFieldFeature","2.0","dvFloatFieldFeature","0.2","dvDoubleFieldFeature","0.2","dvStrNumFieldFeature","2.0","dvStrBoolFieldFeature","1.0"); - final String result7_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "1.0", "c3","2.0", "original","7.0", "dvIntFieldFeature","-1.0","dvLongFieldFeature","-2.0","dvFloatFieldFeature","-3.0","dvDoubleFieldFeature","-4.0","dvStrNumFieldFeature","-5.0","dvStrBoolFieldFeature","0.0"); + final Float original_result0_score = (Float) queryResponse.getResults().get(0).get("score"); + final Float original_result1_score = (Float) queryResponse.getResults().get(1).get("score"); + final Float original_result2_score = (Float) queryResponse.getResults().get(2).get("score"); + final Float original_result3_score = (Float) queryResponse.getResults().get(3).get("score"); + final Float original_result4_score = (Float) queryResponse.getResults().get(4).get("score"); + final Float original_result5_score = (Float) queryResponse.getResults().get(5).get("score"); + final Float original_result6_score = (Float) queryResponse.getResults().get(6).get("score"); + final Float original_result7_score = (Float) queryResponse.getResults().get(7).get("score"); + + final String result0_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0", + "dvLongFieldFeature", "8.0", "dvFloatFieldFeature", "0.8", "dvDoubleFieldFeature", "0.8", + "dvStrNumFieldFeature", "8.0", "dvStrBoolFieldFeature", "1.0"); + final String result1_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "49.0", "c3", "2.0", "original", "1.0", "dvIntFieldFeature", "7.0", + "dvLongFieldFeature", "7.0", "dvFloatFieldFeature", "0.7", "dvDoubleFieldFeature", "0.7", + "dvStrNumFieldFeature", "7.0", "dvStrBoolFieldFeature", "0.0"); + final String result2_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "36.0", "c3", "2.0", "original", "2.0", "dvIntFieldFeature", "6.0", + "dvLongFieldFeature", "6.0", "dvFloatFieldFeature", "0.6", "dvDoubleFieldFeature", "0.6", + "dvStrNumFieldFeature", "6.0", "dvStrBoolFieldFeature", "1.0"); + final String result3_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "25.0", "c3", "2.0", "original", "3.0", "dvIntFieldFeature", "5.0", + "dvLongFieldFeature", "5.0", "dvFloatFieldFeature", "0.5", "dvDoubleFieldFeature", "0.5", + "dvStrNumFieldFeature", "5.0", "dvStrBoolFieldFeature", "0.0"); + final String result4_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "16.0", "c3", "2.0", "original", "4.0", "dvIntFieldFeature", "4.0", + "dvLongFieldFeature", "4.0", "dvFloatFieldFeature", "0.4", "dvDoubleFieldFeature", "0.4", + "dvStrNumFieldFeature", "4.0", "dvStrBoolFieldFeature", "1.0"); + final String result5_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "9.0", "c3", "2.0", "original", "5.0", "dvIntFieldFeature", "3.0", + "dvLongFieldFeature", "3.0", "dvFloatFieldFeature", "0.3", "dvDoubleFieldFeature", "0.3", + "dvStrNumFieldFeature", "3.0", "dvStrBoolFieldFeature", "0.0"); + final String result6_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "4.0", "c3", "2.0", "original", "6.0", "dvIntFieldFeature", "2.0", + "dvLongFieldFeature", "2.0", "dvFloatFieldFeature", "0.2", "dvDoubleFieldFeature", "0.2", + "dvStrNumFieldFeature", "2.0", "dvStrBoolFieldFeature", "1.0"); + final String result7_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "1.0", "c3", "2.0", "original", "7.0", "dvIntFieldFeature", "-1.0", + "dvLongFieldFeature", "-2.0", "dvFloatFieldFeature", "-3.0", "dvDoubleFieldFeature", "-4.0", + "dvStrNumFieldFeature", "-5.0", "dvStrBoolFieldFeature", "0.0"); // Test feature vectors returned (without re-ranking) query.setFields("*,score,features:[fv store=test]"); - queryResponse = - solrCluster.getSolrClient().query(COLLECTION,query); + queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); assertEquals(8, queryResponse.getResults().getNumFound()); assertEquals("1", queryResponse.getResults().get(0).get("id").toString()); assertEquals("2", queryResponse.getResults().get(1).get("id").toString()); @@ -165,8 +185,7 @@ public void testSimpleQuery() throws Exception { // Test feature vectors returned (with re-ranking) query.setFields("*,score,features:[fv]"); query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); - queryResponse = - solrCluster.getSolrClient().query(COLLECTION,query); + queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); assertEquals(8, queryResponse.getResults().getNumFound()); assertEquals("8", queryResponse.getResults().get(0).get("id").toString()); assertEquals(result0_features, @@ -206,7 +225,7 @@ private void setupSolrCluster(int numShards, int numReplicas, int numServers) th createCollection(COLLECTION, "conf1", numShards, numReplicas); indexDocuments(COLLECTION); for (JettySolrRunner solrRunner : solrCluster.getJettySolrRunners()) { - if (!solrRunner.getCoreContainer().getCores().isEmpty()){ + if (!solrRunner.getCoreContainer().getCores().isEmpty()) { String coreName = solrRunner.getCoreContainer().getCores().iterator().next().getName(); restTestHarness = new RestTestHarness(() -> solrRunner.getBaseUrl().toString() + "/" + coreName); break; @@ -231,13 +250,13 @@ private void createCollection(String name, String config, int numShards, int num } void indexDocument(String collection, String id, String title, String description, int popularity) - throws Exception{ + throws Exception { SolrInputDocument doc = new SolrInputDocument(); doc.setField("id", id); doc.setField("title", title); doc.setField("description", description); doc.setField("popularity", popularity); - if(popularity != 1) { + if (popularity != 1) { // check that empty values will be read as default doc.setField("dvIntField", popularity); doc.setField("dvLongField", popularity); @@ -249,17 +268,21 @@ void indexDocument(String collection, String id, String title, String descriptio solrCluster.getSolrClient().add(collection, doc); } - private void indexDocuments(final String collection) - throws Exception { + private void indexDocuments(final String collection) throws Exception { final int collectionSize = 8; - // put documents in reversed order to check that advanceExact is working correctly - for (int docId = collectionSize; docId >= 1; docId--) { + // put documents in random order to check that advanceExact is working correctly + List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList()); + Collections.shuffle(docIds); + + int docCounter = 1; + for (int docId : docIds) { final int popularity = docId; indexDocument(collection, String.valueOf(docId), "a1", "bloom", popularity); - if(docId == collectionSize / 2) { - // commit in the middle in order to check that everything works fine for multi-segment case + // maybe commit in the middle in order to check that everything works fine for multi-segment case + if (docCounter == collectionSize / 2 && random().nextBoolean()) { solrCluster.getSolrClient().commit(collection); } + docCounter++; } solrCluster.getSolrClient().commit(collection, true, true); } @@ -273,22 +296,22 @@ private void loadModelsAndFeatures() throws Exception { "\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1}}"; loadFeature( - featureNames[0], - SolrFeature.class.getName(), - featureStore, - "{\"q\":\"{!func}pow(popularity,2)\"}" + featureNames[0], + SolrFeature.class.getName(), + featureStore, + "{\"q\":\"{!func}pow(popularity,2)\"}" ); loadFeature( - featureNames[1], - ValueFeature.class.getName(), - featureStore, - "{\"value\":2}" + featureNames[1], + ValueFeature.class.getName(), + featureStore, + "{\"value\":2}" ); loadFeature( - featureNames[2], - OriginalScoreFeature.class.getName(), - featureStore, - null + featureNames[2], + OriginalScoreFeature.class.getName(), + featureStore, + null ); loadFeature( featureNames[3], @@ -328,11 +351,11 @@ private void loadModelsAndFeatures() throws Exception { ); loadModel( - "powpularityS-model", - LinearModel.class.getName(), - featureNames, - featureStore, - jsonModelParams + "powpularityS-model", + LinearModel.class.getName(), + featureNames, + featureStore, + jsonModelParams ); reloadCollection(COLLECTION); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java index cfdfcd5f416..1b1967d47ef 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java @@ -114,8 +114,8 @@ public Explanation explain(LeafReaderContext context, int doc, @Test public void testRescorer() throws Exception { - assertU(adoc("id", "0", "field", "wizard the the the the the oz", "final-score", "F")); - assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "final-score", "T")); + assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F")); + assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T")); assertU(commit()); try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { @@ -131,12 +131,12 @@ public void testRescorer() throws Exception { assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "final-score"); + "finalScore"); final List norms = new ArrayList( Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); + 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore"); final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); @@ -145,7 +145,7 @@ public void testRescorer() throws Exception { final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery); hits = rescorer.rescore(searcher, hits, 2); - // rerank using the field final-score + // rerank using the field finalScore assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); } @@ -154,11 +154,11 @@ public void testRescorer() throws Exception { @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134") @Test public void testDifferentTopN() throws IOException { - assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "final-score-float", "1.0")); - assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "final-score-float", "2.0")); - assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "final-score-float", "3.0")); - assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "final-score-float", "4.0")); - assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "final-score-float", "5.0")); + assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0")); + assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0")); + assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0")); + assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0")); + assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0")); assertU(commit()); try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { @@ -179,12 +179,12 @@ public void testDifferentTopN() throws IOException { assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "final-score-float"); + "finalScoreFloat"); final List norms = new ArrayList( Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "final-score-float"); + 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat"); final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); @@ -229,12 +229,12 @@ public void testDifferentTopN() throws IOException { public void testDocParam() throws Exception { try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { List features = makeFieldValueFeatures(new int[] {0}, - "final-score"); + "finalScore"); List norms = new ArrayList( Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); List allFeatures = makeFieldValueFeatures(new int[] {0}, - "final-score"); + "finalScore"); MockModel ltrScoringModel = new MockModel("test", features, norms, "test", allFeatures, null); LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); @@ -246,12 +246,12 @@ public void testDocParam() throws Exception { assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); } - features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score"); + features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore"); norms = new ArrayList( Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9}, "final-score"); + 9}, "finalScore"); ltrScoringModel = new MockModel("test", features, norms, "test", allFeatures, null); query = new LTRScoringQuery(ltrScoringModel); From ec4cbfb4c6e57855d4a5a188bf071ec8b5d37a6c Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Thu, 13 May 2021 21:26:18 +0200 Subject: [PATCH 07/27] [SOLR-12697] determine docValuesType before creating DocValuesFieldValueFeatureScorer so only the supported types have to be handled later; extract number-conversion to separate method --- .../solr/ltr/feature/FieldValueFeature.java | 118 +++++++++--------- .../solr/collection1/conf/schema.xml | 2 +- .../apache/solr/ltr/TestLTROnSolrCloud.java | 2 +- 3 files changed, 63 insertions(+), 59 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index f4df25885a6..6e8b414a855 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -95,7 +95,7 @@ public class FieldValueFeatureWeight extends FeatureWeight { private final SchemaField schemaField; public FieldValueFeatureWeight(IndexSearcher searcher, - SolrQueryRequest request, Query originalQuery, Map efi) { + SolrQueryRequest request, Query originalQuery, Map efi) { super(FieldValueFeature.this, searcher, request, originalQuery, efi); if (searcher instanceof SolrIndexSearcher) { schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); @@ -106,6 +106,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher, /** * Return a FeatureScorer that uses docValues or storedFields if no docValues are present + * * @param context the segment this FeatureScorer is working with * @return FeatureScorer for the current segment and field * @throws IOException as defined by abstract class Feature @@ -113,11 +114,21 @@ public FieldValueFeatureWeight(IndexSearcher searcher, @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { - return new DocValuesFieldValueFeatureScorer(this, context, - DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType()); + + FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); + DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; + + if (DocValuesType.NUMERIC.equals(docValuesType) || DocValuesType.SORTED.equals(docValuesType)) { + return new DocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType(), docValuesType); + // If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway + } else if (!DocValuesType.NONE.equals(docValuesType)) { + throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field + + " is not supported!"); + } } return new FieldValueFeatureScorer(this, context, - DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } /** @@ -135,8 +146,7 @@ public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context, public float score() throws IOException { try { - final Document document = context.reader().document(itr.docID(), - fieldAsSet); + final Document document = context.reader().document(itr.docID(), fieldAsSet); final IndexableField indexableField = document.getField(field); if (indexableField == null) { return getDefaultValue(); @@ -158,10 +168,7 @@ public float score() throws IOException { } } } catch (final IOException e) { - throw new FeatureException( - e.toString() + ": " + - "Unable to extract feature for " - + name, e); + throw new FeatureException(e.toString() + ": " + "Unable to extract feature for " + name, e); } return getDefaultValue(); } @@ -177,76 +184,73 @@ public float getMaxScore(int upTo) throws IOException { */ public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { final LeafReaderContext context; - final DocIdSetIterator docValues; final FieldType fieldType; + final DocValuesType docValuesType; + DocIdSetIterator docValues; NumberType fieldNumberType; - DocValuesType docValuesType = DocValuesType.NONE; public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, - final DocIdSetIterator itr, final FieldType fieldType) { + final DocIdSetIterator itr, final FieldType fieldType, + final DocValuesType docValuesType) { super(weight, itr); this.context = context; this.fieldType = fieldType; + this.docValuesType = docValuesType; try { - FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); - // if fieldInfo is null, just use NONE-Type. This causes no problems, because we won't call score() anyway - docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; - switch (docValuesType) { - case NUMERIC: - docValues = DocValues.getNumeric(context.reader(), field); - fieldNumberType = fieldType.getNumberType(); - break; - case SORTED: - docValues = DocValues.getSorted(context.reader(), field); - break; - case BINARY: - case SORTED_NUMERIC: - case SORTED_SET: - case NONE: - default: - docValues = null; + if (DocValuesType.NUMERIC.equals(docValuesType)) { + docValues = DocValues.getNumeric(context.reader(), field); + fieldNumberType = fieldType.getNumberType(); + } else if (DocValuesType.SORTED.equals(docValuesType)) { + docValues = DocValues.getSorted(context.reader(), field); } } catch (IOException e) { throw new IllegalArgumentException("Could not read docValues for field " + field + " with docValuesType " - + docValuesType.name()); + + docValuesType.name()); } } @Override public float score() throws IOException { - if (docValues != null && docValues.advance(itr.docID()) < DocIdSetIterator.NO_MORE_DOCS) { - switch (docValuesType) { - case NUMERIC: - if (NumberType.FLOAT.equals(fieldNumberType)) { - // convert float value that was stored as long back to float - return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue()); - } else if (NumberType.DOUBLE.equals(fieldNumberType)) { - // handle double value conversion - return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue()); - } - // just take the long value - return ((NumericDocValues) docValues).longValue(); - case SORTED: - int ord = ((SortedDocValues) docValues).ordValue(); - // try to interpret bytesRef either as number string or as true / false token - return handleBytesRef(((SortedDocValues) docValues).lookupOrd(ord)); - case BINARY: - case SORTED_SET: - case SORTED_NUMERIC: - case NONE: - default: - throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field - + " is not supported!"); - } + if (DocValuesType.NUMERIC.equals(docValuesType) && + ((NumericDocValues) docValues).advanceExact(itr.docID())) { + return readNumericDocValues(); + } else if (DocValuesType.SORTED.equals(docValuesType) && + ((SortedDocValues) docValues).advanceExact(itr.docID())) { + int ord = ((SortedDocValues) docValues).ordValue(); + return readSortedDocValues(((SortedDocValues) docValues).lookupOrd(ord)); } return FieldValueFeature.this.getDefaultValue(); } - private float handleBytesRef(BytesRef bytesRef) { + /** + * Read the numeric value for a field and convert the different number types to float. + * + * @return The numeric value that the docValues contain for the current document + * @throws IOException if docValues cannot be read + */ + private float readNumericDocValues() throws IOException { + if (NumberType.FLOAT.equals(fieldNumberType)) { + // convert float value that was stored as long back to float + return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue()); + } else if (NumberType.DOUBLE.equals(fieldNumberType)) { + // handle double value conversion + return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue()); + } + // just take the long value + return ((NumericDocValues) docValues).longValue(); + } + + /** + * Interprets the bytesRef either as true / false token or tries to read it as number string + * + * @param bytesRef the value of the field that should be used as score + * @return the input converted to a number + */ + private float readSortedDocValues(BytesRef bytesRef) { String string = bytesRef.utf8ToString(); if (string.length() == 1 - && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) { + && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) { // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN // (see BoolField) if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 88e0b73f0f9..8ec89e39285 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -30,7 +30,7 @@ - + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 28d6c5c8774..a9f6d36f235 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -272,7 +272,7 @@ private void indexDocuments(final String collection) throws Exception { final int collectionSize = 8; // put documents in random order to check that advanceExact is working correctly List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList()); - Collections.shuffle(docIds); + Collections.shuffle(docIds, random()); int docCounter = 1; for (int docId : docIds) { From e6f20f1ce79ca547518c8759b09cbcf710587402 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Fri, 14 May 2021 13:50:23 +0100 Subject: [PATCH 08/27] split dual-purpose DocValuesFieldValueFeatureScorer into two * NumericDocValuesFieldValueFeatureScorer * SortedDocValuesFieldValueFeatureScorer --- .../solr/ltr/feature/FieldValueFeature.java | 116 ++++++++++++------ 1 file changed, 80 insertions(+), 36 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 6e8b414a855..9b9c6561489 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -36,7 +36,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.BoolField; -import org.apache.solr.schema.FieldType; import org.apache.solr.schema.NumberType; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SolrIndexSearcher; @@ -118,9 +117,12 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException { FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; - if (DocValuesType.NUMERIC.equals(docValuesType) || DocValuesType.SORTED.equals(docValuesType)) { - return new DocValuesFieldValueFeatureScorer(this, context, - DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType(), docValuesType); + if (DocValuesType.NUMERIC.equals(docValuesType)) { + return new NumericDocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType().getNumberType()); + } else if (DocValuesType.SORTED.equals(docValuesType)) { + return new SortedDocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); // If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway } else if (!DocValuesType.NONE.equals(docValuesType)) { throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field @@ -180,45 +182,28 @@ public float getMaxScore(int upTo) throws IOException { } /** - * A FeatureScorer that reads the docValues for a field + * A FeatureScorer that reads the numeric docValues for a field */ - public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { - final LeafReaderContext context; - final FieldType fieldType; - final DocValuesType docValuesType; - DocIdSetIterator docValues; - NumberType fieldNumberType; + public class NumericDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { + NumericDocValues docValues; + NumberType numberType; - public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, - final DocIdSetIterator itr, final FieldType fieldType, - final DocValuesType docValuesType) { + public NumericDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, + final DocIdSetIterator itr, final NumberType numberType) { super(weight, itr); - this.context = context; - this.fieldType = fieldType; - this.docValuesType = docValuesType; + this.numberType = numberType; try { - if (DocValuesType.NUMERIC.equals(docValuesType)) { - docValues = DocValues.getNumeric(context.reader(), field); - fieldNumberType = fieldType.getNumberType(); - } else if (DocValuesType.SORTED.equals(docValuesType)) { - docValues = DocValues.getSorted(context.reader(), field); - } + docValues = DocValues.getNumeric(context.reader(), field); } catch (IOException e) { - throw new IllegalArgumentException("Could not read docValues for field " + field + " with docValuesType " - + docValuesType.name()); + throw new IllegalArgumentException("Could not read numeric docValues for field " + field); } } @Override public float score() throws IOException { - if (DocValuesType.NUMERIC.equals(docValuesType) && - ((NumericDocValues) docValues).advanceExact(itr.docID())) { + if (docValues.advanceExact(itr.docID())) { return readNumericDocValues(); - } else if (DocValuesType.SORTED.equals(docValuesType) && - ((SortedDocValues) docValues).advanceExact(itr.docID())) { - int ord = ((SortedDocValues) docValues).ordValue(); - return readSortedDocValues(((SortedDocValues) docValues).lookupOrd(ord)); } return FieldValueFeature.this.getDefaultValue(); } @@ -230,15 +215,73 @@ public float score() throws IOException { * @throws IOException if docValues cannot be read */ private float readNumericDocValues() throws IOException { - if (NumberType.FLOAT.equals(fieldNumberType)) { + if (NumberType.FLOAT.equals(numberType)) { // convert float value that was stored as long back to float - return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue()); - } else if (NumberType.DOUBLE.equals(fieldNumberType)) { + return Float.intBitsToFloat((int) docValues.longValue()); + } else if (NumberType.DOUBLE.equals(numberType)) { // handle double value conversion - return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue()); + return (float) Double.longBitsToDouble(docValues.longValue()); } // just take the long value - return ((NumericDocValues) docValues).longValue(); + return docValues.longValue(); + } + + /** + * Interprets the bytesRef either as true / false token or tries to read it as number string + * + * @param bytesRef the value of the field that should be used as score + * @return the input converted to a number + */ + private float readSortedDocValues(BytesRef bytesRef) { + String string = bytesRef.utf8ToString(); + if (string.length() == 1 + && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) { + // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN + // (see BoolField) + if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { + return 1f; + } else { + return 0f; + } + } else { + try { + return Float.parseFloat(string); + } catch (NumberFormatException ex) { + throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float."); + } + } + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + } + /** + * A FeatureScorer that reads the sorted docValues for a field + */ + public class SortedDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { + SortedDocValues docValues; + NumberType fieldNumberType; + + public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, + final DocIdSetIterator itr) { + super(weight, itr); + + try { + docValues = DocValues.getSorted(context.reader(), field); + } catch (IOException e) { + throw new IllegalArgumentException("Could not read sorted docValues for field " + field); + } + } + + @Override + public float score() throws IOException { + if (docValues.advanceExact(itr.docID())) { + int ord = docValues.ordValue(); + return readSortedDocValues(docValues.lookupOrd(ord)); + } + return FieldValueFeature.this.getDefaultValue(); } /** @@ -272,5 +315,6 @@ public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } + } } From f16ce3d1e5330784d9804756d0dc10999a0e25de Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Fri, 14 May 2021 19:40:53 +0100 Subject: [PATCH 09/27] add TestFieldValueFeature test coverage (with caveat) caveat: TestFieldValueFeature.testIfADocumentDoesntHaveAFieldDefaultValueIsReturned fails --- .../solr/collection1/conf/schema.xml | 9 ++ .../ltr/feature/TestFieldValueFeature.java | 150 ++++++++++-------- 2 files changed, 91 insertions(+), 68 deletions(-) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 8ec89e39285..bf50149fcdd 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -26,6 +26,10 @@ + + + + @@ -47,6 +51,11 @@ + + + + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index 108044b5cbd..9791fb7fcd6 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -30,6 +30,12 @@ public class TestFieldValueFeature extends TestRerankBase { private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f; + private static final String FIELD_NAMES[] = { + "popularity", + "dvIntPopularity", "dvLongPopularity", + "dvFloatPopularity", "dvDoublePopularity" + }; + @Before public void before() throws Exception { setuptest(false); @@ -56,11 +62,13 @@ public void before() throws Exception { assertU(commit()); - loadFeature("popularity", FieldValueFeature.class.getName(), - "{\"field\":\"popularity\"}"); + for (String field : FIELD_NAMES) { + loadFeature(field, FieldValueFeature.class.getName(), + "{\"field\":\""+field+"\"}"); - loadModel("popularity-model", LinearModel.class.getName(), - new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}"); + loadModel(field + "-model", LinearModel.class.getName(), + new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}"); + } } @After @@ -70,86 +78,92 @@ public void after() throws Exception { @Test public void testRanking() throws Exception { - - final SolrQuery query = new SolrQuery(); - query.setQuery("title:w1"); - query.add("fl", "*, score"); - query.add("rows", "4"); - - // Normal term match - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); - - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); - - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); - - query.setQuery("*:*"); - query.remove("rows"); - query.add("rows", "8"); - query.remove("rq"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=8}"); - - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); + for (String field : FIELD_NAMES) { + + final SolrQuery query = new SolrQuery(); + query.setQuery("title:w1"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + // Normal term match + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); + + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); + + query.setQuery("*:*"); + query.remove("rows"); + query.add("rows", "8"); + query.remove("rq"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}"); + + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); + } } @Test public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception { - SolrQuery query = new SolrQuery(); - query.setQuery("id:42"); - query.add("fl", "*, score"); - query.add("rows", "4"); - - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); - query = new SolrQuery(); - query.setQuery("id:42"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); - query.add("fl", "[fv]"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); - + for (String field : FIELD_NAMES) { + SolrQuery query = new SolrQuery(); + query.setQuery("id:42"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); + query = new SolrQuery(); + query.setQuery("id:42"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + + } } @Test public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception { + for (String field : FIELD_NAMES) { - final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"; + final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field; - loadFeature("popularity42", FieldValueFeature.class.getName(), fstore, - "{\"field\":\"popularity\",\"defaultValue\":\"42.0\"}"); + loadFeature(field+"42", FieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\",\"defaultValue\":\"42.0\"}"); - SolrQuery query = new SolrQuery(); - query.setQuery("id:42"); - query.add("fl", "*, score"); - query.add("rows", "4"); + SolrQuery query = new SolrQuery(); + query.setQuery("id:42"); + query.add("fl", "*, score"); + query.add("rows", "4"); - loadModel("popularity-model42", LinearModel.class.getName(), - new String[] {"popularity42"}, fstore, "{\"weights\":{\"popularity42\":1.0}}"); + loadModel(field+"-model42", LinearModel.class.getName(), + new String[] {field+"42"}, fstore, "{\"weights\":{\""+field+"42\":1.0}}"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); - query = new SolrQuery(); - query.setQuery("id:42"); - query.add("rq", "{!ltr model=popularity-model42 reRankDocs=4}"); - query.add("fl", "[fv]"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity42","42.0")+"'}"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); + query = new SolrQuery(); + query.setQuery("id:42"); + query.add("rq", "{!ltr model="+field+"-model42 reRankDocs=4}"); + query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field+"42","42.0")+"'}"); + } } @Test From e5954eb204f06eaec80523a775cbd0b5e1a679ca Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Sun, 16 May 2021 11:05:26 +0200 Subject: [PATCH 10/27] [SOLR-12697] remove method to read sorted values from Scorer for numeric docValues --- .../solr/ltr/feature/FieldValueFeature.java | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 9b9c6561489..448ab21409a 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -226,43 +226,17 @@ private float readNumericDocValues() throws IOException { return docValues.longValue(); } - /** - * Interprets the bytesRef either as true / false token or tries to read it as number string - * - * @param bytesRef the value of the field that should be used as score - * @return the input converted to a number - */ - private float readSortedDocValues(BytesRef bytesRef) { - String string = bytesRef.utf8ToString(); - if (string.length() == 1 - && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) { - // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN - // (see BoolField) - if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { - return 1f; - } else { - return 0f; - } - } else { - try { - return Float.parseFloat(string); - } catch (NumberFormatException ex) { - throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float."); - } - } - } - @Override public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } + /** * A FeatureScorer that reads the sorted docValues for a field */ public class SortedDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { SortedDocValues docValues; - NumberType fieldNumberType; public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, final DocIdSetIterator itr) { From da6a63568f055e41218cf83335bb47cef2724380 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Mon, 17 May 2021 22:36:50 +0200 Subject: [PATCH 11/27] [SOLR-12697] add fallback feature scorer that always returns the default value; only use one model in tests because of extractAllFeatures==true --- .../solr/ltr/feature/FieldValueFeature.java | 31 ++- .../solr/collection1/conf/schema.xml | 1 + .../ltr/feature/TestFieldValueFeature.java | 181 ++++++++++++------ 3 files changed, 146 insertions(+), 67 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 448ab21409a..c49ddecf141 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -123,11 +123,12 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException { } else if (DocValuesType.SORTED.equals(docValuesType)) { return new SortedDocValuesFieldValueFeatureScorer(this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); - // If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway - } else if (!DocValuesType.NONE.equals(docValuesType)) { - throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field - + " is not supported!"); + } else if (DocValuesType.NONE.equals(docValuesType)) { + // Using a fallback feature scorer because this segment has no documents with a doc value for the current field + return new DefaultValueFieldValueFeatureScorer(this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } + throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field + + " is not supported!"); } return new FieldValueFeatureScorer(this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); @@ -290,5 +291,27 @@ public float getMaxScore(int upTo) throws IOException { } } + /** + * A FeatureScorer that always returns the default value. + * + * It is used as a fallback for cases when a segment does not have any documents that contain doc values for a field. + * By doing so, we prevent a fallback to the FieldValueFeatureScorer, which would also return the default value but + * in a less performant way because it would first try to read the stored fields for the doc (which aren't present). + */ + public class DefaultValueFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { + public DefaultValueFieldValueFeatureScorer(final FeatureWeight weight, final DocIdSetIterator itr) { + super(weight, itr); + } + + @Override + public float score() throws IOException { + return FieldValueFeature.this.getDefaultValue(); + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + } } } diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index bf50149fcdd..c0170398914 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -38,6 +38,7 @@ + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index 9791fb7fcd6..2796caad2a8 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -16,21 +16,30 @@ */ package org.apache.solr.ltr.feature; -import java.util.LinkedHashMap; - +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.ltr.FeatureLoggerTestUtils; import org.apache.solr.ltr.TestRerankBase; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer; import org.apache.solr.ltr.model.LinearModel; +import org.apache.solr.request.SolrQueryRequest; import org.junit.After; import org.junit.Before; import org.junit.Test; +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + public class TestFieldValueFeature extends TestRerankBase { private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f; - private static final String FIELD_NAMES[] = { + private static final String[] FIELD_NAMES = { "popularity", "dvIntPopularity", "dvLongPopularity", "dvFloatPopularity", "dvDoublePopularity" @@ -65,10 +74,10 @@ public void before() throws Exception { for (String field : FIELD_NAMES) { loadFeature(field, FieldValueFeature.class.getName(), "{\"field\":\""+field+"\"}"); - - loadModel(field + "-model", LinearModel.class.getName(), - new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}"); } + loadModel("model", LinearModel.class.getName(), FIELD_NAMES, + "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," + + "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0}}"); } @After @@ -78,68 +87,63 @@ public void after() throws Exception { @Test public void testRanking() throws Exception { - for (String field : FIELD_NAMES) { - - final SolrQuery query = new SolrQuery(); - query.setQuery("title:w1"); - query.add("fl", "*, score"); - query.add("rows", "4"); - - // Normal term match - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); - - query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); - - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); - - query.setQuery("*:*"); - query.remove("rows"); - query.add("rows", "8"); - query.remove("rq"); - query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}"); - - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); - } + SolrQuery query = new SolrQuery(); + query.setQuery("title:w1"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + // Normal term match + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); + + query.add("rq", "{!ltr model=model reRankDocs=4}"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); + + query.setQuery("*:*"); + query.remove("rows"); + query.add("rows", "8"); + query.remove("rq"); + query.add("rq", "{!ltr model=model reRankDocs=8}"); + + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); } @Test public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception { - for (String field : FIELD_NAMES) { - SolrQuery query = new SolrQuery(); - query.setQuery("id:42"); - query.add("fl", "*, score"); - query.add("rows", "4"); + SolrQuery query = new SolrQuery(); + query.setQuery("id:42"); + query.add("fl", "*, score"); + query.add("rows", "4"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); - query = new SolrQuery(); - query.setQuery("id:42"); - query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); - query.add("fl", "[fv]"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); - } - } + query = new SolrQuery(); + query.setQuery("id:42"); + query.add("rq", "{!ltr model=model reRankDocs=4}"); + query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," + + "dvFloatPopularity=0.0,dvDoublePopularity=0.0'}"); + } @Test public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception { for (String field : FIELD_NAMES) { - final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field; loadFeature(field+"42", FieldValueFeature.class.getName(), fstore, @@ -162,15 +166,15 @@ public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws E assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field+"42","42.0")+"'}"); - } } @Test - public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exception { + public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws Exception { + // this tests the case that we create a feature for a non-existent field // using a different fstore to avoid a clash with the other tests - final String fstore = "testThatIfaFieldDoesNotExistDefaultValueIsReturned"; - loadFeature("not-existing-field", FieldValueFeature.class.getName(), fstore, + final String fstore = "testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned"; + loadFeature("not-existing-field", ObservingFieldValueFeature.class.getName(), fstore, "{\"field\":\"cowabunga\"}"); loadModel("not-existing-field-model", LinearModel.class.getName(), @@ -182,8 +186,30 @@ public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exceptio query.add("fl", "[fv]"); assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils + .toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + assertEquals(FieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass); + } + @Test + public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws Exception { + // this tests the case that no document contains docValues for the provided existing field + final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned"; + loadFeature("dvTestField", ObservingFieldValueFeature.class.getName(), fstore, + "{\"field\":\"dvTestField\"}"); + + loadModel("dvTestField-model", LinearModel.class.getName(), + new String[] {"dvTestField"}, fstore, "{\"weights\":{\"dvTestField\":1.0}}"); + + final SolrQuery query = new SolrQuery(); + query.setQuery("id:42"); + query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}"); + query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils + .toFeatureVector("dvTestField",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass); } @Test @@ -202,7 +228,6 @@ public void testBooleanValue() throws Exception { assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}"); - query = new SolrQuery(); query.setQuery("id:5"); query.add("rq", "{!ltr model=trendy-model reRankDocs=4}"); @@ -217,7 +242,6 @@ public void testBooleanValue() throws Exception { query.add("fl", "[fv]"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}"); - } @Test @@ -227,4 +251,35 @@ public void testParamsToMap() throws Exception { doTestParamsToMap(FieldValueFeature.class.getName(), params); } + /** + * This class is used to track which specific FieldValueFeature is used so that we can test, whether the + * fallback mechanism works correctly. + */ + public static class ObservingFieldValueFeature extends FieldValueFeature { + static String usedScorerClass; + + public ObservingFieldValueFeature(String name, Map params) { + super(name, params); + } + + @Override + public Feature.FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request, + Query originalQuery, Map efi) throws IOException { + return new ObservingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class ObservingFieldValueFeatureWeight extends FieldValueFeatureWeight { + public ObservingFieldValueFeatureWeight(IndexSearcher searcher, SolrQueryRequest request, + Query originalQuery, Map efi) { + super(searcher, request, originalQuery, efi); + } + + @Override + public FeatureScorer scorer(LeafReaderContext context) throws IOException { + FeatureScorer scorer = super.scorer(context); + usedScorerClass = scorer.getClass().getName(); + return scorer; + } + } + } } From b1056278e9b2ce99a2c368524e727cc0b82dbfb0 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Wed, 19 May 2021 09:02:10 +0200 Subject: [PATCH 12/27] [SOLR-12697] test that exception is thrown for unsupported dv type, test that right scorer classes are used, add more fields to test --- .../solr/collection1/conf/schema.xml | 4 + .../ltr/feature/TestFieldValueFeature.java | 158 ++++++++++++++---- 2 files changed, 131 insertions(+), 31 deletions(-) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index c0170398914..0081e1b61dd 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -30,6 +30,8 @@ + + @@ -39,6 +41,8 @@ + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index 2796caad2a8..82c8bf5bacb 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -25,6 +25,8 @@ import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight; import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer; import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.NumericDocValuesFieldValueFeatureScorer; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.SortedDocValuesFieldValueFeatureScorer; import org.apache.solr.ltr.model.LinearModel; import org.apache.solr.request.SolrQueryRequest; import org.junit.After; @@ -39,45 +41,55 @@ public class TestFieldValueFeature extends TestRerankBase { private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f; - private static final String[] FIELD_NAMES = { - "popularity", - "dvIntPopularity", "dvLongPopularity", - "dvFloatPopularity", "dvDoublePopularity" + private static final String[] FIELDS = { + "popularity", + "dvIntPopularity", "dvLongPopularity", + "dvFloatPopularity", "dvDoublePopularity", + "dvStringPopularity", "dvBoolPopularity" }; @Before public void before() throws Exception { setuptest(false); - assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", - "1","isTrendy","true")); - assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description", - "w2 2asd asdd didid", "popularity", "2")); - assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity", - "3","isTrendy","true")); - assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity", - "4","isTrendy","false")); - assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity", - "5","isTrendy","true")); - assertU(adoc("id", "6", "title", "w1 w2", "description", "w1 w2", - "popularity", "6","isTrendy","false")); - assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description", - "w1 w2 w3 w4 w5 w8", "popularity", "7","isTrendy","true")); - assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description", - "w1 w1 w1 w2 w2", "popularity", "8","isTrendy","false")); - - // a document without the popularity field + assertU(adoc("id", "1", "popularity", "1", "title", "w1", + "dvStringPopularity", "1", "dvBoolPopularity", "F", + "description", "w1", "isTrendy", "true")); + assertU(adoc("id", "2", "popularity", "2", "title", "w2 2asd asdd didid", + "dvStringPopularity", "2", "dvBoolPopularity", "T", + "description", "w2 2asd asdd didid")); + assertU(adoc("id", "3", "popularity", "3", "title", "w3", + "dvStringPopularity", "3", "dvBoolPopularity", "F", + "description", "w3", "isTrendy", "true")); + assertU(adoc("id", "4", "popularity", "4", "title", "w4", + "dvStringPopularity", "4", "dvBoolPopularity", "T", + "description", "w4", "isTrendy", "false")); + assertU(adoc("id", "5", "popularity", "5", "title", "w5", + "dvStringPopularity", "5", "dvBoolPopularity", "F", + "description", "w5", "isTrendy", "true")); + assertU(adoc("id", "6", "popularity", "6", "title", "w1 w2", + "dvStringPopularity", "6", "dvBoolPopularity", "T", + "description", "w1 w2", "isTrendy", "false")); + assertU(adoc("id", "7", "popularity", "7", "title", "w1 w2 w3 w4 w5", + "dvStringPopularity", "7", "dvBoolPopularity", "F", + "description", "w1 w2 w3 w4 w5 w8", "isTrendy", "true")); + assertU(adoc("id", "8", "popularity", "8", "title", "w1 w1 w1 w2 w2 w8", + "dvStringPopularity", "8", "dvBoolPopularity", "T", + "description", "w1 w1 w1 w2 w2", "isTrendy", "false")); + + // a document without the popularity and the dv fields assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity")); assertU(commit()); - for (String field : FIELD_NAMES) { + for (String field : FIELDS) { loadFeature(field, FieldValueFeature.class.getName(), - "{\"field\":\""+field+"\"}"); + "{\"field\":\"" + field + "\"}"); } - loadModel("model", LinearModel.class.getName(), FIELD_NAMES, - "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," + - "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0}}"); + loadModel("model", LinearModel.class.getName(), FIELDS, + "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," + + "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," + + "\"dvStringPopularity\":1.0,\"dvBoolPopularity\":1.0}}"); } @After @@ -119,7 +131,6 @@ public void testRanking() throws Exception { assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); } - @Test public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception { SolrQuery query = new SolrQuery(); @@ -137,13 +148,14 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," + - "dvFloatPopularity=0.0,dvDoublePopularity=0.0'}"); + "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," + + "dvFloatPopularity=0.0,dvDoublePopularity=0.0," + + "dvStringPopularity=0.0,dvBoolPopularity=0.0'}"); } @Test public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception { - for (String field : FIELD_NAMES) { + for (String field : FIELDS) { final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field; loadFeature(field+"42", FieldValueFeature.class.getName(), fstore, @@ -169,6 +181,35 @@ public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws E } } + @Test + public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned() throws Exception { + final String[] fieldsWithDefaultValues = {"dvIntField", "dvLongField", "dvFloatField"}; + + double fieldCounter = -1.0; + for (String field : fieldsWithDefaultValues) { + final String fstore = "testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned"+field; + + assertU(adoc("id", "21")); + assertU(commit()); + + loadFeature(field, FieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}"); + + SolrQuery query = new SolrQuery(); + query.setQuery("id:21"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, String.valueOf(fieldCounter))+"'}"); + + fieldCounter--; + } + } + @Test public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws Exception { // this tests the case that we create a feature for a non-existent field @@ -205,6 +246,7 @@ public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws E query.setQuery("id:42"); query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}"); query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils @@ -244,6 +286,60 @@ public void testBooleanValue() throws Exception { "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}"); } + @Test + public void testThatExceptionIsThrownForUnsupportedType() throws Exception { + final String fstore = "test_store"; + + assertU(adoc("id", "21", "title", "multivalued not supported", "dvUnsupportedField", "wow value")); + assertU(commit()); + + loadFeature("dvUnsupportedField", FieldValueFeature.class.getName(), fstore, + "{\"field\":\"dvUnsupportedField\"}"); + + loadModel("dvUnsupportedField-model", LinearModel.class.getName(), + new String[] {"dvUnsupportedField"}, fstore, "{\"weights\":{\"dvUnsupportedField\":1.0}}"); + + SolrQuery query = new SolrQuery(); + query.setQuery("id:21"); + query.add("rq", "{!ltr model=dvUnsupportedField-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), + "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvUnsupportedField is not supported!'"); + } + + @Test + public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception { + final String[][] fieldsWithDifferentTypes = { + new String[]{"dvIntPopularity", NumericDocValuesFieldValueFeatureScorer.class.getName()}, + new String[]{"dvStringPopularity", SortedDocValuesFieldValueFeatureScorer.class.getName()}, + new String[]{"noDocValuesField", FieldValueFeatureScorer.class.getName()} + }; + + for (String[] fieldAndScorerClass : fieldsWithDifferentTypes) { + String field = fieldAndScorerClass[0]; + final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes"+field; + + assertU(adoc("id", "21", field, "1")); + assertU(commit()); + + loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}"); + + SolrQuery query = new SolrQuery(); + query.setQuery("id:21"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"); + assertEquals(fieldAndScorerClass[1], ObservingFieldValueFeature.usedScorerClass); + } + } + @Test public void testParamsToMap() throws Exception { final LinkedHashMap params = new LinkedHashMap(); From e07c4328a419b392cdc60daa9396419c470acf3b Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Wed, 19 May 2021 20:57:20 +0200 Subject: [PATCH 13/27] [SOLR-12697] add tests for parsing different sortedDocValues, add entry to CHANGES.txt --- solr/CHANGES.txt | 2 ++ .../ltr/feature/TestFieldValueFeature.java | 34 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 0054194c483..9384f314b78 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -322,6 +322,8 @@ New Features * SOLR-15397: Expose zookeeper status in the Prometheus exporter (janhoy) +* SOLR-12697: Add pure DocValues support to FieldValueFeature (Tom Gilke, Christine Poerschke) + Improvements --------------------- * SOLR-15081: Metrics for a core: add SolrCloud "isLeader" and "replicaState". (David Smiley) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index 82c8bf5bacb..c485dc9fe46 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -347,6 +347,40 @@ public void testParamsToMap() throws Exception { doTestParamsToMap(FieldValueFeature.class.getName(), params); } + @Test + public void testThatStringValuesAreCorrectlyParsed() throws Exception { + final String[][] inputsAndTests = { + new String[]{"T", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "1.0")+"'}"}, + new String[]{"F", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "0.0")+"'}"}, + new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "-7324.427")+"'}"}, + new String[]{"532", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "532.0")+"'}"}, + new String[]{"notanumber", "/error/msg/=='org.apache.solr.ltr.feature.FeatureException: " + + "Cannot parse value notanumber of field dvStrNumField to float.'"} + }; + + final String fstore = "testThatStringValuesAreCorrectlyParsed"; + loadFeature("dvStrNumField", FieldValueFeature.class.getName(), fstore, + "{\"field\":\"" + "dvStrNumField" + "\"}"); + loadModel("dvStrNumField-model", LinearModel.class.getName(), + new String[]{"dvStrNumField"}, fstore, "{\"weights\":{\"" + "dvStrNumField" + "\":1.0}}"); + + for (String[] inputAndTest : inputsAndTests) { + assertU(adoc("id", "21", "dvStrNumField", inputAndTest[0])); + assertU(commit()); + + SolrQuery query = new SolrQuery(); + query.setQuery("id:21"); + query.add("rq", "{!ltr model=" + "dvStrNumField" + "-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), inputAndTest[1]); + } + } + /** * This class is used to track which specific FieldValueFeature is used so that we can test, whether the * fallback mechanism works correctly. From 443a3962858684e9d69511b7c97ccdba5001d81c Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 09:45:56 +0100 Subject: [PATCH 14/27] solr/CHANGES.txt edit --- solr/CHANGES.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9384f314b78..36ba9459e05 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -322,7 +322,8 @@ New Features * SOLR-15397: Expose zookeeper status in the Prometheus exporter (janhoy) -* SOLR-12697: Add pure DocValues support to FieldValueFeature (Tom Gilke, Christine Poerschke) +* SOLR-12697: In contrib/ltr FieldValueFeature support "stored=false docValues=true" a.k.a. pure DocValues fields. + (Stanislav Livotov, Erick Erickson, Tobias Kässmann, Tom Gilke, Christine Poerschke) Improvements --------------------- From 2dbd94e74f7ff1ed0dd23b2d2bff53cfa2cfd569 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 09:47:13 +0100 Subject: [PATCH 15/27] in TestFieldValueFeature reduce potential test interaction --- .../org/apache/solr/ltr/feature/TestFieldValueFeature.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index c485dc9fe46..69d239c8ea8 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -22,7 +22,6 @@ import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.ltr.FeatureLoggerTestUtils; import org.apache.solr.ltr.TestRerankBase; -import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight; import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer; import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer; import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.NumericDocValuesFieldValueFeatureScorer; @@ -225,6 +224,7 @@ public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws E query.setQuery("id:42"); query.add("rq", "{!ltr model=not-existing-field-model reRankDocs=4}"); query.add("fl", "[fv]"); + ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils @@ -247,6 +247,7 @@ public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws E query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}"); query.add("fl", "[fv]"); + ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils @@ -333,6 +334,7 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); query.add("fl", "[fv]"); + ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"); From 9b771546736a5dbc3091c058c4585a557cad5841 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 09:47:51 +0100 Subject: [PATCH 16/27] in FieldValueFeature clarify 'searcher instanceof SolrIndexSearcher' use --- .../src/java/org/apache/solr/ltr/feature/FieldValueFeature.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index c49ddecf141..165a073d1a2 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -98,7 +98,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher, super(FieldValueFeature.this, searcher, request, originalQuery, efi); if (searcher instanceof SolrIndexSearcher) { schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); - } else { + } else { // some tests pass a null or a non-SolrIndexSearcher searcher schemaField = null; } } From c1f3a8ee35eb385a3660ef586647653e17f49923 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 18:14:33 +0100 Subject: [PATCH 17/27] TestFieldValueFeature: replace dvBoolPopularity with dvIsTrendy (former is more numeric, latter is more boolean and copyField from isTrend simplies the document( add)s --- .../solr/collection1/conf/schema.xml | 8 ++++-- .../ltr/feature/TestFieldValueFeature.java | 26 ++++++++++--------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 0081e1b61dd..005eacf0121 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -18,7 +18,7 @@ - + @@ -30,10 +30,12 @@ - + + + @@ -61,6 +63,8 @@ + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index 69d239c8ea8..ad58986da31 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -44,7 +44,9 @@ public class TestFieldValueFeature extends TestRerankBase { "popularity", "dvIntPopularity", "dvLongPopularity", "dvFloatPopularity", "dvDoublePopularity", - "dvStringPopularity", "dvBoolPopularity" + "dvStringPopularity", + "isTrendy", + "dvIsTrendy" }; @Before @@ -52,32 +54,32 @@ public void before() throws Exception { setuptest(false); assertU(adoc("id", "1", "popularity", "1", "title", "w1", - "dvStringPopularity", "1", "dvBoolPopularity", "F", + "dvStringPopularity", "1", "description", "w1", "isTrendy", "true")); assertU(adoc("id", "2", "popularity", "2", "title", "w2 2asd asdd didid", - "dvStringPopularity", "2", "dvBoolPopularity", "T", + "dvStringPopularity", "2", "description", "w2 2asd asdd didid")); assertU(adoc("id", "3", "popularity", "3", "title", "w3", - "dvStringPopularity", "3", "dvBoolPopularity", "F", + "dvStringPopularity", "3", "description", "w3", "isTrendy", "true")); assertU(adoc("id", "4", "popularity", "4", "title", "w4", - "dvStringPopularity", "4", "dvBoolPopularity", "T", + "dvStringPopularity", "4", "description", "w4", "isTrendy", "false")); assertU(adoc("id", "5", "popularity", "5", "title", "w5", - "dvStringPopularity", "5", "dvBoolPopularity", "F", + "dvStringPopularity", "5", "description", "w5", "isTrendy", "true")); assertU(adoc("id", "6", "popularity", "6", "title", "w1 w2", - "dvStringPopularity", "6", "dvBoolPopularity", "T", + "dvStringPopularity", "6", "description", "w1 w2", "isTrendy", "false")); assertU(adoc("id", "7", "popularity", "7", "title", "w1 w2 w3 w4 w5", - "dvStringPopularity", "7", "dvBoolPopularity", "F", + "dvStringPopularity", "7", "description", "w1 w2 w3 w4 w5 w8", "isTrendy", "true")); assertU(adoc("id", "8", "popularity", "8", "title", "w1 w1 w1 w2 w2 w8", - "dvStringPopularity", "8", "dvBoolPopularity", "T", + "dvStringPopularity", "8", "description", "w1 w1 w1 w2 w2", "isTrendy", "false")); // a document without the popularity and the dv fields - assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity")); + assertU(adoc("id", "42", "title", "NO popularity or isTrendy", "description", "NO popularity or isTrendy")); assertU(commit()); @@ -88,7 +90,7 @@ public void before() throws Exception { loadModel("model", LinearModel.class.getName(), FIELDS, "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," + "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," + - "\"dvStringPopularity\":1.0,\"dvBoolPopularity\":1.0}}"); + "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}"); } @After @@ -149,7 +151,7 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," + "dvFloatPopularity=0.0,dvDoublePopularity=0.0," + - "dvStringPopularity=0.0,dvBoolPopularity=0.0'}"); + "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}"); } @Test From 3c38e911483b051f5037138cd6a9cc0cc96a9ae5 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 18:24:15 +0100 Subject: [PATCH 18/27] out-scope TestLTRReRankingPipeline changes --- .../solr/collection1/conf/schema.xml | 4 +- .../solr/ltr/TestLTRReRankingPipeline.java | 334 ++++++++++-------- 2 files changed, 188 insertions(+), 150 deletions(-) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 005eacf0121..c033973d96d 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -19,9 +19,6 @@ - - - @@ -42,6 +39,7 @@ + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java index 1b1967d47ef..85019445546 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java @@ -25,8 +25,12 @@ import java.util.List; import java.util.Map; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -37,8 +41,8 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.lucene.store.Directory; +import org.apache.solr.SolrTestCase; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.ltr.feature.Feature; import org.apache.solr.ltr.feature.FieldValueFeature; @@ -46,24 +50,16 @@ import org.apache.solr.ltr.model.TestLinearModel; import org.apache.solr.ltr.norm.IdentityNormalizer; import org.apache.solr.ltr.norm.Normalizer; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.request.SolrQueryRequest; -import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class TestLTRReRankingPipeline extends SolrTestCaseJ4 { +public class TestLTRReRankingPipeline extends SolrTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath()); - @BeforeClass - public static void setup() throws Exception { - initCore("solrconfig-ltr.xml", "schema.xml"); - } - private IndexSearcher getSearcher(IndexReader r) { // 'yes' to maybe wrapping in general final boolean maybeWrap = true; @@ -113,155 +109,199 @@ public Explanation explain(LeafReaderContext context, int doc, } @Test - public void testRescorer() throws Exception { - assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F")); - assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T")); - assertU(commit()); - - try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { - - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = solrQueryRequest.getSearcher(); - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(2, hits.totalHits.value); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "finalScore"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore"); - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); - - LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel); - ltrScoringQuery.setRequest(solrQueryRequest); - final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery); - hits = rescorer.rescore(searcher, hits, 2); - - // rerank using the field finalScore - assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); - } + public void testRescorer() throws IOException { + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "wizard the the the the the oz", + Field.Store.NO)); + doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field + + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + // 1 extra token, but wizard and oz are close; + doc.add(newTextField("field", "wizard oz the the the the the the", + Field.Store.NO)); + doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field + w.addDocument(doc); + + final IndexReader r = w.getReader(); + w.close(); + + // Do ordinary BooleanQuery: + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = getSearcher(r); + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(2, hits.totalHits.value); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "final-score"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); + hits = rescorer.rescore(searcher, hits, 2); + + // rerank using the field final-score + assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + r.close(); + dir.close(); + } @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134") @Test public void testDifferentTopN() throws IOException { - assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0")); - assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0")); - assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0")); - assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0")); - assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0")); - assertU(commit()); - - try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { - // Do ordinary BooleanQuery: - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = solrQueryRequest.getSearcher(); - - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(5, hits.totalHits.value); - - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "finalScoreFloat"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat"); - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); - - LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel); - scoringQuery.setRequest(solrQueryRequest); - final LTRRescorer rescorer = new LTRRescorer(scoringQuery); - - // rerank @ 0 should not change the order - hits = rescorer.rescore(searcher, hits, 0); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - // test rerank with different topN cuts - - for (int topN = 1; topN <= 5; topN++) { - log.info("rerank {} documents ", topN); - hits = searcher.search(bqBuilder.build(), 10); - - final ScoreDoc[] slice = new ScoreDoc[topN]; - System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); - hits = new TopDocs(hits.totalHits, slice); - hits = rescorer.rescore(searcher, hits, topN); - for (int i = topN - 1, j = 0; i >= 0; i--, j++) { - if (log.isInfoEnabled()) { - log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) - .get("id"), j); - } - - assertEquals(i, - Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); - assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); - + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 1.0f)); + w.addDocument(doc); + + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 2.0f)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "2", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 3.0f)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "3", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz the the the the ", + Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 4.0f)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "4", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz the the the the the the", + Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 5.0f)); + w.addDocument(doc); + + final IndexReader r = w.getReader(); + w.close(); + + // Do ordinary BooleanQuery: + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = getSearcher(r); + + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(5, hits.totalHits.value); + + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "final-score"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); + + // rerank @ 0 should not change the order + hits = rescorer.rescore(searcher, hits, 0); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + // test rerank with different topN cuts + + for (int topN = 1; topN <= 5; topN++) { + log.info("rerank {} documents ", topN); + hits = searcher.search(bqBuilder.build(), 10); + + final ScoreDoc[] slice = new ScoreDoc[topN]; + System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); + hits = new TopDocs(hits.totalHits, slice); + hits = rescorer.rescore(searcher, hits, topN); + for (int i = topN - 1, j = 0; i >= 0; i--, j++) { + if (log.isInfoEnabled()) { + log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) + .get("id"), j); } + + assertEquals(i, + Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); + assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); + } } + + r.close(); + dir.close(); + } @Test public void testDocParam() throws Exception { - try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { - List features = makeFieldValueFeatures(new int[] {0}, - "finalScore"); - List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - List allFeatures = makeFieldValueFeatures(new int[] {0}, - "finalScore"); - MockModel ltrScoringModel = new MockModel("test", - features, norms, "test", allFeatures, null); - LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); - query.setRequest(solrQueryRequest); - LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); - } + final Map test = new HashMap(); + test.put("fake", 2); + List features = makeFieldValueFeatures(new int[] {0}, + "final-score"); + List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + List allFeatures = makeFieldValueFeatures(new int[] {0}, + "final-score"); + MockModel ltrScoringModel = new MockModel("test", + features, norms, "test", allFeatures, null); + LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); + LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + } - features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore"); - norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9}, "finalScore"); - ltrScoringModel = new MockModel("test", features, norms, - "test", allFeatures, null); - query = new LTRScoringQuery(ltrScoringModel); - query.setRequest(solrQueryRequest); - wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); - } + features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score"); + norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, + 9}, "final-score"); + ltrScoringModel = new MockModel("test", features, norms, + "test", allFeatures, null); + query = new LTRScoringQuery(ltrScoringModel); + wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); } } + } From 53cd2fb7e50f566fff531e9692d836f170ee5565 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 18:31:49 +0100 Subject: [PATCH 19/27] FieldValueFeature: mention stored=true or docValues=true in javadocs --- .../solr/ltr/feature/FieldValueFeature.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 165a073d1a2..8a1d7cd0a83 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -41,16 +41,16 @@ import org.apache.solr.search.SolrIndexSearcher; /** - * This feature returns the value of a field in the current document + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. * Example configuration: *
{
- * "name":  "rawHits",
- * "class": "org.apache.solr.ltr.feature.FieldValueFeature",
- * "params": {
- * "field": "hits",
- * "defaultValue": -1
- * }
- * }
+ "name": "rawHits", + "class": "org.apache.solr.ltr.feature.FieldValueFeature", + "params": { + "field": "hits" + } +} */ public class FieldValueFeature extends Feature { From e854f503b64b4862c45d576913d0ebba94503703 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 18:55:30 +0100 Subject: [PATCH 20/27] FieldValueFeature polishes: * undo distracting reformatting (hopefully one-off and next time 'spotless' gradle plugin will be available for contrib/ltr) * use private and final where possible * make new scorers final (but not existing scorer for back compat reasons) since no obvious need to extend --- .../solr/ltr/feature/FieldValueFeature.java | 59 +++++++++++-------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 8a1d7cd0a83..71c0eaa7d3e 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -67,8 +67,8 @@ public void setField(String field) { } @Override - public LinkedHashMap paramsToMap() { - final LinkedHashMap params = defaultParamsToMap(); + public LinkedHashMap paramsToMap() { + final LinkedHashMap params = defaultParamsToMap(); params.put("field", field); return params; } @@ -76,17 +76,19 @@ public LinkedHashMap paramsToMap() { @Override protected void validate() throws FeatureException { if (field == null || field.isEmpty()) { - throw new FeatureException(getClass().getSimpleName() + ": field must be provided"); + throw new FeatureException(getClass().getSimpleName()+ + ": field must be provided"); } } - public FieldValueFeature(String name, Map params) { + public FieldValueFeature(String name, Map params) { super(name, params); } @Override - public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request, - Query originalQuery, Map efi) throws IOException { + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map efi) + throws IOException { return new FieldValueFeatureWeight(searcher, request, originalQuery, efi); } @@ -94,7 +96,7 @@ public class FieldValueFeatureWeight extends FeatureWeight { private final SchemaField schemaField; public FieldValueFeatureWeight(IndexSearcher searcher, - SolrQueryRequest request, Query originalQuery, Map efi) { + SolrQueryRequest request, Query originalQuery, Map efi) { super(FieldValueFeature.this, searcher, request, originalQuery, efi); if (searcher instanceof SolrIndexSearcher) { schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); @@ -114,8 +116,8 @@ public FieldValueFeatureWeight(IndexSearcher searcher, public FeatureScorer scorer(LeafReaderContext context) throws IOException { if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { - FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); - DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; + final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); + final DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; if (DocValuesType.NUMERIC.equals(docValuesType)) { return new NumericDocValuesFieldValueFeatureScorer(this, context, @@ -131,16 +133,18 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException { + " is not supported!"); } return new FieldValueFeatureScorer(this, context, - DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } /** * A FeatureScorer that reads the stored value for a field */ public class FieldValueFeatureScorer extends FeatureScorer { - LeafReaderContext context; - public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context, DocIdSetIterator itr) { + LeafReaderContext context = null; + + public FieldValueFeatureScorer(FeatureWeight weight, + LeafReaderContext context, DocIdSetIterator itr) { super(weight, itr); this.context = context; } @@ -149,7 +153,8 @@ public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context, public float score() throws IOException { try { - final Document document = context.reader().document(itr.docID(), fieldAsSet); + final Document document = context.reader().document(itr.docID(), + fieldAsSet); final IndexableField indexableField = document.getField(field); if (indexableField == null) { return getDefaultValue(); @@ -160,18 +165,22 @@ public float score() throws IOException { } else { final String string = indexableField.stringValue(); if (string.length() == 1) { - // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN + // boolean values in the index are encoded with the + // a single char contained in TRUE_TOKEN or FALSE_TOKEN // (see BoolField) if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { - return 1f; + return 1; } if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) { - return 0f; + return 0; } } } } catch (final IOException e) { - throw new FeatureException(e.toString() + ": " + "Unable to extract feature for " + name, e); + throw new FeatureException( + e.toString() + ": " + + "Unable to extract feature for " + + name, e); } return getDefaultValue(); } @@ -185,20 +194,22 @@ public float getMaxScore(int upTo) throws IOException { /** * A FeatureScorer that reads the numeric docValues for a field */ - public class NumericDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { - NumericDocValues docValues; - NumberType numberType; + public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer { + private final NumericDocValues docValues; + private final NumberType numberType; public NumericDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, final DocIdSetIterator itr, final NumberType numberType) { super(weight, itr); this.numberType = numberType; + NumericDocValues docValues; try { docValues = DocValues.getNumeric(context.reader(), field); } catch (IOException e) { throw new IllegalArgumentException("Could not read numeric docValues for field " + field); } + this.docValues = docValues; } @Override @@ -236,18 +247,20 @@ public float getMaxScore(int upTo) throws IOException { /** * A FeatureScorer that reads the sorted docValues for a field */ - public class SortedDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { - SortedDocValues docValues; + public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer { + private final SortedDocValues docValues; public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, final DocIdSetIterator itr) { super(weight, itr); + SortedDocValues docValues; try { docValues = DocValues.getSorted(context.reader(), field); } catch (IOException e) { throw new IllegalArgumentException("Could not read sorted docValues for field " + field); } + this.docValues = docValues; } @Override @@ -298,7 +311,7 @@ public float getMaxScore(int upTo) throws IOException { * By doing so, we prevent a fallback to the FieldValueFeatureScorer, which would also return the default value but * in a less performant way because it would first try to read the stored fields for the doc (which aren't present). */ - public class DefaultValueFieldValueFeatureScorer extends FeatureWeight.FeatureScorer { + public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer { public DefaultValueFieldValueFeatureScorer(final FeatureWeight weight, final DocIdSetIterator itr) { super(weight, itr); } From b9d3cd0c9f5c424ec5df56c59d85a06b88d65941 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Thu, 20 May 2021 20:01:23 +0200 Subject: [PATCH 21/27] [SOLR-12697] add javadoc to explain which type of FieldValueFeatureScorer is used for different types of fields --- .../apache/solr/ltr/feature/FieldValueFeature.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index 165a073d1a2..9ae9c97595d 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -51,6 +51,17 @@ * "defaultValue": -1 * } * } + * + *

There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. + * The chosen scorer depends on the field attributes.

+ * + *

FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or docValues=false

+ * + *

NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC

+ *

SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED + * + *

DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on segments + * where no document has a value set in the field of this feature

*/ public class FieldValueFeature extends Feature { From abb363210b76b627d89a402ae19a975b4cdb2b18 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 22:59:17 +0100 Subject: [PATCH 22/27] Revert "out-scope TestLTRReRankingPipeline changes" This reverts commit 3c38e911483b051f5037138cd6a9cc0cc96a9ae5. --- .../solr/collection1/conf/schema.xml | 4 +- .../solr/ltr/TestLTRReRankingPipeline.java | 334 ++++++++---------- 2 files changed, 150 insertions(+), 188 deletions(-) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index c033973d96d..005eacf0121 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -19,6 +19,9 @@ + + + @@ -39,7 +42,6 @@ - diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java index 85019445546..1b1967d47ef 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java @@ -25,12 +25,8 @@ import java.util.List; import java.util.Map; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -41,8 +37,8 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.store.Directory; -import org.apache.solr.SolrTestCase; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.ltr.feature.Feature; import org.apache.solr.ltr.feature.FieldValueFeature; @@ -50,16 +46,24 @@ import org.apache.solr.ltr.model.TestLinearModel; import org.apache.solr.ltr.norm.IdentityNormalizer; import org.apache.solr.ltr.norm.Normalizer; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class TestLTRReRankingPipeline extends SolrTestCase { +public class TestLTRReRankingPipeline extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath()); + @BeforeClass + public static void setup() throws Exception { + initCore("solrconfig-ltr.xml", "schema.xml"); + } + private IndexSearcher getSearcher(IndexReader r) { // 'yes' to maybe wrapping in general final boolean maybeWrap = true; @@ -109,199 +113,155 @@ public Explanation explain(LeafReaderContext context, int doc, } @Test - public void testRescorer() throws IOException { - final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random(), dir); - - Document doc = new Document(); - doc.add(newStringField("id", "0", Field.Store.YES)); - doc.add(newTextField("field", "wizard the the the the the oz", - Field.Store.NO)); - doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field - - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "1", Field.Store.YES)); - // 1 extra token, but wizard and oz are close; - doc.add(newTextField("field", "wizard oz the the the the the the", - Field.Store.NO)); - doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field - w.addDocument(doc); - - final IndexReader r = w.getReader(); - w.close(); - - // Do ordinary BooleanQuery: - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = getSearcher(r); - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(2, hits.totalHits.value); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "final-score"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); - - final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); - hits = rescorer.rescore(searcher, hits, 2); - - // rerank using the field final-score - assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); - - r.close(); - dir.close(); - + public void testRescorer() throws Exception { + assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F")); + assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T")); + assertU(commit()); + + try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { + + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = solrQueryRequest.getSearcher(); + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(2, hits.totalHits.value); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "finalScore"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel); + ltrScoringQuery.setRequest(solrQueryRequest); + final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery); + hits = rescorer.rescore(searcher, hits, 2); + + // rerank using the field finalScore + assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); + } } @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134") @Test public void testDifferentTopN() throws IOException { - final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random(), dir); - - Document doc = new Document(); - doc.add(newStringField("id", "0", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 1.0f)); - w.addDocument(doc); - - doc = new Document(); - doc.add(newStringField("id", "1", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 2.0f)); - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "2", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 3.0f)); - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "3", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz oz the the the the ", - Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 4.0f)); - w.addDocument(doc); - doc = new Document(); - doc.add(newStringField("id", "4", Field.Store.YES)); - doc.add(newTextField("field", "wizard oz the the the the the the", - Field.Store.NO)); - doc.add(new FloatDocValuesField("final-score", 5.0f)); - w.addDocument(doc); - - final IndexReader r = w.getReader(); - w.close(); - - // Do ordinary BooleanQuery: - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = getSearcher(r); - - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(5, hits.totalHits.value); - - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "final-score"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); - - final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); - - // rerank @ 0 should not change the order - hits = rescorer.rescore(searcher, hits, 0); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - // test rerank with different topN cuts - - for (int topN = 1; topN <= 5; topN++) { - log.info("rerank {} documents ", topN); - hits = searcher.search(bqBuilder.build(), 10); - - final ScoreDoc[] slice = new ScoreDoc[topN]; - System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); - hits = new TopDocs(hits.totalHits, slice); - hits = rescorer.rescore(searcher, hits, topN); - for (int i = topN - 1, j = 0; i >= 0; i--, j++) { - if (log.isInfoEnabled()) { - log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) - .get("id"), j); - } - - assertEquals(i, - Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); - assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); + assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0")); + assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0")); + assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0")); + assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0")); + assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0")); + assertU(commit()); + + try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { + // Do ordinary BooleanQuery: + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = solrQueryRequest.getSearcher(); + + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(5, hits.totalHits.value); + + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "finalScoreFloat"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel); + scoringQuery.setRequest(solrQueryRequest); + final LTRRescorer rescorer = new LTRRescorer(scoringQuery); + + // rerank @ 0 should not change the order + hits = rescorer.rescore(searcher, hits, 0); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + // test rerank with different topN cuts + + for (int topN = 1; topN <= 5; topN++) { + log.info("rerank {} documents ", topN); + hits = searcher.search(bqBuilder.build(), 10); + + final ScoreDoc[] slice = new ScoreDoc[topN]; + System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); + hits = new TopDocs(hits.totalHits, slice); + hits = rescorer.rescore(searcher, hits, topN); + for (int i = topN - 1, j = 0; i >= 0; i--, j++) { + if (log.isInfoEnabled()) { + log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) + .get("id"), j); + } + + assertEquals(i, + Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); + assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); + } } } - - r.close(); - dir.close(); - } @Test public void testDocParam() throws Exception { - final Map test = new HashMap(); - test.put("fake", 2); - List features = makeFieldValueFeatures(new int[] {0}, - "final-score"); - List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - List allFeatures = makeFieldValueFeatures(new int[] {0}, - "final-score"); - MockModel ltrScoringModel = new MockModel("test", - features, norms, "test", allFeatures, null); - LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); - LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); - } + try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { + List features = makeFieldValueFeatures(new int[] {0}, + "finalScore"); + List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + List allFeatures = makeFieldValueFeatures(new int[] {0}, + "finalScore"); + MockModel ltrScoringModel = new MockModel("test", + features, norms, "test", allFeatures, null); + LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); + query.setRequest(solrQueryRequest); + LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + } - features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score"); - norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9}, "final-score"); - ltrScoringModel = new MockModel("test", features, norms, - "test", allFeatures, null); - query = new LTRScoringQuery(ltrScoringModel); - wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore"); + norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, + 9}, "finalScore"); + ltrScoringModel = new MockModel("test", features, norms, + "test", allFeatures, null); + query = new LTRScoringQuery(ltrScoringModel); + query.setRequest(solrQueryRequest); + wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + } } } - } From a789b12d6919c63a19717c8fe8b4a81276aadfc2 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Thu, 20 May 2021 23:02:24 +0100 Subject: [PATCH 23/27] fix for SOLR-11134 --- .../org/apache/solr/ltr/TestLTRReRankingPipeline.java | 9 +++++---- .../test/org/apache/solr/ltr/model/TestLinearModel.java | 8 ++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java index 1b1967d47ef..c4fdec25b03 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java @@ -114,6 +114,7 @@ public Explanation explain(LeafReaderContext context, int doc, @Test public void testRescorer() throws Exception { + assertU(delQ("*:*")); assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F")); assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T")); assertU(commit()); @@ -151,9 +152,9 @@ public void testRescorer() throws Exception { } } - @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134") @Test public void testDifferentTopN() throws IOException { + assertU(delQ("*:*")); assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0")); assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0")); assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0")); @@ -185,8 +186,9 @@ public void testDifferentTopN() throws IOException { Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat"); + final Double featureWeight = 0.1; final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features, featureWeight)); LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel); scoringQuery.setRequest(solrQueryRequest); @@ -215,10 +217,9 @@ public void testDifferentTopN() throws IOException { log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) .get("id"), j); } - assertEquals(i, Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); - assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); + assertEquals((i + 1) * features.size()*featureWeight, hits.scoreDocs[j].score, 0.00001); } } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java index df03896859f..f528af3e0ec 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java @@ -47,10 +47,14 @@ public static LTRScoringModel createLinearModel(String name, List featu } public static Map makeFeatureWeights(List features) { + return makeFeatureWeights(features, 0.1); + } + + public static Map makeFeatureWeights(List features, Number weight) { final Map nameParams = new HashMap(); - final HashMap modelWeights = new HashMap(); + final HashMap modelWeights = new HashMap(); for (final Feature feat : features) { - modelWeights.put(feat.getName(), 0.1); + modelWeights.put(feat.getName(), weight); } nameParams.put("weights", modelWeights); return nameParams; From c42be5487d8e59e144ba0689338e7844efa656ed Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Fri, 21 May 2021 18:22:56 +0200 Subject: [PATCH 24/27] [SOLR-12697] out-scope TestLTRReRankingPipeline --- .../solr/collection1/conf/schema.xml | 3 - .../solr/ltr/TestLTRReRankingPipeline.java | 337 ++++++++++-------- 2 files changed, 188 insertions(+), 152 deletions(-) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 005eacf0121..6c386ef23d9 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -19,9 +19,6 @@ - - - diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java index c4fdec25b03..85019445546 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java @@ -25,8 +25,12 @@ import java.util.List; import java.util.Map; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -37,8 +41,8 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.lucene.store.Directory; +import org.apache.solr.SolrTestCase; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.ltr.feature.Feature; import org.apache.solr.ltr.feature.FieldValueFeature; @@ -46,24 +50,16 @@ import org.apache.solr.ltr.model.TestLinearModel; import org.apache.solr.ltr.norm.IdentityNormalizer; import org.apache.solr.ltr.norm.Normalizer; -import org.apache.solr.request.LocalSolrQueryRequest; -import org.apache.solr.request.SolrQueryRequest; -import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class TestLTRReRankingPipeline extends SolrTestCaseJ4 { +public class TestLTRReRankingPipeline extends SolrTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath()); - @BeforeClass - public static void setup() throws Exception { - initCore("solrconfig-ltr.xml", "schema.xml"); - } - private IndexSearcher getSearcher(IndexReader r) { // 'yes' to maybe wrapping in general final boolean maybeWrap = true; @@ -113,156 +109,199 @@ public Explanation explain(LeafReaderContext context, int doc, } @Test - public void testRescorer() throws Exception { - assertU(delQ("*:*")); - assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F")); - assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T")); - assertU(commit()); - - try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { - - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = solrQueryRequest.getSearcher(); - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(2, hits.totalHits.value); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "finalScore"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore"); - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); - - LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel); - ltrScoringQuery.setRequest(solrQueryRequest); - final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery); - hits = rescorer.rescore(searcher, hits, 2); - - // rerank using the field finalScore - assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); - } + public void testRescorer() throws IOException { + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "wizard the the the the the oz", + Field.Store.NO)); + doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field + + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + // 1 extra token, but wizard and oz are close; + doc.add(newTextField("field", "wizard oz the the the the the the", + Field.Store.NO)); + doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field + w.addDocument(doc); + + final IndexReader r = w.getReader(); + w.close(); + + // Do ordinary BooleanQuery: + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = getSearcher(r); + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(2, hits.totalHits.value); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "final-score"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); + hits = rescorer.rescore(searcher, hits, 2); + + // rerank using the field final-score + assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id")); + + r.close(); + dir.close(); + } + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134") @Test public void testDifferentTopN() throws IOException { - assertU(delQ("*:*")); - assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0")); - assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0")); - assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0")); - assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0")); - assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0")); - assertU(commit()); - - try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { - // Do ordinary BooleanQuery: - final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); - bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); - bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); - final IndexSearcher searcher = solrQueryRequest.getSearcher(); - - // first run the standard query - TopDocs hits = searcher.search(bqBuilder.build(), 10); - assertEquals(5, hits.totalHits.value); - - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, - "finalScoreFloat"); - final List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, - 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat"); - final Double featureWeight = 0.1; - final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", - features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features, featureWeight)); - - LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel); - scoringQuery.setRequest(solrQueryRequest); - final LTRRescorer rescorer = new LTRRescorer(scoringQuery); - - // rerank @ 0 should not change the order - hits = rescorer.rescore(searcher, hits, 0); - assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); - assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); - assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); - assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); - assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); - - // test rerank with different topN cuts - - for (int topN = 1; topN <= 5; topN++) { - log.info("rerank {} documents ", topN); - hits = searcher.search(bqBuilder.build(), 10); - - final ScoreDoc[] slice = new ScoreDoc[topN]; - System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); - hits = new TopDocs(hits.totalHits, slice); - hits = rescorer.rescore(searcher, hits, topN); - for (int i = topN - 1, j = 0; i >= 0; i--, j++) { - if (log.isInfoEnabled()) { - log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) - .get("id"), j); - } - assertEquals(i, - Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); - assertEquals((i + 1) * features.size()*featureWeight, hits.scoreDocs[j].score, 0.00001); - + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + Document doc = new Document(); + doc.add(newStringField("id", "0", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 1.0f)); + w.addDocument(doc); + + doc = new Document(); + doc.add(newStringField("id", "1", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 2.0f)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "2", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 3.0f)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "3", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz oz the the the the ", + Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 4.0f)); + w.addDocument(doc); + doc = new Document(); + doc.add(newStringField("id", "4", Field.Store.YES)); + doc.add(newTextField("field", "wizard oz the the the the the the", + Field.Store.NO)); + doc.add(new FloatDocValuesField("final-score", 5.0f)); + w.addDocument(doc); + + final IndexReader r = w.getReader(); + w.close(); + + // Do ordinary BooleanQuery: + final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD); + final IndexSearcher searcher = getSearcher(r); + + // first run the standard query + TopDocs hits = searcher.search(bqBuilder.build(), 10); + assertEquals(5, hits.totalHits.value); + + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + final List features = makeFieldValueFeatures(new int[] {0, 1, 2}, + "final-score"); + final List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + final List allFeatures = makeFieldValueFeatures(new int[] {0, 1, + 2, 3, 4, 5, 6, 7, 8, 9}, "final-score"); + final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", + features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features)); + + final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel)); + + // rerank @ 0 should not change the order + hits = rescorer.rescore(searcher, hits, 0); + assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id")); + assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id")); + assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id")); + assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id")); + assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id")); + + // test rerank with different topN cuts + + for (int topN = 1; topN <= 5; topN++) { + log.info("rerank {} documents ", topN); + hits = searcher.search(bqBuilder.build(), 10); + + final ScoreDoc[] slice = new ScoreDoc[topN]; + System.arraycopy(hits.scoreDocs, 0, slice, 0, topN); + hits = new TopDocs(hits.totalHits, slice); + hits = rescorer.rescore(searcher, hits, topN); + for (int i = topN - 1, j = 0; i >= 0; i--, j++) { + if (log.isInfoEnabled()) { + log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc) + .get("id"), j); } + + assertEquals(i, + Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id"))); + assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001); + } } + + r.close(); + dir.close(); + } @Test public void testDocParam() throws Exception { - try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) { - List features = makeFieldValueFeatures(new int[] {0}, - "finalScore"); - List norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - List allFeatures = makeFieldValueFeatures(new int[] {0}, - "finalScore"); - MockModel ltrScoringModel = new MockModel("test", - features, norms, "test", allFeatures, null); - LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); - query.setRequest(solrQueryRequest); - LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); - } + final Map test = new HashMap(); + test.put("fake", 2); + List features = makeFieldValueFeatures(new int[] {0}, + "final-score"); + List norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + List allFeatures = makeFieldValueFeatures(new int[] {0}, + "final-score"); + MockModel ltrScoringModel = new MockModel("test", + features, norms, "test", allFeatures, null); + LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel); + LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); + } - features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore"); - norms = - new ArrayList( - Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); - allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, - 9}, "finalScore"); - ltrScoringModel = new MockModel("test", features, norms, - "test", allFeatures, null); - query = new LTRScoringQuery(ltrScoringModel); - query.setRequest(solrQueryRequest); - wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); - modelScr = wgt.scorer(null); - modelScr.getDocInfo().setOriginalDocScore(1f); - for (final Scorable.ChildScorable feat : modelScr.getChildren()) { - assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); - } + features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score"); + norms = + new ArrayList( + Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE)); + allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, + 9}, "final-score"); + ltrScoringModel = new MockModel("test", features, norms, + "test", allFeatures, null); + query = new LTRScoringQuery(ltrScoringModel); + wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f); + modelScr = wgt.scorer(null); + modelScr.getDocInfo().setOriginalDocScore(1f); + for (final Scorable.ChildScorable feat : modelScr.getChildren()) { + assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore()); } } + } From 83bc1ee5fd292e8842e8c99a14ffbc1227bac4a8 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Mon, 24 May 2021 12:28:05 +0100 Subject: [PATCH 25/27] apologies, multiple TestFieldValueFeature polishes in one commit, approximately: * minor style tweaks e.g. new SolrQuery(id:21) instead of new SolrQuery(); setQuery(id:21) * in testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned for clarity remove assumption w.r.t. field default values being -1/-2/-3 sequential * in testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned replace generic dvTestField name with dvDoubleField and also dvStrBoolField to also cover a non-numeric field * in testThatExceptionIsThrownForUnsupportedType replace generic dvUnsupportedField with dvStringPopularities for the popularities (plural) naming to help signal unsupportedness * in testThatCorrectFieldValueFeatureIsUsedForDocValueTypes replace noDocValuesField with noDvFloatField and noDvStrNumField to cover both numeric and non-numeric * in testThatStringValuesAreCorrectlyParsed also cover non-docValues field _and_ ensure behavioural consistency between dv and non-dv fields [*** this required implementation adjustment for the dv field ***] --- .../solr/ltr/feature/FieldValueFeature.java | 20 +-- .../solr/collection1/conf/schema.xml | 7 +- .../apache/solr/ltr/TestLTROnSolrCloud.java | 16 +- .../ltr/feature/TestFieldValueFeature.java | 169 ++++++++++-------- 4 files changed, 112 insertions(+), 100 deletions(-) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index f57e6c52f51..563a01d14df 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -141,7 +141,7 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException { return new DefaultValueFieldValueFeatureScorer(this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field - + " is not supported!"); + + " is not supported"); } return new FieldValueFeatureScorer(this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); @@ -291,22 +291,18 @@ public float score() throws IOException { */ private float readSortedDocValues(BytesRef bytesRef) { String string = bytesRef.utf8ToString(); - if (string.length() == 1 - && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) { - // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN + if (string.length() == 1) { + // boolean values in the index are encoded with the + // a single char contained in TRUE_TOKEN or FALSE_TOKEN // (see BoolField) if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { - return 1f; - } else { - return 0f; + return 1; } - } else { - try { - return Float.parseFloat(string); - } catch (NumberFormatException ex) { - throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float."); + if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) { + return 0; } } + return FieldValueFeature.this.getDefaultValue(); } @Override diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index 6c386ef23d9..b27542060f4 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -28,6 +28,7 @@ + @@ -39,9 +40,9 @@ - - - + + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index a9f6d36f235..910c0061af7 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -112,31 +112,31 @@ public void testSimpleQuery() throws Exception { final String result0_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0", "dvLongFieldFeature", "8.0", "dvFloatFieldFeature", "0.8", "dvDoubleFieldFeature", "0.8", - "dvStrNumFieldFeature", "8.0", "dvStrBoolFieldFeature", "1.0"); + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); final String result1_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "49.0", "c3", "2.0", "original", "1.0", "dvIntFieldFeature", "7.0", "dvLongFieldFeature", "7.0", "dvFloatFieldFeature", "0.7", "dvDoubleFieldFeature", "0.7", - "dvStrNumFieldFeature", "7.0", "dvStrBoolFieldFeature", "0.0"); + "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0"); final String result2_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "36.0", "c3", "2.0", "original", "2.0", "dvIntFieldFeature", "6.0", "dvLongFieldFeature", "6.0", "dvFloatFieldFeature", "0.6", "dvDoubleFieldFeature", "0.6", - "dvStrNumFieldFeature", "6.0", "dvStrBoolFieldFeature", "1.0"); + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); final String result3_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "25.0", "c3", "2.0", "original", "3.0", "dvIntFieldFeature", "5.0", "dvLongFieldFeature", "5.0", "dvFloatFieldFeature", "0.5", "dvDoubleFieldFeature", "0.5", - "dvStrNumFieldFeature", "5.0", "dvStrBoolFieldFeature", "0.0"); + "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0"); final String result4_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "16.0", "c3", "2.0", "original", "4.0", "dvIntFieldFeature", "4.0", "dvLongFieldFeature", "4.0", "dvFloatFieldFeature", "0.4", "dvDoubleFieldFeature", "0.4", - "dvStrNumFieldFeature", "4.0", "dvStrBoolFieldFeature", "1.0"); + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); final String result5_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "9.0", "c3", "2.0", "original", "5.0", "dvIntFieldFeature", "3.0", "dvLongFieldFeature", "3.0", "dvFloatFieldFeature", "0.3", "dvDoubleFieldFeature", "0.3", - "dvStrNumFieldFeature", "3.0", "dvStrBoolFieldFeature", "0.0"); + "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0"); final String result6_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "4.0", "c3", "2.0", "original", "6.0", "dvIntFieldFeature", "2.0", "dvLongFieldFeature", "2.0", "dvFloatFieldFeature", "0.2", "dvDoubleFieldFeature", "0.2", - "dvStrNumFieldFeature", "2.0", "dvStrBoolFieldFeature", "1.0"); + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); final String result7_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "1.0", "c3", "2.0", "original", "7.0", "dvIntFieldFeature", "-1.0", "dvLongFieldFeature", "-2.0", "dvFloatFieldFeature", "-3.0", "dvDoubleFieldFeature", "-4.0", @@ -262,7 +262,7 @@ void indexDocument(String collection, String id, String title, String descriptio doc.setField("dvLongField", popularity); doc.setField("dvFloatField", ((float) popularity) / 10); doc.setField("dvDoubleField", ((double) popularity) / 10); - doc.setField("dvStrNumField", popularity); + doc.setField("dvStrNumField", popularity % 2 == 0 ? "F" : "T"); doc.setField("dvStrBoolField", popularity % 2 == 0 ? "T" : "F"); } solrCluster.getSolrClient().add(collection, doc); diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index ad58986da31..e9bc943fcf3 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -16,6 +16,10 @@ */ package org.apache.solr.ltr.feature; +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -32,10 +36,6 @@ import org.junit.Before; import org.junit.Test; -import java.io.IOException; -import java.util.LinkedHashMap; -import java.util.Map; - public class TestFieldValueFeature extends TestRerankBase { private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f; @@ -87,10 +87,10 @@ public void before() throws Exception { loadFeature(field, FieldValueFeature.class.getName(), "{\"field\":\"" + field + "\"}"); } - loadModel("model", LinearModel.class.getName(), FIELDS, - "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," + - "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," + - "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}"); + loadModel("model", LinearModel.class.getName(), FIELDS, + "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," + + "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," + + "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}"); } @After @@ -100,7 +100,8 @@ public void after() throws Exception { @Test public void testRanking() throws Exception { - SolrQuery query = new SolrQuery(); + + final SolrQuery query = new SolrQuery(); query.setQuery("title:w1"); query.add("fl", "*, score"); query.add("rows", "4"); @@ -132,6 +133,7 @@ public void testRanking() throws Exception { assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); } + @Test public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception { SolrQuery query = new SolrQuery(); @@ -147,6 +149,10 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep query.add("rq", "{!ltr model=model reRankDocs=4}"); query.add("fl", "[fv]"); + // "0.0" in the assertJQ below is more readable than + // Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL) but first make sure it's equivalent + assertEquals("0.0", Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL)); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," + @@ -154,6 +160,7 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}"); } + @Test public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception { for (String field : FIELDS) { @@ -185,9 +192,12 @@ public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws E @Test public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned() throws Exception { final String[] fieldsWithDefaultValues = {"dvIntField", "dvLongField", "dvFloatField"}; + final String[] defaultValues = {"-1.0", "-2.0", "-3.0"}; + + for (int idx = 0; idx < fieldsWithDefaultValues.length; ++idx) { + final String field = fieldsWithDefaultValues[idx]; + final String defaultValue = defaultValues[idx]; - double fieldCounter = -1.0; - for (String field : fieldsWithDefaultValues) { final String fstore = "testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned"+field; assertU(adoc("id", "21")); @@ -198,16 +208,13 @@ public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned() loadModel(field+"-model", LinearModel.class.getName(), new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}"); - SolrQuery query = new SolrQuery(); - query.setQuery("id:21"); + final SolrQuery query = new SolrQuery("id:21"); query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); query.add("fl", "[fv]"); assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, String.valueOf(fieldCounter))+"'}"); - - fieldCounter--; + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, defaultValue)+"'}"); } } @@ -229,32 +236,35 @@ public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws E ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils - .toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); assertEquals(FieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass); } @Test public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws Exception { + final String[] fieldsWithoutDefaultValues = {"dvDoubleField", "dvStrBoolField"}; // this tests the case that no document contains docValues for the provided existing field - final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned"; - loadFeature("dvTestField", ObservingFieldValueFeature.class.getName(), fstore, - "{\"field\":\"dvTestField\"}"); - loadModel("dvTestField-model", LinearModel.class.getName(), - new String[] {"dvTestField"}, fstore, "{\"weights\":{\"dvTestField\":1.0}}"); + for (String field : fieldsWithoutDefaultValues) { + final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned"+field; - final SolrQuery query = new SolrQuery(); - query.setQuery("id:42"); - query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}"); - query.add("fl", "[fv]"); + loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\"}"); - ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils - .toFeatureVector("dvTestField",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); - assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass); + loadModel(field+"-model", LinearModel.class.getName(), + new String[] {field}, fstore, "{\"weights\":{\""+field+"\":1.0}}"); + + final SolrQuery query = new SolrQuery("id:42"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils + .toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass); + } } @Test @@ -273,6 +283,7 @@ public void testBooleanValue() throws Exception { assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}"); + query = new SolrQuery(); query.setQuery("id:5"); query.add("rq", "{!ltr model=trendy-model reRankDocs=4}"); @@ -293,37 +304,38 @@ public void testBooleanValue() throws Exception { public void testThatExceptionIsThrownForUnsupportedType() throws Exception { final String fstore = "test_store"; - assertU(adoc("id", "21", "title", "multivalued not supported", "dvUnsupportedField", "wow value")); + assertU(adoc("id", "21", "title", "multivalued not supported", "dvStringPopularities", "wow value")); assertU(commit()); - loadFeature("dvUnsupportedField", FieldValueFeature.class.getName(), fstore, - "{\"field\":\"dvUnsupportedField\"}"); + loadFeature("dvStringPopularities", FieldValueFeature.class.getName(), fstore, + "{\"field\":\"dvStringPopularities\"}"); - loadModel("dvUnsupportedField-model", LinearModel.class.getName(), - new String[] {"dvUnsupportedField"}, fstore, "{\"weights\":{\"dvUnsupportedField\":1.0}}"); + loadModel("dvStringPopularities-model", LinearModel.class.getName(), + new String[] {"dvStringPopularities"}, fstore, "{\"weights\":{\"dvStringPopularities\":1.0}}"); - SolrQuery query = new SolrQuery(); - query.setQuery("id:21"); - query.add("rq", "{!ltr model=dvUnsupportedField-model reRankDocs=4}"); + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model=dvStringPopularities-model reRankDocs=4}"); query.add("fl", "[fv]"); assertJQ("/query" + query.toQueryString(), - "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvUnsupportedField is not supported!'"); + "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvStringPopularities is not supported'"); } @Test public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception { final String[][] fieldsWithDifferentTypes = { - new String[]{"dvIntPopularity", NumericDocValuesFieldValueFeatureScorer.class.getName()}, - new String[]{"dvStringPopularity", SortedDocValuesFieldValueFeatureScorer.class.getName()}, - new String[]{"noDocValuesField", FieldValueFeatureScorer.class.getName()} + new String[]{"dvIntPopularity", "1", NumericDocValuesFieldValueFeatureScorer.class.getName()}, + new String[]{"dvStringPopularity", "T", SortedDocValuesFieldValueFeatureScorer.class.getName()}, + new String[]{"noDvFloatField", "1", FieldValueFeatureScorer.class.getName()}, + new String[]{"noDvStrNumField", "T", FieldValueFeatureScorer.class.getName()} }; for (String[] fieldAndScorerClass : fieldsWithDifferentTypes) { - String field = fieldAndScorerClass[0]; + final String field = fieldAndScorerClass[0]; + final String fieldValue = fieldAndScorerClass[1]; final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes"+field; - assertU(adoc("id", "21", field, "1")); + assertU(adoc("id", "21", field, fieldValue)); assertU(commit()); loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore, @@ -331,8 +343,7 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce loadModel(field+"-model", LinearModel.class.getName(), new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}"); - SolrQuery query = new SolrQuery(); - query.setQuery("id:21"); + final SolrQuery query = new SolrQuery("id:21"); query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); query.add("fl", "[fv]"); @@ -340,7 +351,7 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"); - assertEquals(fieldAndScorerClass[1], ObservingFieldValueFeature.usedScorerClass); + assertEquals(fieldAndScorerClass[2], ObservingFieldValueFeature.usedScorerClass); } } @@ -353,35 +364,39 @@ public void testParamsToMap() throws Exception { @Test public void testThatStringValuesAreCorrectlyParsed() throws Exception { - final String[][] inputsAndTests = { - new String[]{"T", "/response/docs/[0]/=={'[fv]':'" + - FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "1.0")+"'}"}, - new String[]{"F", "/response/docs/[0]/=={'[fv]':'" + - FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "0.0")+"'}"}, - new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" + - FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "-7324.427")+"'}"}, - new String[]{"532", "/response/docs/[0]/=={'[fv]':'" + - FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "532.0")+"'}"}, - new String[]{"notanumber", "/error/msg/=='org.apache.solr.ltr.feature.FeatureException: " + - "Cannot parse value notanumber of field dvStrNumField to float.'"} - }; - - final String fstore = "testThatStringValuesAreCorrectlyParsed"; - loadFeature("dvStrNumField", FieldValueFeature.class.getName(), fstore, - "{\"field\":\"" + "dvStrNumField" + "\"}"); - loadModel("dvStrNumField-model", LinearModel.class.getName(), - new String[]{"dvStrNumField"}, fstore, "{\"weights\":{\"" + "dvStrNumField" + "\":1.0}}"); + for (String field : new String[] {"dvStrNumField" , "noDvStrNumField"}) { + final String[][] inputsAndTests = { + new String[]{"T", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"}, + new String[]{"F", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"}, + new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}, + new String[]{"532", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}, + new String[]{Float.toString(Float.NaN), "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}, + new String[]{"notanumber", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"} + }; + + final String fstore = "testThatStringValuesAreCorrectlyParsed"+field; + loadFeature(field, FieldValueFeature.class.getName(), fstore, + "{\"field\":\"" + field + "\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[]{field}, fstore, + "{\"weights\":{\""+field+"\":1.0}}"); - for (String[] inputAndTest : inputsAndTests) { - assertU(adoc("id", "21", "dvStrNumField", inputAndTest[0])); - assertU(commit()); + for (String[] inputAndTest : inputsAndTests) { + assertU(adoc("id", "21", field, inputAndTest[0])); + assertU(commit()); - SolrQuery query = new SolrQuery(); - query.setQuery("id:21"); - query.add("rq", "{!ltr model=" + "dvStrNumField" + "-model reRankDocs=4}"); - query.add("fl", "[fv]"); + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}"); + query.add("fl", "[fv]"); - assertJQ("/query" + query.toQueryString(), inputAndTest[1]); + assertJQ("/query" + query.toQueryString(), inputAndTest[1]); + } } } @@ -389,7 +404,7 @@ public void testThatStringValuesAreCorrectlyParsed() throws Exception { * This class is used to track which specific FieldValueFeature is used so that we can test, whether the * fallback mechanism works correctly. */ - public static class ObservingFieldValueFeature extends FieldValueFeature { + final public static class ObservingFieldValueFeature extends FieldValueFeature { static String usedScorerClass; public ObservingFieldValueFeature(String name, Map params) { From 385d8b2a39fc892a0dc1f6836c43cd298374b9ec Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Tue, 25 May 2021 13:21:48 +0100 Subject: [PATCH 26/27] add TestFieldValueFeature.testThatDateValuesAreCorrectlyParsed() --- .../solr/collection1/conf/schema.xml | 2 ++ .../ltr/feature/TestFieldValueFeature.java | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index b27542060f4..cc85353ee8a 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -40,9 +40,11 @@ + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index e9bc943fcf3..15a007bb584 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -400,6 +400,38 @@ public void testThatStringValuesAreCorrectlyParsed() throws Exception { } } + @Test + public void testThatDateValuesAreCorrectlyParsed() throws Exception { + for (String field : new String[] {"dvDateField", "noDvDateField"}) { + final String[][] inputsAndTests = { + new String[]{"1970-01-01T00:00:00.000Z", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"}, + new String[]{"1970-01-01T00:00:00.001Z", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"}, + new String[]{"1970-01-01T00:00:01.234Z", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "1234.0")+"'}"} + }; + + final String fstore = "testThatDateValuesAreCorrectlyParsed"+field; + loadFeature(field, FieldValueFeature.class.getName(), fstore, + "{\"field\":\"" + field + "\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[]{field}, fstore, + "{\"weights\":{\""+field+"\":1.0}}"); + + for (String[] inputAndTest : inputsAndTests) { + assertU(adoc("id", "21", field, inputAndTest[0])); + assertU(commit()); + + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), inputAndTest[1]); + } + } + } + /** * This class is used to track which specific FieldValueFeature is used so that we can test, whether the * fallback mechanism works correctly. From ad489d0d35f842cf449bcd970830732a0fceeba5 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Wed, 26 May 2021 17:31:39 +0100 Subject: [PATCH 27/27] small TestLTROnSolrCloud polish: * undo distracting reformatting (hopefully one-off and next time 'spotless' gradle plugin will be available for contrib/ltr) --- .../apache/solr/ltr/TestLTROnSolrCloud.java | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 910c0061af7..e6fc0c852a4 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -49,7 +49,7 @@ public class TestLTROnSolrCloud extends TestRerankBase { String solrconfig = "solrconfig-ltr.xml"; String schema = "schema.xml"; - SortedMap extraServlets = null; + SortedMap extraServlets = null; @Override public void setUp() throws Exception { @@ -57,14 +57,17 @@ public void setUp() throws Exception { extraServlets = setupTestInit(solrconfig, schema, true); System.setProperty("enable.update.log", "true"); - int numberOfShards = random().nextInt(4) + 1; - int numberOfReplicas = random().nextInt(2) + 1; + int numberOfShards = random().nextInt(4)+1; + int numberOfReplicas = random().nextInt(2)+1; int numberOfNodes = numberOfShards * numberOfReplicas; setupSolrCluster(numberOfShards, numberOfReplicas, numberOfNodes); + + } + @Override public void tearDown() throws Exception { restTestHarness.close(); @@ -89,7 +92,7 @@ public void testSimpleQuery() throws Exception { query.setParam("rows", "8"); QueryResponse queryResponse = - solrCluster.getSolrClient().query(COLLECTION, query); + solrCluster.getSolrClient().query(COLLECTION,query); assertEquals(8, queryResponse.getResults().getNumFound()); assertEquals("1", queryResponse.getResults().get(0).get("id").toString()); assertEquals("2", queryResponse.getResults().get(1).get("id").toString()); @@ -100,14 +103,14 @@ public void testSimpleQuery() throws Exception { assertEquals("7", queryResponse.getResults().get(6).get("id").toString()); assertEquals("8", queryResponse.getResults().get(7).get("id").toString()); - final Float original_result0_score = (Float) queryResponse.getResults().get(0).get("score"); - final Float original_result1_score = (Float) queryResponse.getResults().get(1).get("score"); - final Float original_result2_score = (Float) queryResponse.getResults().get(2).get("score"); - final Float original_result3_score = (Float) queryResponse.getResults().get(3).get("score"); - final Float original_result4_score = (Float) queryResponse.getResults().get(4).get("score"); - final Float original_result5_score = (Float) queryResponse.getResults().get(5).get("score"); - final Float original_result6_score = (Float) queryResponse.getResults().get(6).get("score"); - final Float original_result7_score = (Float) queryResponse.getResults().get(7).get("score"); + final Float original_result0_score = (Float)queryResponse.getResults().get(0).get("score"); + final Float original_result1_score = (Float)queryResponse.getResults().get(1).get("score"); + final Float original_result2_score = (Float)queryResponse.getResults().get(2).get("score"); + final Float original_result3_score = (Float)queryResponse.getResults().get(3).get("score"); + final Float original_result4_score = (Float)queryResponse.getResults().get(4).get("score"); + final Float original_result5_score = (Float)queryResponse.getResults().get(5).get("score"); + final Float original_result6_score = (Float)queryResponse.getResults().get(6).get("score"); + final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score"); final String result0_features = FeatureLoggerTestUtils.toFeatureVector( "powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0", @@ -145,7 +148,8 @@ public void testSimpleQuery() throws Exception { // Test feature vectors returned (without re-ranking) query.setFields("*,score,features:[fv store=test]"); - queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); + queryResponse = + solrCluster.getSolrClient().query(COLLECTION,query); assertEquals(8, queryResponse.getResults().getNumFound()); assertEquals("1", queryResponse.getResults().get(0).get("id").toString()); assertEquals("2", queryResponse.getResults().get(1).get("id").toString()); @@ -185,7 +189,8 @@ public void testSimpleQuery() throws Exception { // Test feature vectors returned (with re-ranking) query.setFields("*,score,features:[fv]"); query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); - queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); + queryResponse = + solrCluster.getSolrClient().query(COLLECTION,query); assertEquals(8, queryResponse.getResults().getNumFound()); assertEquals("8", queryResponse.getResults().get(0).get("id").toString()); assertEquals(result0_features, @@ -225,7 +230,7 @@ private void setupSolrCluster(int numShards, int numReplicas, int numServers) th createCollection(COLLECTION, "conf1", numShards, numReplicas); indexDocuments(COLLECTION); for (JettySolrRunner solrRunner : solrCluster.getJettySolrRunners()) { - if (!solrRunner.getCoreContainer().getCores().isEmpty()) { + if (!solrRunner.getCoreContainer().getCores().isEmpty()){ String coreName = solrRunner.getCoreContainer().getCores().iterator().next().getName(); restTestHarness = new RestTestHarness(() -> solrRunner.getBaseUrl().toString() + "/" + coreName); break; @@ -249,8 +254,9 @@ private void createCollection(String name, String config, int numShards, int num solrCluster.waitForActiveCollection(name, numShards, numShards * numReplicas); } + void indexDocument(String collection, String id, String title, String description, int popularity) - throws Exception { + throws Exception{ SolrInputDocument doc = new SolrInputDocument(); doc.setField("id", id); doc.setField("title", title); @@ -268,7 +274,8 @@ void indexDocument(String collection, String id, String title, String descriptio solrCluster.getSolrClient().add(collection, doc); } - private void indexDocuments(final String collection) throws Exception { + private void indexDocuments(final String collection) + throws Exception { final int collectionSize = 8; // put documents in random order to check that advanceExact is working correctly List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList()); @@ -375,4 +382,5 @@ public static void after() throws Exception { } System.clearProperty("managed.schema.mutable"); } + }