From 2ee8779246f0e96de68123aa5af2501c6a671c7f Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Wed, 12 May 2021 16:43:32 +0200
Subject: [PATCH 01/27] [SOLR-12697] add DocValuesFieldValueFeatureScorer to
read docValues for ltr feature calculation
---
.../solr/ltr/feature/FieldValueFeature.java | 146 +++++++-
.../solr/collection1/conf/schema.xml | 11 +-
.../apache/solr/ltr/TestLTROnSolrCloud.java | 79 ++++-
.../solr/ltr/TestLTRReRankingPipeline.java | 334 ++++++++----------
4 files changed, 359 insertions(+), 211 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index d12795d2663..129ecf03950 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -23,24 +23,35 @@
import java.util.Set;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.BoolField;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.NumberType;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.SolrIndexSearcher;
/**
* This feature returns the value of a field in the current document
* Example configuration:
*
{
- "name": "rawHits",
- "class": "org.apache.solr.ltr.feature.FieldValueFeature",
- "params": {
- "field": "hits"
- }
-}
+ * "name": "rawHits",
+ * "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ * "params": {
+ * "field": "hits",
+ * "defaultValue": -1
+ * }
+ * }
*/
public class FieldValueFeature extends Feature {
@@ -83,24 +94,42 @@ public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores,
}
public class FieldValueFeatureWeight extends FeatureWeight {
+ private final SchemaField schemaField;
public FieldValueFeatureWeight(IndexSearcher searcher,
SolrQueryRequest request, Query originalQuery, Map efi) {
super(FieldValueFeature.this, searcher, request, originalQuery, efi);
+ if (searcher instanceof SolrIndexSearcher) {
+ schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
+ } else {
+ schemaField = null;
+ }
}
+ /**
+ * Return a FeatureScorer that uses docValues or storedFields if no docValues are present
+ * @param context the segment this FeatureScorer is working with
+ * @return FeatureScorer for the current segment and field
+ * @throws IOException as defined by abstract class Feature
+ */
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
+ // always prefer docValues
+ if (schemaField != null && schemaField.hasDocValues()) {
+ return new DocValuesFieldValueFeatureScorer(this, context,
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField);
+ }
return new FieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
+ /**
+ * A FeatureScorer that reads the stored value for a field
+ */
public class FieldValueFeatureScorer extends FeatureScorer {
+ LeafReaderContext context;
- LeafReaderContext context = null;
-
- public FieldValueFeatureScorer(FeatureWeight weight,
- LeafReaderContext context, DocIdSetIterator itr) {
+ public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context, DocIdSetIterator itr) {
super(weight, itr);
this.context = context;
}
@@ -146,5 +175,102 @@ public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
+
+ /**
+ * A FeatureScorer that reads the docValues for a field
+ */
+ public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
+ final LeafReaderContext context;
+ final DocIdSetIterator docValues;
+ final FieldType schemaFieldType;
+ DocValuesType docValuesType = DocValuesType.NONE;
+
+ public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
+ final DocIdSetIterator itr, final SchemaField schemaField) {
+ super(weight, itr);
+ this.context = context;
+ schemaFieldType = schemaField.getType();
+
+ try {
+ FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
+ // if fieldInfo is null, just use NONE-Type. This causes no problems, because we won't call score() anyway
+ docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
+ switch (docValuesType) {
+ case NUMERIC:
+ docValues = DocValues.getNumeric(context.reader(), field);
+ break;
+ case SORTED:
+ docValues = DocValues.getSorted(context.reader(), field);
+ break;
+ case BINARY:
+ docValues = DocValues.getBinary(context.reader(), field);
+ break;
+ case SORTED_NUMERIC:
+ case SORTED_SET:
+ case NONE:
+ default:
+ docValues = null;
+ }
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Could not read docValues for field " + field + " with docValuesType "
+ + docValuesType.name());
+ }
+ }
+
+ @Override
+ public float score() throws IOException {
+ if (docValues != null && docValues.advance(itr.docID()) < DocIdSetIterator.NO_MORE_DOCS) {
+ switch (docValuesType) {
+ case NUMERIC:
+ if (NumberType.FLOAT.equals(schemaFieldType.getNumberType())) {
+ // convert float value that was stored as long back to float
+ return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue());
+ } else if (NumberType.DOUBLE.equals(schemaFieldType.getNumberType())) {
+ // handle double value conversion
+ return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue());
+ }
+ // just take the long value
+ return ((NumericDocValues) docValues).longValue();
+ case SORTED:
+ int ord = ((SortedDocValues) docValues).ordValue();
+ // try to interpret bytesRef either as number string or as true / false token
+ return handleBytesRef(((SortedDocValues) docValues).lookupOrd(ord));
+ case BINARY:
+ case SORTED_SET:
+ case SORTED_NUMERIC:
+ case NONE:
+ default:
+ throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
+ + " is not supported!");
+ }
+ }
+ return FieldValueFeature.this.getDefaultValue();
+ }
+
+ private float handleBytesRef(BytesRef bytesRef) {
+ String string = bytesRef.utf8ToString();
+ if (string.length() == 1
+ && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) {
+ // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN
+ // (see BoolField)
+ if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
+ return 1f;
+ } else {
+ return 0f;
+ }
+ } else {
+ try {
+ return Float.parseFloat(string);
+ } catch (NumberFormatException ex) {
+ throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float.");
+ }
+ }
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return Float.POSITIVE_INFINITY;
+ }
+ }
}
}
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 4699b0f83f4..4187ce9424f 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -18,13 +18,22 @@
-
+
+
+
+
+
+
+
+
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index 21b71c3e5ec..93709cad43a 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -28,6 +28,7 @@
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.ltr.feature.FieldValueFeature;
import org.apache.solr.ltr.feature.OriginalScoreFeature;
import org.apache.solr.ltr.feature.SolrFeature;
import org.apache.solr.ltr.feature.ValueFeature;
@@ -107,21 +108,21 @@ public void testSimpleQuery() throws Exception {
final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score");
final String result0_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","64.0", "c3","2.0", "original","0.0");
+ "powpularityS","64.0", "c3","2.0", "original","0.0", "dvIntFieldFeature","8.0","dvLongFieldFeature","8.0","dvFloatFieldFeature","0.8","dvDoubleFieldFeature","0.8","dvStrNumFieldFeature","8.0","dvStrBoolFieldFeature","1.0");
final String result1_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","49.0", "c3","2.0", "original","1.0");
+ "powpularityS","49.0", "c3","2.0", "original","1.0", "dvIntFieldFeature","7.0","dvLongFieldFeature","7.0","dvFloatFieldFeature","0.7","dvDoubleFieldFeature","0.7","dvStrNumFieldFeature","7.0","dvStrBoolFieldFeature","0.0");
final String result2_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","36.0", "c3","2.0", "original","2.0");
+ "powpularityS","36.0", "c3","2.0", "original","2.0", "dvIntFieldFeature","6.0","dvLongFieldFeature","6.0","dvFloatFieldFeature","0.6","dvDoubleFieldFeature","0.6","dvStrNumFieldFeature","6.0","dvStrBoolFieldFeature","1.0");
final String result3_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","25.0", "c3","2.0", "original","3.0");
+ "powpularityS","25.0", "c3","2.0", "original","3.0", "dvIntFieldFeature","5.0","dvLongFieldFeature","5.0","dvFloatFieldFeature","0.5","dvDoubleFieldFeature","0.5","dvStrNumFieldFeature","5.0","dvStrBoolFieldFeature","0.0");
final String result4_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","16.0", "c3","2.0", "original","4.0");
+ "powpularityS","16.0", "c3","2.0", "original","4.0", "dvIntFieldFeature","4.0","dvLongFieldFeature","4.0","dvFloatFieldFeature","0.4","dvDoubleFieldFeature","0.4","dvStrNumFieldFeature","4.0","dvStrBoolFieldFeature","1.0");
final String result5_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "9.0", "c3","2.0", "original","5.0");
+ "powpularityS", "9.0", "c3","2.0", "original","5.0", "dvIntFieldFeature","3.0","dvLongFieldFeature","3.0","dvFloatFieldFeature","0.3","dvDoubleFieldFeature","0.3","dvStrNumFieldFeature","3.0","dvStrBoolFieldFeature","0.0");
final String result6_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "4.0", "c3","2.0", "original","6.0");
+ "powpularityS", "4.0", "c3","2.0", "original","6.0", "dvIntFieldFeature","2.0","dvLongFieldFeature","2.0","dvFloatFieldFeature","0.2","dvDoubleFieldFeature","0.2","dvStrNumFieldFeature","2.0","dvStrBoolFieldFeature","1.0");
final String result7_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "1.0", "c3","2.0", "original","7.0");
+ "powpularityS", "1.0", "c3","2.0", "original","7.0", "dvIntFieldFeature","-1.0","dvLongFieldFeature","-2.0","dvFloatFieldFeature","-3.0","dvDoubleFieldFeature","-4.0","dvStrNumFieldFeature","-5.0","dvStrBoolFieldFeature","0.0");
// Test feature vectors returned (without re-ranking)
@@ -240,24 +241,40 @@ void indexDocument(String collection, String id, String title, String descriptio
doc.setField("title", title);
doc.setField("description", description);
doc.setField("popularity", popularity);
+ if(popularity != 1) {
+ // check that empty values will be read as default
+ doc.setField("dvIntField", popularity);
+ doc.setField("dvLongField", popularity);
+ doc.setField("dvFloatField", ((float) popularity) / 10);
+ doc.setField("dvDoubleField", ((double) popularity) / 10);
+ doc.setField("dvStrNumField", popularity);
+ doc.setField("dvStrBoolField", popularity % 2 == 0 ? "T" : "F");
+ }
solrCluster.getSolrClient().add(collection, doc);
}
private void indexDocuments(final String collection)
throws Exception {
final int collectionSize = 8;
- for (int docId = 1; docId <= collectionSize; docId++) {
+ // put documents in reversed order to check that advanceExact is working correctly
+ for (int docId = collectionSize; docId >= 1; docId--) {
final int popularity = docId;
indexDocument(collection, String.valueOf(docId), "a1", "bloom", popularity);
+ if(docId == collectionSize / 2) {
+ // commit in the middle in order to check that everything works fine for multi-segment case
+ solrCluster.getSolrClient().commit(collection);
+ }
}
- solrCluster.getSolrClient().commit(collection);
+ solrCluster.getSolrClient().commit(collection, true, true);
}
-
private void loadModelsAndFeatures() throws Exception {
final String featureStore = "test";
- final String[] featureNames = new String[] {"powpularityS","c3", "original"};
- final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1}}";
+ final String[] featureNames = new String[]{"powpularityS", "c3", "original", "dvIntFieldFeature",
+ "dvLongFieldFeature", "dvFloatFieldFeature", "dvDoubleFieldFeature", "dvStrNumFieldFeature", "dvStrBoolFieldFeature"};
+ final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1," +
+ "\"dvIntFieldFeature\":0.1,\"dvLongFieldFeature\":0.1," +
+ "\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1}}";
loadFeature(
featureNames[0],
@@ -277,6 +294,42 @@ private void loadModelsAndFeatures() throws Exception {
featureStore,
null
);
+ loadFeature(
+ featureNames[3],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvIntField\"}"
+ );
+ loadFeature(
+ featureNames[4],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvLongField\"}"
+ );
+ loadFeature(
+ featureNames[5],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvFloatField\"}"
+ );
+ loadFeature(
+ featureNames[6],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvDoubleField\",\"defaultValue\":-4.0}"
+ );
+ loadFeature(
+ featureNames[7],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvStrNumField\",\"defaultValue\":-5}"
+ );
+ loadFeature(
+ featureNames[8],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvStrBoolField\"}"
+ );
loadModel(
"powpularityS-model",
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
index 85019445546..cfdfcd5f416 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
@@ -25,12 +25,8 @@
import java.util.List;
import java.util.Map;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -41,8 +37,8 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.solr.SolrTestCase;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.ltr.feature.Feature;
import org.apache.solr.ltr.feature.FieldValueFeature;
@@ -50,16 +46,24 @@
import org.apache.solr.ltr.model.TestLinearModel;
import org.apache.solr.ltr.norm.IdentityNormalizer;
import org.apache.solr.ltr.norm.Normalizer;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class TestLTRReRankingPipeline extends SolrTestCase {
+public class TestLTRReRankingPipeline extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath());
+ @BeforeClass
+ public static void setup() throws Exception {
+ initCore("solrconfig-ltr.xml", "schema.xml");
+ }
+
private IndexSearcher getSearcher(IndexReader r) {
// 'yes' to maybe wrapping in general
final boolean maybeWrap = true;
@@ -109,199 +113,155 @@ public Explanation explain(LeafReaderContext context, int doc,
}
@Test
- public void testRescorer() throws IOException {
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-
- Document doc = new Document();
- doc.add(newStringField("id", "0", Field.Store.YES));
- doc.add(newTextField("field", "wizard the the the the the oz",
- Field.Store.NO));
- doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field
-
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "1", Field.Store.YES));
- // 1 extra token, but wizard and oz are close;
- doc.add(newTextField("field", "wizard oz the the the the the the",
- Field.Store.NO));
- doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field
- w.addDocument(doc);
-
- final IndexReader r = w.getReader();
- w.close();
-
- // Do ordinary BooleanQuery:
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = getSearcher(r);
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(2, hits.totalHits.value);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "final-score");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
-
- final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
- hits = rescorer.rescore(searcher, hits, 2);
-
- // rerank using the field final-score
- assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
-
- r.close();
- dir.close();
-
+ public void testRescorer() throws Exception {
+ assertU(adoc("id", "0", "field", "wizard the the the the the oz", "final-score", "F"));
+ assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "final-score", "T"));
+ assertU(commit());
+
+ try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
+
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = solrQueryRequest.getSearcher();
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(2, hits.totalHits.value);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "final-score");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel);
+ ltrScoringQuery.setRequest(solrQueryRequest);
+ final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery);
+ hits = rescorer.rescore(searcher, hits, 2);
+
+ // rerank using the field final-score
+ assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ }
}
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134")
@Test
public void testDifferentTopN() throws IOException {
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-
- Document doc = new Document();
- doc.add(newStringField("id", "0", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 1.0f));
- w.addDocument(doc);
-
- doc = new Document();
- doc.add(newStringField("id", "1", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 2.0f));
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "2", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 3.0f));
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "3", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz the the the the ",
- Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 4.0f));
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "4", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz the the the the the the",
- Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 5.0f));
- w.addDocument(doc);
-
- final IndexReader r = w.getReader();
- w.close();
-
- // Do ordinary BooleanQuery:
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = getSearcher(r);
-
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(5, hits.totalHits.value);
-
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "final-score");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
-
- final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
-
- // rerank @ 0 should not change the order
- hits = rescorer.rescore(searcher, hits, 0);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- // test rerank with different topN cuts
-
- for (int topN = 1; topN <= 5; topN++) {
- log.info("rerank {} documents ", topN);
- hits = searcher.search(bqBuilder.build(), 10);
-
- final ScoreDoc[] slice = new ScoreDoc[topN];
- System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
- hits = new TopDocs(hits.totalHits, slice);
- hits = rescorer.rescore(searcher, hits, topN);
- for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
- if (log.isInfoEnabled()) {
- log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
- .get("id"), j);
- }
-
- assertEquals(i,
- Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
- assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
+ assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "final-score-float", "1.0"));
+ assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "final-score-float", "2.0"));
+ assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "final-score-float", "3.0"));
+ assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "final-score-float", "4.0"));
+ assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "final-score-float", "5.0"));
+ assertU(commit());
+
+ try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
+ // Do ordinary BooleanQuery:
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = solrQueryRequest.getSearcher();
+
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(5, hits.totalHits.value);
+
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "final-score-float");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "final-score-float");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel);
+ scoringQuery.setRequest(solrQueryRequest);
+ final LTRRescorer rescorer = new LTRRescorer(scoringQuery);
+
+ // rerank @ 0 should not change the order
+ hits = rescorer.rescore(searcher, hits, 0);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ // test rerank with different topN cuts
+
+ for (int topN = 1; topN <= 5; topN++) {
+ log.info("rerank {} documents ", topN);
+ hits = searcher.search(bqBuilder.build(), 10);
+
+ final ScoreDoc[] slice = new ScoreDoc[topN];
+ System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
+ hits = new TopDocs(hits.totalHits, slice);
+ hits = rescorer.rescore(searcher, hits, topN);
+ for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
+ if (log.isInfoEnabled()) {
+ log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
+ .get("id"), j);
+ }
+
+ assertEquals(i,
+ Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
+ assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
+ }
}
}
-
- r.close();
- dir.close();
-
}
@Test
public void testDocParam() throws Exception {
- final Map test = new HashMap();
- test.put("fake", 2);
- List features = makeFieldValueFeatures(new int[] {0},
- "final-score");
- List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- List allFeatures = makeFieldValueFeatures(new int[] {0},
- "final-score");
- MockModel ltrScoringModel = new MockModel("test",
- features, norms, "test", allFeatures, null);
- LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
- LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
- }
+ try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
+ List features = makeFieldValueFeatures(new int[] {0},
+ "final-score");
+ List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ List allFeatures = makeFieldValueFeatures(new int[] {0},
+ "final-score");
+ MockModel ltrScoringModel = new MockModel("test",
+ features, norms, "test", allFeatures, null);
+ LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
+ query.setRequest(solrQueryRequest);
+ LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ }
- features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score");
- norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
- 9}, "final-score");
- ltrScoringModel = new MockModel("test", features, norms,
- "test", allFeatures, null);
- query = new LTRScoringQuery(ltrScoringModel);
- wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score");
+ norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
+ 9}, "final-score");
+ ltrScoringModel = new MockModel("test", features, norms,
+ "test", allFeatures, null);
+ query = new LTRScoringQuery(ltrScoringModel);
+ query.setRequest(solrQueryRequest);
+ wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ }
}
}
-
}
From bdce029f3c95526a79e86a0d73389db42832bee9 Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Wed, 12 May 2021 16:44:02 +0200
Subject: [PATCH 02/27] [SOLR-12697] formatting changes
---
.../solr/ltr/feature/FieldValueFeature.java | 21 ++++++++-----------
.../apache/solr/ltr/TestLTROnSolrCloud.java | 5 -----
2 files changed, 9 insertions(+), 17 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 129ecf03950..11470dc5a8a 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -68,8 +68,8 @@ public void setField(String field) {
}
@Override
- public LinkedHashMap paramsToMap() {
- final LinkedHashMap params = defaultParamsToMap();
+ public LinkedHashMap paramsToMap() {
+ final LinkedHashMap params = defaultParamsToMap();
params.put("field", field);
return params;
}
@@ -77,19 +77,17 @@ public LinkedHashMap paramsToMap() {
@Override
protected void validate() throws FeatureException {
if (field == null || field.isEmpty()) {
- throw new FeatureException(getClass().getSimpleName()+
- ": field must be provided");
+ throw new FeatureException(getClass().getSimpleName() + ": field must be provided");
}
}
- public FieldValueFeature(String name, Map params) {
+ public FieldValueFeature(String name, Map params) {
super(name, params);
}
@Override
- public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores,
- SolrQueryRequest request, Query originalQuery, Map efi)
- throws IOException {
+ public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request,
+ Query originalQuery, Map efi) throws IOException {
return new FieldValueFeatureWeight(searcher, request, originalQuery, efi);
}
@@ -150,14 +148,13 @@ public float score() throws IOException {
} else {
final String string = indexableField.stringValue();
if (string.length() == 1) {
- // boolean values in the index are encoded with the
- // a single char contained in TRUE_TOKEN or FALSE_TOKEN
+ // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
- return 1;
+ return 1f;
}
if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
- return 0;
+ return 0f;
}
}
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index 93709cad43a..732b5fd8ff6 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -58,11 +58,8 @@ public void setUp() throws Exception {
int numberOfNodes = numberOfShards * numberOfReplicas;
setupSolrCluster(numberOfShards, numberOfReplicas, numberOfNodes);
-
-
}
-
@Override
public void tearDown() throws Exception {
restTestHarness.close();
@@ -233,7 +230,6 @@ private void createCollection(String name, String config, int numShards, int num
solrCluster.waitForActiveCollection(name, numShards, numShards * numReplicas);
}
-
void indexDocument(String collection, String id, String title, String description, int popularity)
throws Exception{
SolrInputDocument doc = new SolrInputDocument();
@@ -356,5 +352,4 @@ public static void after() throws Exception {
}
System.clearProperty("managed.schema.mutable");
}
-
}
From e6601eeecf999207df01447e1f8a0c0ff20b03aa Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Wed, 12 May 2021 17:09:41 +0200
Subject: [PATCH 03/27] [SOLR-12697] only apply new scorer to fields that are
not stored
---
.../java/org/apache/solr/ltr/feature/FieldValueFeature.java | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 11470dc5a8a..6373f0b108b 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -112,8 +112,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
*/
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
- // always prefer docValues
- if (schemaField != null && schemaField.hasDocValues()) {
+ if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
return new DocValuesFieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField);
}
From d6e14779a5648f928efa63d36246973dec5cd09b Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Wed, 12 May 2021 17:17:09 +0200
Subject: [PATCH 04/27] [SOLR-12697] remove BINARY case because it is not
supported
---
.../src/java/org/apache/solr/ltr/feature/FieldValueFeature.java | 2 --
1 file changed, 2 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 6373f0b108b..f71e02b932b 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -199,8 +199,6 @@ public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafRe
docValues = DocValues.getSorted(context.reader(), field);
break;
case BINARY:
- docValues = DocValues.getBinary(context.reader(), field);
- break;
case SORTED_NUMERIC:
case SORTED_SET:
case NONE:
From 5bc995c04a4489377c4beda1ec62d74685f050e5 Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Thu, 13 May 2021 09:46:47 +0200
Subject: [PATCH 05/27] [SOLR-12697] only pass fieldType to constructor;
determine numberType in constructor instead of once per doc
---
.../apache/solr/ltr/feature/FieldValueFeature.java | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index f71e02b932b..f4df25885a6 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -114,7 +114,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
return new DocValuesFieldValueFeatureScorer(this, context,
- DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField);
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType());
}
return new FieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
@@ -178,14 +178,15 @@ public float getMaxScore(int upTo) throws IOException {
public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
final LeafReaderContext context;
final DocIdSetIterator docValues;
- final FieldType schemaFieldType;
+ final FieldType fieldType;
+ NumberType fieldNumberType;
DocValuesType docValuesType = DocValuesType.NONE;
public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
- final DocIdSetIterator itr, final SchemaField schemaField) {
+ final DocIdSetIterator itr, final FieldType fieldType) {
super(weight, itr);
this.context = context;
- schemaFieldType = schemaField.getType();
+ this.fieldType = fieldType;
try {
FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
@@ -194,6 +195,7 @@ public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafRe
switch (docValuesType) {
case NUMERIC:
docValues = DocValues.getNumeric(context.reader(), field);
+ fieldNumberType = fieldType.getNumberType();
break;
case SORTED:
docValues = DocValues.getSorted(context.reader(), field);
@@ -216,10 +218,10 @@ public float score() throws IOException {
if (docValues != null && docValues.advance(itr.docID()) < DocIdSetIterator.NO_MORE_DOCS) {
switch (docValuesType) {
case NUMERIC:
- if (NumberType.FLOAT.equals(schemaFieldType.getNumberType())) {
+ if (NumberType.FLOAT.equals(fieldNumberType)) {
// convert float value that was stored as long back to float
return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue());
- } else if (NumberType.DOUBLE.equals(schemaFieldType.getNumberType())) {
+ } else if (NumberType.DOUBLE.equals(fieldNumberType)) {
// handle double value conversion
return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue());
}
From 4559415aec0e31db19ae999cc49031ca744bc14b Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Thu, 13 May 2021 09:51:38 +0200
Subject: [PATCH 06/27] [SOLR-12697] remove - from fieldnames; randomize
indexing order for documents and commit during indexing; improve formatting
of test
---
.../solr/collection1/conf/schema.xml | 4 +-
.../apache/solr/ltr/TestLTROnSolrCloud.java | 141 ++++++++++--------
.../solr/ltr/TestLTRReRankingPipeline.java | 32 ++--
3 files changed, 100 insertions(+), 77 deletions(-)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 4187ce9424f..88e0b73f0f9 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -19,8 +19,8 @@
-
-
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index 732b5fd8ff6..28d6c5c8774 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -16,7 +16,10 @@
package org.apache.solr.ltr;
import java.io.File;
+import java.util.Collections;
+import java.util.List;
import java.util.SortedMap;
+import java.util.stream.IntStream;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.SolrQuery;
@@ -38,13 +41,15 @@
import org.junit.AfterClass;
import org.junit.Test;
+import static java.util.stream.Collectors.toList;
+
public class TestLTROnSolrCloud extends TestRerankBase {
private MiniSolrCloudCluster solrCluster;
String solrconfig = "solrconfig-ltr.xml";
String schema = "schema.xml";
- SortedMap extraServlets = null;
+ SortedMap extraServlets = null;
@Override
public void setUp() throws Exception {
@@ -52,8 +57,8 @@ public void setUp() throws Exception {
extraServlets = setupTestInit(solrconfig, schema, true);
System.setProperty("enable.update.log", "true");
- int numberOfShards = random().nextInt(4)+1;
- int numberOfReplicas = random().nextInt(2)+1;
+ int numberOfShards = random().nextInt(4) + 1;
+ int numberOfReplicas = random().nextInt(2) + 1;
int numberOfNodes = numberOfShards * numberOfReplicas;
@@ -84,7 +89,7 @@ public void testSimpleQuery() throws Exception {
query.setParam("rows", "8");
QueryResponse queryResponse =
- solrCluster.getSolrClient().query(COLLECTION,query);
+ solrCluster.getSolrClient().query(COLLECTION, query);
assertEquals(8, queryResponse.getResults().getNumFound());
assertEquals("1", queryResponse.getResults().get(0).get("id").toString());
assertEquals("2", queryResponse.getResults().get(1).get("id").toString());
@@ -95,37 +100,52 @@ public void testSimpleQuery() throws Exception {
assertEquals("7", queryResponse.getResults().get(6).get("id").toString());
assertEquals("8", queryResponse.getResults().get(7).get("id").toString());
- final Float original_result0_score = (Float)queryResponse.getResults().get(0).get("score");
- final Float original_result1_score = (Float)queryResponse.getResults().get(1).get("score");
- final Float original_result2_score = (Float)queryResponse.getResults().get(2).get("score");
- final Float original_result3_score = (Float)queryResponse.getResults().get(3).get("score");
- final Float original_result4_score = (Float)queryResponse.getResults().get(4).get("score");
- final Float original_result5_score = (Float)queryResponse.getResults().get(5).get("score");
- final Float original_result6_score = (Float)queryResponse.getResults().get(6).get("score");
- final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score");
-
- final String result0_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","64.0", "c3","2.0", "original","0.0", "dvIntFieldFeature","8.0","dvLongFieldFeature","8.0","dvFloatFieldFeature","0.8","dvDoubleFieldFeature","0.8","dvStrNumFieldFeature","8.0","dvStrBoolFieldFeature","1.0");
- final String result1_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","49.0", "c3","2.0", "original","1.0", "dvIntFieldFeature","7.0","dvLongFieldFeature","7.0","dvFloatFieldFeature","0.7","dvDoubleFieldFeature","0.7","dvStrNumFieldFeature","7.0","dvStrBoolFieldFeature","0.0");
- final String result2_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","36.0", "c3","2.0", "original","2.0", "dvIntFieldFeature","6.0","dvLongFieldFeature","6.0","dvFloatFieldFeature","0.6","dvDoubleFieldFeature","0.6","dvStrNumFieldFeature","6.0","dvStrBoolFieldFeature","1.0");
- final String result3_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","25.0", "c3","2.0", "original","3.0", "dvIntFieldFeature","5.0","dvLongFieldFeature","5.0","dvFloatFieldFeature","0.5","dvDoubleFieldFeature","0.5","dvStrNumFieldFeature","5.0","dvStrBoolFieldFeature","0.0");
- final String result4_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","16.0", "c3","2.0", "original","4.0", "dvIntFieldFeature","4.0","dvLongFieldFeature","4.0","dvFloatFieldFeature","0.4","dvDoubleFieldFeature","0.4","dvStrNumFieldFeature","4.0","dvStrBoolFieldFeature","1.0");
- final String result5_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "9.0", "c3","2.0", "original","5.0", "dvIntFieldFeature","3.0","dvLongFieldFeature","3.0","dvFloatFieldFeature","0.3","dvDoubleFieldFeature","0.3","dvStrNumFieldFeature","3.0","dvStrBoolFieldFeature","0.0");
- final String result6_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "4.0", "c3","2.0", "original","6.0", "dvIntFieldFeature","2.0","dvLongFieldFeature","2.0","dvFloatFieldFeature","0.2","dvDoubleFieldFeature","0.2","dvStrNumFieldFeature","2.0","dvStrBoolFieldFeature","1.0");
- final String result7_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "1.0", "c3","2.0", "original","7.0", "dvIntFieldFeature","-1.0","dvLongFieldFeature","-2.0","dvFloatFieldFeature","-3.0","dvDoubleFieldFeature","-4.0","dvStrNumFieldFeature","-5.0","dvStrBoolFieldFeature","0.0");
+ final Float original_result0_score = (Float) queryResponse.getResults().get(0).get("score");
+ final Float original_result1_score = (Float) queryResponse.getResults().get(1).get("score");
+ final Float original_result2_score = (Float) queryResponse.getResults().get(2).get("score");
+ final Float original_result3_score = (Float) queryResponse.getResults().get(3).get("score");
+ final Float original_result4_score = (Float) queryResponse.getResults().get(4).get("score");
+ final Float original_result5_score = (Float) queryResponse.getResults().get(5).get("score");
+ final Float original_result6_score = (Float) queryResponse.getResults().get(6).get("score");
+ final Float original_result7_score = (Float) queryResponse.getResults().get(7).get("score");
+
+ final String result0_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0",
+ "dvLongFieldFeature", "8.0", "dvFloatFieldFeature", "0.8", "dvDoubleFieldFeature", "0.8",
+ "dvStrNumFieldFeature", "8.0", "dvStrBoolFieldFeature", "1.0");
+ final String result1_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "49.0", "c3", "2.0", "original", "1.0", "dvIntFieldFeature", "7.0",
+ "dvLongFieldFeature", "7.0", "dvFloatFieldFeature", "0.7", "dvDoubleFieldFeature", "0.7",
+ "dvStrNumFieldFeature", "7.0", "dvStrBoolFieldFeature", "0.0");
+ final String result2_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "36.0", "c3", "2.0", "original", "2.0", "dvIntFieldFeature", "6.0",
+ "dvLongFieldFeature", "6.0", "dvFloatFieldFeature", "0.6", "dvDoubleFieldFeature", "0.6",
+ "dvStrNumFieldFeature", "6.0", "dvStrBoolFieldFeature", "1.0");
+ final String result3_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "25.0", "c3", "2.0", "original", "3.0", "dvIntFieldFeature", "5.0",
+ "dvLongFieldFeature", "5.0", "dvFloatFieldFeature", "0.5", "dvDoubleFieldFeature", "0.5",
+ "dvStrNumFieldFeature", "5.0", "dvStrBoolFieldFeature", "0.0");
+ final String result4_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "16.0", "c3", "2.0", "original", "4.0", "dvIntFieldFeature", "4.0",
+ "dvLongFieldFeature", "4.0", "dvFloatFieldFeature", "0.4", "dvDoubleFieldFeature", "0.4",
+ "dvStrNumFieldFeature", "4.0", "dvStrBoolFieldFeature", "1.0");
+ final String result5_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "9.0", "c3", "2.0", "original", "5.0", "dvIntFieldFeature", "3.0",
+ "dvLongFieldFeature", "3.0", "dvFloatFieldFeature", "0.3", "dvDoubleFieldFeature", "0.3",
+ "dvStrNumFieldFeature", "3.0", "dvStrBoolFieldFeature", "0.0");
+ final String result6_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "4.0", "c3", "2.0", "original", "6.0", "dvIntFieldFeature", "2.0",
+ "dvLongFieldFeature", "2.0", "dvFloatFieldFeature", "0.2", "dvDoubleFieldFeature", "0.2",
+ "dvStrNumFieldFeature", "2.0", "dvStrBoolFieldFeature", "1.0");
+ final String result7_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "1.0", "c3", "2.0", "original", "7.0", "dvIntFieldFeature", "-1.0",
+ "dvLongFieldFeature", "-2.0", "dvFloatFieldFeature", "-3.0", "dvDoubleFieldFeature", "-4.0",
+ "dvStrNumFieldFeature", "-5.0", "dvStrBoolFieldFeature", "0.0");
// Test feature vectors returned (without re-ranking)
query.setFields("*,score,features:[fv store=test]");
- queryResponse =
- solrCluster.getSolrClient().query(COLLECTION,query);
+ queryResponse = solrCluster.getSolrClient().query(COLLECTION, query);
assertEquals(8, queryResponse.getResults().getNumFound());
assertEquals("1", queryResponse.getResults().get(0).get("id").toString());
assertEquals("2", queryResponse.getResults().get(1).get("id").toString());
@@ -165,8 +185,7 @@ public void testSimpleQuery() throws Exception {
// Test feature vectors returned (with re-ranking)
query.setFields("*,score,features:[fv]");
query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}");
- queryResponse =
- solrCluster.getSolrClient().query(COLLECTION,query);
+ queryResponse = solrCluster.getSolrClient().query(COLLECTION, query);
assertEquals(8, queryResponse.getResults().getNumFound());
assertEquals("8", queryResponse.getResults().get(0).get("id").toString());
assertEquals(result0_features,
@@ -206,7 +225,7 @@ private void setupSolrCluster(int numShards, int numReplicas, int numServers) th
createCollection(COLLECTION, "conf1", numShards, numReplicas);
indexDocuments(COLLECTION);
for (JettySolrRunner solrRunner : solrCluster.getJettySolrRunners()) {
- if (!solrRunner.getCoreContainer().getCores().isEmpty()){
+ if (!solrRunner.getCoreContainer().getCores().isEmpty()) {
String coreName = solrRunner.getCoreContainer().getCores().iterator().next().getName();
restTestHarness = new RestTestHarness(() -> solrRunner.getBaseUrl().toString() + "/" + coreName);
break;
@@ -231,13 +250,13 @@ private void createCollection(String name, String config, int numShards, int num
}
void indexDocument(String collection, String id, String title, String description, int popularity)
- throws Exception{
+ throws Exception {
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", id);
doc.setField("title", title);
doc.setField("description", description);
doc.setField("popularity", popularity);
- if(popularity != 1) {
+ if (popularity != 1) {
// check that empty values will be read as default
doc.setField("dvIntField", popularity);
doc.setField("dvLongField", popularity);
@@ -249,17 +268,21 @@ void indexDocument(String collection, String id, String title, String descriptio
solrCluster.getSolrClient().add(collection, doc);
}
- private void indexDocuments(final String collection)
- throws Exception {
+ private void indexDocuments(final String collection) throws Exception {
final int collectionSize = 8;
- // put documents in reversed order to check that advanceExact is working correctly
- for (int docId = collectionSize; docId >= 1; docId--) {
+ // put documents in random order to check that advanceExact is working correctly
+ List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList());
+ Collections.shuffle(docIds);
+
+ int docCounter = 1;
+ for (int docId : docIds) {
final int popularity = docId;
indexDocument(collection, String.valueOf(docId), "a1", "bloom", popularity);
- if(docId == collectionSize / 2) {
- // commit in the middle in order to check that everything works fine for multi-segment case
+ // maybe commit in the middle in order to check that everything works fine for multi-segment case
+ if (docCounter == collectionSize / 2 && random().nextBoolean()) {
solrCluster.getSolrClient().commit(collection);
}
+ docCounter++;
}
solrCluster.getSolrClient().commit(collection, true, true);
}
@@ -273,22 +296,22 @@ private void loadModelsAndFeatures() throws Exception {
"\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1}}";
loadFeature(
- featureNames[0],
- SolrFeature.class.getName(),
- featureStore,
- "{\"q\":\"{!func}pow(popularity,2)\"}"
+ featureNames[0],
+ SolrFeature.class.getName(),
+ featureStore,
+ "{\"q\":\"{!func}pow(popularity,2)\"}"
);
loadFeature(
- featureNames[1],
- ValueFeature.class.getName(),
- featureStore,
- "{\"value\":2}"
+ featureNames[1],
+ ValueFeature.class.getName(),
+ featureStore,
+ "{\"value\":2}"
);
loadFeature(
- featureNames[2],
- OriginalScoreFeature.class.getName(),
- featureStore,
- null
+ featureNames[2],
+ OriginalScoreFeature.class.getName(),
+ featureStore,
+ null
);
loadFeature(
featureNames[3],
@@ -328,11 +351,11 @@ private void loadModelsAndFeatures() throws Exception {
);
loadModel(
- "powpularityS-model",
- LinearModel.class.getName(),
- featureNames,
- featureStore,
- jsonModelParams
+ "powpularityS-model",
+ LinearModel.class.getName(),
+ featureNames,
+ featureStore,
+ jsonModelParams
);
reloadCollection(COLLECTION);
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
index cfdfcd5f416..1b1967d47ef 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
@@ -114,8 +114,8 @@ public Explanation explain(LeafReaderContext context, int doc,
@Test
public void testRescorer() throws Exception {
- assertU(adoc("id", "0", "field", "wizard the the the the the oz", "final-score", "F"));
- assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "final-score", "T"));
+ assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F"));
+ assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T"));
assertU(commit());
try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
@@ -131,12 +131,12 @@ public void testRescorer() throws Exception {
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "final-score");
+ "finalScore");
final List norms =
new ArrayList(
Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
+ 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore");
final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
@@ -145,7 +145,7 @@ public void testRescorer() throws Exception {
final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery);
hits = rescorer.rescore(searcher, hits, 2);
- // rerank using the field final-score
+ // rerank using the field finalScore
assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
}
@@ -154,11 +154,11 @@ public void testRescorer() throws Exception {
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134")
@Test
public void testDifferentTopN() throws IOException {
- assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "final-score-float", "1.0"));
- assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "final-score-float", "2.0"));
- assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "final-score-float", "3.0"));
- assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "final-score-float", "4.0"));
- assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "final-score-float", "5.0"));
+ assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0"));
+ assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0"));
+ assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0"));
+ assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0"));
+ assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0"));
assertU(commit());
try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
@@ -179,12 +179,12 @@ public void testDifferentTopN() throws IOException {
assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "final-score-float");
+ "finalScoreFloat");
final List norms =
new ArrayList(
Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "final-score-float");
+ 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat");
final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
@@ -229,12 +229,12 @@ public void testDifferentTopN() throws IOException {
public void testDocParam() throws Exception {
try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
List features = makeFieldValueFeatures(new int[] {0},
- "final-score");
+ "finalScore");
List norms =
new ArrayList(
Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
List allFeatures = makeFieldValueFeatures(new int[] {0},
- "final-score");
+ "finalScore");
MockModel ltrScoringModel = new MockModel("test",
features, norms, "test", allFeatures, null);
LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
@@ -246,12 +246,12 @@ public void testDocParam() throws Exception {
assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
}
- features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score");
+ features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore");
norms =
new ArrayList(
Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
- 9}, "final-score");
+ 9}, "finalScore");
ltrScoringModel = new MockModel("test", features, norms,
"test", allFeatures, null);
query = new LTRScoringQuery(ltrScoringModel);
From ec4cbfb4c6e57855d4a5a188bf071ec8b5d37a6c Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Thu, 13 May 2021 21:26:18 +0200
Subject: [PATCH 07/27] [SOLR-12697] determine docValuesType before creating
DocValuesFieldValueFeatureScorer so only the supported types have to be
handled later; extract number-conversion to separate method
---
.../solr/ltr/feature/FieldValueFeature.java | 118 +++++++++---------
.../solr/collection1/conf/schema.xml | 2 +-
.../apache/solr/ltr/TestLTROnSolrCloud.java | 2 +-
3 files changed, 63 insertions(+), 59 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index f4df25885a6..6e8b414a855 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -95,7 +95,7 @@ public class FieldValueFeatureWeight extends FeatureWeight {
private final SchemaField schemaField;
public FieldValueFeatureWeight(IndexSearcher searcher,
- SolrQueryRequest request, Query originalQuery, Map efi) {
+ SolrQueryRequest request, Query originalQuery, Map efi) {
super(FieldValueFeature.this, searcher, request, originalQuery, efi);
if (searcher instanceof SolrIndexSearcher) {
schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
@@ -106,6 +106,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
/**
* Return a FeatureScorer that uses docValues or storedFields if no docValues are present
+ *
* @param context the segment this FeatureScorer is working with
* @return FeatureScorer for the current segment and field
* @throws IOException as defined by abstract class Feature
@@ -113,11 +114,21 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
- return new DocValuesFieldValueFeatureScorer(this, context,
- DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType());
+
+ FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
+ DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
+
+ if (DocValuesType.NUMERIC.equals(docValuesType) || DocValuesType.SORTED.equals(docValuesType)) {
+ return new DocValuesFieldValueFeatureScorer(this, context,
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType(), docValuesType);
+ // If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway
+ } else if (!DocValuesType.NONE.equals(docValuesType)) {
+ throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
+ + " is not supported!");
+ }
}
return new FieldValueFeatureScorer(this, context,
- DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
/**
@@ -135,8 +146,7 @@ public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context,
public float score() throws IOException {
try {
- final Document document = context.reader().document(itr.docID(),
- fieldAsSet);
+ final Document document = context.reader().document(itr.docID(), fieldAsSet);
final IndexableField indexableField = document.getField(field);
if (indexableField == null) {
return getDefaultValue();
@@ -158,10 +168,7 @@ public float score() throws IOException {
}
}
} catch (final IOException e) {
- throw new FeatureException(
- e.toString() + ": " +
- "Unable to extract feature for "
- + name, e);
+ throw new FeatureException(e.toString() + ": " + "Unable to extract feature for " + name, e);
}
return getDefaultValue();
}
@@ -177,76 +184,73 @@ public float getMaxScore(int upTo) throws IOException {
*/
public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
final LeafReaderContext context;
- final DocIdSetIterator docValues;
final FieldType fieldType;
+ final DocValuesType docValuesType;
+ DocIdSetIterator docValues;
NumberType fieldNumberType;
- DocValuesType docValuesType = DocValuesType.NONE;
public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
- final DocIdSetIterator itr, final FieldType fieldType) {
+ final DocIdSetIterator itr, final FieldType fieldType,
+ final DocValuesType docValuesType) {
super(weight, itr);
this.context = context;
this.fieldType = fieldType;
+ this.docValuesType = docValuesType;
try {
- FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
- // if fieldInfo is null, just use NONE-Type. This causes no problems, because we won't call score() anyway
- docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
- switch (docValuesType) {
- case NUMERIC:
- docValues = DocValues.getNumeric(context.reader(), field);
- fieldNumberType = fieldType.getNumberType();
- break;
- case SORTED:
- docValues = DocValues.getSorted(context.reader(), field);
- break;
- case BINARY:
- case SORTED_NUMERIC:
- case SORTED_SET:
- case NONE:
- default:
- docValues = null;
+ if (DocValuesType.NUMERIC.equals(docValuesType)) {
+ docValues = DocValues.getNumeric(context.reader(), field);
+ fieldNumberType = fieldType.getNumberType();
+ } else if (DocValuesType.SORTED.equals(docValuesType)) {
+ docValues = DocValues.getSorted(context.reader(), field);
}
} catch (IOException e) {
throw new IllegalArgumentException("Could not read docValues for field " + field + " with docValuesType "
- + docValuesType.name());
+ + docValuesType.name());
}
}
@Override
public float score() throws IOException {
- if (docValues != null && docValues.advance(itr.docID()) < DocIdSetIterator.NO_MORE_DOCS) {
- switch (docValuesType) {
- case NUMERIC:
- if (NumberType.FLOAT.equals(fieldNumberType)) {
- // convert float value that was stored as long back to float
- return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue());
- } else if (NumberType.DOUBLE.equals(fieldNumberType)) {
- // handle double value conversion
- return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue());
- }
- // just take the long value
- return ((NumericDocValues) docValues).longValue();
- case SORTED:
- int ord = ((SortedDocValues) docValues).ordValue();
- // try to interpret bytesRef either as number string or as true / false token
- return handleBytesRef(((SortedDocValues) docValues).lookupOrd(ord));
- case BINARY:
- case SORTED_SET:
- case SORTED_NUMERIC:
- case NONE:
- default:
- throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
- + " is not supported!");
- }
+ if (DocValuesType.NUMERIC.equals(docValuesType) &&
+ ((NumericDocValues) docValues).advanceExact(itr.docID())) {
+ return readNumericDocValues();
+ } else if (DocValuesType.SORTED.equals(docValuesType) &&
+ ((SortedDocValues) docValues).advanceExact(itr.docID())) {
+ int ord = ((SortedDocValues) docValues).ordValue();
+ return readSortedDocValues(((SortedDocValues) docValues).lookupOrd(ord));
}
return FieldValueFeature.this.getDefaultValue();
}
- private float handleBytesRef(BytesRef bytesRef) {
+ /**
+ * Read the numeric value for a field and convert the different number types to float.
+ *
+ * @return The numeric value that the docValues contain for the current document
+ * @throws IOException if docValues cannot be read
+ */
+ private float readNumericDocValues() throws IOException {
+ if (NumberType.FLOAT.equals(fieldNumberType)) {
+ // convert float value that was stored as long back to float
+ return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue());
+ } else if (NumberType.DOUBLE.equals(fieldNumberType)) {
+ // handle double value conversion
+ return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue());
+ }
+ // just take the long value
+ return ((NumericDocValues) docValues).longValue();
+ }
+
+ /**
+ * Interprets the bytesRef either as true / false token or tries to read it as number string
+ *
+ * @param bytesRef the value of the field that should be used as score
+ * @return the input converted to a number
+ */
+ private float readSortedDocValues(BytesRef bytesRef) {
String string = bytesRef.utf8ToString();
if (string.length() == 1
- && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) {
+ && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) {
// boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 88e0b73f0f9..8ec89e39285 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -30,7 +30,7 @@
-
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index 28d6c5c8774..a9f6d36f235 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -272,7 +272,7 @@ private void indexDocuments(final String collection) throws Exception {
final int collectionSize = 8;
// put documents in random order to check that advanceExact is working correctly
List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList());
- Collections.shuffle(docIds);
+ Collections.shuffle(docIds, random());
int docCounter = 1;
for (int docId : docIds) {
From e6f20f1ce79ca547518c8759b09cbcf710587402 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Fri, 14 May 2021 13:50:23 +0100
Subject: [PATCH 08/27] split dual-purpose DocValuesFieldValueFeatureScorer
into two * NumericDocValuesFieldValueFeatureScorer *
SortedDocValuesFieldValueFeatureScorer
---
.../solr/ltr/feature/FieldValueFeature.java | 116 ++++++++++++------
1 file changed, 80 insertions(+), 36 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 6e8b414a855..9b9c6561489 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -36,7 +36,6 @@
import org.apache.lucene.util.BytesRef;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.BoolField;
-import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
@@ -118,9 +117,12 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException {
FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
- if (DocValuesType.NUMERIC.equals(docValuesType) || DocValuesType.SORTED.equals(docValuesType)) {
- return new DocValuesFieldValueFeatureScorer(this, context,
- DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType(), docValuesType);
+ if (DocValuesType.NUMERIC.equals(docValuesType)) {
+ return new NumericDocValuesFieldValueFeatureScorer(this, context,
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType().getNumberType());
+ } else if (DocValuesType.SORTED.equals(docValuesType)) {
+ return new SortedDocValuesFieldValueFeatureScorer(this, context,
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
// If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway
} else if (!DocValuesType.NONE.equals(docValuesType)) {
throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
@@ -180,45 +182,28 @@ public float getMaxScore(int upTo) throws IOException {
}
/**
- * A FeatureScorer that reads the docValues for a field
+ * A FeatureScorer that reads the numeric docValues for a field
*/
- public class DocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
- final LeafReaderContext context;
- final FieldType fieldType;
- final DocValuesType docValuesType;
- DocIdSetIterator docValues;
- NumberType fieldNumberType;
+ public class NumericDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
+ NumericDocValues docValues;
+ NumberType numberType;
- public DocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
- final DocIdSetIterator itr, final FieldType fieldType,
- final DocValuesType docValuesType) {
+ public NumericDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
+ final DocIdSetIterator itr, final NumberType numberType) {
super(weight, itr);
- this.context = context;
- this.fieldType = fieldType;
- this.docValuesType = docValuesType;
+ this.numberType = numberType;
try {
- if (DocValuesType.NUMERIC.equals(docValuesType)) {
- docValues = DocValues.getNumeric(context.reader(), field);
- fieldNumberType = fieldType.getNumberType();
- } else if (DocValuesType.SORTED.equals(docValuesType)) {
- docValues = DocValues.getSorted(context.reader(), field);
- }
+ docValues = DocValues.getNumeric(context.reader(), field);
} catch (IOException e) {
- throw new IllegalArgumentException("Could not read docValues for field " + field + " with docValuesType "
- + docValuesType.name());
+ throw new IllegalArgumentException("Could not read numeric docValues for field " + field);
}
}
@Override
public float score() throws IOException {
- if (DocValuesType.NUMERIC.equals(docValuesType) &&
- ((NumericDocValues) docValues).advanceExact(itr.docID())) {
+ if (docValues.advanceExact(itr.docID())) {
return readNumericDocValues();
- } else if (DocValuesType.SORTED.equals(docValuesType) &&
- ((SortedDocValues) docValues).advanceExact(itr.docID())) {
- int ord = ((SortedDocValues) docValues).ordValue();
- return readSortedDocValues(((SortedDocValues) docValues).lookupOrd(ord));
}
return FieldValueFeature.this.getDefaultValue();
}
@@ -230,15 +215,73 @@ public float score() throws IOException {
* @throws IOException if docValues cannot be read
*/
private float readNumericDocValues() throws IOException {
- if (NumberType.FLOAT.equals(fieldNumberType)) {
+ if (NumberType.FLOAT.equals(numberType)) {
// convert float value that was stored as long back to float
- return Float.intBitsToFloat((int) ((NumericDocValues) docValues).longValue());
- } else if (NumberType.DOUBLE.equals(fieldNumberType)) {
+ return Float.intBitsToFloat((int) docValues.longValue());
+ } else if (NumberType.DOUBLE.equals(numberType)) {
// handle double value conversion
- return (float) Double.longBitsToDouble(((NumericDocValues) docValues).longValue());
+ return (float) Double.longBitsToDouble(docValues.longValue());
}
// just take the long value
- return ((NumericDocValues) docValues).longValue();
+ return docValues.longValue();
+ }
+
+ /**
+ * Interprets the bytesRef either as true / false token or tries to read it as number string
+ *
+ * @param bytesRef the value of the field that should be used as score
+ * @return the input converted to a number
+ */
+ private float readSortedDocValues(BytesRef bytesRef) {
+ String string = bytesRef.utf8ToString();
+ if (string.length() == 1
+ && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) {
+ // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN
+ // (see BoolField)
+ if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
+ return 1f;
+ } else {
+ return 0f;
+ }
+ } else {
+ try {
+ return Float.parseFloat(string);
+ } catch (NumberFormatException ex) {
+ throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float.");
+ }
+ }
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return Float.POSITIVE_INFINITY;
+ }
+ }
+ /**
+ * A FeatureScorer that reads the sorted docValues for a field
+ */
+ public class SortedDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
+ SortedDocValues docValues;
+ NumberType fieldNumberType;
+
+ public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
+ final DocIdSetIterator itr) {
+ super(weight, itr);
+
+ try {
+ docValues = DocValues.getSorted(context.reader(), field);
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Could not read sorted docValues for field " + field);
+ }
+ }
+
+ @Override
+ public float score() throws IOException {
+ if (docValues.advanceExact(itr.docID())) {
+ int ord = docValues.ordValue();
+ return readSortedDocValues(docValues.lookupOrd(ord));
+ }
+ return FieldValueFeature.this.getDefaultValue();
}
/**
@@ -272,5 +315,6 @@ public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
+
}
}
From f16ce3d1e5330784d9804756d0dc10999a0e25de Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Fri, 14 May 2021 19:40:53 +0100
Subject: [PATCH 09/27] add TestFieldValueFeature test coverage (with caveat)
caveat: TestFieldValueFeature.testIfADocumentDoesntHaveAFieldDefaultValueIsReturned fails
---
.../solr/collection1/conf/schema.xml | 9 ++
.../ltr/feature/TestFieldValueFeature.java | 150 ++++++++++--------
2 files changed, 91 insertions(+), 68 deletions(-)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 8ec89e39285..bf50149fcdd 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -26,6 +26,10 @@
+
+
+
+
@@ -47,6 +51,11 @@
+
+
+
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index 108044b5cbd..9791fb7fcd6 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -30,6 +30,12 @@ public class TestFieldValueFeature extends TestRerankBase {
private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f;
+ private static final String FIELD_NAMES[] = {
+ "popularity",
+ "dvIntPopularity", "dvLongPopularity",
+ "dvFloatPopularity", "dvDoublePopularity"
+ };
+
@Before
public void before() throws Exception {
setuptest(false);
@@ -56,11 +62,13 @@ public void before() throws Exception {
assertU(commit());
- loadFeature("popularity", FieldValueFeature.class.getName(),
- "{\"field\":\"popularity\"}");
+ for (String field : FIELD_NAMES) {
+ loadFeature(field, FieldValueFeature.class.getName(),
+ "{\"field\":\""+field+"\"}");
- loadModel("popularity-model", LinearModel.class.getName(),
- new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}");
+ loadModel(field + "-model", LinearModel.class.getName(),
+ new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}");
+ }
}
@After
@@ -70,86 +78,92 @@ public void after() throws Exception {
@Test
public void testRanking() throws Exception {
-
- final SolrQuery query = new SolrQuery();
- query.setQuery("title:w1");
- query.add("fl", "*, score");
- query.add("rows", "4");
-
- // Normal term match
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
-
- query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
-
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
-
- query.setQuery("*:*");
- query.remove("rows");
- query.add("rows", "8");
- query.remove("rq");
- query.add("rq", "{!ltr model=popularity-model reRankDocs=8}");
-
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+ for (String field : FIELD_NAMES) {
+
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("title:w1");
+ query.add("fl", "*, score");
+ query.add("rows", "4");
+
+ // Normal term match
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
+
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
+
+ query.setQuery("*:*");
+ query.remove("rows");
+ query.add("rows", "8");
+ query.remove("rq");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}");
+
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+ }
}
@Test
public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception {
- SolrQuery query = new SolrQuery();
- query.setQuery("id:42");
- query.add("fl", "*, score");
- query.add("rows", "4");
-
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
- query = new SolrQuery();
- query.setQuery("id:42");
- query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
- query.add("fl", "[fv]");
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
-
+ for (String field : FIELD_NAMES) {
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("fl", "*, score");
+ query.add("rows", "4");
+
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
+ query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+
+ }
}
@Test
public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception {
+ for (String field : FIELD_NAMES) {
- final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned";
+ final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field;
- loadFeature("popularity42", FieldValueFeature.class.getName(), fstore,
- "{\"field\":\"popularity\",\"defaultValue\":\"42.0\"}");
+ loadFeature(field+"42", FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\",\"defaultValue\":\"42.0\"}");
- SolrQuery query = new SolrQuery();
- query.setQuery("id:42");
- query.add("fl", "*, score");
- query.add("rows", "4");
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("fl", "*, score");
+ query.add("rows", "4");
- loadModel("popularity-model42", LinearModel.class.getName(),
- new String[] {"popularity42"}, fstore, "{\"weights\":{\"popularity42\":1.0}}");
+ loadModel(field+"-model42", LinearModel.class.getName(),
+ new String[] {field+"42"}, fstore, "{\"weights\":{\""+field+"42\":1.0}}");
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
- query = new SolrQuery();
- query.setQuery("id:42");
- query.add("rq", "{!ltr model=popularity-model42 reRankDocs=4}");
- query.add("fl", "[fv]");
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity42","42.0")+"'}");
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
+ query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("rq", "{!ltr model="+field+"-model42 reRankDocs=4}");
+ query.add("fl", "[fv]");
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field+"42","42.0")+"'}");
+ }
}
@Test
From e5954eb204f06eaec80523a775cbd0b5e1a679ca Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Sun, 16 May 2021 11:05:26 +0200
Subject: [PATCH 10/27] [SOLR-12697] remove method to read sorted values from
Scorer for numeric docValues
---
.../solr/ltr/feature/FieldValueFeature.java | 28 +------------------
1 file changed, 1 insertion(+), 27 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 9b9c6561489..448ab21409a 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -226,43 +226,17 @@ private float readNumericDocValues() throws IOException {
return docValues.longValue();
}
- /**
- * Interprets the bytesRef either as true / false token or tries to read it as number string
- *
- * @param bytesRef the value of the field that should be used as score
- * @return the input converted to a number
- */
- private float readSortedDocValues(BytesRef bytesRef) {
- String string = bytesRef.utf8ToString();
- if (string.length() == 1
- && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) {
- // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN
- // (see BoolField)
- if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
- return 1f;
- } else {
- return 0f;
- }
- } else {
- try {
- return Float.parseFloat(string);
- } catch (NumberFormatException ex) {
- throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float.");
- }
- }
- }
-
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
+
/**
* A FeatureScorer that reads the sorted docValues for a field
*/
public class SortedDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
SortedDocValues docValues;
- NumberType fieldNumberType;
public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
final DocIdSetIterator itr) {
From da6a63568f055e41218cf83335bb47cef2724380 Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Mon, 17 May 2021 22:36:50 +0200
Subject: [PATCH 11/27] [SOLR-12697] add fallback feature scorer that always
returns the default value; only use one model in tests because of
extractAllFeatures==true
---
.../solr/ltr/feature/FieldValueFeature.java | 31 ++-
.../solr/collection1/conf/schema.xml | 1 +
.../ltr/feature/TestFieldValueFeature.java | 181 ++++++++++++------
3 files changed, 146 insertions(+), 67 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 448ab21409a..c49ddecf141 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -123,11 +123,12 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException {
} else if (DocValuesType.SORTED.equals(docValuesType)) {
return new SortedDocValuesFieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
- // If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway
- } else if (!DocValuesType.NONE.equals(docValuesType)) {
- throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
- + " is not supported!");
+ } else if (DocValuesType.NONE.equals(docValuesType)) {
+ // Using a fallback feature scorer because this segment has no documents with a doc value for the current field
+ return new DefaultValueFieldValueFeatureScorer(this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
+ throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
+ + " is not supported!");
}
return new FieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
@@ -290,5 +291,27 @@ public float getMaxScore(int upTo) throws IOException {
}
}
+ /**
+ * A FeatureScorer that always returns the default value.
+ *
+ * It is used as a fallback for cases when a segment does not have any documents that contain doc values for a field.
+ * By doing so, we prevent a fallback to the FieldValueFeatureScorer, which would also return the default value but
+ * in a less performant way because it would first try to read the stored fields for the doc (which aren't present).
+ */
+ public class DefaultValueFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
+ public DefaultValueFieldValueFeatureScorer(final FeatureWeight weight, final DocIdSetIterator itr) {
+ super(weight, itr);
+ }
+
+ @Override
+ public float score() throws IOException {
+ return FieldValueFeature.this.getDefaultValue();
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return Float.POSITIVE_INFINITY;
+ }
+ }
}
}
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index bf50149fcdd..c0170398914 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -38,6 +38,7 @@
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index 9791fb7fcd6..2796caad2a8 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -16,21 +16,30 @@
*/
package org.apache.solr.ltr.feature;
-import java.util.LinkedHashMap;
-
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.ltr.FeatureLoggerTestUtils;
import org.apache.solr.ltr.TestRerankBase;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer;
import org.apache.solr.ltr.model.LinearModel;
+import org.apache.solr.request.SolrQueryRequest;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
public class TestFieldValueFeature extends TestRerankBase {
private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f;
- private static final String FIELD_NAMES[] = {
+ private static final String[] FIELD_NAMES = {
"popularity",
"dvIntPopularity", "dvLongPopularity",
"dvFloatPopularity", "dvDoublePopularity"
@@ -65,10 +74,10 @@ public void before() throws Exception {
for (String field : FIELD_NAMES) {
loadFeature(field, FieldValueFeature.class.getName(),
"{\"field\":\""+field+"\"}");
-
- loadModel(field + "-model", LinearModel.class.getName(),
- new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}");
}
+ loadModel("model", LinearModel.class.getName(), FIELD_NAMES,
+ "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," +
+ "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0}}");
}
@After
@@ -78,68 +87,63 @@ public void after() throws Exception {
@Test
public void testRanking() throws Exception {
- for (String field : FIELD_NAMES) {
-
- final SolrQuery query = new SolrQuery();
- query.setQuery("title:w1");
- query.add("fl", "*, score");
- query.add("rows", "4");
-
- // Normal term match
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
-
- query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
-
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
-
- query.setQuery("*:*");
- query.remove("rows");
- query.add("rows", "8");
- query.remove("rq");
- query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}");
-
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
- }
+ SolrQuery query = new SolrQuery();
+ query.setQuery("title:w1");
+ query.add("fl", "*, score");
+ query.add("rows", "4");
+
+ // Normal term match
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
+
+ query.add("rq", "{!ltr model=model reRankDocs=4}");
+
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
+
+ query.setQuery("*:*");
+ query.remove("rows");
+ query.add("rows", "8");
+ query.remove("rq");
+ query.add("rq", "{!ltr model=model reRankDocs=8}");
+
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
}
@Test
public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception {
- for (String field : FIELD_NAMES) {
- SolrQuery query = new SolrQuery();
- query.setQuery("id:42");
- query.add("fl", "*, score");
- query.add("rows", "4");
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("fl", "*, score");
+ query.add("rows", "4");
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
- query = new SolrQuery();
- query.setQuery("id:42");
- query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
- query.add("fl", "[fv]");
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
- }
- }
+ query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("rq", "{!ltr model=model reRankDocs=4}");
+ query.add("fl", "[fv]");
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," +
+ "dvFloatPopularity=0.0,dvDoublePopularity=0.0'}");
+ }
@Test
public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception {
for (String field : FIELD_NAMES) {
-
final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field;
loadFeature(field+"42", FieldValueFeature.class.getName(), fstore,
@@ -162,15 +166,15 @@ public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws E
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field+"42","42.0")+"'}");
-
}
}
@Test
- public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exception {
+ public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws Exception {
+ // this tests the case that we create a feature for a non-existent field
// using a different fstore to avoid a clash with the other tests
- final String fstore = "testThatIfaFieldDoesNotExistDefaultValueIsReturned";
- loadFeature("not-existing-field", FieldValueFeature.class.getName(), fstore,
+ final String fstore = "testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned";
+ loadFeature("not-existing-field", ObservingFieldValueFeature.class.getName(), fstore,
"{\"field\":\"cowabunga\"}");
loadModel("not-existing-field-model", LinearModel.class.getName(),
@@ -182,8 +186,30 @@ public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exceptio
query.add("fl", "[fv]");
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
+ .toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ assertEquals(FieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass);
+ }
+ @Test
+ public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws Exception {
+ // this tests the case that no document contains docValues for the provided existing field
+ final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned";
+ loadFeature("dvTestField", ObservingFieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"dvTestField\"}");
+
+ loadModel("dvTestField-model", LinearModel.class.getName(),
+ new String[] {"dvTestField"}, fstore, "{\"weights\":{\"dvTestField\":1.0}}");
+
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
+ .toFeatureVector("dvTestField",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass);
}
@Test
@@ -202,7 +228,6 @@ public void testBooleanValue() throws Exception {
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}");
-
query = new SolrQuery();
query.setQuery("id:5");
query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
@@ -217,7 +242,6 @@ public void testBooleanValue() throws Exception {
query.add("fl", "[fv]");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}");
-
}
@Test
@@ -227,4 +251,35 @@ public void testParamsToMap() throws Exception {
doTestParamsToMap(FieldValueFeature.class.getName(), params);
}
+ /**
+ * This class is used to track which specific FieldValueFeature is used so that we can test, whether the
+ * fallback mechanism works correctly.
+ */
+ public static class ObservingFieldValueFeature extends FieldValueFeature {
+ static String usedScorerClass;
+
+ public ObservingFieldValueFeature(String name, Map params) {
+ super(name, params);
+ }
+
+ @Override
+ public Feature.FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request,
+ Query originalQuery, Map efi) throws IOException {
+ return new ObservingFieldValueFeatureWeight(searcher, request, originalQuery, efi);
+ }
+
+ public class ObservingFieldValueFeatureWeight extends FieldValueFeatureWeight {
+ public ObservingFieldValueFeatureWeight(IndexSearcher searcher, SolrQueryRequest request,
+ Query originalQuery, Map efi) {
+ super(searcher, request, originalQuery, efi);
+ }
+
+ @Override
+ public FeatureScorer scorer(LeafReaderContext context) throws IOException {
+ FeatureScorer scorer = super.scorer(context);
+ usedScorerClass = scorer.getClass().getName();
+ return scorer;
+ }
+ }
+ }
}
From b1056278e9b2ce99a2c368524e727cc0b82dbfb0 Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Wed, 19 May 2021 09:02:10 +0200
Subject: [PATCH 12/27] [SOLR-12697] test that exception is thrown for
unsupported dv type, test that right scorer classes are used, add more fields
to test
---
.../solr/collection1/conf/schema.xml | 4 +
.../ltr/feature/TestFieldValueFeature.java | 158 ++++++++++++++----
2 files changed, 131 insertions(+), 31 deletions(-)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index c0170398914..0081e1b61dd 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -30,6 +30,8 @@
+
+
@@ -39,6 +41,8 @@
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index 2796caad2a8..82c8bf5bacb 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -25,6 +25,8 @@
import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight;
import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer;
import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.NumericDocValuesFieldValueFeatureScorer;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.SortedDocValuesFieldValueFeatureScorer;
import org.apache.solr.ltr.model.LinearModel;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.After;
@@ -39,45 +41,55 @@ public class TestFieldValueFeature extends TestRerankBase {
private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f;
- private static final String[] FIELD_NAMES = {
- "popularity",
- "dvIntPopularity", "dvLongPopularity",
- "dvFloatPopularity", "dvDoublePopularity"
+ private static final String[] FIELDS = {
+ "popularity",
+ "dvIntPopularity", "dvLongPopularity",
+ "dvFloatPopularity", "dvDoublePopularity",
+ "dvStringPopularity", "dvBoolPopularity"
};
@Before
public void before() throws Exception {
setuptest(false);
- assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
- "1","isTrendy","true"));
- assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description",
- "w2 2asd asdd didid", "popularity", "2"));
- assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
- "3","isTrendy","true"));
- assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
- "4","isTrendy","false"));
- assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
- "5","isTrendy","true"));
- assertU(adoc("id", "6", "title", "w1 w2", "description", "w1 w2",
- "popularity", "6","isTrendy","false"));
- assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description",
- "w1 w2 w3 w4 w5 w8", "popularity", "7","isTrendy","true"));
- assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description",
- "w1 w1 w1 w2 w2", "popularity", "8","isTrendy","false"));
-
- // a document without the popularity field
+ assertU(adoc("id", "1", "popularity", "1", "title", "w1",
+ "dvStringPopularity", "1", "dvBoolPopularity", "F",
+ "description", "w1", "isTrendy", "true"));
+ assertU(adoc("id", "2", "popularity", "2", "title", "w2 2asd asdd didid",
+ "dvStringPopularity", "2", "dvBoolPopularity", "T",
+ "description", "w2 2asd asdd didid"));
+ assertU(adoc("id", "3", "popularity", "3", "title", "w3",
+ "dvStringPopularity", "3", "dvBoolPopularity", "F",
+ "description", "w3", "isTrendy", "true"));
+ assertU(adoc("id", "4", "popularity", "4", "title", "w4",
+ "dvStringPopularity", "4", "dvBoolPopularity", "T",
+ "description", "w4", "isTrendy", "false"));
+ assertU(adoc("id", "5", "popularity", "5", "title", "w5",
+ "dvStringPopularity", "5", "dvBoolPopularity", "F",
+ "description", "w5", "isTrendy", "true"));
+ assertU(adoc("id", "6", "popularity", "6", "title", "w1 w2",
+ "dvStringPopularity", "6", "dvBoolPopularity", "T",
+ "description", "w1 w2", "isTrendy", "false"));
+ assertU(adoc("id", "7", "popularity", "7", "title", "w1 w2 w3 w4 w5",
+ "dvStringPopularity", "7", "dvBoolPopularity", "F",
+ "description", "w1 w2 w3 w4 w5 w8", "isTrendy", "true"));
+ assertU(adoc("id", "8", "popularity", "8", "title", "w1 w1 w1 w2 w2 w8",
+ "dvStringPopularity", "8", "dvBoolPopularity", "T",
+ "description", "w1 w1 w1 w2 w2", "isTrendy", "false"));
+
+ // a document without the popularity and the dv fields
assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity"));
assertU(commit());
- for (String field : FIELD_NAMES) {
+ for (String field : FIELDS) {
loadFeature(field, FieldValueFeature.class.getName(),
- "{\"field\":\""+field+"\"}");
+ "{\"field\":\"" + field + "\"}");
}
- loadModel("model", LinearModel.class.getName(), FIELD_NAMES,
- "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," +
- "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0}}");
+ loadModel("model", LinearModel.class.getName(), FIELDS,
+ "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," +
+ "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," +
+ "\"dvStringPopularity\":1.0,\"dvBoolPopularity\":1.0}}");
}
@After
@@ -119,7 +131,6 @@ public void testRanking() throws Exception {
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
}
-
@Test
public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception {
SolrQuery query = new SolrQuery();
@@ -137,13 +148,14 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," +
- "dvFloatPopularity=0.0,dvDoublePopularity=0.0'}");
+ "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," +
+ "dvFloatPopularity=0.0,dvDoublePopularity=0.0," +
+ "dvStringPopularity=0.0,dvBoolPopularity=0.0'}");
}
@Test
public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception {
- for (String field : FIELD_NAMES) {
+ for (String field : FIELDS) {
final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field;
loadFeature(field+"42", FieldValueFeature.class.getName(), fstore,
@@ -169,6 +181,35 @@ public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws E
}
}
+ @Test
+ public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned() throws Exception {
+ final String[] fieldsWithDefaultValues = {"dvIntField", "dvLongField", "dvFloatField"};
+
+ double fieldCounter = -1.0;
+ for (String field : fieldsWithDefaultValues) {
+ final String fstore = "testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned"+field;
+
+ assertU(adoc("id", "21"));
+ assertU(commit());
+
+ loadFeature(field, FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}");
+
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:21");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, String.valueOf(fieldCounter))+"'}");
+
+ fieldCounter--;
+ }
+ }
+
@Test
public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws Exception {
// this tests the case that we create a feature for a non-existent field
@@ -205,6 +246,7 @@ public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws E
query.setQuery("id:42");
query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}");
query.add("fl", "[fv]");
+
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
@@ -244,6 +286,60 @@ public void testBooleanValue() throws Exception {
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}");
}
+ @Test
+ public void testThatExceptionIsThrownForUnsupportedType() throws Exception {
+ final String fstore = "test_store";
+
+ assertU(adoc("id", "21", "title", "multivalued not supported", "dvUnsupportedField", "wow value"));
+ assertU(commit());
+
+ loadFeature("dvUnsupportedField", FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"dvUnsupportedField\"}");
+
+ loadModel("dvUnsupportedField-model", LinearModel.class.getName(),
+ new String[] {"dvUnsupportedField"}, fstore, "{\"weights\":{\"dvUnsupportedField\":1.0}}");
+
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:21");
+ query.add("rq", "{!ltr model=dvUnsupportedField-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(),
+ "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvUnsupportedField is not supported!'");
+ }
+
+ @Test
+ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception {
+ final String[][] fieldsWithDifferentTypes = {
+ new String[]{"dvIntPopularity", NumericDocValuesFieldValueFeatureScorer.class.getName()},
+ new String[]{"dvStringPopularity", SortedDocValuesFieldValueFeatureScorer.class.getName()},
+ new String[]{"noDocValuesField", FieldValueFeatureScorer.class.getName()}
+ };
+
+ for (String[] fieldAndScorerClass : fieldsWithDifferentTypes) {
+ String field = fieldAndScorerClass[0];
+ final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes"+field;
+
+ assertU(adoc("id", "21", field, "1"));
+ assertU(commit());
+
+ loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}");
+
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:21");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}");
+ assertEquals(fieldAndScorerClass[1], ObservingFieldValueFeature.usedScorerClass);
+ }
+ }
+
@Test
public void testParamsToMap() throws Exception {
final LinkedHashMap params = new LinkedHashMap();
From e07c4328a419b392cdc60daa9396419c470acf3b Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Wed, 19 May 2021 20:57:20 +0200
Subject: [PATCH 13/27] [SOLR-12697] add tests for parsing different
sortedDocValues, add entry to CHANGES.txt
---
solr/CHANGES.txt | 2 ++
.../ltr/feature/TestFieldValueFeature.java | 34 +++++++++++++++++++
2 files changed, 36 insertions(+)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0054194c483..9384f314b78 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -322,6 +322,8 @@ New Features
* SOLR-15397: Expose zookeeper status in the Prometheus exporter (janhoy)
+* SOLR-12697: Add pure DocValues support to FieldValueFeature (Tom Gilke, Christine Poerschke)
+
Improvements
---------------------
* SOLR-15081: Metrics for a core: add SolrCloud "isLeader" and "replicaState". (David Smiley)
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index 82c8bf5bacb..c485dc9fe46 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -347,6 +347,40 @@ public void testParamsToMap() throws Exception {
doTestParamsToMap(FieldValueFeature.class.getName(), params);
}
+ @Test
+ public void testThatStringValuesAreCorrectlyParsed() throws Exception {
+ final String[][] inputsAndTests = {
+ new String[]{"T", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "1.0")+"'}"},
+ new String[]{"F", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "0.0")+"'}"},
+ new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "-7324.427")+"'}"},
+ new String[]{"532", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "532.0")+"'}"},
+ new String[]{"notanumber", "/error/msg/=='org.apache.solr.ltr.feature.FeatureException: " +
+ "Cannot parse value notanumber of field dvStrNumField to float.'"}
+ };
+
+ final String fstore = "testThatStringValuesAreCorrectlyParsed";
+ loadFeature("dvStrNumField", FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"" + "dvStrNumField" + "\"}");
+ loadModel("dvStrNumField-model", LinearModel.class.getName(),
+ new String[]{"dvStrNumField"}, fstore, "{\"weights\":{\"" + "dvStrNumField" + "\":1.0}}");
+
+ for (String[] inputAndTest : inputsAndTests) {
+ assertU(adoc("id", "21", "dvStrNumField", inputAndTest[0]));
+ assertU(commit());
+
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:21");
+ query.add("rq", "{!ltr model=" + "dvStrNumField" + "-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(), inputAndTest[1]);
+ }
+ }
+
/**
* This class is used to track which specific FieldValueFeature is used so that we can test, whether the
* fallback mechanism works correctly.
From 443a3962858684e9d69511b7c97ccdba5001d81c Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 09:45:56 +0100
Subject: [PATCH 14/27] solr/CHANGES.txt edit
---
solr/CHANGES.txt | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9384f314b78..36ba9459e05 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -322,7 +322,8 @@ New Features
* SOLR-15397: Expose zookeeper status in the Prometheus exporter (janhoy)
-* SOLR-12697: Add pure DocValues support to FieldValueFeature (Tom Gilke, Christine Poerschke)
+* SOLR-12697: In contrib/ltr FieldValueFeature support "stored=false docValues=true" a.k.a. pure DocValues fields.
+ (Stanislav Livotov, Erick Erickson, Tobias Kässmann, Tom Gilke, Christine Poerschke)
Improvements
---------------------
From 2dbd94e74f7ff1ed0dd23b2d2bff53cfa2cfd569 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 09:47:13 +0100
Subject: [PATCH 15/27] in TestFieldValueFeature reduce potential test
interaction
---
.../org/apache/solr/ltr/feature/TestFieldValueFeature.java | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index c485dc9fe46..69d239c8ea8 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -22,7 +22,6 @@
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.ltr.FeatureLoggerTestUtils;
import org.apache.solr.ltr.TestRerankBase;
-import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight;
import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer;
import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer;
import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.NumericDocValuesFieldValueFeatureScorer;
@@ -225,6 +224,7 @@ public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws E
query.setQuery("id:42");
query.add("rq", "{!ltr model=not-existing-field-model reRankDocs=4}");
query.add("fl", "[fv]");
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
@@ -247,6 +247,7 @@ public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws E
query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}");
query.add("fl", "[fv]");
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
@@ -333,6 +334,7 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce
query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
query.add("fl", "[fv]");
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}");
From 9b771546736a5dbc3091c058c4585a557cad5841 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 09:47:51 +0100
Subject: [PATCH 16/27] in FieldValueFeature clarify 'searcher instanceof
SolrIndexSearcher' use
---
.../src/java/org/apache/solr/ltr/feature/FieldValueFeature.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index c49ddecf141..165a073d1a2 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -98,7 +98,7 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
super(FieldValueFeature.this, searcher, request, originalQuery, efi);
if (searcher instanceof SolrIndexSearcher) {
schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
- } else {
+ } else { // some tests pass a null or a non-SolrIndexSearcher searcher
schemaField = null;
}
}
From c1f3a8ee35eb385a3660ef586647653e17f49923 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 18:14:33 +0100
Subject: [PATCH 17/27] TestFieldValueFeature: replace dvBoolPopularity with
dvIsTrendy (former is more numeric, latter is more boolean and copyField from
isTrend simplies the document( add)s
---
.../solr/collection1/conf/schema.xml | 8 ++++--
.../ltr/feature/TestFieldValueFeature.java | 26 ++++++++++---------
2 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 0081e1b61dd..005eacf0121 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -18,7 +18,7 @@
-
+
@@ -30,10 +30,12 @@
-
+
+
+
@@ -61,6 +63,8 @@
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index 69d239c8ea8..ad58986da31 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -44,7 +44,9 @@ public class TestFieldValueFeature extends TestRerankBase {
"popularity",
"dvIntPopularity", "dvLongPopularity",
"dvFloatPopularity", "dvDoublePopularity",
- "dvStringPopularity", "dvBoolPopularity"
+ "dvStringPopularity",
+ "isTrendy",
+ "dvIsTrendy"
};
@Before
@@ -52,32 +54,32 @@ public void before() throws Exception {
setuptest(false);
assertU(adoc("id", "1", "popularity", "1", "title", "w1",
- "dvStringPopularity", "1", "dvBoolPopularity", "F",
+ "dvStringPopularity", "1",
"description", "w1", "isTrendy", "true"));
assertU(adoc("id", "2", "popularity", "2", "title", "w2 2asd asdd didid",
- "dvStringPopularity", "2", "dvBoolPopularity", "T",
+ "dvStringPopularity", "2",
"description", "w2 2asd asdd didid"));
assertU(adoc("id", "3", "popularity", "3", "title", "w3",
- "dvStringPopularity", "3", "dvBoolPopularity", "F",
+ "dvStringPopularity", "3",
"description", "w3", "isTrendy", "true"));
assertU(adoc("id", "4", "popularity", "4", "title", "w4",
- "dvStringPopularity", "4", "dvBoolPopularity", "T",
+ "dvStringPopularity", "4",
"description", "w4", "isTrendy", "false"));
assertU(adoc("id", "5", "popularity", "5", "title", "w5",
- "dvStringPopularity", "5", "dvBoolPopularity", "F",
+ "dvStringPopularity", "5",
"description", "w5", "isTrendy", "true"));
assertU(adoc("id", "6", "popularity", "6", "title", "w1 w2",
- "dvStringPopularity", "6", "dvBoolPopularity", "T",
+ "dvStringPopularity", "6",
"description", "w1 w2", "isTrendy", "false"));
assertU(adoc("id", "7", "popularity", "7", "title", "w1 w2 w3 w4 w5",
- "dvStringPopularity", "7", "dvBoolPopularity", "F",
+ "dvStringPopularity", "7",
"description", "w1 w2 w3 w4 w5 w8", "isTrendy", "true"));
assertU(adoc("id", "8", "popularity", "8", "title", "w1 w1 w1 w2 w2 w8",
- "dvStringPopularity", "8", "dvBoolPopularity", "T",
+ "dvStringPopularity", "8",
"description", "w1 w1 w1 w2 w2", "isTrendy", "false"));
// a document without the popularity and the dv fields
- assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity"));
+ assertU(adoc("id", "42", "title", "NO popularity or isTrendy", "description", "NO popularity or isTrendy"));
assertU(commit());
@@ -88,7 +90,7 @@ public void before() throws Exception {
loadModel("model", LinearModel.class.getName(), FIELDS,
"{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," +
"\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," +
- "\"dvStringPopularity\":1.0,\"dvBoolPopularity\":1.0}}");
+ "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}");
}
@After
@@ -149,7 +151,7 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," +
"dvFloatPopularity=0.0,dvDoublePopularity=0.0," +
- "dvStringPopularity=0.0,dvBoolPopularity=0.0'}");
+ "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}");
}
@Test
From 3c38e911483b051f5037138cd6a9cc0cc96a9ae5 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 18:24:15 +0100
Subject: [PATCH 18/27] out-scope TestLTRReRankingPipeline changes
---
.../solr/collection1/conf/schema.xml | 4 +-
.../solr/ltr/TestLTRReRankingPipeline.java | 334 ++++++++++--------
2 files changed, 188 insertions(+), 150 deletions(-)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 005eacf0121..c033973d96d 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -19,9 +19,6 @@
-
-
-
@@ -42,6 +39,7 @@
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
index 1b1967d47ef..85019445546 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
@@ -25,8 +25,12 @@
import java.util.List;
import java.util.Map;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -37,8 +41,8 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.lucene.store.Directory;
+import org.apache.solr.SolrTestCase;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.ltr.feature.Feature;
import org.apache.solr.ltr.feature.FieldValueFeature;
@@ -46,24 +50,16 @@
import org.apache.solr.ltr.model.TestLinearModel;
import org.apache.solr.ltr.norm.IdentityNormalizer;
import org.apache.solr.ltr.norm.Normalizer;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.request.SolrQueryRequest;
-import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class TestLTRReRankingPipeline extends SolrTestCaseJ4 {
+public class TestLTRReRankingPipeline extends SolrTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath());
- @BeforeClass
- public static void setup() throws Exception {
- initCore("solrconfig-ltr.xml", "schema.xml");
- }
-
private IndexSearcher getSearcher(IndexReader r) {
// 'yes' to maybe wrapping in general
final boolean maybeWrap = true;
@@ -113,155 +109,199 @@ public Explanation explain(LeafReaderContext context, int doc,
}
@Test
- public void testRescorer() throws Exception {
- assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F"));
- assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T"));
- assertU(commit());
-
- try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
-
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = solrQueryRequest.getSearcher();
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(2, hits.totalHits.value);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "finalScore");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore");
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
-
- LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel);
- ltrScoringQuery.setRequest(solrQueryRequest);
- final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery);
- hits = rescorer.rescore(searcher, hits, 2);
-
- // rerank using the field finalScore
- assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- }
+ public void testRescorer() throws IOException {
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ Document doc = new Document();
+ doc.add(newStringField("id", "0", Field.Store.YES));
+ doc.add(newTextField("field", "wizard the the the the the oz",
+ Field.Store.NO));
+ doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field
+
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "1", Field.Store.YES));
+ // 1 extra token, but wizard and oz are close;
+ doc.add(newTextField("field", "wizard oz the the the the the the",
+ Field.Store.NO));
+ doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field
+ w.addDocument(doc);
+
+ final IndexReader r = w.getReader();
+ w.close();
+
+ // Do ordinary BooleanQuery:
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = getSearcher(r);
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(2, hits.totalHits.value);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "final-score");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
+ hits = rescorer.rescore(searcher, hits, 2);
+
+ // rerank using the field final-score
+ assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+
+ r.close();
+ dir.close();
+
}
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134")
@Test
public void testDifferentTopN() throws IOException {
- assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0"));
- assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0"));
- assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0"));
- assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0"));
- assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0"));
- assertU(commit());
-
- try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
- // Do ordinary BooleanQuery:
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = solrQueryRequest.getSearcher();
-
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(5, hits.totalHits.value);
-
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "finalScoreFloat");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat");
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
-
- LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel);
- scoringQuery.setRequest(solrQueryRequest);
- final LTRRescorer rescorer = new LTRRescorer(scoringQuery);
-
- // rerank @ 0 should not change the order
- hits = rescorer.rescore(searcher, hits, 0);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- // test rerank with different topN cuts
-
- for (int topN = 1; topN <= 5; topN++) {
- log.info("rerank {} documents ", topN);
- hits = searcher.search(bqBuilder.build(), 10);
-
- final ScoreDoc[] slice = new ScoreDoc[topN];
- System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
- hits = new TopDocs(hits.totalHits, slice);
- hits = rescorer.rescore(searcher, hits, topN);
- for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
- if (log.isInfoEnabled()) {
- log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
- .get("id"), j);
- }
-
- assertEquals(i,
- Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
- assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
-
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ Document doc = new Document();
+ doc.add(newStringField("id", "0", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 1.0f));
+ w.addDocument(doc);
+
+ doc = new Document();
+ doc.add(newStringField("id", "1", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 2.0f));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "2", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 3.0f));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "3", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz the the the the ",
+ Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 4.0f));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "4", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz the the the the the the",
+ Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 5.0f));
+ w.addDocument(doc);
+
+ final IndexReader r = w.getReader();
+ w.close();
+
+ // Do ordinary BooleanQuery:
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = getSearcher(r);
+
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(5, hits.totalHits.value);
+
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "final-score");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
+
+ // rerank @ 0 should not change the order
+ hits = rescorer.rescore(searcher, hits, 0);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ // test rerank with different topN cuts
+
+ for (int topN = 1; topN <= 5; topN++) {
+ log.info("rerank {} documents ", topN);
+ hits = searcher.search(bqBuilder.build(), 10);
+
+ final ScoreDoc[] slice = new ScoreDoc[topN];
+ System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
+ hits = new TopDocs(hits.totalHits, slice);
+ hits = rescorer.rescore(searcher, hits, topN);
+ for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
+ if (log.isInfoEnabled()) {
+ log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
+ .get("id"), j);
}
+
+ assertEquals(i,
+ Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
+ assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
+
}
}
+
+ r.close();
+ dir.close();
+
}
@Test
public void testDocParam() throws Exception {
- try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
- List features = makeFieldValueFeatures(new int[] {0},
- "finalScore");
- List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- List allFeatures = makeFieldValueFeatures(new int[] {0},
- "finalScore");
- MockModel ltrScoringModel = new MockModel("test",
- features, norms, "test", allFeatures, null);
- LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
- query.setRequest(solrQueryRequest);
- LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
- }
+ final Map test = new HashMap();
+ test.put("fake", 2);
+ List features = makeFieldValueFeatures(new int[] {0},
+ "final-score");
+ List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ List allFeatures = makeFieldValueFeatures(new int[] {0},
+ "final-score");
+ MockModel ltrScoringModel = new MockModel("test",
+ features, norms, "test", allFeatures, null);
+ LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
+ LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ }
- features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore");
- norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
- 9}, "finalScore");
- ltrScoringModel = new MockModel("test", features, norms,
- "test", allFeatures, null);
- query = new LTRScoringQuery(ltrScoringModel);
- query.setRequest(solrQueryRequest);
- wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
- }
+ features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score");
+ norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
+ 9}, "final-score");
+ ltrScoringModel = new MockModel("test", features, norms,
+ "test", allFeatures, null);
+ query = new LTRScoringQuery(ltrScoringModel);
+ wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
}
}
+
}
From 53cd2fb7e50f566fff531e9692d836f170ee5565 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 18:31:49 +0100
Subject: [PATCH 19/27] FieldValueFeature: mention stored=true or
docValues=true in javadocs
---
.../solr/ltr/feature/FieldValueFeature.java | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 165a073d1a2..8a1d7cd0a83 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -41,16 +41,16 @@
import org.apache.solr.search.SolrIndexSearcher;
/**
- * This feature returns the value of a field in the current document
+ * This feature returns the value of a field in the current document.
+ * The field must have stored="true" or docValues="true" properties.
* Example configuration:
* {
- * "name": "rawHits",
- * "class": "org.apache.solr.ltr.feature.FieldValueFeature",
- * "params": {
- * "field": "hits",
- * "defaultValue": -1
- * }
- * }
+ "name": "rawHits",
+ "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ "params": {
+ "field": "hits"
+ }
+}
*/
public class FieldValueFeature extends Feature {
From e854f503b64b4862c45d576913d0ebba94503703 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 18:55:30 +0100
Subject: [PATCH 20/27] FieldValueFeature polishes: * undo distracting
reformatting (hopefully one-off and next time 'spotless' gradle plugin will
be available for contrib/ltr) * use private and final where possible * make
new scorers final (but not existing scorer for back compat reasons) since no
obvious need to extend
---
.../solr/ltr/feature/FieldValueFeature.java | 59 +++++++++++--------
1 file changed, 36 insertions(+), 23 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 8a1d7cd0a83..71c0eaa7d3e 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -67,8 +67,8 @@ public void setField(String field) {
}
@Override
- public LinkedHashMap paramsToMap() {
- final LinkedHashMap params = defaultParamsToMap();
+ public LinkedHashMap paramsToMap() {
+ final LinkedHashMap params = defaultParamsToMap();
params.put("field", field);
return params;
}
@@ -76,17 +76,19 @@ public LinkedHashMap paramsToMap() {
@Override
protected void validate() throws FeatureException {
if (field == null || field.isEmpty()) {
- throw new FeatureException(getClass().getSimpleName() + ": field must be provided");
+ throw new FeatureException(getClass().getSimpleName()+
+ ": field must be provided");
}
}
- public FieldValueFeature(String name, Map params) {
+ public FieldValueFeature(String name, Map params) {
super(name, params);
}
@Override
- public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request,
- Query originalQuery, Map efi) throws IOException {
+ public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores,
+ SolrQueryRequest request, Query originalQuery, Map efi)
+ throws IOException {
return new FieldValueFeatureWeight(searcher, request, originalQuery, efi);
}
@@ -94,7 +96,7 @@ public class FieldValueFeatureWeight extends FeatureWeight {
private final SchemaField schemaField;
public FieldValueFeatureWeight(IndexSearcher searcher,
- SolrQueryRequest request, Query originalQuery, Map efi) {
+ SolrQueryRequest request, Query originalQuery, Map efi) {
super(FieldValueFeature.this, searcher, request, originalQuery, efi);
if (searcher instanceof SolrIndexSearcher) {
schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
@@ -114,8 +116,8 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
- FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
- DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
+ final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
+ final DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
if (DocValuesType.NUMERIC.equals(docValuesType)) {
return new NumericDocValuesFieldValueFeatureScorer(this, context,
@@ -131,16 +133,18 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException {
+ " is not supported!");
}
return new FieldValueFeatureScorer(this, context,
- DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
/**
* A FeatureScorer that reads the stored value for a field
*/
public class FieldValueFeatureScorer extends FeatureScorer {
- LeafReaderContext context;
- public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context, DocIdSetIterator itr) {
+ LeafReaderContext context = null;
+
+ public FieldValueFeatureScorer(FeatureWeight weight,
+ LeafReaderContext context, DocIdSetIterator itr) {
super(weight, itr);
this.context = context;
}
@@ -149,7 +153,8 @@ public FieldValueFeatureScorer(FeatureWeight weight, LeafReaderContext context,
public float score() throws IOException {
try {
- final Document document = context.reader().document(itr.docID(), fieldAsSet);
+ final Document document = context.reader().document(itr.docID(),
+ fieldAsSet);
final IndexableField indexableField = document.getField(field);
if (indexableField == null) {
return getDefaultValue();
@@ -160,18 +165,22 @@ public float score() throws IOException {
} else {
final String string = indexableField.stringValue();
if (string.length() == 1) {
- // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN
+ // boolean values in the index are encoded with the
+ // a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
- return 1f;
+ return 1;
}
if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
- return 0f;
+ return 0;
}
}
}
} catch (final IOException e) {
- throw new FeatureException(e.toString() + ": " + "Unable to extract feature for " + name, e);
+ throw new FeatureException(
+ e.toString() + ": " +
+ "Unable to extract feature for "
+ + name, e);
}
return getDefaultValue();
}
@@ -185,20 +194,22 @@ public float getMaxScore(int upTo) throws IOException {
/**
* A FeatureScorer that reads the numeric docValues for a field
*/
- public class NumericDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
- NumericDocValues docValues;
- NumberType numberType;
+ public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer {
+ private final NumericDocValues docValues;
+ private final NumberType numberType;
public NumericDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
final DocIdSetIterator itr, final NumberType numberType) {
super(weight, itr);
this.numberType = numberType;
+ NumericDocValues docValues;
try {
docValues = DocValues.getNumeric(context.reader(), field);
} catch (IOException e) {
throw new IllegalArgumentException("Could not read numeric docValues for field " + field);
}
+ this.docValues = docValues;
}
@Override
@@ -236,18 +247,20 @@ public float getMaxScore(int upTo) throws IOException {
/**
* A FeatureScorer that reads the sorted docValues for a field
*/
- public class SortedDocValuesFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
- SortedDocValues docValues;
+ public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer {
+ private final SortedDocValues docValues;
public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
final DocIdSetIterator itr) {
super(weight, itr);
+ SortedDocValues docValues;
try {
docValues = DocValues.getSorted(context.reader(), field);
} catch (IOException e) {
throw new IllegalArgumentException("Could not read sorted docValues for field " + field);
}
+ this.docValues = docValues;
}
@Override
@@ -298,7 +311,7 @@ public float getMaxScore(int upTo) throws IOException {
* By doing so, we prevent a fallback to the FieldValueFeatureScorer, which would also return the default value but
* in a less performant way because it would first try to read the stored fields for the doc (which aren't present).
*/
- public class DefaultValueFieldValueFeatureScorer extends FeatureWeight.FeatureScorer {
+ public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer {
public DefaultValueFieldValueFeatureScorer(final FeatureWeight weight, final DocIdSetIterator itr) {
super(weight, itr);
}
From b9d3cd0c9f5c424ec5df56c59d85a06b88d65941 Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Thu, 20 May 2021 20:01:23 +0200
Subject: [PATCH 21/27] [SOLR-12697] add javadoc to explain which type of
FieldValueFeatureScorer is used for different types of fields
---
.../apache/solr/ltr/feature/FieldValueFeature.java | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 165a073d1a2..9ae9c97595d 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -51,6 +51,17 @@
* "defaultValue": -1
* }
* }
+ *
+ * There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use.
+ * The chosen scorer depends on the field attributes.
+ *
+ * FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or docValues=false
+ *
+ * NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC
+ * SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED
+ *
+ *
DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on segments
+ * where no document has a value set in the field of this feature
*/
public class FieldValueFeature extends Feature {
From abb363210b76b627d89a402ae19a975b4cdb2b18 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 22:59:17 +0100
Subject: [PATCH 22/27] Revert "out-scope TestLTRReRankingPipeline changes"
This reverts commit 3c38e911483b051f5037138cd6a9cc0cc96a9ae5.
---
.../solr/collection1/conf/schema.xml | 4 +-
.../solr/ltr/TestLTRReRankingPipeline.java | 334 ++++++++----------
2 files changed, 150 insertions(+), 188 deletions(-)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index c033973d96d..005eacf0121 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -19,6 +19,9 @@
+
+
+
@@ -39,7 +42,6 @@
-
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
index 85019445546..1b1967d47ef 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
@@ -25,12 +25,8 @@
import java.util.List;
import java.util.Map;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -41,8 +37,8 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.solr.SolrTestCase;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.ltr.feature.Feature;
import org.apache.solr.ltr.feature.FieldValueFeature;
@@ -50,16 +46,24 @@
import org.apache.solr.ltr.model.TestLinearModel;
import org.apache.solr.ltr.norm.IdentityNormalizer;
import org.apache.solr.ltr.norm.Normalizer;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class TestLTRReRankingPipeline extends SolrTestCase {
+public class TestLTRReRankingPipeline extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath());
+ @BeforeClass
+ public static void setup() throws Exception {
+ initCore("solrconfig-ltr.xml", "schema.xml");
+ }
+
private IndexSearcher getSearcher(IndexReader r) {
// 'yes' to maybe wrapping in general
final boolean maybeWrap = true;
@@ -109,199 +113,155 @@ public Explanation explain(LeafReaderContext context, int doc,
}
@Test
- public void testRescorer() throws IOException {
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-
- Document doc = new Document();
- doc.add(newStringField("id", "0", Field.Store.YES));
- doc.add(newTextField("field", "wizard the the the the the oz",
- Field.Store.NO));
- doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field
-
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "1", Field.Store.YES));
- // 1 extra token, but wizard and oz are close;
- doc.add(newTextField("field", "wizard oz the the the the the the",
- Field.Store.NO));
- doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field
- w.addDocument(doc);
-
- final IndexReader r = w.getReader();
- w.close();
-
- // Do ordinary BooleanQuery:
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = getSearcher(r);
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(2, hits.totalHits.value);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "final-score");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
-
- final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
- hits = rescorer.rescore(searcher, hits, 2);
-
- // rerank using the field final-score
- assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
-
- r.close();
- dir.close();
-
+ public void testRescorer() throws Exception {
+ assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F"));
+ assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T"));
+ assertU(commit());
+
+ try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
+
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = solrQueryRequest.getSearcher();
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(2, hits.totalHits.value);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "finalScore");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel);
+ ltrScoringQuery.setRequest(solrQueryRequest);
+ final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery);
+ hits = rescorer.rescore(searcher, hits, 2);
+
+ // rerank using the field finalScore
+ assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ }
}
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134")
@Test
public void testDifferentTopN() throws IOException {
- final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
-
- Document doc = new Document();
- doc.add(newStringField("id", "0", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 1.0f));
- w.addDocument(doc);
-
- doc = new Document();
- doc.add(newStringField("id", "1", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 2.0f));
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "2", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 3.0f));
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "3", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz oz the the the the ",
- Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 4.0f));
- w.addDocument(doc);
- doc = new Document();
- doc.add(newStringField("id", "4", Field.Store.YES));
- doc.add(newTextField("field", "wizard oz the the the the the the",
- Field.Store.NO));
- doc.add(new FloatDocValuesField("final-score", 5.0f));
- w.addDocument(doc);
-
- final IndexReader r = w.getReader();
- w.close();
-
- // Do ordinary BooleanQuery:
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = getSearcher(r);
-
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(5, hits.totalHits.value);
-
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "final-score");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
-
- final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
-
- // rerank @ 0 should not change the order
- hits = rescorer.rescore(searcher, hits, 0);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- // test rerank with different topN cuts
-
- for (int topN = 1; topN <= 5; topN++) {
- log.info("rerank {} documents ", topN);
- hits = searcher.search(bqBuilder.build(), 10);
-
- final ScoreDoc[] slice = new ScoreDoc[topN];
- System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
- hits = new TopDocs(hits.totalHits, slice);
- hits = rescorer.rescore(searcher, hits, topN);
- for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
- if (log.isInfoEnabled()) {
- log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
- .get("id"), j);
- }
-
- assertEquals(i,
- Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
- assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
+ assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0"));
+ assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0"));
+ assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0"));
+ assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0"));
+ assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0"));
+ assertU(commit());
+
+ try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
+ // Do ordinary BooleanQuery:
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = solrQueryRequest.getSearcher();
+
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(5, hits.totalHits.value);
+
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "finalScoreFloat");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel);
+ scoringQuery.setRequest(solrQueryRequest);
+ final LTRRescorer rescorer = new LTRRescorer(scoringQuery);
+
+ // rerank @ 0 should not change the order
+ hits = rescorer.rescore(searcher, hits, 0);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ // test rerank with different topN cuts
+
+ for (int topN = 1; topN <= 5; topN++) {
+ log.info("rerank {} documents ", topN);
+ hits = searcher.search(bqBuilder.build(), 10);
+
+ final ScoreDoc[] slice = new ScoreDoc[topN];
+ System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
+ hits = new TopDocs(hits.totalHits, slice);
+ hits = rescorer.rescore(searcher, hits, topN);
+ for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
+ if (log.isInfoEnabled()) {
+ log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
+ .get("id"), j);
+ }
+
+ assertEquals(i,
+ Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
+ assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
+ }
}
}
-
- r.close();
- dir.close();
-
}
@Test
public void testDocParam() throws Exception {
- final Map test = new HashMap();
- test.put("fake", 2);
- List features = makeFieldValueFeatures(new int[] {0},
- "final-score");
- List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- List allFeatures = makeFieldValueFeatures(new int[] {0},
- "final-score");
- MockModel ltrScoringModel = new MockModel("test",
- features, norms, "test", allFeatures, null);
- LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
- LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
- }
+ try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
+ List features = makeFieldValueFeatures(new int[] {0},
+ "finalScore");
+ List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ List allFeatures = makeFieldValueFeatures(new int[] {0},
+ "finalScore");
+ MockModel ltrScoringModel = new MockModel("test",
+ features, norms, "test", allFeatures, null);
+ LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
+ query.setRequest(solrQueryRequest);
+ LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ }
- features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score");
- norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
- 9}, "final-score");
- ltrScoringModel = new MockModel("test", features, norms,
- "test", allFeatures, null);
- query = new LTRScoringQuery(ltrScoringModel);
- wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore");
+ norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
+ 9}, "finalScore");
+ ltrScoringModel = new MockModel("test", features, norms,
+ "test", allFeatures, null);
+ query = new LTRScoringQuery(ltrScoringModel);
+ query.setRequest(solrQueryRequest);
+ wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ }
}
}
-
}
From a789b12d6919c63a19717c8fe8b4a81276aadfc2 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Thu, 20 May 2021 23:02:24 +0100
Subject: [PATCH 23/27] fix for SOLR-11134
---
.../org/apache/solr/ltr/TestLTRReRankingPipeline.java | 9 +++++----
.../test/org/apache/solr/ltr/model/TestLinearModel.java | 8 ++++++--
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
index 1b1967d47ef..c4fdec25b03 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
@@ -114,6 +114,7 @@ public Explanation explain(LeafReaderContext context, int doc,
@Test
public void testRescorer() throws Exception {
+ assertU(delQ("*:*"));
assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F"));
assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T"));
assertU(commit());
@@ -151,9 +152,9 @@ public void testRescorer() throws Exception {
}
}
- @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134")
@Test
public void testDifferentTopN() throws IOException {
+ assertU(delQ("*:*"));
assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0"));
assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0"));
assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0"));
@@ -185,8 +186,9 @@ public void testDifferentTopN() throws IOException {
Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat");
+ final Double featureWeight = 0.1;
final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features, featureWeight));
LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel);
scoringQuery.setRequest(solrQueryRequest);
@@ -215,10 +217,9 @@ public void testDifferentTopN() throws IOException {
log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
.get("id"), j);
}
-
assertEquals(i,
Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
- assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
+ assertEquals((i + 1) * features.size()*featureWeight, hits.scoreDocs[j].score, 0.00001);
}
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java
index df03896859f..f528af3e0ec 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestLinearModel.java
@@ -47,10 +47,14 @@ public static LTRScoringModel createLinearModel(String name, List featu
}
public static Map makeFeatureWeights(List features) {
+ return makeFeatureWeights(features, 0.1);
+ }
+
+ public static Map makeFeatureWeights(List features, Number weight) {
final Map nameParams = new HashMap();
- final HashMap modelWeights = new HashMap();
+ final HashMap modelWeights = new HashMap();
for (final Feature feat : features) {
- modelWeights.put(feat.getName(), 0.1);
+ modelWeights.put(feat.getName(), weight);
}
nameParams.put("weights", modelWeights);
return nameParams;
From c42be5487d8e59e144ba0689338e7844efa656ed Mon Sep 17 00:00:00 2001
From: tomglk <>
Date: Fri, 21 May 2021 18:22:56 +0200
Subject: [PATCH 24/27] [SOLR-12697] out-scope TestLTRReRankingPipeline
---
.../solr/collection1/conf/schema.xml | 3 -
.../solr/ltr/TestLTRReRankingPipeline.java | 337 ++++++++++--------
2 files changed, 188 insertions(+), 152 deletions(-)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 005eacf0121..6c386ef23d9 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -19,9 +19,6 @@
-
-
-
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
index c4fdec25b03..85019445546 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRReRankingPipeline.java
@@ -25,8 +25,12 @@
import java.util.List;
import java.util.Map;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -37,8 +41,8 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.lucene.store.Directory;
+import org.apache.solr.SolrTestCase;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.ltr.feature.Feature;
import org.apache.solr.ltr.feature.FieldValueFeature;
@@ -46,24 +50,16 @@
import org.apache.solr.ltr.model.TestLinearModel;
import org.apache.solr.ltr.norm.IdentityNormalizer;
import org.apache.solr.ltr.norm.Normalizer;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.request.SolrQueryRequest;
-import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class TestLTRReRankingPipeline extends SolrTestCaseJ4 {
+public class TestLTRReRankingPipeline extends SolrTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final SolrResourceLoader solrResourceLoader = new SolrResourceLoader(Paths.get("").toAbsolutePath());
- @BeforeClass
- public static void setup() throws Exception {
- initCore("solrconfig-ltr.xml", "schema.xml");
- }
-
private IndexSearcher getSearcher(IndexReader r) {
// 'yes' to maybe wrapping in general
final boolean maybeWrap = true;
@@ -113,156 +109,199 @@ public Explanation explain(LeafReaderContext context, int doc,
}
@Test
- public void testRescorer() throws Exception {
- assertU(delQ("*:*"));
- assertU(adoc("id", "0", "field", "wizard the the the the the oz", "finalScore", "F"));
- assertU(adoc("id", "1", "field", "wizard oz the the the the the the", "finalScore", "T"));
- assertU(commit());
-
- try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
-
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = solrQueryRequest.getSearcher();
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(2, hits.totalHits.value);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "finalScore");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "finalScore");
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
-
- LTRScoringQuery ltrScoringQuery = new LTRScoringQuery(ltrScoringModel);
- ltrScoringQuery.setRequest(solrQueryRequest);
- final LTRRescorer rescorer = new LTRRescorer(ltrScoringQuery);
- hits = rescorer.rescore(searcher, hits, 2);
-
- // rerank using the field finalScore
- assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- }
+ public void testRescorer() throws IOException {
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ Document doc = new Document();
+ doc.add(newStringField("id", "0", Field.Store.YES));
+ doc.add(newTextField("field", "wizard the the the the the oz",
+ Field.Store.NO));
+ doc.add(newStringField("final-score", "F", Field.Store.YES)); // TODO: change to numeric field
+
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "1", Field.Store.YES));
+ // 1 extra token, but wizard and oz are close;
+ doc.add(newTextField("field", "wizard oz the the the the the the",
+ Field.Store.NO));
+ doc.add(newStringField("final-score", "T", Field.Store.YES)); // TODO: change to numeric field
+ w.addDocument(doc);
+
+ final IndexReader r = w.getReader();
+ w.close();
+
+ // Do ordinary BooleanQuery:
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = getSearcher(r);
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(2, hits.totalHits.value);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "final-score");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
+ hits = rescorer.rescore(searcher, hits, 2);
+
+ // rerank using the field final-score
+ assertEquals("1", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("0", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+
+ r.close();
+ dir.close();
+
}
+ @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11134")
@Test
public void testDifferentTopN() throws IOException {
- assertU(delQ("*:*"));
- assertU(adoc("id", "0", "field", "wizard oz oz oz oz oz", "finalScoreFloat", "1.0"));
- assertU(adoc("id", "1", "field", "wizard oz oz oz oz the", "finalScoreFloat", "2.0"));
- assertU(adoc("id", "2", "field", "wizard oz oz oz the the ", "finalScoreFloat", "3.0"));
- assertU(adoc("id", "3", "field", "wizard oz oz the the the the ", "finalScoreFloat", "4.0"));
- assertU(adoc("id", "4", "field", "wizard oz the the the the the the", "finalScoreFloat", "5.0"));
- assertU(commit());
-
- try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
- // Do ordinary BooleanQuery:
- final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
- bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
- bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
- final IndexSearcher searcher = solrQueryRequest.getSearcher();
-
- // first run the standard query
- TopDocs hits = searcher.search(bqBuilder.build(), 10);
- assertEquals(5, hits.totalHits.value);
-
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
- "finalScoreFloat");
- final List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
- 2, 3, 4, 5, 6, 7, 8, 9}, "finalScoreFloat");
- final Double featureWeight = 0.1;
- final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
- features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features, featureWeight));
-
- LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel);
- scoringQuery.setRequest(solrQueryRequest);
- final LTRRescorer rescorer = new LTRRescorer(scoringQuery);
-
- // rerank @ 0 should not change the order
- hits = rescorer.rescore(searcher, hits, 0);
- assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
- assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
- assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
- assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
- assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
-
- // test rerank with different topN cuts
-
- for (int topN = 1; topN <= 5; topN++) {
- log.info("rerank {} documents ", topN);
- hits = searcher.search(bqBuilder.build(), 10);
-
- final ScoreDoc[] slice = new ScoreDoc[topN];
- System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
- hits = new TopDocs(hits.totalHits, slice);
- hits = rescorer.rescore(searcher, hits, topN);
- for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
- if (log.isInfoEnabled()) {
- log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
- .get("id"), j);
- }
- assertEquals(i,
- Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
- assertEquals((i + 1) * features.size()*featureWeight, hits.scoreDocs[j].score, 0.00001);
-
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ Document doc = new Document();
+ doc.add(newStringField("id", "0", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 1.0f));
+ w.addDocument(doc);
+
+ doc = new Document();
+ doc.add(newStringField("id", "1", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 2.0f));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "2", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 3.0f));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "3", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz oz the the the the ",
+ Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 4.0f));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(newStringField("id", "4", Field.Store.YES));
+ doc.add(newTextField("field", "wizard oz the the the the the the",
+ Field.Store.NO));
+ doc.add(new FloatDocValuesField("final-score", 5.0f));
+ w.addDocument(doc);
+
+ final IndexReader r = w.getReader();
+ w.close();
+
+ // Do ordinary BooleanQuery:
+ final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
+ final IndexSearcher searcher = getSearcher(r);
+
+ // first run the standard query
+ TopDocs hits = searcher.search(bqBuilder.build(), 10);
+ assertEquals(5, hits.totalHits.value);
+
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ final List features = makeFieldValueFeatures(new int[] {0, 1, 2},
+ "final-score");
+ final List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ final List allFeatures = makeFieldValueFeatures(new int[] {0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9}, "final-score");
+ final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test",
+ features, norms, "test", allFeatures, TestLinearModel.makeFeatureWeights(features));
+
+ final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
+
+ // rerank @ 0 should not change the order
+ hits = rescorer.rescore(searcher, hits, 0);
+ assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
+ assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
+ assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
+ assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
+ assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
+
+ // test rerank with different topN cuts
+
+ for (int topN = 1; topN <= 5; topN++) {
+ log.info("rerank {} documents ", topN);
+ hits = searcher.search(bqBuilder.build(), 10);
+
+ final ScoreDoc[] slice = new ScoreDoc[topN];
+ System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
+ hits = new TopDocs(hits.totalHits, slice);
+ hits = rescorer.rescore(searcher, hits, topN);
+ for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
+ if (log.isInfoEnabled()) {
+ log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc)
+ .get("id"), j);
}
+
+ assertEquals(i,
+ Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
+ assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
+
}
}
+
+ r.close();
+ dir.close();
+
}
@Test
public void testDocParam() throws Exception {
- try (SolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(h.getCore(), new ModifiableSolrParams())) {
- List features = makeFieldValueFeatures(new int[] {0},
- "finalScore");
- List norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- List allFeatures = makeFieldValueFeatures(new int[] {0},
- "finalScore");
- MockModel ltrScoringModel = new MockModel("test",
- features, norms, "test", allFeatures, null);
- LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
- query.setRequest(solrQueryRequest);
- LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
- }
+ final Map test = new HashMap();
+ test.put("fake", 2);
+ List features = makeFieldValueFeatures(new int[] {0},
+ "final-score");
+ List norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ List allFeatures = makeFieldValueFeatures(new int[] {0},
+ "final-score");
+ MockModel ltrScoringModel = new MockModel("test",
+ features, norms, "test", allFeatures, null);
+ LTRScoringQuery query = new LTRScoringQuery(ltrScoringModel);
+ LTRScoringQuery.ModelWeight wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ LTRScoringQuery.ModelWeight.ModelScorer modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
+ }
- features = makeFieldValueFeatures(new int[] {0, 1, 2}, "finalScore");
- norms =
- new ArrayList(
- Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
- allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
- 9}, "finalScore");
- ltrScoringModel = new MockModel("test", features, norms,
- "test", allFeatures, null);
- query = new LTRScoringQuery(ltrScoringModel);
- query.setRequest(solrQueryRequest);
- wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
- modelScr = wgt.scorer(null);
- modelScr.getDocInfo().setOriginalDocScore(1f);
- for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
- assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
- }
+ features = makeFieldValueFeatures(new int[] {0, 1, 2}, "final-score");
+ norms =
+ new ArrayList(
+ Collections.nCopies(features.size(),IdentityNormalizer.INSTANCE));
+ allFeatures = makeFieldValueFeatures(new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8,
+ 9}, "final-score");
+ ltrScoringModel = new MockModel("test", features, norms,
+ "test", allFeatures, null);
+ query = new LTRScoringQuery(ltrScoringModel);
+ wgt = query.createWeight(null, ScoreMode.COMPLETE, 1f);
+ modelScr = wgt.scorer(null);
+ modelScr.getDocInfo().setOriginalDocScore(1f);
+ for (final Scorable.ChildScorable feat : modelScr.getChildren()) {
+ assertNotNull(((Feature.FeatureWeight.FeatureScorer) feat.child).getDocInfo().getOriginalDocScore());
}
}
+
}
From 83bc1ee5fd292e8842e8c99a14ffbc1227bac4a8 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Mon, 24 May 2021 12:28:05 +0100
Subject: [PATCH 25/27] apologies, multiple TestFieldValueFeature polishes in
one commit, approximately:
* minor style tweaks e.g. new SolrQuery(id:21) instead of new SolrQuery(); setQuery(id:21)
* in testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned for clarity remove assumption w.r.t. field default values being -1/-2/-3 sequential
* in testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned replace generic dvTestField name with dvDoubleField and also dvStrBoolField to also cover a non-numeric field
* in testThatExceptionIsThrownForUnsupportedType replace generic dvUnsupportedField with dvStringPopularities for the popularities (plural) naming to help signal unsupportedness
* in testThatCorrectFieldValueFeatureIsUsedForDocValueTypes replace noDocValuesField with noDvFloatField and noDvStrNumField to cover both numeric and non-numeric
* in testThatStringValuesAreCorrectlyParsed also cover non-docValues field _and_ ensure behavioural consistency between dv and non-dv fields [*** this required implementation adjustment for the dv field ***]
---
.../solr/ltr/feature/FieldValueFeature.java | 20 +--
.../solr/collection1/conf/schema.xml | 7 +-
.../apache/solr/ltr/TestLTROnSolrCloud.java | 16 +-
.../ltr/feature/TestFieldValueFeature.java | 169 ++++++++++--------
4 files changed, 112 insertions(+), 100 deletions(-)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index f57e6c52f51..563a01d14df 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -141,7 +141,7 @@ public FeatureScorer scorer(LeafReaderContext context) throws IOException {
return new DefaultValueFieldValueFeatureScorer(this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
- + " is not supported!");
+ + " is not supported");
}
return new FieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
@@ -291,22 +291,18 @@ public float score() throws IOException {
*/
private float readSortedDocValues(BytesRef bytesRef) {
String string = bytesRef.utf8ToString();
- if (string.length() == 1
- && (string.charAt(0) == BoolField.TRUE_TOKEN[0] || string.charAt(0) == BoolField.FALSE_TOKEN[0])) {
- // boolean values in the index are encoded with a single char contained in TRUE_TOKEN or FALSE_TOKEN
+ if (string.length() == 1) {
+ // boolean values in the index are encoded with the
+ // a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
- return 1f;
- } else {
- return 0f;
+ return 1;
}
- } else {
- try {
- return Float.parseFloat(string);
- } catch (NumberFormatException ex) {
- throw new FeatureException("Cannot parse value " + string + " of field " + schemaField.getName() + " to float.");
+ if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
+ return 0;
}
}
+ return FieldValueFeature.this.getDefaultValue();
}
@Override
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 6c386ef23d9..b27542060f4 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -28,6 +28,7 @@
+
@@ -39,9 +40,9 @@
-
-
-
+
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index a9f6d36f235..910c0061af7 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -112,31 +112,31 @@ public void testSimpleQuery() throws Exception {
final String result0_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0",
"dvLongFieldFeature", "8.0", "dvFloatFieldFeature", "0.8", "dvDoubleFieldFeature", "0.8",
- "dvStrNumFieldFeature", "8.0", "dvStrBoolFieldFeature", "1.0");
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
final String result1_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "49.0", "c3", "2.0", "original", "1.0", "dvIntFieldFeature", "7.0",
"dvLongFieldFeature", "7.0", "dvFloatFieldFeature", "0.7", "dvDoubleFieldFeature", "0.7",
- "dvStrNumFieldFeature", "7.0", "dvStrBoolFieldFeature", "0.0");
+ "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0");
final String result2_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "36.0", "c3", "2.0", "original", "2.0", "dvIntFieldFeature", "6.0",
"dvLongFieldFeature", "6.0", "dvFloatFieldFeature", "0.6", "dvDoubleFieldFeature", "0.6",
- "dvStrNumFieldFeature", "6.0", "dvStrBoolFieldFeature", "1.0");
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
final String result3_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "25.0", "c3", "2.0", "original", "3.0", "dvIntFieldFeature", "5.0",
"dvLongFieldFeature", "5.0", "dvFloatFieldFeature", "0.5", "dvDoubleFieldFeature", "0.5",
- "dvStrNumFieldFeature", "5.0", "dvStrBoolFieldFeature", "0.0");
+ "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0");
final String result4_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "16.0", "c3", "2.0", "original", "4.0", "dvIntFieldFeature", "4.0",
"dvLongFieldFeature", "4.0", "dvFloatFieldFeature", "0.4", "dvDoubleFieldFeature", "0.4",
- "dvStrNumFieldFeature", "4.0", "dvStrBoolFieldFeature", "1.0");
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
final String result5_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "9.0", "c3", "2.0", "original", "5.0", "dvIntFieldFeature", "3.0",
"dvLongFieldFeature", "3.0", "dvFloatFieldFeature", "0.3", "dvDoubleFieldFeature", "0.3",
- "dvStrNumFieldFeature", "3.0", "dvStrBoolFieldFeature", "0.0");
+ "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0");
final String result6_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "4.0", "c3", "2.0", "original", "6.0", "dvIntFieldFeature", "2.0",
"dvLongFieldFeature", "2.0", "dvFloatFieldFeature", "0.2", "dvDoubleFieldFeature", "0.2",
- "dvStrNumFieldFeature", "2.0", "dvStrBoolFieldFeature", "1.0");
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
final String result7_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "1.0", "c3", "2.0", "original", "7.0", "dvIntFieldFeature", "-1.0",
"dvLongFieldFeature", "-2.0", "dvFloatFieldFeature", "-3.0", "dvDoubleFieldFeature", "-4.0",
@@ -262,7 +262,7 @@ void indexDocument(String collection, String id, String title, String descriptio
doc.setField("dvLongField", popularity);
doc.setField("dvFloatField", ((float) popularity) / 10);
doc.setField("dvDoubleField", ((double) popularity) / 10);
- doc.setField("dvStrNumField", popularity);
+ doc.setField("dvStrNumField", popularity % 2 == 0 ? "F" : "T");
doc.setField("dvStrBoolField", popularity % 2 == 0 ? "T" : "F");
}
solrCluster.getSolrClient().add(collection, doc);
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index ad58986da31..e9bc943fcf3 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -16,6 +16,10 @@
*/
package org.apache.solr.ltr.feature;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -32,10 +36,6 @@
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
public class TestFieldValueFeature extends TestRerankBase {
private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f;
@@ -87,10 +87,10 @@ public void before() throws Exception {
loadFeature(field, FieldValueFeature.class.getName(),
"{\"field\":\"" + field + "\"}");
}
- loadModel("model", LinearModel.class.getName(), FIELDS,
- "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," +
- "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," +
- "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}");
+ loadModel("model", LinearModel.class.getName(), FIELDS,
+ "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," +
+ "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," +
+ "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}");
}
@After
@@ -100,7 +100,8 @@ public void after() throws Exception {
@Test
public void testRanking() throws Exception {
- SolrQuery query = new SolrQuery();
+
+ final SolrQuery query = new SolrQuery();
query.setQuery("title:w1");
query.add("fl", "*, score");
query.add("rows", "4");
@@ -132,6 +133,7 @@ public void testRanking() throws Exception {
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
}
+
@Test
public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception {
SolrQuery query = new SolrQuery();
@@ -147,6 +149,10 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep
query.add("rq", "{!ltr model=model reRankDocs=4}");
query.add("fl", "[fv]");
+ // "0.0" in the assertJQ below is more readable than
+ // Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL) but first make sure it's equivalent
+ assertEquals("0.0", Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL));
+
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," +
@@ -154,6 +160,7 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep
"dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}");
}
+
@Test
public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception {
for (String field : FIELDS) {
@@ -185,9 +192,12 @@ public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws E
@Test
public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned() throws Exception {
final String[] fieldsWithDefaultValues = {"dvIntField", "dvLongField", "dvFloatField"};
+ final String[] defaultValues = {"-1.0", "-2.0", "-3.0"};
+
+ for (int idx = 0; idx < fieldsWithDefaultValues.length; ++idx) {
+ final String field = fieldsWithDefaultValues[idx];
+ final String defaultValue = defaultValues[idx];
- double fieldCounter = -1.0;
- for (String field : fieldsWithDefaultValues) {
final String fstore = "testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned"+field;
assertU(adoc("id", "21"));
@@ -198,16 +208,13 @@ public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned()
loadModel(field+"-model", LinearModel.class.getName(),
new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}");
- SolrQuery query = new SolrQuery();
- query.setQuery("id:21");
+ final SolrQuery query = new SolrQuery("id:21");
query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
query.add("fl", "[fv]");
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, String.valueOf(fieldCounter))+"'}");
-
- fieldCounter--;
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, defaultValue)+"'}");
}
}
@@ -229,32 +236,35 @@ public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws E
ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
- .toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
assertEquals(FieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass);
}
@Test
public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws Exception {
+ final String[] fieldsWithoutDefaultValues = {"dvDoubleField", "dvStrBoolField"};
// this tests the case that no document contains docValues for the provided existing field
- final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned";
- loadFeature("dvTestField", ObservingFieldValueFeature.class.getName(), fstore,
- "{\"field\":\"dvTestField\"}");
- loadModel("dvTestField-model", LinearModel.class.getName(),
- new String[] {"dvTestField"}, fstore, "{\"weights\":{\"dvTestField\":1.0}}");
+ for (String field : fieldsWithoutDefaultValues) {
+ final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned"+field;
- final SolrQuery query = new SolrQuery();
- query.setQuery("id:42");
- query.add("rq", "{!ltr model=dvTestField-model reRankDocs=4}");
- query.add("fl", "[fv]");
+ loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\"}");
- ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
- .toFeatureVector("dvTestField",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
- assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass);
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[] {field}, fstore, "{\"weights\":{\""+field+"\":1.0}}");
+
+ final SolrQuery query = new SolrQuery("id:42");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
+ .toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass);
+ }
}
@Test
@@ -273,6 +283,7 @@ public void testBooleanValue() throws Exception {
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}");
+
query = new SolrQuery();
query.setQuery("id:5");
query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
@@ -293,37 +304,38 @@ public void testBooleanValue() throws Exception {
public void testThatExceptionIsThrownForUnsupportedType() throws Exception {
final String fstore = "test_store";
- assertU(adoc("id", "21", "title", "multivalued not supported", "dvUnsupportedField", "wow value"));
+ assertU(adoc("id", "21", "title", "multivalued not supported", "dvStringPopularities", "wow value"));
assertU(commit());
- loadFeature("dvUnsupportedField", FieldValueFeature.class.getName(), fstore,
- "{\"field\":\"dvUnsupportedField\"}");
+ loadFeature("dvStringPopularities", FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"dvStringPopularities\"}");
- loadModel("dvUnsupportedField-model", LinearModel.class.getName(),
- new String[] {"dvUnsupportedField"}, fstore, "{\"weights\":{\"dvUnsupportedField\":1.0}}");
+ loadModel("dvStringPopularities-model", LinearModel.class.getName(),
+ new String[] {"dvStringPopularities"}, fstore, "{\"weights\":{\"dvStringPopularities\":1.0}}");
- SolrQuery query = new SolrQuery();
- query.setQuery("id:21");
- query.add("rq", "{!ltr model=dvUnsupportedField-model reRankDocs=4}");
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model=dvStringPopularities-model reRankDocs=4}");
query.add("fl", "[fv]");
assertJQ("/query" + query.toQueryString(),
- "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvUnsupportedField is not supported!'");
+ "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvStringPopularities is not supported'");
}
@Test
public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception {
final String[][] fieldsWithDifferentTypes = {
- new String[]{"dvIntPopularity", NumericDocValuesFieldValueFeatureScorer.class.getName()},
- new String[]{"dvStringPopularity", SortedDocValuesFieldValueFeatureScorer.class.getName()},
- new String[]{"noDocValuesField", FieldValueFeatureScorer.class.getName()}
+ new String[]{"dvIntPopularity", "1", NumericDocValuesFieldValueFeatureScorer.class.getName()},
+ new String[]{"dvStringPopularity", "T", SortedDocValuesFieldValueFeatureScorer.class.getName()},
+ new String[]{"noDvFloatField", "1", FieldValueFeatureScorer.class.getName()},
+ new String[]{"noDvStrNumField", "T", FieldValueFeatureScorer.class.getName()}
};
for (String[] fieldAndScorerClass : fieldsWithDifferentTypes) {
- String field = fieldAndScorerClass[0];
+ final String field = fieldAndScorerClass[0];
+ final String fieldValue = fieldAndScorerClass[1];
final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes"+field;
- assertU(adoc("id", "21", field, "1"));
+ assertU(adoc("id", "21", field, fieldValue));
assertU(commit());
loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore,
@@ -331,8 +343,7 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce
loadModel(field+"-model", LinearModel.class.getName(),
new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}");
- SolrQuery query = new SolrQuery();
- query.setQuery("id:21");
+ final SolrQuery query = new SolrQuery("id:21");
query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
query.add("fl", "[fv]");
@@ -340,7 +351,7 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}");
- assertEquals(fieldAndScorerClass[1], ObservingFieldValueFeature.usedScorerClass);
+ assertEquals(fieldAndScorerClass[2], ObservingFieldValueFeature.usedScorerClass);
}
}
@@ -353,35 +364,39 @@ public void testParamsToMap() throws Exception {
@Test
public void testThatStringValuesAreCorrectlyParsed() throws Exception {
- final String[][] inputsAndTests = {
- new String[]{"T", "/response/docs/[0]/=={'[fv]':'" +
- FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "1.0")+"'}"},
- new String[]{"F", "/response/docs/[0]/=={'[fv]':'" +
- FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "0.0")+"'}"},
- new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" +
- FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "-7324.427")+"'}"},
- new String[]{"532", "/response/docs/[0]/=={'[fv]':'" +
- FeatureLoggerTestUtils.toFeatureVector("dvStrNumField", "532.0")+"'}"},
- new String[]{"notanumber", "/error/msg/=='org.apache.solr.ltr.feature.FeatureException: " +
- "Cannot parse value notanumber of field dvStrNumField to float.'"}
- };
-
- final String fstore = "testThatStringValuesAreCorrectlyParsed";
- loadFeature("dvStrNumField", FieldValueFeature.class.getName(), fstore,
- "{\"field\":\"" + "dvStrNumField" + "\"}");
- loadModel("dvStrNumField-model", LinearModel.class.getName(),
- new String[]{"dvStrNumField"}, fstore, "{\"weights\":{\"" + "dvStrNumField" + "\":1.0}}");
+ for (String field : new String[] {"dvStrNumField" , "noDvStrNumField"}) {
+ final String[][] inputsAndTests = {
+ new String[]{"T", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"},
+ new String[]{"F", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"},
+ new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"},
+ new String[]{"532", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"},
+ new String[]{Float.toString(Float.NaN), "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"},
+ new String[]{"notanumber", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}
+ };
+
+ final String fstore = "testThatStringValuesAreCorrectlyParsed"+field;
+ loadFeature(field, FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"" + field + "\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[]{field}, fstore,
+ "{\"weights\":{\""+field+"\":1.0}}");
- for (String[] inputAndTest : inputsAndTests) {
- assertU(adoc("id", "21", "dvStrNumField", inputAndTest[0]));
- assertU(commit());
+ for (String[] inputAndTest : inputsAndTests) {
+ assertU(adoc("id", "21", field, inputAndTest[0]));
+ assertU(commit());
- SolrQuery query = new SolrQuery();
- query.setQuery("id:21");
- query.add("rq", "{!ltr model=" + "dvStrNumField" + "-model reRankDocs=4}");
- query.add("fl", "[fv]");
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}");
+ query.add("fl", "[fv]");
- assertJQ("/query" + query.toQueryString(), inputAndTest[1]);
+ assertJQ("/query" + query.toQueryString(), inputAndTest[1]);
+ }
}
}
@@ -389,7 +404,7 @@ public void testThatStringValuesAreCorrectlyParsed() throws Exception {
* This class is used to track which specific FieldValueFeature is used so that we can test, whether the
* fallback mechanism works correctly.
*/
- public static class ObservingFieldValueFeature extends FieldValueFeature {
+ final public static class ObservingFieldValueFeature extends FieldValueFeature {
static String usedScorerClass;
public ObservingFieldValueFeature(String name, Map params) {
From 385d8b2a39fc892a0dc1f6836c43cd298374b9ec Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Tue, 25 May 2021 13:21:48 +0100
Subject: [PATCH 26/27] add
TestFieldValueFeature.testThatDateValuesAreCorrectlyParsed()
---
.../solr/collection1/conf/schema.xml | 2 ++
.../ltr/feature/TestFieldValueFeature.java | 32 +++++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index b27542060f4..cc85353ee8a 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -40,9 +40,11 @@
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index e9bc943fcf3..15a007bb584 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -400,6 +400,38 @@ public void testThatStringValuesAreCorrectlyParsed() throws Exception {
}
}
+ @Test
+ public void testThatDateValuesAreCorrectlyParsed() throws Exception {
+ for (String field : new String[] {"dvDateField", "noDvDateField"}) {
+ final String[][] inputsAndTests = {
+ new String[]{"1970-01-01T00:00:00.000Z", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"},
+ new String[]{"1970-01-01T00:00:00.001Z", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"},
+ new String[]{"1970-01-01T00:00:01.234Z", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "1234.0")+"'}"}
+ };
+
+ final String fstore = "testThatDateValuesAreCorrectlyParsed"+field;
+ loadFeature(field, FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"" + field + "\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[]{field}, fstore,
+ "{\"weights\":{\""+field+"\":1.0}}");
+
+ for (String[] inputAndTest : inputsAndTests) {
+ assertU(adoc("id", "21", field, inputAndTest[0]));
+ assertU(commit());
+
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(), inputAndTest[1]);
+ }
+ }
+ }
+
/**
* This class is used to track which specific FieldValueFeature is used so that we can test, whether the
* fallback mechanism works correctly.
From ad489d0d35f842cf449bcd970830732a0fceeba5 Mon Sep 17 00:00:00 2001
From: Christine Poerschke
Date: Wed, 26 May 2021 17:31:39 +0100
Subject: [PATCH 27/27] small TestLTROnSolrCloud polish: * undo distracting
reformatting (hopefully one-off and next time 'spotless' gradle plugin will
be available for contrib/ltr)
---
.../apache/solr/ltr/TestLTROnSolrCloud.java | 42 +++++++++++--------
1 file changed, 25 insertions(+), 17 deletions(-)
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index 910c0061af7..e6fc0c852a4 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -49,7 +49,7 @@ public class TestLTROnSolrCloud extends TestRerankBase {
String solrconfig = "solrconfig-ltr.xml";
String schema = "schema.xml";
- SortedMap extraServlets = null;
+ SortedMap extraServlets = null;
@Override
public void setUp() throws Exception {
@@ -57,14 +57,17 @@ public void setUp() throws Exception {
extraServlets = setupTestInit(solrconfig, schema, true);
System.setProperty("enable.update.log", "true");
- int numberOfShards = random().nextInt(4) + 1;
- int numberOfReplicas = random().nextInt(2) + 1;
+ int numberOfShards = random().nextInt(4)+1;
+ int numberOfReplicas = random().nextInt(2)+1;
int numberOfNodes = numberOfShards * numberOfReplicas;
setupSolrCluster(numberOfShards, numberOfReplicas, numberOfNodes);
+
+
}
+
@Override
public void tearDown() throws Exception {
restTestHarness.close();
@@ -89,7 +92,7 @@ public void testSimpleQuery() throws Exception {
query.setParam("rows", "8");
QueryResponse queryResponse =
- solrCluster.getSolrClient().query(COLLECTION, query);
+ solrCluster.getSolrClient().query(COLLECTION,query);
assertEquals(8, queryResponse.getResults().getNumFound());
assertEquals("1", queryResponse.getResults().get(0).get("id").toString());
assertEquals("2", queryResponse.getResults().get(1).get("id").toString());
@@ -100,14 +103,14 @@ public void testSimpleQuery() throws Exception {
assertEquals("7", queryResponse.getResults().get(6).get("id").toString());
assertEquals("8", queryResponse.getResults().get(7).get("id").toString());
- final Float original_result0_score = (Float) queryResponse.getResults().get(0).get("score");
- final Float original_result1_score = (Float) queryResponse.getResults().get(1).get("score");
- final Float original_result2_score = (Float) queryResponse.getResults().get(2).get("score");
- final Float original_result3_score = (Float) queryResponse.getResults().get(3).get("score");
- final Float original_result4_score = (Float) queryResponse.getResults().get(4).get("score");
- final Float original_result5_score = (Float) queryResponse.getResults().get(5).get("score");
- final Float original_result6_score = (Float) queryResponse.getResults().get(6).get("score");
- final Float original_result7_score = (Float) queryResponse.getResults().get(7).get("score");
+ final Float original_result0_score = (Float)queryResponse.getResults().get(0).get("score");
+ final Float original_result1_score = (Float)queryResponse.getResults().get(1).get("score");
+ final Float original_result2_score = (Float)queryResponse.getResults().get(2).get("score");
+ final Float original_result3_score = (Float)queryResponse.getResults().get(3).get("score");
+ final Float original_result4_score = (Float)queryResponse.getResults().get(4).get("score");
+ final Float original_result5_score = (Float)queryResponse.getResults().get(5).get("score");
+ final Float original_result6_score = (Float)queryResponse.getResults().get(6).get("score");
+ final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score");
final String result0_features = FeatureLoggerTestUtils.toFeatureVector(
"powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0",
@@ -145,7 +148,8 @@ public void testSimpleQuery() throws Exception {
// Test feature vectors returned (without re-ranking)
query.setFields("*,score,features:[fv store=test]");
- queryResponse = solrCluster.getSolrClient().query(COLLECTION, query);
+ queryResponse =
+ solrCluster.getSolrClient().query(COLLECTION,query);
assertEquals(8, queryResponse.getResults().getNumFound());
assertEquals("1", queryResponse.getResults().get(0).get("id").toString());
assertEquals("2", queryResponse.getResults().get(1).get("id").toString());
@@ -185,7 +189,8 @@ public void testSimpleQuery() throws Exception {
// Test feature vectors returned (with re-ranking)
query.setFields("*,score,features:[fv]");
query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}");
- queryResponse = solrCluster.getSolrClient().query(COLLECTION, query);
+ queryResponse =
+ solrCluster.getSolrClient().query(COLLECTION,query);
assertEquals(8, queryResponse.getResults().getNumFound());
assertEquals("8", queryResponse.getResults().get(0).get("id").toString());
assertEquals(result0_features,
@@ -225,7 +230,7 @@ private void setupSolrCluster(int numShards, int numReplicas, int numServers) th
createCollection(COLLECTION, "conf1", numShards, numReplicas);
indexDocuments(COLLECTION);
for (JettySolrRunner solrRunner : solrCluster.getJettySolrRunners()) {
- if (!solrRunner.getCoreContainer().getCores().isEmpty()) {
+ if (!solrRunner.getCoreContainer().getCores().isEmpty()){
String coreName = solrRunner.getCoreContainer().getCores().iterator().next().getName();
restTestHarness = new RestTestHarness(() -> solrRunner.getBaseUrl().toString() + "/" + coreName);
break;
@@ -249,8 +254,9 @@ private void createCollection(String name, String config, int numShards, int num
solrCluster.waitForActiveCollection(name, numShards, numShards * numReplicas);
}
+
void indexDocument(String collection, String id, String title, String description, int popularity)
- throws Exception {
+ throws Exception{
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", id);
doc.setField("title", title);
@@ -268,7 +274,8 @@ void indexDocument(String collection, String id, String title, String descriptio
solrCluster.getSolrClient().add(collection, doc);
}
- private void indexDocuments(final String collection) throws Exception {
+ private void indexDocuments(final String collection)
+ throws Exception {
final int collectionSize = 8;
// put documents in random order to check that advanceExact is working correctly
List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList());
@@ -375,4 +382,5 @@ public static void after() throws Exception {
}
System.clearProperty("managed.schema.mutable");
}
+
}