diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f853ddeffb8..2f564cd20b8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -333,6 +333,9 @@ New Features
* SOLR-15090: A new 'gcs-repository' contrib can be used to store and retrieve backups from Google Cloud Storage. (Jason Gerlowski, Shalin Mangar, Cao Manh Dat)
+* SOLR-12697: In contrib/ltr FieldValueFeature support "stored=false docValues=true" a.k.a. pure DocValues fields.
+ (Stanislav Livotov, Erick Erickson, Tobias Kässmann, Tom Gilke, Christine Poerschke)
+
Improvements
---------------------
* SOLR-15081: Metrics for a core: add SolrCloud "isLeader" and "replicaState". (David Smiley)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index d12795d2663..563a01d14df 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -23,16 +23,26 @@
import java.util.Set;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.BoolField;
+import org.apache.solr.schema.NumberType;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.SolrIndexSearcher;
/**
- * This feature returns the value of a field in the current document
+ * This feature returns the value of a field in the current document.
+ * The field must have stored="true" or docValues="true" properties.
* Example configuration:
*
{
"name": "rawHits",
@@ -41,6 +51,17 @@
"field": "hits"
}
}
+ *
+ * There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use.
+ * The chosen scorer depends on the field attributes.
+ *
+ * FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or docValues=false
+ *
+ * NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC
+ * SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED
+ *
+ *
DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on segments
+ * where no document has a value set in the field of this feature
*/
public class FieldValueFeature extends Feature {
@@ -83,18 +104,52 @@ public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores,
}
public class FieldValueFeatureWeight extends FeatureWeight {
+ private final SchemaField schemaField;
public FieldValueFeatureWeight(IndexSearcher searcher,
SolrQueryRequest request, Query originalQuery, Map efi) {
super(FieldValueFeature.this, searcher, request, originalQuery, efi);
+ if (searcher instanceof SolrIndexSearcher) {
+ schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
+ } else { // some tests pass a null or a non-SolrIndexSearcher searcher
+ schemaField = null;
+ }
}
+ /**
+ * Return a FeatureScorer that uses docValues or storedFields if no docValues are present
+ *
+ * @param context the segment this FeatureScorer is working with
+ * @return FeatureScorer for the current segment and field
+ * @throws IOException as defined by abstract class Feature
+ */
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
+ if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
+
+ final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
+ final DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
+
+ if (DocValuesType.NUMERIC.equals(docValuesType)) {
+ return new NumericDocValuesFieldValueFeatureScorer(this, context,
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType().getNumberType());
+ } else if (DocValuesType.SORTED.equals(docValuesType)) {
+ return new SortedDocValuesFieldValueFeatureScorer(this, context,
+ DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
+ } else if (DocValuesType.NONE.equals(docValuesType)) {
+ // Using a fallback feature scorer because this segment has no documents with a doc value for the current field
+ return new DefaultValueFieldValueFeatureScorer(this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
+ }
+ throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
+ + " is not supported");
+ }
return new FieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
+ /**
+ * A FeatureScorer that reads the stored value for a field
+ */
public class FieldValueFeatureScorer extends FeatureScorer {
LeafReaderContext context = null;
@@ -146,5 +201,137 @@ public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
+
+ /**
+ * A FeatureScorer that reads the numeric docValues for a field
+ */
+ public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer {
+ private final NumericDocValues docValues;
+ private final NumberType numberType;
+
+ public NumericDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
+ final DocIdSetIterator itr, final NumberType numberType) {
+ super(weight, itr);
+ this.numberType = numberType;
+
+ NumericDocValues docValues;
+ try {
+ docValues = DocValues.getNumeric(context.reader(), field);
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Could not read numeric docValues for field " + field);
+ }
+ this.docValues = docValues;
+ }
+
+ @Override
+ public float score() throws IOException {
+ if (docValues.advanceExact(itr.docID())) {
+ return readNumericDocValues();
+ }
+ return FieldValueFeature.this.getDefaultValue();
+ }
+
+ /**
+ * Read the numeric value for a field and convert the different number types to float.
+ *
+ * @return The numeric value that the docValues contain for the current document
+ * @throws IOException if docValues cannot be read
+ */
+ private float readNumericDocValues() throws IOException {
+ if (NumberType.FLOAT.equals(numberType)) {
+ // convert float value that was stored as long back to float
+ return Float.intBitsToFloat((int) docValues.longValue());
+ } else if (NumberType.DOUBLE.equals(numberType)) {
+ // handle double value conversion
+ return (float) Double.longBitsToDouble(docValues.longValue());
+ }
+ // just take the long value
+ return docValues.longValue();
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return Float.POSITIVE_INFINITY;
+ }
+ }
+
+ /**
+ * A FeatureScorer that reads the sorted docValues for a field
+ */
+ public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer {
+ private final SortedDocValues docValues;
+
+ public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
+ final DocIdSetIterator itr) {
+ super(weight, itr);
+
+ SortedDocValues docValues;
+ try {
+ docValues = DocValues.getSorted(context.reader(), field);
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Could not read sorted docValues for field " + field);
+ }
+ this.docValues = docValues;
+ }
+
+ @Override
+ public float score() throws IOException {
+ if (docValues.advanceExact(itr.docID())) {
+ int ord = docValues.ordValue();
+ return readSortedDocValues(docValues.lookupOrd(ord));
+ }
+ return FieldValueFeature.this.getDefaultValue();
+ }
+
+ /**
+ * Interprets the bytesRef either as true / false token or tries to read it as number string
+ *
+ * @param bytesRef the value of the field that should be used as score
+ * @return the input converted to a number
+ */
+ private float readSortedDocValues(BytesRef bytesRef) {
+ String string = bytesRef.utf8ToString();
+ if (string.length() == 1) {
+ // boolean values in the index are encoded with the
+ // a single char contained in TRUE_TOKEN or FALSE_TOKEN
+ // (see BoolField)
+ if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
+ return 1;
+ }
+ if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
+ return 0;
+ }
+ }
+ return FieldValueFeature.this.getDefaultValue();
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return Float.POSITIVE_INFINITY;
+ }
+ }
+
+ /**
+ * A FeatureScorer that always returns the default value.
+ *
+ * It is used as a fallback for cases when a segment does not have any documents that contain doc values for a field.
+ * By doing so, we prevent a fallback to the FieldValueFeatureScorer, which would also return the default value but
+ * in a less performant way because it would first try to read the stored fields for the doc (which aren't present).
+ */
+ public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer {
+ public DefaultValueFieldValueFeatureScorer(final FeatureWeight weight, final DocIdSetIterator itr) {
+ super(weight, itr);
+ }
+
+ @Override
+ public float score() throws IOException {
+ return FieldValueFeature.this.getDefaultValue();
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return Float.POSITIVE_INFINITY;
+ }
+ }
}
}
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index b6f5b3bb2d1..288a953c85d 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -26,8 +26,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -41,6 +61,13 @@
+
+
+
+
+
+
+
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
index 21b71c3e5ec..e6fc0c852a4 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java
@@ -16,7 +16,10 @@
package org.apache.solr.ltr;
import java.io.File;
+import java.util.Collections;
+import java.util.List;
import java.util.SortedMap;
+import java.util.stream.IntStream;
import org.apache.commons.io.FileUtils;
import org.apache.solr.client.solrj.SolrQuery;
@@ -28,6 +31,7 @@
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.ltr.feature.FieldValueFeature;
import org.apache.solr.ltr.feature.OriginalScoreFeature;
import org.apache.solr.ltr.feature.SolrFeature;
import org.apache.solr.ltr.feature.ValueFeature;
@@ -37,6 +41,8 @@
import org.junit.AfterClass;
import org.junit.Test;
+import static java.util.stream.Collectors.toList;
+
public class TestLTROnSolrCloud extends TestRerankBase {
private MiniSolrCloudCluster solrCluster;
@@ -106,22 +112,38 @@ public void testSimpleQuery() throws Exception {
final Float original_result6_score = (Float)queryResponse.getResults().get(6).get("score");
final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score");
- final String result0_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","64.0", "c3","2.0", "original","0.0");
- final String result1_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","49.0", "c3","2.0", "original","1.0");
- final String result2_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","36.0", "c3","2.0", "original","2.0");
- final String result3_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","25.0", "c3","2.0", "original","3.0");
- final String result4_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS","16.0", "c3","2.0", "original","4.0");
- final String result5_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "9.0", "c3","2.0", "original","5.0");
- final String result6_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "4.0", "c3","2.0", "original","6.0");
- final String result7_features= FeatureLoggerTestUtils.toFeatureVector(
- "powpularityS", "1.0", "c3","2.0", "original","7.0");
+ final String result0_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0",
+ "dvLongFieldFeature", "8.0", "dvFloatFieldFeature", "0.8", "dvDoubleFieldFeature", "0.8",
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
+ final String result1_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "49.0", "c3", "2.0", "original", "1.0", "dvIntFieldFeature", "7.0",
+ "dvLongFieldFeature", "7.0", "dvFloatFieldFeature", "0.7", "dvDoubleFieldFeature", "0.7",
+ "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0");
+ final String result2_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "36.0", "c3", "2.0", "original", "2.0", "dvIntFieldFeature", "6.0",
+ "dvLongFieldFeature", "6.0", "dvFloatFieldFeature", "0.6", "dvDoubleFieldFeature", "0.6",
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
+ final String result3_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "25.0", "c3", "2.0", "original", "3.0", "dvIntFieldFeature", "5.0",
+ "dvLongFieldFeature", "5.0", "dvFloatFieldFeature", "0.5", "dvDoubleFieldFeature", "0.5",
+ "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0");
+ final String result4_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "16.0", "c3", "2.0", "original", "4.0", "dvIntFieldFeature", "4.0",
+ "dvLongFieldFeature", "4.0", "dvFloatFieldFeature", "0.4", "dvDoubleFieldFeature", "0.4",
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
+ final String result5_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "9.0", "c3", "2.0", "original", "5.0", "dvIntFieldFeature", "3.0",
+ "dvLongFieldFeature", "3.0", "dvFloatFieldFeature", "0.3", "dvDoubleFieldFeature", "0.3",
+ "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0");
+ final String result6_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "4.0", "c3", "2.0", "original", "6.0", "dvIntFieldFeature", "2.0",
+ "dvLongFieldFeature", "2.0", "dvFloatFieldFeature", "0.2", "dvDoubleFieldFeature", "0.2",
+ "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0");
+ final String result7_features = FeatureLoggerTestUtils.toFeatureVector(
+ "powpularityS", "1.0", "c3", "2.0", "original", "7.0", "dvIntFieldFeature", "-1.0",
+ "dvLongFieldFeature", "-2.0", "dvFloatFieldFeature", "-3.0", "dvDoubleFieldFeature", "-4.0",
+ "dvStrNumFieldFeature", "-5.0", "dvStrBoolFieldFeature", "0.0");
// Test feature vectors returned (without re-ranking)
@@ -240,50 +262,107 @@ void indexDocument(String collection, String id, String title, String descriptio
doc.setField("title", title);
doc.setField("description", description);
doc.setField("popularity", popularity);
+ if (popularity != 1) {
+ // check that empty values will be read as default
+ doc.setField("dvIntField", popularity);
+ doc.setField("dvLongField", popularity);
+ doc.setField("dvFloatField", ((float) popularity) / 10);
+ doc.setField("dvDoubleField", ((double) popularity) / 10);
+ doc.setField("dvStrNumField", popularity % 2 == 0 ? "F" : "T");
+ doc.setField("dvStrBoolField", popularity % 2 == 0 ? "T" : "F");
+ }
solrCluster.getSolrClient().add(collection, doc);
}
private void indexDocuments(final String collection)
throws Exception {
final int collectionSize = 8;
- for (int docId = 1; docId <= collectionSize; docId++) {
+ // put documents in random order to check that advanceExact is working correctly
+ List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList());
+ Collections.shuffle(docIds, random());
+
+ int docCounter = 1;
+ for (int docId : docIds) {
final int popularity = docId;
indexDocument(collection, String.valueOf(docId), "a1", "bloom", popularity);
+ // maybe commit in the middle in order to check that everything works fine for multi-segment case
+ if (docCounter == collectionSize / 2 && random().nextBoolean()) {
+ solrCluster.getSolrClient().commit(collection);
+ }
+ docCounter++;
}
- solrCluster.getSolrClient().commit(collection);
+ solrCluster.getSolrClient().commit(collection, true, true);
}
-
private void loadModelsAndFeatures() throws Exception {
final String featureStore = "test";
- final String[] featureNames = new String[] {"powpularityS","c3", "original"};
- final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1}}";
+ final String[] featureNames = new String[]{"powpularityS", "c3", "original", "dvIntFieldFeature",
+ "dvLongFieldFeature", "dvFloatFieldFeature", "dvDoubleFieldFeature", "dvStrNumFieldFeature", "dvStrBoolFieldFeature"};
+ final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1," +
+ "\"dvIntFieldFeature\":0.1,\"dvLongFieldFeature\":0.1," +
+ "\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1}}";
loadFeature(
- featureNames[0],
- SolrFeature.class.getName(),
- featureStore,
- "{\"q\":\"{!func}pow(popularity,2)\"}"
+ featureNames[0],
+ SolrFeature.class.getName(),
+ featureStore,
+ "{\"q\":\"{!func}pow(popularity,2)\"}"
+ );
+ loadFeature(
+ featureNames[1],
+ ValueFeature.class.getName(),
+ featureStore,
+ "{\"value\":2}"
+ );
+ loadFeature(
+ featureNames[2],
+ OriginalScoreFeature.class.getName(),
+ featureStore,
+ null
+ );
+ loadFeature(
+ featureNames[3],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvIntField\"}"
+ );
+ loadFeature(
+ featureNames[4],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvLongField\"}"
+ );
+ loadFeature(
+ featureNames[5],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvFloatField\"}"
+ );
+ loadFeature(
+ featureNames[6],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvDoubleField\",\"defaultValue\":-4.0}"
);
loadFeature(
- featureNames[1],
- ValueFeature.class.getName(),
- featureStore,
- "{\"value\":2}"
+ featureNames[7],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvStrNumField\",\"defaultValue\":-5}"
);
loadFeature(
- featureNames[2],
- OriginalScoreFeature.class.getName(),
- featureStore,
- null
+ featureNames[8],
+ FieldValueFeature.class.getName(),
+ featureStore,
+ "{\"field\":\"dvStrBoolField\"}"
);
loadModel(
- "powpularityS-model",
- LinearModel.class.getName(),
- featureNames,
- featureStore,
- jsonModelParams
+ "powpularityS-model",
+ LinearModel.class.getName(),
+ featureNames,
+ featureStore,
+ jsonModelParams
);
reloadCollection(COLLECTION);
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index 108044b5cbd..15a007bb584 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -16,12 +16,22 @@
*/
package org.apache.solr.ltr.feature;
+import java.io.IOException;
import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.ltr.FeatureLoggerTestUtils;
import org.apache.solr.ltr.TestRerankBase;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.NumericDocValuesFieldValueFeatureScorer;
+import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.SortedDocValuesFieldValueFeatureScorer;
import org.apache.solr.ltr.model.LinearModel;
+import org.apache.solr.request.SolrQueryRequest;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -30,37 +40,57 @@ public class TestFieldValueFeature extends TestRerankBase {
private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f;
+ private static final String[] FIELDS = {
+ "popularity",
+ "dvIntPopularity", "dvLongPopularity",
+ "dvFloatPopularity", "dvDoublePopularity",
+ "dvStringPopularity",
+ "isTrendy",
+ "dvIsTrendy"
+ };
+
@Before
public void before() throws Exception {
setuptest(false);
- assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
- "1","isTrendy","true"));
- assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description",
- "w2 2asd asdd didid", "popularity", "2"));
- assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
- "3","isTrendy","true"));
- assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
- "4","isTrendy","false"));
- assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
- "5","isTrendy","true"));
- assertU(adoc("id", "6", "title", "w1 w2", "description", "w1 w2",
- "popularity", "6","isTrendy","false"));
- assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description",
- "w1 w2 w3 w4 w5 w8", "popularity", "7","isTrendy","true"));
- assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description",
- "w1 w1 w1 w2 w2", "popularity", "8","isTrendy","false"));
-
- // a document without the popularity field
- assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity"));
+ assertU(adoc("id", "1", "popularity", "1", "title", "w1",
+ "dvStringPopularity", "1",
+ "description", "w1", "isTrendy", "true"));
+ assertU(adoc("id", "2", "popularity", "2", "title", "w2 2asd asdd didid",
+ "dvStringPopularity", "2",
+ "description", "w2 2asd asdd didid"));
+ assertU(adoc("id", "3", "popularity", "3", "title", "w3",
+ "dvStringPopularity", "3",
+ "description", "w3", "isTrendy", "true"));
+ assertU(adoc("id", "4", "popularity", "4", "title", "w4",
+ "dvStringPopularity", "4",
+ "description", "w4", "isTrendy", "false"));
+ assertU(adoc("id", "5", "popularity", "5", "title", "w5",
+ "dvStringPopularity", "5",
+ "description", "w5", "isTrendy", "true"));
+ assertU(adoc("id", "6", "popularity", "6", "title", "w1 w2",
+ "dvStringPopularity", "6",
+ "description", "w1 w2", "isTrendy", "false"));
+ assertU(adoc("id", "7", "popularity", "7", "title", "w1 w2 w3 w4 w5",
+ "dvStringPopularity", "7",
+ "description", "w1 w2 w3 w4 w5 w8", "isTrendy", "true"));
+ assertU(adoc("id", "8", "popularity", "8", "title", "w1 w1 w1 w2 w2 w8",
+ "dvStringPopularity", "8",
+ "description", "w1 w1 w1 w2 w2", "isTrendy", "false"));
+
+ // a document without the popularity and the dv fields
+ assertU(adoc("id", "42", "title", "NO popularity or isTrendy", "description", "NO popularity or isTrendy"));
assertU(commit());
- loadFeature("popularity", FieldValueFeature.class.getName(),
- "{\"field\":\"popularity\"}");
-
- loadModel("popularity-model", LinearModel.class.getName(),
- new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}");
+ for (String field : FIELDS) {
+ loadFeature(field, FieldValueFeature.class.getName(),
+ "{\"field\":\"" + field + "\"}");
+ }
+ loadModel("model", LinearModel.class.getName(), FIELDS,
+ "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," +
+ "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," +
+ "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}");
}
@After
@@ -83,7 +113,7 @@ public void testRanking() throws Exception {
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
- query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
+ query.add("rq", "{!ltr model=model reRankDocs=4}");
assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
@@ -95,7 +125,7 @@ public void testRanking() throws Exception {
query.remove("rows");
query.add("rows", "8");
query.remove("rq");
- query.add("rq", "{!ltr model=popularity-model reRankDocs=8}");
+ query.add("rq", "{!ltr model=model reRankDocs=8}");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
@@ -113,50 +143,87 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
+
query = new SolrQuery();
query.setQuery("id:42");
- query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
+ query.add("rq", "{!ltr model=model reRankDocs=4}");
query.add("fl", "[fv]");
+
+ // "0.0" in the assertJQ below is more readable than
+ // Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL) but first make sure it's equivalent
+ assertEquals("0.0", Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL));
+
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
-
+ "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," +
+ "dvFloatPopularity=0.0,dvDoublePopularity=0.0," +
+ "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}");
}
@Test
public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception {
+ for (String field : FIELDS) {
+ final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field;
+
+ loadFeature(field+"42", FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\",\"defaultValue\":\"42.0\"}");
+
+ SolrQuery query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("fl", "*, score");
+ query.add("rows", "4");
+
+ loadModel(field+"-model42", LinearModel.class.getName(),
+ new String[] {field+"42"}, fstore, "{\"weights\":{\""+field+"42\":1.0}}");
+
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
+ query = new SolrQuery();
+ query.setQuery("id:42");
+ query.add("rq", "{!ltr model="+field+"-model42 reRankDocs=4}");
+ query.add("fl", "[fv]");
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field+"42","42.0")+"'}");
+ }
+ }
- final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned";
+ @Test
+ public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned() throws Exception {
+ final String[] fieldsWithDefaultValues = {"dvIntField", "dvLongField", "dvFloatField"};
+ final String[] defaultValues = {"-1.0", "-2.0", "-3.0"};
- loadFeature("popularity42", FieldValueFeature.class.getName(), fstore,
- "{\"field\":\"popularity\",\"defaultValue\":\"42.0\"}");
+ for (int idx = 0; idx < fieldsWithDefaultValues.length; ++idx) {
+ final String field = fieldsWithDefaultValues[idx];
+ final String defaultValue = defaultValues[idx];
- SolrQuery query = new SolrQuery();
- query.setQuery("id:42");
- query.add("fl", "*, score");
- query.add("rows", "4");
+ final String fstore = "testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned"+field;
- loadModel("popularity-model42", LinearModel.class.getName(),
- new String[] {"popularity42"}, fstore, "{\"weights\":{\"popularity42\":1.0}}");
+ assertU(adoc("id", "21"));
+ assertU(commit());
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
- query = new SolrQuery();
- query.setQuery("id:42");
- query.add("rq", "{!ltr model=popularity-model42 reRankDocs=4}");
- query.add("fl", "[fv]");
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ("/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity42","42.0")+"'}");
+ loadFeature(field, FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}");
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, defaultValue)+"'}");
+ }
}
@Test
- public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exception {
+ public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws Exception {
+ // this tests the case that we create a feature for a non-existent field
// using a different fstore to avoid a clash with the other tests
- final String fstore = "testThatIfaFieldDoesNotExistDefaultValueIsReturned";
- loadFeature("not-existing-field", FieldValueFeature.class.getName(), fstore,
+ final String fstore = "testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned";
+ loadFeature("not-existing-field", ObservingFieldValueFeature.class.getName(), fstore,
"{\"field\":\"cowabunga\"}");
loadModel("not-existing-field-model", LinearModel.class.getName(),
@@ -166,10 +233,38 @@ public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exceptio
query.setQuery("id:42");
query.add("rq", "{!ltr model=not-existing-field-model reRankDocs=4}");
query.add("fl", "[fv]");
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ assertEquals(FieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass);
+ }
+ @Test
+ public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws Exception {
+ final String[] fieldsWithoutDefaultValues = {"dvDoubleField", "dvStrBoolField"};
+ // this tests the case that no document contains docValues for the provided existing field
+
+ for (String field : fieldsWithoutDefaultValues) {
+ final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned"+field;
+
+ loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\"}");
+
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[] {field}, fstore, "{\"weights\":{\""+field+"\":1.0}}");
+
+ final SolrQuery query = new SolrQuery("id:42");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils
+ .toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
+ assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass);
+ }
}
@Test
@@ -203,7 +298,61 @@ public void testBooleanValue() throws Exception {
query.add("fl", "[fv]");
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}");
+ }
+
+ @Test
+ public void testThatExceptionIsThrownForUnsupportedType() throws Exception {
+ final String fstore = "test_store";
+
+ assertU(adoc("id", "21", "title", "multivalued not supported", "dvStringPopularities", "wow value"));
+ assertU(commit());
+ loadFeature("dvStringPopularities", FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"dvStringPopularities\"}");
+
+ loadModel("dvStringPopularities-model", LinearModel.class.getName(),
+ new String[] {"dvStringPopularities"}, fstore, "{\"weights\":{\"dvStringPopularities\":1.0}}");
+
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model=dvStringPopularities-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(),
+ "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvStringPopularities is not supported'");
+ }
+
+ @Test
+ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception {
+ final String[][] fieldsWithDifferentTypes = {
+ new String[]{"dvIntPopularity", "1", NumericDocValuesFieldValueFeatureScorer.class.getName()},
+ new String[]{"dvStringPopularity", "T", SortedDocValuesFieldValueFeatureScorer.class.getName()},
+ new String[]{"noDvFloatField", "1", FieldValueFeatureScorer.class.getName()},
+ new String[]{"noDvStrNumField", "T", FieldValueFeatureScorer.class.getName()}
+ };
+
+ for (String[] fieldAndScorerClass : fieldsWithDifferentTypes) {
+ final String field = fieldAndScorerClass[0];
+ final String fieldValue = fieldAndScorerClass[1];
+ final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes"+field;
+
+ assertU(adoc("id", "21", field, fieldValue));
+ assertU(commit());
+
+ loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore,
+ "{\"field\":\""+field+"\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}");
+
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ("/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}");
+ assertEquals(fieldAndScorerClass[2], ObservingFieldValueFeature.usedScorerClass);
+ }
}
@Test
@@ -213,4 +362,105 @@ public void testParamsToMap() throws Exception {
doTestParamsToMap(FieldValueFeature.class.getName(), params);
}
+ @Test
+ public void testThatStringValuesAreCorrectlyParsed() throws Exception {
+ for (String field : new String[] {"dvStrNumField" , "noDvStrNumField"}) {
+ final String[][] inputsAndTests = {
+ new String[]{"T", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"},
+ new String[]{"F", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"},
+ new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"},
+ new String[]{"532", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"},
+ new String[]{Float.toString(Float.NaN), "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"},
+ new String[]{"notanumber", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}
+ };
+
+ final String fstore = "testThatStringValuesAreCorrectlyParsed"+field;
+ loadFeature(field, FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"" + field + "\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[]{field}, fstore,
+ "{\"weights\":{\""+field+"\":1.0}}");
+
+ for (String[] inputAndTest : inputsAndTests) {
+ assertU(adoc("id", "21", field, inputAndTest[0]));
+ assertU(commit());
+
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(), inputAndTest[1]);
+ }
+ }
+ }
+
+ @Test
+ public void testThatDateValuesAreCorrectlyParsed() throws Exception {
+ for (String field : new String[] {"dvDateField", "noDvDateField"}) {
+ final String[][] inputsAndTests = {
+ new String[]{"1970-01-01T00:00:00.000Z", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"},
+ new String[]{"1970-01-01T00:00:00.001Z", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"},
+ new String[]{"1970-01-01T00:00:01.234Z", "/response/docs/[0]/=={'[fv]':'" +
+ FeatureLoggerTestUtils.toFeatureVector(field, "1234.0")+"'}"}
+ };
+
+ final String fstore = "testThatDateValuesAreCorrectlyParsed"+field;
+ loadFeature(field, FieldValueFeature.class.getName(), fstore,
+ "{\"field\":\"" + field + "\"}");
+ loadModel(field+"-model", LinearModel.class.getName(),
+ new String[]{field}, fstore,
+ "{\"weights\":{\""+field+"\":1.0}}");
+
+ for (String[] inputAndTest : inputsAndTests) {
+ assertU(adoc("id", "21", field, inputAndTest[0]));
+ assertU(commit());
+
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ assertJQ("/query" + query.toQueryString(), inputAndTest[1]);
+ }
+ }
+ }
+
+ /**
+ * This class is used to track which specific FieldValueFeature is used so that we can test, whether the
+ * fallback mechanism works correctly.
+ */
+ final public static class ObservingFieldValueFeature extends FieldValueFeature {
+ static String usedScorerClass;
+
+ public ObservingFieldValueFeature(String name, Map params) {
+ super(name, params);
+ }
+
+ @Override
+ public Feature.FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request,
+ Query originalQuery, Map efi) throws IOException {
+ return new ObservingFieldValueFeatureWeight(searcher, request, originalQuery, efi);
+ }
+
+ public class ObservingFieldValueFeatureWeight extends FieldValueFeatureWeight {
+ public ObservingFieldValueFeatureWeight(IndexSearcher searcher, SolrQueryRequest request,
+ Query originalQuery, Map efi) {
+ super(searcher, request, originalQuery, efi);
+ }
+
+ @Override
+ public FeatureScorer scorer(LeafReaderContext context) throws IOException {
+ FeatureScorer scorer = super.scorer(context);
+ usedScorerClass = scorer.getClass().getName();
+ return scorer;
+ }
+ }
+ }
}