diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index f853ddeffb8..2f564cd20b8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -333,6 +333,9 @@ New Features * SOLR-15090: A new 'gcs-repository' contrib can be used to store and retrieve backups from Google Cloud Storage. (Jason Gerlowski, Shalin Mangar, Cao Manh Dat) +* SOLR-12697: In contrib/ltr FieldValueFeature support "stored=false docValues=true" a.k.a. pure DocValues fields. + (Stanislav Livotov, Erick Erickson, Tobias Kässmann, Tom Gilke, Christine Poerschke) + Improvements --------------------- * SOLR-15081: Metrics for a core: add SolrCloud "isLeader" and "replicaState". (David Smiley) diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java index d12795d2663..563a01d14df 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java @@ -23,16 +23,26 @@ import java.util.Set; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.NumberType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrIndexSearcher; /** - * This feature returns the value of a field in the current document + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. * Example configuration: *
{
   "name":  "rawHits",
@@ -41,6 +51,17 @@
       "field": "hits"
   }
 }
+ * + *

There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. + * The chosen scorer depends on the field attributes.

+ * + *

FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or docValues=false

+ * + *

NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC

+ *

SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED + * + *

DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on segments + * where no document has a value set in the field of this feature

*/ public class FieldValueFeature extends Feature { @@ -83,18 +104,52 @@ public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, } public class FieldValueFeatureWeight extends FeatureWeight { + private final SchemaField schemaField; public FieldValueFeatureWeight(IndexSearcher searcher, SolrQueryRequest request, Query originalQuery, Map efi) { super(FieldValueFeature.this, searcher, request, originalQuery, efi); + if (searcher instanceof SolrIndexSearcher) { + schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); + } else { // some tests pass a null or a non-SolrIndexSearcher searcher + schemaField = null; + } } + /** + * Return a FeatureScorer that uses docValues or storedFields if no docValues are present + * + * @param context the segment this FeatureScorer is working with + * @return FeatureScorer for the current segment and field + * @throws IOException as defined by abstract class Feature + */ @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { + if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { + + final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); + final DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; + + if (DocValuesType.NUMERIC.equals(docValuesType)) { + return new NumericDocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType().getNumberType()); + } else if (DocValuesType.SORTED.equals(docValuesType)) { + return new SortedDocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + } else if (DocValuesType.NONE.equals(docValuesType)) { + // Using a fallback feature scorer because this segment has no documents with a doc value for the current field + return new DefaultValueFieldValueFeatureScorer(this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + } + throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field + + " is not supported"); + } return new FieldValueFeatureScorer(this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } + /** + * A FeatureScorer that reads the stored value for a field + */ public class FieldValueFeatureScorer extends FeatureScorer { LeafReaderContext context = null; @@ -146,5 +201,137 @@ public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } + + /** + * A FeatureScorer that reads the numeric docValues for a field + */ + public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer { + private final NumericDocValues docValues; + private final NumberType numberType; + + public NumericDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, + final DocIdSetIterator itr, final NumberType numberType) { + super(weight, itr); + this.numberType = numberType; + + NumericDocValues docValues; + try { + docValues = DocValues.getNumeric(context.reader(), field); + } catch (IOException e) { + throw new IllegalArgumentException("Could not read numeric docValues for field " + field); + } + this.docValues = docValues; + } + + @Override + public float score() throws IOException { + if (docValues.advanceExact(itr.docID())) { + return readNumericDocValues(); + } + return FieldValueFeature.this.getDefaultValue(); + } + + /** + * Read the numeric value for a field and convert the different number types to float. + * + * @return The numeric value that the docValues contain for the current document + * @throws IOException if docValues cannot be read + */ + private float readNumericDocValues() throws IOException { + if (NumberType.FLOAT.equals(numberType)) { + // convert float value that was stored as long back to float + return Float.intBitsToFloat((int) docValues.longValue()); + } else if (NumberType.DOUBLE.equals(numberType)) { + // handle double value conversion + return (float) Double.longBitsToDouble(docValues.longValue()); + } + // just take the long value + return docValues.longValue(); + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + } + + /** + * A FeatureScorer that reads the sorted docValues for a field + */ + public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer { + private final SortedDocValues docValues; + + public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context, + final DocIdSetIterator itr) { + super(weight, itr); + + SortedDocValues docValues; + try { + docValues = DocValues.getSorted(context.reader(), field); + } catch (IOException e) { + throw new IllegalArgumentException("Could not read sorted docValues for field " + field); + } + this.docValues = docValues; + } + + @Override + public float score() throws IOException { + if (docValues.advanceExact(itr.docID())) { + int ord = docValues.ordValue(); + return readSortedDocValues(docValues.lookupOrd(ord)); + } + return FieldValueFeature.this.getDefaultValue(); + } + + /** + * Interprets the bytesRef either as true / false token or tries to read it as number string + * + * @param bytesRef the value of the field that should be used as score + * @return the input converted to a number + */ + private float readSortedDocValues(BytesRef bytesRef) { + String string = bytesRef.utf8ToString(); + if (string.length() == 1) { + // boolean values in the index are encoded with the + // a single char contained in TRUE_TOKEN or FALSE_TOKEN + // (see BoolField) + if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { + return 1; + } + if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) { + return 0; + } + } + return FieldValueFeature.this.getDefaultValue(); + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + } + + /** + * A FeatureScorer that always returns the default value. + * + * It is used as a fallback for cases when a segment does not have any documents that contain doc values for a field. + * By doing so, we prevent a fallback to the FieldValueFeatureScorer, which would also return the default value but + * in a less performant way because it would first try to read the stored fields for the doc (which aren't present). + */ + public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer { + public DefaultValueFieldValueFeatureScorer(final FeatureWeight weight, final DocIdSetIterator itr) { + super(weight, itr); + } + + @Override + public float score() throws IOException { + return FieldValueFeature.this.getDefaultValue(); + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return Float.POSITIVE_INFINITY; + } + } } } diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml index b6f5b3bb2d1..288a953c85d 100644 --- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml @@ -26,8 +26,28 @@ + + + + + + + + + + + + + + + + + + + + @@ -41,6 +61,13 @@ + + + + + + + diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 21b71c3e5ec..e6fc0c852a4 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -16,7 +16,10 @@ package org.apache.solr.ltr; import java.io.File; +import java.util.Collections; +import java.util.List; import java.util.SortedMap; +import java.util.stream.IntStream; import org.apache.commons.io.FileUtils; import org.apache.solr.client.solrj.SolrQuery; @@ -28,6 +31,7 @@ import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.ltr.feature.FieldValueFeature; import org.apache.solr.ltr.feature.OriginalScoreFeature; import org.apache.solr.ltr.feature.SolrFeature; import org.apache.solr.ltr.feature.ValueFeature; @@ -37,6 +41,8 @@ import org.junit.AfterClass; import org.junit.Test; +import static java.util.stream.Collectors.toList; + public class TestLTROnSolrCloud extends TestRerankBase { private MiniSolrCloudCluster solrCluster; @@ -106,22 +112,38 @@ public void testSimpleQuery() throws Exception { final Float original_result6_score = (Float)queryResponse.getResults().get(6).get("score"); final Float original_result7_score = (Float)queryResponse.getResults().get(7).get("score"); - final String result0_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","64.0", "c3","2.0", "original","0.0"); - final String result1_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","49.0", "c3","2.0", "original","1.0"); - final String result2_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","36.0", "c3","2.0", "original","2.0"); - final String result3_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","25.0", "c3","2.0", "original","3.0"); - final String result4_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS","16.0", "c3","2.0", "original","4.0"); - final String result5_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "9.0", "c3","2.0", "original","5.0"); - final String result6_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "4.0", "c3","2.0", "original","6.0"); - final String result7_features= FeatureLoggerTestUtils.toFeatureVector( - "powpularityS", "1.0", "c3","2.0", "original","7.0"); + final String result0_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "64.0", "c3", "2.0", "original", "0.0", "dvIntFieldFeature", "8.0", + "dvLongFieldFeature", "8.0", "dvFloatFieldFeature", "0.8", "dvDoubleFieldFeature", "0.8", + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); + final String result1_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "49.0", "c3", "2.0", "original", "1.0", "dvIntFieldFeature", "7.0", + "dvLongFieldFeature", "7.0", "dvFloatFieldFeature", "0.7", "dvDoubleFieldFeature", "0.7", + "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0"); + final String result2_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "36.0", "c3", "2.0", "original", "2.0", "dvIntFieldFeature", "6.0", + "dvLongFieldFeature", "6.0", "dvFloatFieldFeature", "0.6", "dvDoubleFieldFeature", "0.6", + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); + final String result3_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "25.0", "c3", "2.0", "original", "3.0", "dvIntFieldFeature", "5.0", + "dvLongFieldFeature", "5.0", "dvFloatFieldFeature", "0.5", "dvDoubleFieldFeature", "0.5", + "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0"); + final String result4_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "16.0", "c3", "2.0", "original", "4.0", "dvIntFieldFeature", "4.0", + "dvLongFieldFeature", "4.0", "dvFloatFieldFeature", "0.4", "dvDoubleFieldFeature", "0.4", + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); + final String result5_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "9.0", "c3", "2.0", "original", "5.0", "dvIntFieldFeature", "3.0", + "dvLongFieldFeature", "3.0", "dvFloatFieldFeature", "0.3", "dvDoubleFieldFeature", "0.3", + "dvStrNumFieldFeature", "1.0", "dvStrBoolFieldFeature", "0.0"); + final String result6_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "4.0", "c3", "2.0", "original", "6.0", "dvIntFieldFeature", "2.0", + "dvLongFieldFeature", "2.0", "dvFloatFieldFeature", "0.2", "dvDoubleFieldFeature", "0.2", + "dvStrNumFieldFeature", "0.0", "dvStrBoolFieldFeature", "1.0"); + final String result7_features = FeatureLoggerTestUtils.toFeatureVector( + "powpularityS", "1.0", "c3", "2.0", "original", "7.0", "dvIntFieldFeature", "-1.0", + "dvLongFieldFeature", "-2.0", "dvFloatFieldFeature", "-3.0", "dvDoubleFieldFeature", "-4.0", + "dvStrNumFieldFeature", "-5.0", "dvStrBoolFieldFeature", "0.0"); // Test feature vectors returned (without re-ranking) @@ -240,50 +262,107 @@ void indexDocument(String collection, String id, String title, String descriptio doc.setField("title", title); doc.setField("description", description); doc.setField("popularity", popularity); + if (popularity != 1) { + // check that empty values will be read as default + doc.setField("dvIntField", popularity); + doc.setField("dvLongField", popularity); + doc.setField("dvFloatField", ((float) popularity) / 10); + doc.setField("dvDoubleField", ((double) popularity) / 10); + doc.setField("dvStrNumField", popularity % 2 == 0 ? "F" : "T"); + doc.setField("dvStrBoolField", popularity % 2 == 0 ? "T" : "F"); + } solrCluster.getSolrClient().add(collection, doc); } private void indexDocuments(final String collection) throws Exception { final int collectionSize = 8; - for (int docId = 1; docId <= collectionSize; docId++) { + // put documents in random order to check that advanceExact is working correctly + List docIds = IntStream.rangeClosed(1, collectionSize).boxed().collect(toList()); + Collections.shuffle(docIds, random()); + + int docCounter = 1; + for (int docId : docIds) { final int popularity = docId; indexDocument(collection, String.valueOf(docId), "a1", "bloom", popularity); + // maybe commit in the middle in order to check that everything works fine for multi-segment case + if (docCounter == collectionSize / 2 && random().nextBoolean()) { + solrCluster.getSolrClient().commit(collection); + } + docCounter++; } - solrCluster.getSolrClient().commit(collection); + solrCluster.getSolrClient().commit(collection, true, true); } - private void loadModelsAndFeatures() throws Exception { final String featureStore = "test"; - final String[] featureNames = new String[] {"powpularityS","c3", "original"}; - final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1}}"; + final String[] featureNames = new String[]{"powpularityS", "c3", "original", "dvIntFieldFeature", + "dvLongFieldFeature", "dvFloatFieldFeature", "dvDoubleFieldFeature", "dvStrNumFieldFeature", "dvStrBoolFieldFeature"}; + final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1," + + "\"dvIntFieldFeature\":0.1,\"dvLongFieldFeature\":0.1," + + "\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1}}"; loadFeature( - featureNames[0], - SolrFeature.class.getName(), - featureStore, - "{\"q\":\"{!func}pow(popularity,2)\"}" + featureNames[0], + SolrFeature.class.getName(), + featureStore, + "{\"q\":\"{!func}pow(popularity,2)\"}" + ); + loadFeature( + featureNames[1], + ValueFeature.class.getName(), + featureStore, + "{\"value\":2}" + ); + loadFeature( + featureNames[2], + OriginalScoreFeature.class.getName(), + featureStore, + null + ); + loadFeature( + featureNames[3], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvIntField\"}" + ); + loadFeature( + featureNames[4], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvLongField\"}" + ); + loadFeature( + featureNames[5], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvFloatField\"}" + ); + loadFeature( + featureNames[6], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvDoubleField\",\"defaultValue\":-4.0}" ); loadFeature( - featureNames[1], - ValueFeature.class.getName(), - featureStore, - "{\"value\":2}" + featureNames[7], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvStrNumField\",\"defaultValue\":-5}" ); loadFeature( - featureNames[2], - OriginalScoreFeature.class.getName(), - featureStore, - null + featureNames[8], + FieldValueFeature.class.getName(), + featureStore, + "{\"field\":\"dvStrBoolField\"}" ); loadModel( - "powpularityS-model", - LinearModel.class.getName(), - featureNames, - featureStore, - jsonModelParams + "powpularityS-model", + LinearModel.class.getName(), + featureNames, + featureStore, + jsonModelParams ); reloadCollection(COLLECTION); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java index 108044b5cbd..15a007bb584 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java @@ -16,12 +16,22 @@ */ package org.apache.solr.ltr.feature; +import java.io.IOException; import java.util.LinkedHashMap; +import java.util.Map; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.ltr.FeatureLoggerTestUtils; import org.apache.solr.ltr.TestRerankBase; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.DefaultValueFieldValueFeatureScorer; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.NumericDocValuesFieldValueFeatureScorer; +import org.apache.solr.ltr.feature.FieldValueFeature.FieldValueFeatureWeight.SortedDocValuesFieldValueFeatureScorer; import org.apache.solr.ltr.model.LinearModel; +import org.apache.solr.request.SolrQueryRequest; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -30,37 +40,57 @@ public class TestFieldValueFeature extends TestRerankBase { private static final float FIELD_VALUE_FEATURE_DEFAULT_VAL = 0.0f; + private static final String[] FIELDS = { + "popularity", + "dvIntPopularity", "dvLongPopularity", + "dvFloatPopularity", "dvDoublePopularity", + "dvStringPopularity", + "isTrendy", + "dvIsTrendy" + }; + @Before public void before() throws Exception { setuptest(false); - assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity", - "1","isTrendy","true")); - assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description", - "w2 2asd asdd didid", "popularity", "2")); - assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity", - "3","isTrendy","true")); - assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity", - "4","isTrendy","false")); - assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity", - "5","isTrendy","true")); - assertU(adoc("id", "6", "title", "w1 w2", "description", "w1 w2", - "popularity", "6","isTrendy","false")); - assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description", - "w1 w2 w3 w4 w5 w8", "popularity", "7","isTrendy","true")); - assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description", - "w1 w1 w1 w2 w2", "popularity", "8","isTrendy","false")); - - // a document without the popularity field - assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity")); + assertU(adoc("id", "1", "popularity", "1", "title", "w1", + "dvStringPopularity", "1", + "description", "w1", "isTrendy", "true")); + assertU(adoc("id", "2", "popularity", "2", "title", "w2 2asd asdd didid", + "dvStringPopularity", "2", + "description", "w2 2asd asdd didid")); + assertU(adoc("id", "3", "popularity", "3", "title", "w3", + "dvStringPopularity", "3", + "description", "w3", "isTrendy", "true")); + assertU(adoc("id", "4", "popularity", "4", "title", "w4", + "dvStringPopularity", "4", + "description", "w4", "isTrendy", "false")); + assertU(adoc("id", "5", "popularity", "5", "title", "w5", + "dvStringPopularity", "5", + "description", "w5", "isTrendy", "true")); + assertU(adoc("id", "6", "popularity", "6", "title", "w1 w2", + "dvStringPopularity", "6", + "description", "w1 w2", "isTrendy", "false")); + assertU(adoc("id", "7", "popularity", "7", "title", "w1 w2 w3 w4 w5", + "dvStringPopularity", "7", + "description", "w1 w2 w3 w4 w5 w8", "isTrendy", "true")); + assertU(adoc("id", "8", "popularity", "8", "title", "w1 w1 w1 w2 w2 w8", + "dvStringPopularity", "8", + "description", "w1 w1 w1 w2 w2", "isTrendy", "false")); + + // a document without the popularity and the dv fields + assertU(adoc("id", "42", "title", "NO popularity or isTrendy", "description", "NO popularity or isTrendy")); assertU(commit()); - loadFeature("popularity", FieldValueFeature.class.getName(), - "{\"field\":\"popularity\"}"); - - loadModel("popularity-model", LinearModel.class.getName(), - new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}"); + for (String field : FIELDS) { + loadFeature(field, FieldValueFeature.class.getName(), + "{\"field\":\"" + field + "\"}"); + } + loadModel("model", LinearModel.class.getName(), FIELDS, + "{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0," + + "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0," + + "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}"); } @After @@ -83,7 +113,7 @@ public void testRanking() throws Exception { assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); + query.add("rq", "{!ltr model=model reRankDocs=4}"); assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); @@ -95,7 +125,7 @@ public void testRanking() throws Exception { query.remove("rows"); query.add("rows", "8"); query.remove("rq"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=8}"); + query.add("rq", "{!ltr model=model reRankDocs=8}"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); @@ -113,50 +143,87 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); + query = new SolrQuery(); query.setQuery("id:42"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); + query.add("rq", "{!ltr model=model reRankDocs=4}"); query.add("fl", "[fv]"); + + // "0.0" in the assertJQ below is more readable than + // Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL) but first make sure it's equivalent + assertEquals("0.0", Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL)); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); - + "/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0," + + "dvFloatPopularity=0.0,dvDoublePopularity=0.0," + + "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}"); } @Test public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws Exception { + for (String field : FIELDS) { + final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"+field; + + loadFeature(field+"42", FieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\",\"defaultValue\":\"42.0\"}"); + + SolrQuery query = new SolrQuery(); + query.setQuery("id:42"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + loadModel(field+"-model42", LinearModel.class.getName(), + new String[] {field+"42"}, fstore, "{\"weights\":{\""+field+"42\":1.0}}"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); + query = new SolrQuery(); + query.setQuery("id:42"); + query.add("rq", "{!ltr model="+field+"-model42 reRankDocs=4}"); + query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field+"42","42.0")+"'}"); + } + } - final String fstore = "testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned"; + @Test + public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned() throws Exception { + final String[] fieldsWithDefaultValues = {"dvIntField", "dvLongField", "dvFloatField"}; + final String[] defaultValues = {"-1.0", "-2.0", "-3.0"}; - loadFeature("popularity42", FieldValueFeature.class.getName(), fstore, - "{\"field\":\"popularity\",\"defaultValue\":\"42.0\"}"); + for (int idx = 0; idx < fieldsWithDefaultValues.length; ++idx) { + final String field = fieldsWithDefaultValues[idx]; + final String defaultValue = defaultValues[idx]; - SolrQuery query = new SolrQuery(); - query.setQuery("id:42"); - query.add("fl", "*, score"); - query.add("rows", "4"); + final String fstore = "testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned"+field; - loadModel("popularity-model42", LinearModel.class.getName(), - new String[] {"popularity42"}, fstore, "{\"weights\":{\"popularity42\":1.0}}"); + assertU(adoc("id", "21")); + assertU(commit()); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); - query = new SolrQuery(); - query.setQuery("id:42"); - query.add("rq", "{!ltr model=popularity-model42 reRankDocs=4}"); - query.add("fl", "[fv]"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity42","42.0")+"'}"); + loadFeature(field, FieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}"); + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, defaultValue)+"'}"); + } } @Test - public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exception { + public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws Exception { + // this tests the case that we create a feature for a non-existent field // using a different fstore to avoid a clash with the other tests - final String fstore = "testThatIfaFieldDoesNotExistDefaultValueIsReturned"; - loadFeature("not-existing-field", FieldValueFeature.class.getName(), fstore, + final String fstore = "testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned"; + loadFeature("not-existing-field", ObservingFieldValueFeature.class.getName(), fstore, "{\"field\":\"cowabunga\"}"); loadModel("not-existing-field-model", LinearModel.class.getName(), @@ -166,10 +233,38 @@ public void testThatIfaFieldDoesNotExistDefaultValueIsReturned() throws Exceptio query.setQuery("id:42"); query.add("rq", "{!ltr model=not-existing-field-model reRankDocs=4}"); query.add("fl", "[fv]"); + ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("not-existing-field",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + assertEquals(FieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass); + } + @Test + public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws Exception { + final String[] fieldsWithoutDefaultValues = {"dvDoubleField", "dvStrBoolField"}; + // this tests the case that no document contains docValues for the provided existing field + + for (String field : fieldsWithoutDefaultValues) { + final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned"+field; + + loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\"}"); + + loadModel(field+"-model", LinearModel.class.getName(), + new String[] {field}, fstore, "{\"weights\":{\""+field+"\":1.0}}"); + + final SolrQuery query = new SolrQuery("id:42"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils + .toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); + assertEquals(DefaultValueFieldValueFeatureScorer.class.getName(), ObservingFieldValueFeature.usedScorerClass); + } } @Test @@ -203,7 +298,61 @@ public void testBooleanValue() throws Exception { query.add("fl", "[fv]"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("trendy","0.0")+"'}"); + } + + @Test + public void testThatExceptionIsThrownForUnsupportedType() throws Exception { + final String fstore = "test_store"; + + assertU(adoc("id", "21", "title", "multivalued not supported", "dvStringPopularities", "wow value")); + assertU(commit()); + loadFeature("dvStringPopularities", FieldValueFeature.class.getName(), fstore, + "{\"field\":\"dvStringPopularities\"}"); + + loadModel("dvStringPopularities-model", LinearModel.class.getName(), + new String[] {"dvStringPopularities"}, fstore, "{\"weights\":{\"dvStringPopularities\":1.0}}"); + + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model=dvStringPopularities-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), + "/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvStringPopularities is not supported'"); + } + + @Test + public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception { + final String[][] fieldsWithDifferentTypes = { + new String[]{"dvIntPopularity", "1", NumericDocValuesFieldValueFeatureScorer.class.getName()}, + new String[]{"dvStringPopularity", "T", SortedDocValuesFieldValueFeatureScorer.class.getName()}, + new String[]{"noDvFloatField", "1", FieldValueFeatureScorer.class.getName()}, + new String[]{"noDvStrNumField", "T", FieldValueFeatureScorer.class.getName()} + }; + + for (String[] fieldAndScorerClass : fieldsWithDifferentTypes) { + final String field = fieldAndScorerClass[0]; + final String fieldValue = fieldAndScorerClass[1]; + final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes"+field; + + assertU(adoc("id", "21", field, fieldValue)); + assertU(commit()); + + loadFeature(field, ObservingFieldValueFeature.class.getName(), fstore, + "{\"field\":\""+field+"\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[] {field}, fstore, "{\"weights\":{\"" + field + "\":1.0}}"); + + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"); + assertEquals(fieldAndScorerClass[2], ObservingFieldValueFeature.usedScorerClass); + } } @Test @@ -213,4 +362,105 @@ public void testParamsToMap() throws Exception { doTestParamsToMap(FieldValueFeature.class.getName(), params); } + @Test + public void testThatStringValuesAreCorrectlyParsed() throws Exception { + for (String field : new String[] {"dvStrNumField" , "noDvStrNumField"}) { + final String[][] inputsAndTests = { + new String[]{"T", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"}, + new String[]{"F", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"}, + new String[]{"-7324.427", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}, + new String[]{"532", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}, + new String[]{Float.toString(Float.NaN), "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"}, + new String[]{"notanumber", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"} + }; + + final String fstore = "testThatStringValuesAreCorrectlyParsed"+field; + loadFeature(field, FieldValueFeature.class.getName(), fstore, + "{\"field\":\"" + field + "\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[]{field}, fstore, + "{\"weights\":{\""+field+"\":1.0}}"); + + for (String[] inputAndTest : inputsAndTests) { + assertU(adoc("id", "21", field, inputAndTest[0])); + assertU(commit()); + + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), inputAndTest[1]); + } + } + } + + @Test + public void testThatDateValuesAreCorrectlyParsed() throws Exception { + for (String field : new String[] {"dvDateField", "noDvDateField"}) { + final String[][] inputsAndTests = { + new String[]{"1970-01-01T00:00:00.000Z", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "0.0")+"'}"}, + new String[]{"1970-01-01T00:00:00.001Z", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "1.0")+"'}"}, + new String[]{"1970-01-01T00:00:01.234Z", "/response/docs/[0]/=={'[fv]':'" + + FeatureLoggerTestUtils.toFeatureVector(field, "1234.0")+"'}"} + }; + + final String fstore = "testThatDateValuesAreCorrectlyParsed"+field; + loadFeature(field, FieldValueFeature.class.getName(), fstore, + "{\"field\":\"" + field + "\"}"); + loadModel(field+"-model", LinearModel.class.getName(), + new String[]{field}, fstore, + "{\"weights\":{\""+field+"\":1.0}}"); + + for (String[] inputAndTest : inputsAndTests) { + assertU(adoc("id", "21", field, inputAndTest[0])); + assertU(commit()); + + final SolrQuery query = new SolrQuery("id:21"); + query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}"); + query.add("fl", "[fv]"); + + assertJQ("/query" + query.toQueryString(), inputAndTest[1]); + } + } + } + + /** + * This class is used to track which specific FieldValueFeature is used so that we can test, whether the + * fallback mechanism works correctly. + */ + final public static class ObservingFieldValueFeature extends FieldValueFeature { + static String usedScorerClass; + + public ObservingFieldValueFeature(String name, Map params) { + super(name, params); + } + + @Override + public Feature.FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, SolrQueryRequest request, + Query originalQuery, Map efi) throws IOException { + return new ObservingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class ObservingFieldValueFeatureWeight extends FieldValueFeatureWeight { + public ObservingFieldValueFeatureWeight(IndexSearcher searcher, SolrQueryRequest request, + Query originalQuery, Map efi) { + super(searcher, request, originalQuery, efi); + } + + @Override + public FeatureScorer scorer(LeafReaderContext context) throws IOException { + FeatureScorer scorer = super.scorer(context); + usedScorerClass = scorer.getClass().getName(); + return scorer; + } + } + } }