Skip to content

Commit

Permalink
SOLR-15440: FieldValueFeature now uses DocValues when docValues=true …
Browse files Browse the repository at this point in the history
…and stored=true are combined. (#1883)

Co-authored-by: tomglk <>
(cherry picked from commit a52c849)
  • Loading branch information
cpoerschke committed Sep 15, 2023
1 parent f441675 commit fbebbbf
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 51 deletions.
2 changes: 2 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ Improvements

* SOLR-16950: SimpleTracer propagation for manual transaction ids

* SOLR-15440: The Learning To Rank FieldValueFeature now uses DocValues when docValues=true and stored=true are combined. (Christine Poerschke, Tom Gilke)

Optimizations
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,22 @@
* <p>There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. The
* chosen scorer depends on the field attributes.
*
* <p>FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or
* docValues=false
* <p>FieldValueFeatureScorer (FVFS): used for stored=true if docValues=false
*
* <p>NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC
* <p>NumericDocValuesFVFS: used for docValues=true, if docValueType == NUMERIC
*
* <p>SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED
* <p>SortedDocValuesFVFS: used for docValues=true, if docValueType == SORTED
*
* <p>DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on
* segments where no document has a value set in the field of this feature
* <p>DefaultValueFVFS: used for docValues=true, a fallback scorer that is used on segments where no
* document has a value set in the field of this feature
*
* <p>Use {@link LegacyFieldValueFeature} for the pre 9.4 behaviour of not using DocValues when
* docValues=true is combined with stored=true.
*/
public class FieldValueFeature extends Feature {

protected boolean useDocValuesForStored = true;

private String field;
private Set<String> fieldAsSet;

Expand Down Expand Up @@ -134,7 +138,9 @@ public FieldValueFeatureWeight(
*/
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
if (schemaField != null
&& (!schemaField.stored() || useDocValuesForStored)
&& schemaField.hasDocValues()) {

final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
final DocValuesType docValuesType =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.ltr.feature;

import java.util.Map;

/**
* This feature returns the value of a field in the current document. The field must have
* stored="true" or docValues="true" properties. Example configuration:
*
* <pre>
* {
* "name": "rawHits",
* "class": "org.apache.solr.ltr.feature.LegacyFieldValueFeature",
* "params": {
* "field": "hits"
* }
* }
* </pre>
*
* <p>There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. The
* chosen scorer depends on the field attributes.
*
* <p>FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or
* docValues=false
*
* <p>NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC
*
* <p>SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED
*
* <p>DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on
* segments where no document has a value set in the field of this feature
*
* <p>Matches {@link FieldValueFeature} behaviour prior to 9.4 i.e. DocValues are not used when
* docValues=true is combined with stored=true.
*/
@Deprecated(since = "9.4")
public class LegacyFieldValueFeature extends FieldValueFeature {

public LegacyFieldValueFeature(String name, Map<String, Object> params) {
super(name, params);
this.useDocValuesForStored = false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
<field name="normHits" type="float" indexed="true" stored="true" />

<field name="isTrendy" type="boolean" indexed="true" stored="true" />
<field name="storedDvIsTrendy" type="boolean" indexed="true" stored="true" docValues="true"/>
<field name="dvIsTrendy" type="boolean" indexed="true" stored="false" docValues="true"/>

<field name="dvIntField" type="int" indexed="false" docValues="true" stored="false" default="-1" multiValued="false"/>
Expand Down Expand Up @@ -67,6 +68,7 @@
<copyField source="popularity" dest="dvDoublePopularity"/>

<copyField source="isTrendy" dest="dvIsTrendy"/>
<copyField source="isTrendy" dest="storedDvIsTrendy"/>

<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,18 @@ public class TestFieldValueFeature extends TestRerankBase {
"dvDoublePopularity",
"dvStringPopularity",
"isTrendy",
"dvIsTrendy"
"dvIsTrendy",
"storedDvIsTrendy"
};

protected String getFieldValueFeatureClassName() {
return FieldValueFeature.class.getName();
}

protected String getObservingFieldValueFeatureClassName() {
return ObservingFieldValueFeature.class.getName();
}

@Before
public void before() throws Exception {
setuptest(false);
Expand Down Expand Up @@ -178,15 +187,15 @@ public void before() throws Exception {
assertU(commit());

for (String field : FIELDS) {
loadFeature(field, FieldValueFeature.class.getName(), "{\"field\":\"" + field + "\"}");
loadFeature(field, getFieldValueFeatureClassName(), "{\"field\":\"" + field + "\"}");
}
loadModel(
"model",
LinearModel.class.getName(),
FIELDS,
"{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0,"
+ "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0,"
+ "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}");
+ "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0,\"storedDvIsTrendy\":1.0}}");
}

@After
Expand Down Expand Up @@ -253,7 +262,7 @@ public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Excep
"/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0,"
+ "dvFloatPopularity=0.0,dvDoublePopularity=0.0,"
+ "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}");
+ "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0,storedDvIsTrendy=0.0'}");
}

@Test
Expand All @@ -263,7 +272,7 @@ public void testIfADocumentDoesntHaveAFieldASetDefaultValueIsReturned() throws E

loadFeature(
field + "42",
FieldValueFeature.class.getName(),
getFieldValueFeatureClassName(),
fstore,
"{\"field\":\"" + field + "\",\"defaultValue\":\"42.0\"}");

Expand Down Expand Up @@ -310,8 +319,7 @@ public void testIfADocumentDoesntHaveAFieldTheDefaultValueFromSchemaIsReturned()
assertU(adoc("id", "21"));
assertU(commit());

loadFeature(
field, FieldValueFeature.class.getName(), fstore, "{\"field\":\"" + field + "\"}");
loadFeature(field, getFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
LinearModel.class.getName(),
Expand Down Expand Up @@ -339,7 +347,7 @@ public void testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned() throws E
final String fstore = "testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned";
loadFeature(
"not-existing-field",
ObservingFieldValueFeature.class.getName(),
getObservingFieldValueFeatureClassName(),
fstore,
"{\"field\":\"cowabunga\"}");

Expand Down Expand Up @@ -375,10 +383,7 @@ public void testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned() throws E
final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned" + field;

loadFeature(
field,
ObservingFieldValueFeature.class.getName(),
fstore,
"{\"field\":\"" + field + "\"}");
field, getObservingFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");

loadModel(
field + "-model",
Expand Down Expand Up @@ -415,11 +420,16 @@ public void testBooleanValue_docValues() throws Exception {
implTestBooleanValue("dvIsTrendy");
}

@Test
public void testBooleanValue_stored_docValues() throws Exception {
implTestBooleanValue("storedDvIsTrendy");
}

private void implTestBooleanValue(String isTrendyFieldName) throws Exception {
final String fstore = "test_boolean_store";
loadFeature(
"trendy",
FieldValueFeature.class.getName(),
getFieldValueFeatureClassName(),
fstore,
"{\"field\":\"" + isTrendyFieldName + "\"}");

Expand Down Expand Up @@ -473,7 +483,7 @@ public void testThatExceptionIsThrownForUnsupportedType() throws Exception {

loadFeature(
"dvStringPopularities",
FieldValueFeature.class.getName(),
getFieldValueFeatureClassName(),
fstore,
"{\"field\":\"dvStringPopularities\"}");

Expand All @@ -493,6 +503,10 @@ public void testThatExceptionIsThrownForUnsupportedType() throws Exception {
"/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvStringPopularities is not supported'");
}

protected String storedDvIsTrendy_FieldValueFeatureScorer_className() {
return SortedDocValuesFieldValueFeatureScorer.class.getName();
}

@Test
public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception {
final String[][] fieldsWithDifferentTypes = {
Expand All @@ -502,6 +516,8 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce
new String[] {
"dvStringPopularity", "T", SortedDocValuesFieldValueFeatureScorer.class.getName()
},
new String[] {"dvIsTrendy", "1", SortedDocValuesFieldValueFeatureScorer.class.getName()},
new String[] {"storedDvIsTrendy", "1", storedDvIsTrendy_FieldValueFeatureScorer_className()},
new String[] {"noDvFloatField", "1", FieldValueFeatureScorer.class.getName()},
new String[] {"noDvStrNumField", "T", FieldValueFeatureScorer.class.getName()}
};
Expand All @@ -510,37 +526,51 @@ public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exce
final String field = fieldAndScorerClass[0];
final String fieldValue = fieldAndScorerClass[1];
final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes" + field;
final String modelName = field + "-model";

assertU(adoc("id", "21", field, fieldValue));
assertU(commit());

loadFeature(
field,
ObservingFieldValueFeature.class.getName(),
fstore,
"{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
LinearModel.class.getName(),
new String[] {field},
fstore,
"{\"weights\":{\"" + field + "\":1.0}}");
loadFeatureAndModel(getObservingFieldValueFeatureClassName(), field, fstore, modelName);

final SolrQuery query = new SolrQuery("id:21");
query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}");
query.add("fl", "[fv]");
final String usedScorerClass = addAndQueryId21(field, modelName, fieldValue);

ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ(
"/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"
+ FeatureLoggerTestUtils.toFeatureVector(field, "1.0")
+ "'}");
assertEquals(fieldAndScorerClass[2], ObservingFieldValueFeature.usedScorerClass);
assertEquals(fieldAndScorerClass[2], usedScorerClass);
}
}

protected void loadFeatureAndModel(
String featureClassName, String field, String fstore, String modelName) throws Exception {
loadFeature(field, featureClassName, fstore, "{\"field\":\"" + field + "\"}");

loadModel(
modelName,
LinearModel.class.getName(),
new String[] {field},
fstore,
"{\"weights\":{\"" + field + "\":1.0}}");
}

/**
* @return used scorer class
*/
protected String addAndQueryId21(String field, String modelName, String fieldValue)
throws Exception {

assertU(adoc("id", "21", field, fieldValue));
assertU(commit());

final SolrQuery query = new SolrQuery("id:21");
query.add("rq", "{!ltr model=" + modelName + " reRankDocs=4}");
query.add("fl", "[fv]");

ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
assertJQ(
"/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'"
+ FeatureLoggerTestUtils.toFeatureVector(field, "1.0")
+ "'}");
return ObservingFieldValueFeature.usedScorerClass;
}

@Test
public void testParamsToMap() throws Exception {
final LinkedHashMap<String, Object> params = new LinkedHashMap<>();
Expand Down Expand Up @@ -595,8 +625,7 @@ public void testThatStringValuesAreCorrectlyParsed() throws Exception {
};

final String fstore = "testThatStringValuesAreCorrectlyParsed" + field;
loadFeature(
field, FieldValueFeature.class.getName(), fstore, "{\"field\":\"" + field + "\"}");
loadFeature(field, getFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
LinearModel.class.getName(),
Expand Down Expand Up @@ -642,8 +671,7 @@ public void testThatDateValuesAreCorrectlyParsed() throws Exception {
};

final String fstore = "testThatDateValuesAreCorrectlyParsed" + field;
loadFeature(
field, FieldValueFeature.class.getName(), fstore, "{\"field\":\"" + field + "\"}");
loadFeature(field, getFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
LinearModel.class.getName(),
Expand All @@ -668,7 +696,7 @@ public void testThatDateValuesAreCorrectlyParsed() throws Exception {
* This class is used to track which specific FieldValueFeature is used so that we can test,
* whether the fallback mechanism works correctly.
*/
public static final class ObservingFieldValueFeature extends FieldValueFeature {
public static class ObservingFieldValueFeature extends FieldValueFeature {
static String usedScorerClass;

public ObservingFieldValueFeature(String name, Map<String, Object> params) {
Expand Down
Loading

0 comments on commit fbebbbf

Please sign in to comment.