Skip to content

Commit

Permalink
Rename ranking evaluation response section (elastic#32166)
Browse files Browse the repository at this point in the history
Currently the ranking evaluation response contains a 'unknown_docs' section 
for each search use case in the evaluation set. It contains document ids for 
results in the search hits that currently don't have a quality rating.
This change renames it to `unrated_docs`, which better reflects its purpose.
  • Loading branch information
Christoph Büscher authored Jul 20, 2018
1 parent c5cde96 commit 5cbd9ad
Show file tree
Hide file tree
Showing 12 changed files with 29 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;

public class RankEvalIT extends ESRestHighLevelClientTestCase {

Expand Down Expand Up @@ -84,7 +84,7 @@ public void testRankEvalRequest() throws IOException {
Map<String, EvalQueryQuality> partialResults = response.getPartialResults();
assertEquals(2, partialResults.size());
EvalQueryQuality amsterdamQueryQuality = partialResults.get("amsterdam_query");
assertEquals(2, filterUnknownDocuments(amsterdamQueryQuality.getHitsAndRatings()).size());
assertEquals(2, filterUnratedDocuments(amsterdamQueryQuality.getHitsAndRatings()).size());
List<RatedSearchHit> hitsAndRatings = amsterdamQueryQuality.getHitsAndRatings();
assertEquals(7, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {
Expand All @@ -96,7 +96,7 @@ public void testRankEvalRequest() throws IOException {
}
}
EvalQueryQuality berlinQueryQuality = partialResults.get("berlin_query");
assertEquals(6, filterUnknownDocuments(berlinQueryQuality.getHitsAndRatings()).size());
assertEquals(6, filterUnratedDocuments(berlinQueryQuality.getHitsAndRatings()).size());
hitsAndRatings = berlinQueryQuality.getHitsAndRatings();
assertEquals(7, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/search/rank-eval.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ that shows potential errors of individual queries. The response has the followin
"details": {
"my_query_id1": { <2>
"quality_level": 0.6, <3>
"unknown_docs": [ <4>
"unrated_docs": [ <4>
{
"_index": "my_index",
"_id": "1960795"
Expand Down Expand Up @@ -309,7 +309,7 @@ that shows potential errors of individual queries. The response has the followin
<1> the overall evaluation quality calculated by the defined metric
<2> the `details` section contains one entry for every query in the original `requests` section, keyed by the search request id
<3> the `quality_level` in the `details` section shows the contribution of this query to the global quality score
<4> the `unknown_docs` section contains an `_index` and `_id` entry for each document in the search result for this
<4> the `unrated_docs` section contains an `_index` and `_id` entry for each document in the search result for this
query that didn't have a ratings value. This can be used to ask the user to supply ratings for these documents
<5> the `hits` section shows a grouping of the search results with their supplied rating
<6> the `metric_details` give additional information about the calculated quality metric (e.g. how many of the retrieved
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ public List<RatedSearchHit> getHitsAndRatings() {
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(queryId);
builder.field(QUALITY_LEVEL_FIELD.getPreferredName(), this.evaluationResult);
builder.startArray(UNKNOWN_DOCS_FIELD.getPreferredName());
for (DocumentKey key : EvaluationMetric.filterUnknownDocuments(ratedHits)) {
builder.startArray(UNRATED_DOCS_FIELD.getPreferredName());
for (DocumentKey key : EvaluationMetric.filterUnratedDocuments(ratedHits)) {
builder.startObject();
builder.field(RatedDocument.INDEX_FIELD.getPreferredName(), key.getIndex());
builder.field(RatedDocument.DOC_ID_FIELD.getPreferredName(), key.getDocId());
Expand All @@ -123,7 +123,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
}

private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level");
private static final ParseField UNKNOWN_DOCS_FIELD = new ParseField("unknown_docs");
private static final ParseField UNRATED_DOCS_FIELD = new ParseField("unrated_docs");
private static final ParseField HITS_FIELD = new ParseField("hits");
private static final ParseField METRIC_DETAILS_FIELD = new ParseField("metric_details");
private static final ObjectParser<ParsedEvalQueryQuality, Void> PARSER = new ObjectParser<>("eval_query_quality",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,9 @@ static List<RatedSearchHit> joinHitsWithRatings(SearchHit[] hits, List<RatedDocu
/**
* filter @link {@link RatedSearchHit} that don't have a rating
*/
static List<DocumentKey> filterUnknownDocuments(List<RatedSearchHit> ratedHits) {
List<DocumentKey> unknownDocs = ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false)
static List<DocumentKey> filterUnratedDocuments(List<RatedSearchHit> ratedHits) {
return ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false)
.map(hit -> new DocumentKey(hit.getSearchHit().getIndex(), hit.getSearchHit().getId())).collect(Collectors.toList());
return unknownDocs;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
import java.util.Collections;
import java.util.List;

import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
import static org.elasticsearch.test.XContentTestUtils.insertRandomFields;
import static org.hamcrest.CoreMatchers.containsString;
Expand Down Expand Up @@ -128,7 +128,7 @@ public void testDCGAtSixMissingRatings() {
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
EvalQueryQuality result = dcg.evaluate("id", hits, rated);
assertEquals(12.779642067948913, result.getQualityLevel(), DELTA);
assertEquals(2, filterUnknownDocuments(result.getHitsAndRatings()).size());
assertEquals(2, filterUnratedDocuments(result.getHitsAndRatings()).size());

/**
* Check with normalization: to get the maximal possible dcg, sort documents by
Expand Down Expand Up @@ -185,7 +185,7 @@ public void testDCGAtFourMoreRatings() {
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
assertEquals(12.392789260714371, result.getQualityLevel(), DELTA);
assertEquals(1, filterUnknownDocuments(result.getHitsAndRatings()).size());
assertEquals(1, filterUnratedDocuments(result.getHitsAndRatings()).size());

/**
* Check with normalization: to get the maximal possible dcg, sort documents by
Expand Down Expand Up @@ -224,13 +224,13 @@ public void testNoResults() throws Exception {
DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
assertEquals(0.0d, result.getQualityLevel(), DELTA);
assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size());
assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());

// also check normalized
dcg = new DiscountedCumulativeGain(true, null, 10);
result = dcg.evaluate("id", hits, ratedDocs);
assertEquals(0.0d, result.getQualityLevel(), DELTA);
assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size());
assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());
}

public void testParseFromXContent() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.rankeval.RatedDocument.DocumentKey;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.test.ESTestCase;

Expand All @@ -52,11 +51,6 @@ protected NamedXContentRegistry xContentRegistry() {
}

public static EvalQueryQuality randomEvalQueryQuality() {
List<DocumentKey> unknownDocs = new ArrayList<>();
int numberOfUnknownDocs = randomInt(5);
for (int i = 0; i < numberOfUnknownDocs; i++) {
unknownDocs.add(new DocumentKey(randomAlphaOfLength(10), randomAlphaOfLength(10)));
}
int numberOfSearchHits = randomInt(5);
List<RatedSearchHit> ratedHits = new ArrayList<>();
for (int i = 0; i < numberOfSearchHits; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
import java.util.Map.Entry;
import java.util.Set;

import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.instanceOf;

Expand Down Expand Up @@ -120,7 +120,7 @@ public void testPrecisionAtRequest() {
for (Entry<String, EvalQueryQuality> entry : entrySet) {
EvalQueryQuality quality = entry.getValue();
if (entry.getKey() == "amsterdam_query") {
assertEquals(2, filterUnknownDocuments(quality.getHitsAndRatings()).size());
assertEquals(2, filterUnratedDocuments(quality.getHitsAndRatings()).size());
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
assertEquals(6, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {
Expand All @@ -133,7 +133,7 @@ public void testPrecisionAtRequest() {
}
}
if (entry.getKey() == "berlin_query") {
assertEquals(5, filterUnknownDocuments(quality.getHitsAndRatings()).size());
assertEquals(5, filterUnratedDocuments(quality.getHitsAndRatings()).size());
List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
assertEquals(6, hitsAndRatings.size());
for (RatedSearchHit hit : hitsAndRatings) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ public void testToXContent() throws IOException {
" \"details\": {" +
" \"coffee_query\": {" +
" \"quality_level\": 0.1," +
" \"unknown_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," +
" \"unrated_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," +
" \"hits\":[{\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"123\",\"_score\":1.0}," +
" \"rating\":5}," +
" {\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"456\",\"_score\":1.0}," +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ setup:

- match: { quality_level: 1}
- match: { details.amsterdam_query.quality_level: 1.0}
- match: { details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc4"}]}
- match: { details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]}
- match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}}

- length: { details.amsterdam_query.hits: 3}
Expand All @@ -85,7 +85,7 @@ setup:
- is_false: details.amsterdam_query.hits.2.rating

- match: { details.berlin_query.quality_level: 1.0}
- match: { details.berlin_query.unknown_docs: [ {"_index": "foo", "_id": "doc4"}]}
- match: { details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]}
- match: { details.berlin_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
- length: { details.berlin_query.hits: 2}
- match: { details.berlin_query.hits.0.hit._id: "doc1" }
Expand Down Expand Up @@ -155,9 +155,9 @@ setup:
- gt: {details.amsterdam_query.quality_level: 0.333}
- lt: {details.amsterdam_query.quality_level: 0.334}
- match: {details.amsterdam_query.metric_details.mean_reciprocal_rank: {"first_relevant": 3}}
- match: {details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"},
- match: {details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc2"},
{"_index": "foo", "_id": "doc3"} ]}
- match: {details.berlin_query.quality_level: 0.5}
- match: {details.berlin_query.metric_details.mean_reciprocal_rank: {"first_relevant": 2}}
- match: {details.berlin_query.unknown_docs: [ {"_index": "foo", "_id": "doc1"}]}
- match: {details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc1"}]}

Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
- lt: {quality_level: 13.848264 }
- gt: {details.dcg_query.quality_level: 13.848263}
- lt: {details.dcg_query.quality_level: 13.848264}
- match: {details.dcg_query.unknown_docs: [ ]}
- match: {details.dcg_query.unrated_docs: [ ]}

# reverse the order in which the results are returned (less relevant docs first)

Expand All @@ -100,7 +100,7 @@
- lt: {quality_level: 10.299675}
- gt: {details.dcg_query_reverse.quality_level: 10.299674}
- lt: {details.dcg_query_reverse.quality_level: 10.299675}
- match: {details.dcg_query_reverse.unknown_docs: [ ]}
- match: {details.dcg_query_reverse.unrated_docs: [ ]}

# if we mix both, we should get the average

Expand Down Expand Up @@ -138,7 +138,7 @@
- lt: {quality_level: 12.073970}
- gt: {details.dcg_query.quality_level: 13.848263}
- lt: {details.dcg_query.quality_level: 13.848264}
- match: {details.dcg_query.unknown_docs: [ ]}
- match: {details.dcg_query.unrated_docs: [ ]}
- gt: {details.dcg_query_reverse.quality_level: 10.299674}
- lt: {details.dcg_query_reverse.quality_level: 10.299675}
- match: {details.dcg_query_reverse.unknown_docs: [ ]}
- match: {details.dcg_query_reverse.unrated_docs: [ ]}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

- match: { quality_level: 1}
- match: { details.amsterdam_query.quality_level: 1.0}
- match: { details.amsterdam_query.unknown_docs: [ ]}
- match: { details.amsterdam_query.unrated_docs: [ ]}
- match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}

- is_true: failures.invalid_query
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ setup:
}

- match: {quality_level: 0.9}
- match: {details.amsterdam_query.unknown_docs.0._id: "6"}
- match: {details.amsterdam_query.unrated_docs.0._id: "6"}

---
"Test illegal request parts":
Expand Down

0 comments on commit 5cbd9ad

Please sign in to comment.