From 5cbd9ad177e4284a2fef799d01920bd09648de70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 20 Jul 2018 11:43:46 +0200 Subject: [PATCH] Rename ranking evaluation response section (#32166) Currently the ranking evaluation response contains a 'unknown_docs' section for each search use case in the evaluation set. It contains document ids for results in the search hits that currently don't have a quality rating. This change renames it to `unrated_docs`, which better reflects its purpose. --- .../test/java/org/elasticsearch/client/RankEvalIT.java | 6 +++--- docs/reference/search/rank-eval.asciidoc | 4 ++-- .../elasticsearch/index/rankeval/EvalQueryQuality.java | 6 +++--- .../elasticsearch/index/rankeval/EvaluationMetric.java | 5 ++--- .../index/rankeval/DiscountedCumulativeGainTests.java | 10 +++++----- .../index/rankeval/EvalQueryQualityTests.java | 6 ------ .../index/rankeval/RankEvalRequestIT.java | 6 +++--- .../index/rankeval/RankEvalResponseTests.java | 2 +- .../rest-api-spec/test/rank_eval/10_basic.yml | 8 ++++---- .../resources/rest-api-spec/test/rank_eval/20_dcg.yml | 8 ++++---- .../rest-api-spec/test/rank_eval/30_failures.yml | 2 +- .../test/rank_eval/40_rank_eval_templated.yml | 2 +- 12 files changed, 29 insertions(+), 36 deletions(-) diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java index d61fccb937193..2890257b236cd 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java @@ -40,7 +40,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments; +import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments; public class RankEvalIT extends ESRestHighLevelClientTestCase { @@ -84,7 +84,7 @@ public void testRankEvalRequest() throws IOException { Map partialResults = response.getPartialResults(); assertEquals(2, partialResults.size()); EvalQueryQuality amsterdamQueryQuality = partialResults.get("amsterdam_query"); - assertEquals(2, filterUnknownDocuments(amsterdamQueryQuality.getHitsAndRatings()).size()); + assertEquals(2, filterUnratedDocuments(amsterdamQueryQuality.getHitsAndRatings()).size()); List hitsAndRatings = amsterdamQueryQuality.getHitsAndRatings(); assertEquals(7, hitsAndRatings.size()); for (RatedSearchHit hit : hitsAndRatings) { @@ -96,7 +96,7 @@ public void testRankEvalRequest() throws IOException { } } EvalQueryQuality berlinQueryQuality = partialResults.get("berlin_query"); - assertEquals(6, filterUnknownDocuments(berlinQueryQuality.getHitsAndRatings()).size()); + assertEquals(6, filterUnratedDocuments(berlinQueryQuality.getHitsAndRatings()).size()); hitsAndRatings = berlinQueryQuality.getHitsAndRatings(); assertEquals(7, hitsAndRatings.size()); for (RatedSearchHit hit : hitsAndRatings) { diff --git a/docs/reference/search/rank-eval.asciidoc b/docs/reference/search/rank-eval.asciidoc index 571a488699168..cf13b9f7b0655 100644 --- a/docs/reference/search/rank-eval.asciidoc +++ b/docs/reference/search/rank-eval.asciidoc @@ -274,7 +274,7 @@ that shows potential errors of individual queries. The response has the followin "details": { "my_query_id1": { <2> "quality_level": 0.6, <3> - "unknown_docs": [ <4> + "unrated_docs": [ <4> { "_index": "my_index", "_id": "1960795" @@ -309,7 +309,7 @@ that shows potential errors of individual queries. The response has the followin <1> the overall evaluation quality calculated by the defined metric <2> the `details` section contains one entry for every query in the original `requests` section, keyed by the search request id <3> the `quality_level` in the `details` section shows the contribution of this query to the global quality score -<4> the `unknown_docs` section contains an `_index` and `_id` entry for each document in the search result for this +<4> the `unrated_docs` section contains an `_index` and `_id` entry for each document in the search result for this query that didn't have a ratings value. This can be used to ask the user to supply ratings for these documents <5> the `hits` section shows a grouping of the search results with their supplied rating <6> the `metric_details` give additional information about the calculated quality metric (e.g. how many of the retrieved diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java index 2ad3e589bd8c0..91ba1ce61692b 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java @@ -102,8 +102,8 @@ public List getHitsAndRatings() { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(queryId); builder.field(QUALITY_LEVEL_FIELD.getPreferredName(), this.evaluationResult); - builder.startArray(UNKNOWN_DOCS_FIELD.getPreferredName()); - for (DocumentKey key : EvaluationMetric.filterUnknownDocuments(ratedHits)) { + builder.startArray(UNRATED_DOCS_FIELD.getPreferredName()); + for (DocumentKey key : EvaluationMetric.filterUnratedDocuments(ratedHits)) { builder.startObject(); builder.field(RatedDocument.INDEX_FIELD.getPreferredName(), key.getIndex()); builder.field(RatedDocument.DOC_ID_FIELD.getPreferredName(), key.getDocId()); @@ -123,7 +123,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level"); - private static final ParseField UNKNOWN_DOCS_FIELD = new ParseField("unknown_docs"); + private static final ParseField UNRATED_DOCS_FIELD = new ParseField("unrated_docs"); private static final ParseField HITS_FIELD = new ParseField("hits"); private static final ParseField METRIC_DETAILS_FIELD = new ParseField("metric_details"); private static final ObjectParser PARSER = new ObjectParser<>("eval_query_quality", diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java index c67511e051f96..37898fd951638 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java @@ -76,10 +76,9 @@ static List joinHitsWithRatings(SearchHit[] hits, List filterUnknownDocuments(List ratedHits) { - List unknownDocs = ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false) + static List filterUnratedDocuments(List ratedHits) { + return ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false) .map(hit -> new DocumentKey(hit.getSearchHit().getIndex(), hit.getSearchHit().getId())).collect(Collectors.toList()); - return unknownDocs; } /** diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java index 56b0c692c411a..e768c2973330e 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java @@ -40,7 +40,7 @@ import java.util.Collections; import java.util.List; -import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments; +import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments; import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; import static org.elasticsearch.test.XContentTestUtils.insertRandomFields; import static org.hamcrest.CoreMatchers.containsString; @@ -128,7 +128,7 @@ public void testDCGAtSixMissingRatings() { DiscountedCumulativeGain dcg = new DiscountedCumulativeGain(); EvalQueryQuality result = dcg.evaluate("id", hits, rated); assertEquals(12.779642067948913, result.getQualityLevel(), DELTA); - assertEquals(2, filterUnknownDocuments(result.getHitsAndRatings()).size()); + assertEquals(2, filterUnratedDocuments(result.getHitsAndRatings()).size()); /** * Check with normalization: to get the maximal possible dcg, sort documents by @@ -185,7 +185,7 @@ public void testDCGAtFourMoreRatings() { DiscountedCumulativeGain dcg = new DiscountedCumulativeGain(); EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs); assertEquals(12.392789260714371, result.getQualityLevel(), DELTA); - assertEquals(1, filterUnknownDocuments(result.getHitsAndRatings()).size()); + assertEquals(1, filterUnratedDocuments(result.getHitsAndRatings()).size()); /** * Check with normalization: to get the maximal possible dcg, sort documents by @@ -224,13 +224,13 @@ public void testNoResults() throws Exception { DiscountedCumulativeGain dcg = new DiscountedCumulativeGain(); EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs); assertEquals(0.0d, result.getQualityLevel(), DELTA); - assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size()); + assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size()); // also check normalized dcg = new DiscountedCumulativeGain(true, null, 10); result = dcg.evaluate("id", hits, ratedDocs); assertEquals(0.0d, result.getQualityLevel(), DELTA); - assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size()); + assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size()); } public void testParseFromXContent() throws IOException { diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java index e9fae6b5c63ee..c9251bb80903d 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java @@ -26,7 +26,6 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.Index; -import org.elasticsearch.index.rankeval.RatedDocument.DocumentKey; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.test.ESTestCase; @@ -52,11 +51,6 @@ protected NamedXContentRegistry xContentRegistry() { } public static EvalQueryQuality randomEvalQueryQuality() { - List unknownDocs = new ArrayList<>(); - int numberOfUnknownDocs = randomInt(5); - for (int i = 0; i < numberOfUnknownDocs; i++) { - unknownDocs.add(new DocumentKey(randomAlphaOfLength(10), randomAlphaOfLength(10))); - } int numberOfSearchHits = randomInt(5); List ratedHits = new ArrayList<>(); for (int i = 0; i < numberOfSearchHits; i++) { diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java index b55c57bae2bcf..28200e7d5a0e6 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java @@ -40,7 +40,7 @@ import java.util.Map.Entry; import java.util.Set; -import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments; +import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.instanceOf; @@ -120,7 +120,7 @@ public void testPrecisionAtRequest() { for (Entry entry : entrySet) { EvalQueryQuality quality = entry.getValue(); if (entry.getKey() == "amsterdam_query") { - assertEquals(2, filterUnknownDocuments(quality.getHitsAndRatings()).size()); + assertEquals(2, filterUnratedDocuments(quality.getHitsAndRatings()).size()); List hitsAndRatings = quality.getHitsAndRatings(); assertEquals(6, hitsAndRatings.size()); for (RatedSearchHit hit : hitsAndRatings) { @@ -133,7 +133,7 @@ public void testPrecisionAtRequest() { } } if (entry.getKey() == "berlin_query") { - assertEquals(5, filterUnknownDocuments(quality.getHitsAndRatings()).size()); + assertEquals(5, filterUnratedDocuments(quality.getHitsAndRatings()).size()); List hitsAndRatings = quality.getHitsAndRatings(); assertEquals(6, hitsAndRatings.size()); for (RatedSearchHit hit : hitsAndRatings) { diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java index e4fe48482377c..1e94e869d2594 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java @@ -158,7 +158,7 @@ public void testToXContent() throws IOException { " \"details\": {" + " \"coffee_query\": {" + " \"quality_level\": 0.1," + - " \"unknown_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," + + " \"unrated_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," + " \"hits\":[{\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"123\",\"_score\":1.0}," + " \"rating\":5}," + " {\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"456\",\"_score\":1.0}," + diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml index 3900b1f32baa7..62c246fb32066 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml @@ -73,7 +73,7 @@ setup: - match: { quality_level: 1} - match: { details.amsterdam_query.quality_level: 1.0} - - match: { details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc4"}]} + - match: { details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]} - match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}} - length: { details.amsterdam_query.hits: 3} @@ -85,7 +85,7 @@ setup: - is_false: details.amsterdam_query.hits.2.rating - match: { details.berlin_query.quality_level: 1.0} - - match: { details.berlin_query.unknown_docs: [ {"_index": "foo", "_id": "doc4"}]} + - match: { details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc4"}]} - match: { details.berlin_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}} - length: { details.berlin_query.hits: 2} - match: { details.berlin_query.hits.0.hit._id: "doc1" } @@ -155,9 +155,9 @@ setup: - gt: {details.amsterdam_query.quality_level: 0.333} - lt: {details.amsterdam_query.quality_level: 0.334} - match: {details.amsterdam_query.metric_details.mean_reciprocal_rank: {"first_relevant": 3}} - - match: {details.amsterdam_query.unknown_docs: [ {"_index": "foo", "_id": "doc2"}, + - match: {details.amsterdam_query.unrated_docs: [ {"_index": "foo", "_id": "doc2"}, {"_index": "foo", "_id": "doc3"} ]} - match: {details.berlin_query.quality_level: 0.5} - match: {details.berlin_query.metric_details.mean_reciprocal_rank: {"first_relevant": 2}} - - match: {details.berlin_query.unknown_docs: [ {"_index": "foo", "_id": "doc1"}]} + - match: {details.berlin_query.unrated_docs: [ {"_index": "foo", "_id": "doc1"}]} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml index fc5e6576ad4d1..baf10f1542cfb 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml @@ -73,7 +73,7 @@ - lt: {quality_level: 13.848264 } - gt: {details.dcg_query.quality_level: 13.848263} - lt: {details.dcg_query.quality_level: 13.848264} - - match: {details.dcg_query.unknown_docs: [ ]} + - match: {details.dcg_query.unrated_docs: [ ]} # reverse the order in which the results are returned (less relevant docs first) @@ -100,7 +100,7 @@ - lt: {quality_level: 10.299675} - gt: {details.dcg_query_reverse.quality_level: 10.299674} - lt: {details.dcg_query_reverse.quality_level: 10.299675} - - match: {details.dcg_query_reverse.unknown_docs: [ ]} + - match: {details.dcg_query_reverse.unrated_docs: [ ]} # if we mix both, we should get the average @@ -138,7 +138,7 @@ - lt: {quality_level: 12.073970} - gt: {details.dcg_query.quality_level: 13.848263} - lt: {details.dcg_query.quality_level: 13.848264} - - match: {details.dcg_query.unknown_docs: [ ]} + - match: {details.dcg_query.unrated_docs: [ ]} - gt: {details.dcg_query_reverse.quality_level: 10.299674} - lt: {details.dcg_query_reverse.quality_level: 10.299675} - - match: {details.dcg_query_reverse.unknown_docs: [ ]} + - match: {details.dcg_query_reverse.unrated_docs: [ ]} diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml index 4008f677185e5..d6119ad3a9e95 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml @@ -36,7 +36,7 @@ - match: { quality_level: 1} - match: { details.amsterdam_query.quality_level: 1.0} - - match: { details.amsterdam_query.unknown_docs: [ ]} + - match: { details.amsterdam_query.unrated_docs: [ ]} - match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}} - is_true: failures.invalid_query diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml index f0c564d363904..5e0082d213c90 100644 --- a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml +++ b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml @@ -85,7 +85,7 @@ setup: } - match: {quality_level: 0.9} - - match: {details.amsterdam_query.unknown_docs.0._id: "6"} + - match: {details.amsterdam_query.unrated_docs.0._id: "6"} --- "Test illegal request parts":