Rename ranking evaluation response section (#32166)

Currently the ranking evaluation response contains a 'unknown_docs' section for each search use case in the evaluation set. It contains document ids for results in the search hits that currently don't have a quality rating. This change renames it to `unrated_docs`, which better reflects its purpose.
elastic · Jul 20, 2018 · 06cc3fb · 06cc3fb
1 parent 56acf4d
commit 06cc3fb
Show file tree

Hide file tree

Showing 12 changed files with 29 additions and 36 deletions.
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RankEvalIT.java
@@ -40,7 +40,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
+import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
 
 public class RankEvalIT extends ESRestHighLevelClientTestCase {
 
@@ -85,7 +85,7 @@ public void testRankEvalRequest() throws IOException {
         Map<String, EvalQueryQuality> partialResults = response.getPartialResults();
         assertEquals(2, partialResults.size());
         EvalQueryQuality amsterdamQueryQuality = partialResults.get("amsterdam_query");
-        assertEquals(2, filterUnknownDocuments(amsterdamQueryQuality.getHitsAndRatings()).size());
+        assertEquals(2, filterUnratedDocuments(amsterdamQueryQuality.getHitsAndRatings()).size());
         List<RatedSearchHit> hitsAndRatings = amsterdamQueryQuality.getHitsAndRatings();
         assertEquals(7, hitsAndRatings.size());
         for (RatedSearchHit hit : hitsAndRatings) {
@@ -97,7 +97,7 @@ public void testRankEvalRequest() throws IOException {
             }
         }
         EvalQueryQuality berlinQueryQuality = partialResults.get("berlin_query");
-        assertEquals(6, filterUnknownDocuments(berlinQueryQuality.getHitsAndRatings()).size());
+        assertEquals(6, filterUnratedDocuments(berlinQueryQuality.getHitsAndRatings()).size());
         hitsAndRatings = berlinQueryQuality.getHitsAndRatings();
         assertEquals(7, hitsAndRatings.size());
         for (RatedSearchHit hit : hitsAndRatings) {

diff --git a/docs/reference/search/rank-eval.asciidoc b/docs/reference/search/rank-eval.asciidoc
@@ -278,7 +278,7 @@ that shows potential errors of individual queries. The response has the followin
         "details": {  
             "my_query_id1": { <2>
                 "quality_level": 0.6, <3>
-                "unknown_docs": [ <4>
+                "unrated_docs": [ <4>
                     {
                         "_index": "my_index",
                         "_id": "1960795"
@@ -313,7 +313,7 @@ that shows potential errors of individual queries. The response has the followin
 <1> the overall evaluation quality calculated by the defined metric
 <2> the `details` section contains one entry for every query in the original `requests` section, keyed by the search request id
 <3> the `quality_level` in the `details` section shows the contribution of this query to the global quality score
-<4> the `unknown_docs` section contains an `_index` and `_id` entry for each document in the search result for this
+<4> the `unrated_docs` section contains an `_index` and `_id` entry for each document in the search result for this
 query that didn't have a ratings value. This can be used to ask the user to supply ratings for these documents
 <5> the `hits` section shows a grouping of the search results with their supplied rating
 <6> the `metric_details` give additional information about the calculated quality metric (e.g. how many of the retrieved

diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvalQueryQuality.java
@@ -102,8 +102,8 @@ public List<RatedSearchHit> getHitsAndRatings() {
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject(queryId);
         builder.field(QUALITY_LEVEL_FIELD.getPreferredName(), this.evaluationResult);
-        builder.startArray(UNKNOWN_DOCS_FIELD.getPreferredName());
-        for (DocumentKey key : EvaluationMetric.filterUnknownDocuments(ratedHits)) {
+        builder.startArray(UNRATED_DOCS_FIELD.getPreferredName());
+        for (DocumentKey key : EvaluationMetric.filterUnratedDocuments(ratedHits)) {
             builder.startObject();
             builder.field(RatedDocument.INDEX_FIELD.getPreferredName(), key.getIndex());
             builder.field(RatedDocument.DOC_ID_FIELD.getPreferredName(), key.getDocId());
@@ -123,7 +123,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
     }
 
     private static final ParseField QUALITY_LEVEL_FIELD = new ParseField("quality_level");
-    private static final ParseField UNKNOWN_DOCS_FIELD = new ParseField("unknown_docs");
+    private static final ParseField UNRATED_DOCS_FIELD = new ParseField("unrated_docs");
     private static final ParseField HITS_FIELD = new ParseField("hits");
     private static final ParseField METRIC_DETAILS_FIELD = new ParseField("metric_details");
     private static final ObjectParser<ParsedEvalQueryQuality, Void> PARSER = new ObjectParser<>("eval_query_quality",

diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/EvaluationMetric.java
@@ -76,10 +76,9 @@ static List<RatedSearchHit> joinHitsWithRatings(SearchHit[] hits, List<RatedDocu
     /**
      * filter @link {@link RatedSearchHit} that don't have a rating
      */
-    static List<DocumentKey> filterUnknownDocuments(List<RatedSearchHit> ratedHits) {
-        List<DocumentKey> unknownDocs = ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false)
+    static List<DocumentKey> filterUnratedDocuments(List<RatedSearchHit> ratedHits) {
+        return ratedHits.stream().filter(hit -> hit.getRating().isPresent() == false)
                 .map(hit -> new DocumentKey(hit.getSearchHit().getIndex(), hit.getSearchHit().getId())).collect(Collectors.toList());
-        return unknownDocs;
     }
 
     /**

diff --git a/...nk-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java b/...nk-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java
@@ -40,7 +40,7 @@
 import java.util.Collections;
 import java.util.List;
 
-import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
+import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
 import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode;
 import static org.elasticsearch.test.XContentTestUtils.insertRandomFields;
 import static org.hamcrest.CoreMatchers.containsString;
@@ -128,7 +128,7 @@ public void testDCGAtSixMissingRatings() {
         DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
         EvalQueryQuality result = dcg.evaluate("id", hits, rated);
         assertEquals(12.779642067948913, result.getQualityLevel(), DELTA);
-        assertEquals(2, filterUnknownDocuments(result.getHitsAndRatings()).size());
+        assertEquals(2, filterUnratedDocuments(result.getHitsAndRatings()).size());
 
         /**
          * Check with normalization: to get the maximal possible dcg, sort documents by
@@ -185,7 +185,7 @@ public void testDCGAtFourMoreRatings() {
         DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
         EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
         assertEquals(12.392789260714371, result.getQualityLevel(), DELTA);
-        assertEquals(1, filterUnknownDocuments(result.getHitsAndRatings()).size());
+        assertEquals(1, filterUnratedDocuments(result.getHitsAndRatings()).size());
 
         /**
          * Check with normalization: to get the maximal possible dcg, sort documents by
@@ -224,13 +224,13 @@ public void testNoResults() throws Exception {
         DiscountedCumulativeGain dcg = new DiscountedCumulativeGain();
         EvalQueryQuality result = dcg.evaluate("id", hits, ratedDocs);
         assertEquals(0.0d, result.getQualityLevel(), DELTA);
-        assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size());
+        assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());
 
         // also check normalized
         dcg = new DiscountedCumulativeGain(true, null, 10);
         result = dcg.evaluate("id", hits, ratedDocs);
         assertEquals(0.0d, result.getQualityLevel(), DELTA);
-        assertEquals(0, filterUnknownDocuments(result.getHitsAndRatings()).size());
+        assertEquals(0, filterUnratedDocuments(result.getHitsAndRatings()).size());
     }
 
     public void testParseFromXContent() throws IOException {

diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/EvalQueryQualityTests.java
@@ -26,7 +26,6 @@
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.index.Index;
-import org.elasticsearch.index.rankeval.RatedDocument.DocumentKey;
 import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.test.ESTestCase;
 
@@ -52,11 +51,6 @@ protected NamedXContentRegistry xContentRegistry() {
     }
 
     public static EvalQueryQuality randomEvalQueryQuality() {
-        List<DocumentKey> unknownDocs = new ArrayList<>();
-        int numberOfUnknownDocs = randomInt(5);
-        for (int i = 0; i < numberOfUnknownDocs; i++) {
-            unknownDocs.add(new DocumentKey(randomAlphaOfLength(10), randomAlphaOfLength(10)));
-        }
         int numberOfSearchHits = randomInt(5);
         List<RatedSearchHit> ratedHits = new ArrayList<>();
         for (int i = 0; i < numberOfSearchHits; i++) {

diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalRequestIT.java
@@ -40,7 +40,7 @@
 import java.util.Map.Entry;
 import java.util.Set;
 
-import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnknownDocuments;
+import static org.elasticsearch.index.rankeval.EvaluationMetric.filterUnratedDocuments;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.hamcrest.Matchers.instanceOf;
 
@@ -120,7 +120,7 @@ public void testPrecisionAtRequest() {
         for (Entry<String, EvalQueryQuality> entry : entrySet) {
             EvalQueryQuality quality = entry.getValue();
             if (entry.getKey() == "amsterdam_query") {
-                assertEquals(2, filterUnknownDocuments(quality.getHitsAndRatings()).size());
+                assertEquals(2, filterUnratedDocuments(quality.getHitsAndRatings()).size());
                 List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
                 assertEquals(6, hitsAndRatings.size());
                 for (RatedSearchHit hit : hitsAndRatings) {
@@ -133,7 +133,7 @@ public void testPrecisionAtRequest() {
                 }
             }
             if (entry.getKey() == "berlin_query") {
-                assertEquals(5, filterUnknownDocuments(quality.getHitsAndRatings()).size());
+                assertEquals(5, filterUnratedDocuments(quality.getHitsAndRatings()).size());
                 List<RatedSearchHit> hitsAndRatings = quality.getHitsAndRatings();
                 assertEquals(6, hitsAndRatings.size());
                 for (RatedSearchHit hit : hitsAndRatings) {

diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RankEvalResponseTests.java
@@ -158,7 +158,7 @@ public void testToXContent() throws IOException {
                 "    \"details\": {" +
                 "        \"coffee_query\": {" +
                 "            \"quality_level\": 0.1," +
-                "            \"unknown_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," +
+                "            \"unrated_docs\": [{\"_index\":\"index\",\"_id\":\"456\"}]," +
                 "            \"hits\":[{\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"123\",\"_score\":1.0}," +
                 "                       \"rating\":5}," +
                 "                      {\"hit\":{\"_index\":\"index\",\"_type\":\"\",\"_id\":\"456\",\"_score\":1.0}," +

diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/10_basic.yml
@@ -73,7 +73,7 @@ setup:
 
   - match: { quality_level: 1}
   - match: { details.amsterdam_query.quality_level: 1.0}
-  - match: { details.amsterdam_query.unknown_docs:  [ {"_index": "foo", "_id": "doc4"}]}
+  - match: { details.amsterdam_query.unrated_docs:  [ {"_index": "foo", "_id": "doc4"}]}
   - match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 2, "docs_retrieved": 2}}
 
   - length: { details.amsterdam_query.hits: 3}
@@ -85,7 +85,7 @@ setup:
   - is_false: details.amsterdam_query.hits.2.rating
 
   - match: { details.berlin_query.quality_level: 1.0}
-  - match: { details.berlin_query.unknown_docs:  [ {"_index": "foo", "_id": "doc4"}]}
+  - match: { details.berlin_query.unrated_docs:  [ {"_index": "foo", "_id": "doc4"}]}
   - match: { details.berlin_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
   - length: { details.berlin_query.hits: 2}
   - match: { details.berlin_query.hits.0.hit._id: "doc1" }
@@ -159,9 +159,9 @@ setup:
   - gt: {details.amsterdam_query.quality_level: 0.333}
   - lt: {details.amsterdam_query.quality_level: 0.334}
   - match: {details.amsterdam_query.metric_details.mean_reciprocal_rank: {"first_relevant": 3}}
-  - match: {details.amsterdam_query.unknown_docs:  [ {"_index": "foo", "_id": "doc2"},
+  - match: {details.amsterdam_query.unrated_docs:  [ {"_index": "foo", "_id": "doc2"},
                                                                {"_index": "foo", "_id": "doc3"} ]}
   - match: {details.berlin_query.quality_level: 0.5}
   - match: {details.berlin_query.metric_details.mean_reciprocal_rank: {"first_relevant": 2}}
-  - match: {details.berlin_query.unknown_docs:  [ {"_index": "foo", "_id": "doc1"}]}
+  - match: {details.berlin_query.unrated_docs:  [ {"_index": "foo", "_id": "doc1"}]}
 
diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/20_dcg.yml
@@ -73,7 +73,7 @@
   - lt: {quality_level: 13.848264 }
   - gt: {details.dcg_query.quality_level: 13.848263}
   - lt: {details.dcg_query.quality_level: 13.848264}
-  - match: {details.dcg_query.unknown_docs: [ ]}
+  - match: {details.dcg_query.unrated_docs: [ ]}
 
 # reverse the order in which the results are returned (less relevant docs first)
 
@@ -100,7 +100,7 @@
   - lt: {quality_level: 10.299675}
   - gt: {details.dcg_query_reverse.quality_level: 10.299674}
   - lt: {details.dcg_query_reverse.quality_level: 10.299675}
-  - match: {details.dcg_query_reverse.unknown_docs: [ ]}
+  - match: {details.dcg_query_reverse.unrated_docs: [ ]}
 
 # if we mix both, we should get the average
 
@@ -138,7 +138,7 @@
   - lt: {quality_level: 12.073970}
   - gt: {details.dcg_query.quality_level: 13.848263}
   - lt: {details.dcg_query.quality_level: 13.848264}
-  - match: {details.dcg_query.unknown_docs: [ ]}
+  - match: {details.dcg_query.unrated_docs: [ ]}
   - gt: {details.dcg_query_reverse.quality_level: 10.299674}
   - lt: {details.dcg_query_reverse.quality_level: 10.299675}
-  - match: {details.dcg_query_reverse.unknown_docs: [ ]}
+  - match: {details.dcg_query_reverse.unrated_docs: [ ]}
diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/30_failures.yml
@@ -36,7 +36,7 @@
 
   - match: { quality_level: 1}
   - match: { details.amsterdam_query.quality_level: 1.0}
-  - match: { details.amsterdam_query.unknown_docs:  [ ]}
+  - match: { details.amsterdam_query.unrated_docs:  [ ]}
   - match: { details.amsterdam_query.metric_details.precision: {"relevant_docs_retrieved": 1, "docs_retrieved": 1}}
 
   - is_true: failures.invalid_query
diff --git a/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml b/modules/rank-eval/src/test/resources/rest-api-spec/test/rank_eval/40_rank_eval_templated.yml
@@ -85,7 +85,7 @@ setup:
         }
 
   - match: {quality_level: 0.9}
-  - match: {details.amsterdam_query.unknown_docs.0._id:  "6"}
+  - match: {details.amsterdam_query.unrated_docs.0._id:  "6"}
 
 ---
 "Test illegal request parts":