From 379fad1ed9e923c5c882e7a6d19a12c3fdabebe2 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Tue, 13 Jul 2021 07:40:56 -0400 Subject: [PATCH] Refactor significance heuristic tests for easier extensability (#75264) The significant terms heuristic tests do not lend themselves well for new heuristics being added. This commit extracts common code and builds an abstract significant heuristic test class. This way new heuristics get the common suite of tests by extending a test class. --- ...> AbstractSignificanceHeuristicTests.java} | 281 +++++++----------- .../SignificantTermsAggregatorTests.java | 36 ++- .../terms/heuristic/ChiSquareTests.java | 28 ++ .../bucket/terms/heuristic/GNDTests.java | 48 +++ .../bucket/terms/heuristic/JLHScoreTests.java | 28 ++ .../heuristic/MutualInformationTests.java | 71 +++++ .../terms/heuristic/PercentageScoreTests.java | 28 ++ 7 files changed, 334 insertions(+), 186 deletions(-) rename server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/{SignificanceHeuristicTests.java => AbstractSignificanceHeuristicTests.java} (65%) create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceHeuristicTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractSignificanceHeuristicTests.java similarity index 65% rename from server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceHeuristicTests.java rename to server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractSignificanceHeuristicTests.java index 9a9f78849dbfa..80d2056c4cce3 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceHeuristicTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractSignificanceHeuristicTests.java @@ -5,6 +5,7 @@ * in compliance with, at your election, the Elastic License 2.0 or the Server * Side Public License, v 1. */ + package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.lucene.util.BytesRef; @@ -17,6 +18,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParseException; @@ -26,11 +28,6 @@ import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; -import org.elasticsearch.search.aggregations.bucket.terms.heuristic.ChiSquare; -import org.elasticsearch.search.aggregations.bucket.terms.heuristic.GND; -import org.elasticsearch.search.aggregations.bucket.terms.heuristic.JLHScore; -import org.elasticsearch.search.aggregations.bucket.terms.heuristic.MutualInformation; -import org.elasticsearch.search.aggregations.bucket.terms.heuristic.PercentageScore; import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.InternalAggregationTestCase; @@ -54,16 +51,27 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThan; -import static org.hamcrest.Matchers.lessThanOrEqualTo; -public class SignificanceHeuristicTests extends ESTestCase { +/** + * Abstract test case for testing significant term heuristics + */ +public abstract class AbstractSignificanceHeuristicTests extends ESTestCase { + + /** + * @return A random instance of the heuristic to test + */ + protected abstract SignificanceHeuristic getHeuristic(); + + /** + * @return test if the score is `0` with a subset frequency of `0` + */ + protected abstract boolean testZeroScore(); // test that stream output can actually be read - does not replace bwc test public void testStreamResponse() throws Exception { Version version = randomVersion(random()); - InternalMappedSignificantTerms sigTerms = getRandomSignificantTerms(getRandomSignificanceheuristic()); + InternalMappedSignificantTerms sigTerms = getRandomSignificantTerms(getHeuristic()); // write ByteArrayOutputStream outBuffer = new ByteArrayOutputStream(); @@ -103,15 +111,6 @@ public void testStreamResponse() throws Exception { } } - public static SignificanceHeuristic getRandomSignificanceheuristic() { - List heuristics = new ArrayList<>(); - heuristics.add(new JLHScore()); - heuristics.add(new MutualInformation(randomBoolean(), randomBoolean())); - heuristics.add(new GND(randomBoolean())); - heuristics.add(new ChiSquare(randomBoolean(), randomBoolean())); - return heuristics.get(randomInt(3)); - } - public void testReduce() { List aggs = createInternalAggregations(); InternalAggregation.ReduceContext context = InternalAggregationTestCase.emptyReduceContextBuilder().forFinalReduction(); @@ -127,10 +126,89 @@ public void testReduce() { assertThat(reducedAgg.getBuckets().get(1).getSupersetSize(), equalTo(30L)); } + public void testBasicScoreProperties() { + SignificanceHeuristic heuristic = getHeuristic(); + assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0)); + assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3))); + assertThat(heuristic.getScore(1, 1, 3, 4), lessThan(heuristic.getScore(1, 1, 2, 4))); + if (testZeroScore()) { + assertThat(heuristic.getScore(0, 1, 2, 3), equalTo(0.0)); + } + + double score = 0.0; + try { + long a = randomLong(); + long b = randomLong(); + long c = randomLong(); + long d = randomLong(); + score = heuristic.getScore(a, b, c, d); + } catch (IllegalArgumentException e) { + } + assertThat(score, greaterThanOrEqualTo(0.0)); + } + + /** + * Testing heuristic specific assertions + * Typically, this method would call either + * {@link AbstractSignificanceHeuristicTests#testBackgroundAssertions(SignificanceHeuristic, SignificanceHeuristic)} + * or {@link AbstractSignificanceHeuristicTests#testAssertions(SignificanceHeuristic)} + * depending on which was appropriate + */ + public abstract void testAssertions(); + + public void testParseFromString() throws IOException { + SignificanceHeuristic significanceHeuristic = getHeuristic(); + try (XContentBuilder builder = JsonXContent.contentBuilder()){ + builder.startObject() + .field("field", "text") + .field("min_doc_count", "200"); + significanceHeuristic.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + try (XContentParser stParser = createParser(builder)) { + SignificanceHeuristic parsedHeuristic = parseSignificanceHeuristic(stParser); + assertThat(significanceHeuristic, equalTo(parsedHeuristic)); + } + } + } + + public void testParseFromAggBuilder() throws IOException { + SignificanceHeuristic significanceHeuristic = getHeuristic(); + SignificantTermsAggregationBuilder stBuilder = significantTerms("testagg"); + stBuilder.significanceHeuristic(significanceHeuristic).field("text").minDocCount(200); + XContentBuilder stXContentBuilder = XContentFactory.jsonBuilder(); + stBuilder.internalXContent(stXContentBuilder, null); + XContentParser stParser = createParser(JsonXContent.jsonXContent, Strings.toString(stXContentBuilder)); + SignificanceHeuristic parsedHeuristic = parseSignificanceHeuristic(stParser); + assertThat(significanceHeuristic, equalTo(parsedHeuristic)); + } + + public void testParseFailure() throws IOException { + SignificanceHeuristic significanceHeuristic = getHeuristic(); + try (XContentBuilder builder = JsonXContent.contentBuilder()){ + builder.startObject() + .field("field", "text") + .startObject(significanceHeuristic.getWriteableName()) + .field("unknown_field", false) + .endObject() + .field("min_doc_count", "200") + .endObject(); + try (XContentParser stParser = createParser(builder)) { + try { + parseSignificanceHeuristic(stParser); + fail("parsing the heurstic should have failed"); + } catch (XContentParseException e) { + assertThat(e.getMessage(), containsString("unknown field [unknown_field]")); + } + } + } + } + // Create aggregations as they might come from three different shards and return as list. private List createInternalAggregations() { - SignificanceHeuristic significanceHeuristic = getRandomSignificanceheuristic(); - TestAggFactory factory = randomBoolean() ? new StringTestAggFactory() : new LongTestAggFactory(); + SignificanceHeuristic significanceHeuristic = getHeuristic(); + AbstractSignificanceHeuristicTests.TestAggFactory factory = randomBoolean() ? + new AbstractSignificanceHeuristicTests.StringTestAggFactory() : + new AbstractSignificanceHeuristicTests.LongTestAggFactory(); List aggs = new ArrayList<>(); aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0))); @@ -176,67 +254,10 @@ SignificantLongTerms createAggregation(SignificanceHeuristic significanceHeurist @Override SignificantLongTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) { return new SignificantLongTerms.Bucket(subsetDF, subsetSize, supersetDF, supersetSize, label, InternalAggregations.EMPTY, - DocValueFormat.RAW, 0); - } - } - - // test that - // 1. The output of the builders can actually be parsed - // 2. The parser does not swallow parameters after a significance heuristic was defined - public void testBuilderAndParser() throws Exception { - // test jlh with string - assertTrue(parseFromString("\"jlh\":{}") instanceof JLHScore); - // test gnd with string - assertTrue(parseFromString("\"gnd\":{}") instanceof GND); - // test mutual information with string - boolean includeNegatives = randomBoolean(); - boolean backgroundIsSuperset = randomBoolean(); - String mutual = "\"mutual_information\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" - + backgroundIsSuperset + "}"; - assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset), - parseFromString(mutual)); - String chiSquare = "\"chi_square\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" - + backgroundIsSuperset + "}"; - assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset), - parseFromString(chiSquare)); - - // test with builders - assertThat(parseFromBuilder(new JLHScore()), instanceOf(JLHScore.class)); - assertThat(parseFromBuilder(new GND(backgroundIsSuperset)), instanceOf(GND.class)); - assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset), - parseFromBuilder(new MutualInformation(includeNegatives, backgroundIsSuperset))); - assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset), - parseFromBuilder(new ChiSquare(includeNegatives, backgroundIsSuperset))); - - // test exceptions - String expectedError = "unknown field [unknown_field]"; - checkParseException("\"mutual_information\":{\"include_negatives\": false, \"unknown_field\": false}", expectedError); - checkParseException("\"chi_square\":{\"unknown_field\": true}", expectedError); - checkParseException("\"jlh\":{\"unknown_field\": true}", expectedError); - checkParseException("\"gnd\":{\"unknown_field\": true}", expectedError); - } - - protected void checkParseException(String faultyHeuristicDefinition, String expectedError) throws IOException { - - try (XContentParser stParser = createParser(JsonXContent.jsonXContent, - "{\"field\":\"text\", " + faultyHeuristicDefinition + ",\"min_doc_count\":200}")) { - stParser.nextToken(); - SignificantTermsAggregationBuilder.parse("testagg", stParser); - fail(); - } catch (XContentParseException e) { - assertThat(e.getMessage(), containsString(expectedError)); + DocValueFormat.RAW, 0); } } - protected SignificanceHeuristic parseFromBuilder(SignificanceHeuristic significanceHeuristic) throws IOException { - SignificantTermsAggregationBuilder stBuilder = significantTerms("testagg"); - stBuilder.significanceHeuristic(significanceHeuristic).field("text").minDocCount(200); - XContentBuilder stXContentBuilder = XContentFactory.jsonBuilder(); - stBuilder.internalXContent(stXContentBuilder, null); - XContentParser stParser = createParser(JsonXContent.jsonXContent, Strings.toString(stXContentBuilder)); - return parseSignificanceHeuristic(stParser); - } - private static SignificanceHeuristic parseSignificanceHeuristic(XContentParser stParser) throws IOException { stParser.nextToken(); SignificantTermsAggregationBuilder aggregatorFactory = SignificantTermsAggregationBuilder.parse("testagg", stParser); @@ -247,14 +268,12 @@ private static SignificanceHeuristic parseSignificanceHeuristic(XContentParser s return aggregatorFactory.significanceHeuristic(); } - protected SignificanceHeuristic parseFromString(String heuristicString) throws IOException { - try (XContentParser stParser = createParser(JsonXContent.jsonXContent, - "{\"field\":\"text\", " + heuristicString + ", \"min_doc_count\":200}")) { - return parseSignificanceHeuristic(stParser); - } + @Override + protected NamedXContentRegistry xContentRegistry() { + return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents()); } - void testBackgroundAssertions(SignificanceHeuristic heuristicIsSuperset, SignificanceHeuristic heuristicNotSuperset) { + protected void testBackgroundAssertions(SignificanceHeuristic heuristicIsSuperset, SignificanceHeuristic heuristicNotSuperset) { try { heuristicIsSuperset.getScore(2, 3, 1, 4); fail(); @@ -326,7 +345,7 @@ void testBackgroundAssertions(SignificanceHeuristic heuristicIsSuperset, Signifi } } - void testAssertions(SignificanceHeuristic heuristic) { + protected void testAssertions(SignificanceHeuristic heuristic) { try { int idx = randomInt(3); long[] values = {1, 2, 3, 4}; @@ -353,96 +372,4 @@ void testAssertions(SignificanceHeuristic heuristic) { } } - public void testAssertions() throws Exception { - testBackgroundAssertions(new MutualInformation(true, true), new MutualInformation(true, false)); - testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false)); - testBackgroundAssertions(new GND(true), new GND(false)); - testAssertions(new PercentageScore()); - testAssertions(new JLHScore()); - } - - public void testBasicScoreProperties() { - basicScoreProperties(new JLHScore(), true); - basicScoreProperties(new GND(true), true); - basicScoreProperties(new PercentageScore(), true); - basicScoreProperties(new MutualInformation(true, true), false); - basicScoreProperties(new ChiSquare(true, true), false); - } - - public void basicScoreProperties(SignificanceHeuristic heuristic, boolean test0) { - assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0)); - assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3))); - assertThat(heuristic.getScore(1, 1, 3, 4), lessThan(heuristic.getScore(1, 1, 2, 4))); - if (test0) { - assertThat(heuristic.getScore(0, 1, 2, 3), equalTo(0.0)); - } - - double score = 0.0; - try { - long a = randomLong(); - long b = randomLong(); - long c = randomLong(); - long d = randomLong(); - score = heuristic.getScore(a, b, c, d); - } catch (IllegalArgumentException e) { - } - assertThat(score, greaterThanOrEqualTo(0.0)); - } - - public void testScoreMutual() throws Exception { - SignificanceHeuristic heuristic = new MutualInformation(true, true); - assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0)); - assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3))); - assertThat(heuristic.getScore(2, 2, 2, 4), equalTo(1.0)); - assertThat(heuristic.getScore(0, 2, 2, 4), equalTo(1.0)); - assertThat(heuristic.getScore(2, 2, 4, 4), equalTo(0.0)); - assertThat(heuristic.getScore(1, 2, 2, 4), equalTo(0.0)); - assertThat(heuristic.getScore(3, 6, 9, 18), equalTo(0.0)); - - double score = 0.0; - try { - long a = randomLong(); - long b = randomLong(); - long c = randomLong(); - long d = randomLong(); - score = heuristic.getScore(a, b, c, d); - } catch (IllegalArgumentException e) { - } - assertThat(score, lessThanOrEqualTo(1.0)); - assertThat(score, greaterThanOrEqualTo(0.0)); - heuristic = new MutualInformation(false, true); - assertThat(heuristic.getScore(0, 1, 2, 3), equalTo(Double.NEGATIVE_INFINITY)); - - heuristic = new MutualInformation(true, false); - score = heuristic.getScore(2, 3, 1, 4); - assertThat(score, greaterThanOrEqualTo(0.0)); - assertThat(score, lessThanOrEqualTo(1.0)); - score = heuristic.getScore(1, 4, 2, 3); - assertThat(score, greaterThanOrEqualTo(0.0)); - assertThat(score, lessThanOrEqualTo(1.0)); - score = heuristic.getScore(1, 3, 4, 4); - assertThat(score, greaterThanOrEqualTo(0.0)); - assertThat(score, lessThanOrEqualTo(1.0)); - } - - public void testGNDCornerCases() throws Exception { - GND gnd = new GND(true); - //term is only in the subset, not at all in the other set but that is because the other set is empty. - // this should actually not happen because only terms that are in the subset are considered now, - // however, in this case the score should be 0 because a term that does not exist cannot be relevant... - assertThat(gnd.getScore(0, randomIntBetween(1, 2), 0, randomIntBetween(2,3)), equalTo(0.0)); - // the terms do not co-occur at all - should be 0 - assertThat(gnd.getScore(0, randomIntBetween(1, 2), randomIntBetween(2, 3), randomIntBetween(5,6)), equalTo(0.0)); - // comparison between two terms that do not exist - probably not relevant - assertThat(gnd.getScore(0, 0, 0, randomIntBetween(1,2)), equalTo(0.0)); - // terms co-occur perfectly - should be 1 - assertThat(gnd.getScore(1, 1, 1, 1), equalTo(1.0)); - gnd = new GND(false); - assertThat(gnd.getScore(0, 0, 0, 0), equalTo(0.0)); - } - - @Override - protected NamedXContentRegistry xContentRegistry() { - return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents()); - } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java index 941d38f7b4ed7..1098776800e63 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java @@ -40,10 +40,17 @@ import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.terms.SignificantTermsAggregatorFactory.ExecutionMode; +import org.elasticsearch.search.aggregations.bucket.terms.heuristic.ChiSquare; +import org.elasticsearch.search.aggregations.bucket.terms.heuristic.GND; +import org.elasticsearch.search.aggregations.bucket.terms.heuristic.JLHScore; +import org.elasticsearch.search.aggregations.bucket.terms.heuristic.MutualInformation; +import org.elasticsearch.search.aggregations.bucket.terms.heuristic.PercentageScore; +import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -51,6 +58,17 @@ import static org.hamcrest.Matchers.equalTo; public class SignificantTermsAggregatorTests extends AggregatorTestCase { + + static SignificanceHeuristic getRandomSignificanceheuristic() { + List heuristics = new ArrayList<>(); + heuristics.add(new JLHScore()); + heuristics.add(new MutualInformation(randomBoolean(), randomBoolean())); + heuristics.add(new GND(randomBoolean())); + heuristics.add(new ChiSquare(randomBoolean(), randomBoolean())); + heuristics.add(new PercentageScore()); + return heuristics.get(randomInt(4)); + } + @Override protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { return new SignificantTermsAggregationBuilder("foo").field(fieldName); @@ -75,10 +93,7 @@ protected List unsupportedMappedFieldTypes() { ); } - /** - * Uses the significant terms aggregation to find the keywords in text fields - */ - public void testSignificance() throws IOException { + public void testSignificance(SignificanceHeuristic heuristic) throws IOException { TextFieldType textFieldType = new TextFieldType("text"); textFieldType.setFielddata(true); @@ -135,7 +150,7 @@ public void testSignificance() throws IOException { String evenStrings[] = new String[] {"even", "regular"}; sigAgg.includeExclude(new IncludeExclude(oddStrings, evenStrings)); - sigAgg.significanceHeuristic(SignificanceHeuristicTests.getRandomSignificanceheuristic()); + sigAgg.significanceHeuristic(heuristic); terms = searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), sigAgg, textFieldType); assertThat(terms.getSubsetSize(), equalTo(5L)); assertEquals(1, terms.getBuckets().size()); @@ -159,6 +174,13 @@ public void testSignificance() throws IOException { } } + /** + * Uses the significant terms aggregation to find the keywords in text fields + */ + public void testSignificance() throws IOException { + testSignificance(getRandomSignificanceheuristic()); + } + /** * Uses the significant terms aggregation to find the keywords in numeric * fields @@ -167,8 +189,6 @@ public void testNumericSignificance() throws IOException { NumberFieldType longFieldType = new NumberFieldMapper.NumberFieldType("long_field", NumberFieldMapper.NumberType.LONG); - TextFieldType textFieldType = new TextFieldType("text"); - IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment @@ -257,8 +277,6 @@ public void testUnmapped() throws IOException { */ public void testRangeField() throws IOException { RangeType rangeType = RangeType.DOUBLE; - final RangeFieldMapper.Range range1 = new RangeFieldMapper.Range(rangeType, 1.0D, 5.0D, true, true); - final RangeFieldMapper.Range range2 = new RangeFieldMapper.Range(rangeType, 6.0D, 10.0D, true, true); final String fieldName = "rangeField"; MappedFieldType fieldType = new RangeFieldMapper.RangeFieldType(fieldName, rangeType); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java new file mode 100644 index 0000000000000..0ea14fc3af9e0 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.aggregations.bucket.terms.heuristic; + +import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; + +public class ChiSquareTests extends AbstractSignificanceHeuristicTests { + @Override + protected SignificanceHeuristic getHeuristic() { + return new ChiSquare(randomBoolean(), randomBoolean()); + } + + @Override + protected boolean testZeroScore() { + return false; + } + + @Override + public void testAssertions() { + testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false)); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java new file mode 100644 index 0000000000000..2f6b839c8ffc9 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.aggregations.bucket.terms.heuristic; + +import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; + +import static org.hamcrest.Matchers.equalTo; + +public class GNDTests extends AbstractSignificanceHeuristicTests { + @Override + protected SignificanceHeuristic getHeuristic() { + return new GND(randomBoolean()); + } + + @Override + protected boolean testZeroScore() { + return true; + } + + @Override + public void testAssertions() { + testBackgroundAssertions(new GND(true), new GND(false)); + } + + /** + * term is only in the subset, not at all in the other set but that is because the other set is empty. + * this should actually not happen because only terms that are in the subset are considered now, + * however, in this case the score should be 0 because a term that does not exist cannot be relevant... + */ + public void testGNDCornerCases() { + GND gnd = new GND(true); + assertThat(gnd.getScore(0, randomIntBetween(1, 2), 0, randomIntBetween(2,3)), equalTo(0.0)); + // the terms do not co-occur at all - should be 0 + assertThat(gnd.getScore(0, randomIntBetween(1, 2), randomIntBetween(2, 3), randomIntBetween(5,6)), equalTo(0.0)); + // comparison between two terms that do not exist - probably not relevant + assertThat(gnd.getScore(0, 0, 0, randomIntBetween(1,2)), equalTo(0.0)); + // terms co-occur perfectly - should be 1 + assertThat(gnd.getScore(1, 1, 1, 1), equalTo(1.0)); + gnd = new GND(false); + assertThat(gnd.getScore(0, 0, 0, 0), equalTo(0.0)); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java new file mode 100644 index 0000000000000..72fff07a939c6 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.aggregations.bucket.terms.heuristic; + +import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; + +public class JLHScoreTests extends AbstractSignificanceHeuristicTests { + @Override + protected SignificanceHeuristic getHeuristic() { + return new JLHScore(); + } + + @Override + protected boolean testZeroScore() { + return true; + } + + @Override + public void testAssertions() { + testAssertions(new JLHScore()); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java new file mode 100644 index 0000000000000..f649b90cd7c5e --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.aggregations.bucket.terms.heuristic; + +import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +public class MutualInformationTests extends AbstractSignificanceHeuristicTests { + @Override + protected SignificanceHeuristic getHeuristic() { + return new MutualInformation(randomBoolean(), randomBoolean()); + } + + @Override + protected boolean testZeroScore() { + return false; + } + + @Override + public void testAssertions() { + testBackgroundAssertions(new MutualInformation(true, true), new MutualInformation(true, false)); + } + + public void testScoreMutual() { + SignificanceHeuristic heuristic = new MutualInformation(true, true); + assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0)); + assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3))); + assertThat(heuristic.getScore(2, 2, 2, 4), equalTo(1.0)); + assertThat(heuristic.getScore(0, 2, 2, 4), equalTo(1.0)); + assertThat(heuristic.getScore(2, 2, 4, 4), equalTo(0.0)); + assertThat(heuristic.getScore(1, 2, 2, 4), equalTo(0.0)); + assertThat(heuristic.getScore(3, 6, 9, 18), equalTo(0.0)); + + double score = 0.0; + try { + long a = randomLong(); + long b = randomLong(); + long c = randomLong(); + long d = randomLong(); + score = heuristic.getScore(a, b, c, d); + } catch (IllegalArgumentException e) { + } + assertThat(score, lessThanOrEqualTo(1.0)); + assertThat(score, greaterThanOrEqualTo(0.0)); + heuristic = new MutualInformation(false, true); + assertThat(heuristic.getScore(0, 1, 2, 3), equalTo(Double.NEGATIVE_INFINITY)); + + heuristic = new MutualInformation(true, false); + score = heuristic.getScore(2, 3, 1, 4); + assertThat(score, greaterThanOrEqualTo(0.0)); + assertThat(score, lessThanOrEqualTo(1.0)); + score = heuristic.getScore(1, 4, 2, 3); + assertThat(score, greaterThanOrEqualTo(0.0)); + assertThat(score, lessThanOrEqualTo(1.0)); + score = heuristic.getScore(1, 3, 4, 4); + assertThat(score, greaterThanOrEqualTo(0.0)); + assertThat(score, lessThanOrEqualTo(1.0)); + } + +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java new file mode 100644 index 0000000000000..f18b3d047ef6e --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.aggregations.bucket.terms.heuristic; + +import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; + +public class PercentageScoreTests extends AbstractSignificanceHeuristicTests { + @Override + protected SignificanceHeuristic getHeuristic() { + return new PercentageScore(); + } + + @Override + protected boolean testZeroScore() { + return true; + } + + @Override + public void testAssertions() { + testAssertions(new PercentageScore()); + } +}