From 2fee60d87230d80f19cbb6b20520f51cce766846 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Mon, 19 Jul 2021 08:13:47 -0400 Subject: [PATCH 1/2] Significant terms test refactor for extendability --- .../terms/InternalMappedSignificantTerms.java | 2 +- .../bucket/terms/SignificantLongTerms.java | 4 +- .../SignificantTermsAggregationBuilder.java | 2 +- .../terms/heuristic/ChiSquareTests.java | 16 +++---- .../bucket/terms/heuristic/GNDTests.java | 9 +--- .../bucket/terms/heuristic/JLHScoreTests.java | 9 +--- .../heuristic/MutualInformationTests.java | 13 ++---- .../terms/heuristic/PercentageScoreTests.java | 9 +--- ...tractNXYSignificanceHeuristicTestCase.java | 38 ++++++++++++++++ ...AbstractSignificanceHeuristicTestCase.java | 43 +++++++++++-------- 10 files changed, 83 insertions(+), 62 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java rename server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractSignificanceHeuristicTests.java => test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java (91%) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java index 43bd7e0043564..118f6dee63b54 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java @@ -92,7 +92,7 @@ protected long getSupersetSize() { } @Override - protected SignificanceHeuristic getSignificanceHeuristic() { + public SignificanceHeuristic getSignificanceHeuristic() { return significanceHeuristic; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java index 61053053d5b1e..9d05f98be904c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java @@ -25,11 +25,11 @@ public class SignificantLongTerms extends InternalMappedSignificantTerms { public static final String NAME = "siglterms"; - static class Bucket extends InternalSignificantTerms.Bucket { + public static class Bucket extends InternalSignificantTerms.Bucket { long term; - Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations, + public Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations, DocValueFormat format, double score) { super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format); this.term = term; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregationBuilder.java index 0b6e7c24a6c7c..384de0d7d7268 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregationBuilder.java @@ -151,7 +151,7 @@ protected boolean serializeTargetValueType(Version version) { return true; } - protected TermsAggregator.BucketCountThresholds getBucketCountThresholds() { + public TermsAggregator.BucketCountThresholds getBucketCountThresholds() { return new TermsAggregator.BucketCountThresholds(bucketCountThresholds); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java index 0ea14fc3af9e0..055afbdb445c2 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/ChiSquareTests.java @@ -8,21 +8,17 @@ package org.elasticsearch.search.aggregations.bucket.terms.heuristic; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; +import org.elasticsearch.search.aggregations.bucket.AbstractNXYSignificanceHeuristicTestCase; -public class ChiSquareTests extends AbstractSignificanceHeuristicTests { - @Override - protected SignificanceHeuristic getHeuristic() { - return new ChiSquare(randomBoolean(), randomBoolean()); - } +public class ChiSquareTests extends AbstractNXYSignificanceHeuristicTestCase { @Override - protected boolean testZeroScore() { - return false; + public void testAssertions() { + testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false)); } @Override - public void testAssertions() { - testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false)); + protected SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { + return new ChiSquare(includeNegatives, backgroundIsSuperset); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java index 2f6b839c8ffc9..3cde741ab5ce6 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/GNDTests.java @@ -8,21 +8,16 @@ package org.elasticsearch.search.aggregations.bucket.terms.heuristic; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; +import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase; import static org.hamcrest.Matchers.equalTo; -public class GNDTests extends AbstractSignificanceHeuristicTests { +public class GNDTests extends AbstractSignificanceHeuristicTestCase { @Override protected SignificanceHeuristic getHeuristic() { return new GND(randomBoolean()); } - @Override - protected boolean testZeroScore() { - return true; - } - @Override public void testAssertions() { testBackgroundAssertions(new GND(true), new GND(false)); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java index 72fff07a939c6..0ad2deba57d83 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/JLHScoreTests.java @@ -8,19 +8,14 @@ package org.elasticsearch.search.aggregations.bucket.terms.heuristic; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; +import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase; -public class JLHScoreTests extends AbstractSignificanceHeuristicTests { +public class JLHScoreTests extends AbstractSignificanceHeuristicTestCase { @Override protected SignificanceHeuristic getHeuristic() { return new JLHScore(); } - @Override - protected boolean testZeroScore() { - return true; - } - @Override public void testAssertions() { testAssertions(new JLHScore()); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java index f649b90cd7c5e..655d283c10aed 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/MutualInformationTests.java @@ -8,7 +8,7 @@ package org.elasticsearch.search.aggregations.bucket.terms.heuristic; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; +import org.elasticsearch.search.aggregations.bucket.AbstractNXYSignificanceHeuristicTestCase; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -16,15 +16,10 @@ import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; -public class MutualInformationTests extends AbstractSignificanceHeuristicTests { +public class MutualInformationTests extends AbstractNXYSignificanceHeuristicTestCase { @Override - protected SignificanceHeuristic getHeuristic() { - return new MutualInformation(randomBoolean(), randomBoolean()); - } - - @Override - protected boolean testZeroScore() { - return false; + protected SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { + return new MutualInformation(includeNegatives, backgroundIsSuperset); } @Override diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java index f18b3d047ef6e..b039a37533cb9 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/heuristic/PercentageScoreTests.java @@ -8,19 +8,14 @@ package org.elasticsearch.search.aggregations.bucket.terms.heuristic; -import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests; +import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase; -public class PercentageScoreTests extends AbstractSignificanceHeuristicTests { +public class PercentageScoreTests extends AbstractSignificanceHeuristicTestCase { @Override protected SignificanceHeuristic getHeuristic() { return new PercentageScore(); } - @Override - protected boolean testZeroScore() { - return true; - } - @Override public void testAssertions() { testAssertions(new PercentageScore()); diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java new file mode 100644 index 0000000000000..71b34ec98f3f3 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; + +/** + * Abstract test case for testing significant term heuristics + */ +public abstract class AbstractNXYSignificanceHeuristicTestCase extends AbstractSignificanceHeuristicTestCase { + + /** + * @return A random instance of the heuristic to test + */ + @Override + protected SignificanceHeuristic getHeuristic() { + return getHeuristic(randomBoolean(), randomBoolean()); + } + + /** + * @param includeNegatives value for this test run + * @param backgroundIsSuperset value for this test run + * @return A random instance of an NXY heuristic to test + */ + protected abstract SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset); + + @Override + public void testBasicScoreProperties() { + testBasicScoreProperties(getHeuristic(true, true), false); + } + +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractSignificanceHeuristicTests.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java similarity index 91% rename from server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractSignificanceHeuristicTests.java rename to test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java index 80d2056c4cce3..376f9854c7ca4 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractSignificanceHeuristicTests.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java @@ -6,7 +6,7 @@ * Side Public License, v 1. */ -package org.elasticsearch.search.aggregations.bucket.terms; +package org.elasticsearch.search.aggregations.bucket; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; @@ -28,6 +28,12 @@ import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; +import org.elasticsearch.search.aggregations.bucket.terms.InternalMappedSignificantTerms; +import org.elasticsearch.search.aggregations.bucket.terms.InternalSignificantTerms; +import org.elasticsearch.search.aggregations.bucket.terms.SignificantLongTerms; +import org.elasticsearch.search.aggregations.bucket.terms.SignificantStringTerms; +import org.elasticsearch.search.aggregations.bucket.terms.SignificantTerms; +import org.elasticsearch.search.aggregations.bucket.terms.SignificantTermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.InternalAggregationTestCase; @@ -56,18 +62,13 @@ /** * Abstract test case for testing significant term heuristics */ -public abstract class AbstractSignificanceHeuristicTests extends ESTestCase { +public abstract class AbstractSignificanceHeuristicTestCase extends ESTestCase { /** * @return A random instance of the heuristic to test */ protected abstract SignificanceHeuristic getHeuristic(); - /** - * @return test if the score is `0` with a subset frequency of `0` - */ - protected abstract boolean testZeroScore(); - // test that stream output can actually be read - does not replace bwc test public void testStreamResponse() throws Exception { Version version = randomVersion(random()); @@ -83,13 +84,11 @@ public void testStreamResponse() throws Exception { ByteArrayInputStream inBuffer = new ByteArrayInputStream(outBuffer.toByteArray()); StreamInput in = new InputStreamStreamInput(inBuffer); // populates the registry through side effects - SearchModule searchModule = new SearchModule(Settings.EMPTY, emptyList()); - NamedWriteableRegistry registry = new NamedWriteableRegistry(searchModule.getNamedWriteables()); - in = new NamedWriteableAwareStreamInput(in, registry); + in = new NamedWriteableAwareStreamInput(in, writableRegistry()); in.setVersion(version); InternalMappedSignificantTerms read = (InternalMappedSignificantTerms) in.readNamedWriteable(InternalAggregation.class); - assertEquals(sigTerms.significanceHeuristic, read.significanceHeuristic); + assertEquals(sigTerms.getSignificanceHeuristic(), read.getSignificanceHeuristic()); SignificantTerms.Bucket originalBucket = sigTerms.getBuckets().get(0); SignificantTerms.Bucket streamedBucket = read.getBuckets().get(0); assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString())); @@ -127,11 +126,14 @@ public void testReduce() { } public void testBasicScoreProperties() { - SignificanceHeuristic heuristic = getHeuristic(); + testBasicScoreProperties(getHeuristic(), true); + } + + protected void testBasicScoreProperties(SignificanceHeuristic heuristic, boolean testZeroScore) { assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0)); assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3))); assertThat(heuristic.getScore(1, 1, 3, 4), lessThan(heuristic.getScore(1, 1, 2, 4))); - if (testZeroScore()) { + if (testZeroScore) { assertThat(heuristic.getScore(0, 1, 2, 3), equalTo(0.0)); } @@ -150,8 +152,8 @@ public void testBasicScoreProperties() { /** * Testing heuristic specific assertions * Typically, this method would call either - * {@link AbstractSignificanceHeuristicTests#testBackgroundAssertions(SignificanceHeuristic, SignificanceHeuristic)} - * or {@link AbstractSignificanceHeuristicTests#testAssertions(SignificanceHeuristic)} + * {@link AbstractSignificanceHeuristicTestCase#testBackgroundAssertions(SignificanceHeuristic, SignificanceHeuristic)} + * or {@link AbstractSignificanceHeuristicTestCase#testAssertions(SignificanceHeuristic)} * depending on which was appropriate */ public abstract void testAssertions(); @@ -206,9 +208,9 @@ public void testParseFailure() throws IOException { // Create aggregations as they might come from three different shards and return as list. private List createInternalAggregations() { SignificanceHeuristic significanceHeuristic = getHeuristic(); - AbstractSignificanceHeuristicTests.TestAggFactory factory = randomBoolean() ? - new AbstractSignificanceHeuristicTests.StringTestAggFactory() : - new AbstractSignificanceHeuristicTests.LongTestAggFactory(); + AbstractSignificanceHeuristicTestCase.TestAggFactory factory = randomBoolean() ? + new AbstractSignificanceHeuristicTestCase.StringTestAggFactory() : + new AbstractSignificanceHeuristicTestCase.LongTestAggFactory(); List aggs = new ArrayList<>(); aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0))); @@ -273,6 +275,11 @@ protected NamedXContentRegistry xContentRegistry() { return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents()); } + @Override + protected NamedWriteableRegistry writableRegistry() { + return new NamedWriteableRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedWriteables()); + } + protected void testBackgroundAssertions(SignificanceHeuristic heuristicIsSuperset, SignificanceHeuristic heuristicNotSuperset) { try { heuristicIsSuperset.getScore(2, 3, 1, 4); From 402b96e309e86bd75de813dd87107f683938b749 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Wed, 21 Jul 2021 07:21:51 -0400 Subject: [PATCH 2/2] adding docs --- .../bucket/AbstractNXYSignificanceHeuristicTestCase.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java index 71b34ec98f3f3..0ea5b6eb687f9 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractNXYSignificanceHeuristicTestCase.java @@ -11,7 +11,7 @@ import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; /** - * Abstract test case for testing significant term heuristics + * Abstract test case for testing NXY significant term heuristics */ public abstract class AbstractNXYSignificanceHeuristicTestCase extends AbstractSignificanceHeuristicTestCase { @@ -24,8 +24,9 @@ protected SignificanceHeuristic getHeuristic() { } /** - * @param includeNegatives value for this test run - * @param backgroundIsSuperset value for this test run + * @param includeNegatives value for this test run, should the scores include negative values. + * @param backgroundIsSuperset value for this test run, indicates in NXY significant terms if the background is indeed + * a superset of the the subset, or is instead a disjoint set * @return A random instance of an NXY heuristic to test */ protected abstract SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset);