Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Significant terms test refactor for extendability #75452

Merged
merged 3 commits into from
Jul 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ protected long getSupersetSize() {
}

@Override
protected SignificanceHeuristic getSignificanceHeuristic() {
public SignificanceHeuristic getSignificanceHeuristic() {
return significanceHeuristic;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
public class SignificantLongTerms extends InternalMappedSignificantTerms<SignificantLongTerms, SignificantLongTerms.Bucket> {
public static final String NAME = "siglterms";

static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
public static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {

long term;

Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations,
public Bucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, long term, InternalAggregations aggregations,
DocValueFormat format, double score) {
super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
this.term = term;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ protected boolean serializeTargetValueType(Version version) {
return true;
}

protected TermsAggregator.BucketCountThresholds getBucketCountThresholds() {
public TermsAggregator.BucketCountThresholds getBucketCountThresholds() {
return new TermsAggregator.BucketCountThresholds(bucketCountThresholds);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,17 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractNXYSignificanceHeuristicTestCase;

public class ChiSquareTests extends AbstractSignificanceHeuristicTests {
@Override
protected SignificanceHeuristic getHeuristic() {
return new ChiSquare(randomBoolean(), randomBoolean());
}
public class ChiSquareTests extends AbstractNXYSignificanceHeuristicTestCase {

@Override
protected boolean testZeroScore() {
return false;
public void testAssertions() {
testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false));
}

@Override
public void testAssertions() {
testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false));
protected SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) {
return new ChiSquare(includeNegatives, backgroundIsSuperset);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...okay github, that's an interesting way to interpret that change

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, sorry :(. It looked cleaner on a local git diff

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not your fault. I think github optimizes for minimal edit distance, not fewest lines changed. I got the point though, and that's all that matters.

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,16 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase;

import static org.hamcrest.Matchers.equalTo;

public class GNDTests extends AbstractSignificanceHeuristicTests {
public class GNDTests extends AbstractSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new GND(randomBoolean());
}

@Override
protected boolean testZeroScore() {
return true;
}

@Override
public void testAssertions() {
testBackgroundAssertions(new GND(true), new GND(false));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,14 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase;

public class JLHScoreTests extends AbstractSignificanceHeuristicTests {
public class JLHScoreTests extends AbstractSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new JLHScore();
}

@Override
protected boolean testZeroScore() {
return true;
}

@Override
public void testAssertions() {
testAssertions(new JLHScore());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,18 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractNXYSignificanceHeuristicTestCase;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.Matchers.lessThanOrEqualTo;

public class MutualInformationTests extends AbstractSignificanceHeuristicTests {
public class MutualInformationTests extends AbstractNXYSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new MutualInformation(randomBoolean(), randomBoolean());
}

@Override
protected boolean testZeroScore() {
return false;
protected SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) {
return new MutualInformation(includeNegatives, backgroundIsSuperset);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,14 @@

package org.elasticsearch.search.aggregations.bucket.terms.heuristic;

import org.elasticsearch.search.aggregations.bucket.terms.AbstractSignificanceHeuristicTests;
import org.elasticsearch.search.aggregations.bucket.AbstractSignificanceHeuristicTestCase;

public class PercentageScoreTests extends AbstractSignificanceHeuristicTests {
public class PercentageScoreTests extends AbstractSignificanceHeuristicTestCase {
@Override
protected SignificanceHeuristic getHeuristic() {
return new PercentageScore();
}

@Override
protected boolean testZeroScore() {
return true;
}

@Override
public void testAssertions() {
testAssertions(new PercentageScore());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket;

import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;

/**
* Abstract test case for testing NXY significant term heuristics
*/
public abstract class AbstractNXYSignificanceHeuristicTestCase extends AbstractSignificanceHeuristicTestCase {

/**
* @return A random instance of the heuristic to test
*/
@Override
protected SignificanceHeuristic getHeuristic() {
return getHeuristic(randomBoolean(), randomBoolean());
}

/**
* @param includeNegatives value for this test run, should the scores include negative values.
* @param backgroundIsSuperset value for this test run, indicates in NXY significant terms if the background is indeed
* a superset of the the subset, or is instead a disjoint set
* @return A random instance of an NXY heuristic to test
*/
protected abstract SignificanceHeuristic getHeuristic(boolean includeNegatives, boolean backgroundIsSuperset);

@Override
public void testBasicScoreProperties() {
testBasicScoreProperties(getHeuristic(true, true), false);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Side Public License, v 1.
*/

package org.elasticsearch.search.aggregations.bucket.terms;
package org.elasticsearch.search.aggregations.bucket;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
Expand All @@ -28,6 +28,12 @@
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.terms.InternalMappedSignificantTerms;
import org.elasticsearch.search.aggregations.bucket.terms.InternalSignificantTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantLongTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantStringTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTerms;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTermsAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.InternalAggregationTestCase;
Expand Down Expand Up @@ -56,18 +62,13 @@
/**
* Abstract test case for testing significant term heuristics
*/
public abstract class AbstractSignificanceHeuristicTests extends ESTestCase {
public abstract class AbstractSignificanceHeuristicTestCase extends ESTestCase {

/**
* @return A random instance of the heuristic to test
*/
protected abstract SignificanceHeuristic getHeuristic();

/**
* @return test if the score is `0` with a subset frequency of `0`
*/
protected abstract boolean testZeroScore();

// test that stream output can actually be read - does not replace bwc test
public void testStreamResponse() throws Exception {
Version version = randomVersion(random());
Expand All @@ -83,13 +84,11 @@ public void testStreamResponse() throws Exception {
ByteArrayInputStream inBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
StreamInput in = new InputStreamStreamInput(inBuffer);
// populates the registry through side effects
SearchModule searchModule = new SearchModule(Settings.EMPTY, emptyList());
NamedWriteableRegistry registry = new NamedWriteableRegistry(searchModule.getNamedWriteables());
in = new NamedWriteableAwareStreamInput(in, registry);
in = new NamedWriteableAwareStreamInput(in, writableRegistry());
in.setVersion(version);
InternalMappedSignificantTerms<?, ?> read = (InternalMappedSignificantTerms<?, ?>) in.readNamedWriteable(InternalAggregation.class);

assertEquals(sigTerms.significanceHeuristic, read.significanceHeuristic);
assertEquals(sigTerms.getSignificanceHeuristic(), read.getSignificanceHeuristic());
SignificantTerms.Bucket originalBucket = sigTerms.getBuckets().get(0);
SignificantTerms.Bucket streamedBucket = read.getBuckets().get(0);
assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString()));
Expand Down Expand Up @@ -127,11 +126,14 @@ public void testReduce() {
}

public void testBasicScoreProperties() {
SignificanceHeuristic heuristic = getHeuristic();
testBasicScoreProperties(getHeuristic(), true);
}

protected void testBasicScoreProperties(SignificanceHeuristic heuristic, boolean testZeroScore) {
assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0));
assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3)));
assertThat(heuristic.getScore(1, 1, 3, 4), lessThan(heuristic.getScore(1, 1, 2, 4)));
if (testZeroScore()) {
if (testZeroScore) {
assertThat(heuristic.getScore(0, 1, 2, 3), equalTo(0.0));
}

Expand All @@ -150,8 +152,8 @@ public void testBasicScoreProperties() {
/**
* Testing heuristic specific assertions
* Typically, this method would call either
* {@link AbstractSignificanceHeuristicTests#testBackgroundAssertions(SignificanceHeuristic, SignificanceHeuristic)}
* or {@link AbstractSignificanceHeuristicTests#testAssertions(SignificanceHeuristic)}
* {@link AbstractSignificanceHeuristicTestCase#testBackgroundAssertions(SignificanceHeuristic, SignificanceHeuristic)}
* or {@link AbstractSignificanceHeuristicTestCase#testAssertions(SignificanceHeuristic)}
* depending on which was appropriate
*/
public abstract void testAssertions();
Expand Down Expand Up @@ -206,9 +208,9 @@ public void testParseFailure() throws IOException {
// Create aggregations as they might come from three different shards and return as list.
private List<InternalAggregation> createInternalAggregations() {
SignificanceHeuristic significanceHeuristic = getHeuristic();
AbstractSignificanceHeuristicTests.TestAggFactory<?, ?> factory = randomBoolean() ?
new AbstractSignificanceHeuristicTests.StringTestAggFactory() :
new AbstractSignificanceHeuristicTests.LongTestAggFactory();
AbstractSignificanceHeuristicTestCase.TestAggFactory<?, ?> factory = randomBoolean() ?
new AbstractSignificanceHeuristicTestCase.StringTestAggFactory() :
new AbstractSignificanceHeuristicTestCase.LongTestAggFactory();

List<InternalAggregation> aggs = new ArrayList<>();
aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0)));
Expand Down Expand Up @@ -273,6 +275,11 @@ protected NamedXContentRegistry xContentRegistry() {
return new NamedXContentRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents());
}

@Override
protected NamedWriteableRegistry writableRegistry() {
return new NamedWriteableRegistry(new SearchModule(Settings.EMPTY, emptyList()).getNamedWriteables());
}

protected void testBackgroundAssertions(SignificanceHeuristic heuristicIsSuperset, SignificanceHeuristic heuristicNotSuperset) {
try {
heuristicIsSuperset.getScore(2, 3, 1, 4);
Expand Down