Skip to content

Commit

Permalink
Merge pull request #216 from DemocracyDevelopers/sample-size-estimati…
Browse files Browse the repository at this point in the history
…on-without-manifests

Sample size estimation without manifests
  • Loading branch information
vteague authored Nov 20, 2024
2 parents 625d415 + 37b8d5d commit 2b512e4
Show file tree
Hide file tree
Showing 33 changed files with 1,121 additions and 118 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ jobs:
run: cd server/eclipse-project; mvn -Dtest='us.freeandfair.corla.**' test

- name: IRV Tests
run: cd server/eclipse-project; mvn -Dtest='au.org.democracydevelopers.corla.**' test
working-directory: server/eclipse-project
run: mvn -Dtest='au.org.democracydevelopers.corla.**' test
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,18 @@
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import us.freeandfair.corla.asm.ASMEvent;
import us.freeandfair.corla.controller.ContestCounter;
import us.freeandfair.corla.endpoint.AbstractDoSDashboardEndpoint;
import us.freeandfair.corla.model.*;
import us.freeandfair.corla.persistence.Persistence;
import us.freeandfair.corla.query.BallotManifestInfoQueries;

import java.net.http.HttpClient;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import static us.freeandfair.corla.controller.ContestCounter.countContest;

/**
* An abstract endpoint for communicating with raire. Includes all the information for collecting IRV contests
Expand Down Expand Up @@ -88,21 +94,49 @@ protected void reset() {
}

/**
* Get all the ContestResults whose contests are consistently IRV.
* Get (or make) all the ContestResults whose contests are consistently IRV.
* Used for assertion generation and retrieval.
* Uses manifests if they are there, but just counts the CSVs if not.
* This is analogous to (and mostly copied from) ContestCounter::countAllContests, but restricted
* to the IRV ones. Although the countContest function isn't really useful or meaningful for IRV,
* it is called here because it actually does a lot of other useful things, such as setting the
* number of allowed winners and gathering all the results across counties.
* Assumption: Contest names are unique.
* @return A list of all ContestResults for IRV contests.
* @throws RuntimeException if it encounters contests with a mix of IRV and any other contest type.
*/
protected static List<ContestResult> getIRVContestResults() {
final String prefix = "[getIRVContestResults]";
final String msg = "Inconsistent contest types:";

// Find all the ContestResults with any that match IRV.
List<ContestResult> results = ContestCounter.countAllContests().stream()
.filter(cr -> cr.getContests().stream().map(Contest::description)
.anyMatch(d -> d.equalsIgnoreCase(ContestType.IRV.toString()))).toList();

List<ContestResult> results = Persistence.getAll(CountyContestResult.class)
.stream()
// Collect contests by name across counties.
.collect(Collectors.groupingBy(x -> x.contest().name()))
.entrySet()
.stream()
.filter(
// Filter for those with any IRV descriptions (which should be all)
((Map.Entry<String, List<CountyContestResult>> countyContestResults) ->
countyContestResults.getValue().stream().map(ccr -> ccr.contest().description())
.anyMatch(d -> d.equalsIgnoreCase(ContestType.IRV.toString())))
)
// 'Count' them (which actually does plurality counting and sets various useful values
// such as number of winners).
.map((Map.Entry<String, List<CountyContestResult>> countyContestResults) -> {
// Use manifests (for the denominator of the diluted margin) if _all_ counties have
// uploaded one.
boolean useManifests = countyContestResults.getValue().stream().
allMatch(ccr -> BallotManifestInfoQueries.totalBallots(Set.of(ccr.county().id())) > 0);
return countContest(countyContestResults, useManifests);
}
)
.toList();

// The above should be sufficient, but just in case, check that each contest we found _all_
// matches IRV, and throw a RuntimeException if not.
// matches IRV, and throw a RuntimeException if not - one contest must not mix plurality and
// IRV.
for (final ContestResult cr : results) {
if (cr.getContests().stream().map(Contest::description)
.anyMatch(d -> !d.equalsIgnoreCase(ContestType.IRV.toString()))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ public String endpointBody(final Request the_request, final Response the_respons

/**
* Compute sample sizes for all contests for which CountyContestResults exist in the database.
* This method ignores manifests, instead using the count of uploaded CSVs. This means that the
* estimate may differ from the estimate computed by estimatedSampleSize() during the audit, if
* the manifest has more votes than the CVR file.
* @return A list of string arrays containing rows with the following data: county name,
* contest name, contest type, single or multi-jurisdictional, ballots cast, diluted margin,
* and estimated sample size.
Expand All @@ -143,7 +146,8 @@ public String estimateSampleSizes() {
// in a ContestResult for an IRV contest will not be used. In the call to ContestCounter
// (countAllContests), all persisted CountyContestResults will be accessed from the database,
// grouped by contest, and accumulated into a single ContestResult.
final List<ContestResult> countedCRs = ContestCounter.countAllContests().stream().peek(cr ->
// Set the useManifests flag to false, to tell contest counter to use CVR count instead.
final List<ContestResult> countedCRs = ContestCounter.countAllContests(false).stream().peek(cr ->
cr.setAuditReason(AuditReason.OPPORTUNISTIC_BENEFITS)).toList();

// Try to get the DoS Dashboard, which may contain the risk limit for the audit.
Expand Down Expand Up @@ -186,7 +190,7 @@ public String estimateSampleSizes() {
* @param dilutedMargin The margin divided by the total Auditable ballots.
* @param estimatedSamples The estimated samples to audit.
*/
private record EstimateData(String countyName, String contestName, String contestType,
public record EstimateData(String countyName, String contestName, String contestType,
int contestBallots, long totalBallots, BigDecimal dilutedMargin,
int estimatedSamples) implements Comparable<EstimateData> {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,18 @@


import java.math.BigDecimal;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.*;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import au.org.democracydevelopers.corla.model.ContestType;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import us.freeandfair.corla.math.Audit;
import us.freeandfair.corla.model.ContestResult;
import us.freeandfair.corla.model.CountyContestResult;
import us.freeandfair.corla.model.*;
import us.freeandfair.corla.persistence.Persistence;
import us.freeandfair.corla.query.BallotManifestInfoQueries;
import us.freeandfair.corla.query.CastVoteRecordQueries;
import us.freeandfair.corla.query.ContestResultQueries;

public final class ContestCounter {
Expand All @@ -36,19 +30,23 @@ private ContestCounter() {
/**
* Group all CountyContestResults by contest name and tally the votes
* across all counties that have reported results.
* This only works for plurality - not valid, and not needed, for IRV.
*
* @return List<ContestResult> A high level view of contests and their
* participants.
* If 'useManifests' is true, it calculates the total universe size from the uploaded manifests -
* this is important for the validity of the audit step. useManifests can be false for sample-size
* estimation, where we expect that counties may not have uploaded valid manifests - in this case,
* universe size is calculated by counting the CVRs.
* The actual tallying is valid only for plurality - it is not valid, and not needed, for IRV.
* However, this function may still be useful for IRV, e.g. for gathering contests together by
* name and calculating their universes.
* @return List<ContestResult> A high level view of contests and their participants.
*/
public static List<ContestResult> countAllContests() {
public static List<ContestResult> countAllContests(boolean useManifests) {
return
Persistence.getAll(CountyContestResult.class)
.stream()
.collect(Collectors.groupingBy(x -> x.contest().name()))
.entrySet()
.stream()
.map(ContestCounter::countContest)
.map((Entry<String, List<CountyContestResult>> countyContestResults) -> countContest(countyContestResults, useManifests))
.collect(Collectors.toList());
}

Expand Down Expand Up @@ -85,9 +83,14 @@ public static Set<Integer> pairwiseMargins(final Set<String> winners,
* Set voteTotals on CONTEST based on all counties that have that
* Contest name in their uploaded CVRs
* Not valid for IRV.
* @param countyContestResults the county-by-county contest results, which are useful for plurality.
* @param useManifests whether to use manifests to compute the total number of ballots. This
* *must* be true when counting for audits - it can be false only when
* doing pre-audit sample size estimation. In this case, it computes
* the total number of ballots based on the (untrusted) CVRs.
**/
public static ContestResult
countContest(final Map.Entry<String, List<CountyContestResult>> countyContestResults) {
public static ContestResult countContest(final Map.Entry<String, List<CountyContestResult>> countyContestResults,
boolean useManifests) {
final String contestName = countyContestResults.getKey();
final ContestResult contestResult = ContestResultQueries.findOrCreate(contestName);

Expand Down Expand Up @@ -126,7 +129,13 @@ public static Set<Integer> pairwiseMargins(final Set<String> winners,
.map(cr -> cr.county())
.collect(Collectors.toSet()));

final Long ballotCount = BallotManifestInfoQueries.totalBallots(contestResult.countyIDs());
// If we are supposed to use manifests, set the ballotCount to their indicated total, otherwise
// count the CVRs.
final Long ballotCount = useManifests ?
BallotManifestInfoQueries.totalBallots(contestResult.countyIDs()) : countCVRs(contestResult);
LOGGER.debug(String.format("%s Contest %s counted %s manifests.", "[countContest]", contestName,
useManifests ? "with" : "without"));

final Set<Integer> margins = pairwiseMargins(contestResult.getWinners(),
contestResult.getLosers(),
voteTotals);
Expand All @@ -141,13 +150,48 @@ public static Set<Integer> pairwiseMargins(final Set<String> winners,
contestResult.setDilutedMargin(dilutedMargin);

if (ballotCount == 0L) {
LOGGER.error(String.format("[countContest: %s has no ballot manifests for"
+ " countyIDs: %s", contestName, contestResult.countyIDs()));
final String dataSource = useManifests ? "ballot manifests" : "uploaded CVRs";
LOGGER.error(String.format("[countContest: %s has no %s for"
+ " countyIDs: %s", contestName, dataSource, contestResult.countyIDs()));
}

return contestResult;
}

/**
* Calculate the size of the audit universe for a given contest by counting CVRs. This is the
* total, over all counties that have any votes in the contest, of the total number of CVRs in the
* county. Used for preliminary sample-size estimation before the audit.
* For example, if a county had 10,000 CVRs, of which only 500 contained the contest, it would
* contribute 10,000 to the total.
* Note this should *not* be used during auditing, only for preliminary sample-size estimation in
* advance of the audit. During auditing, the sample-size estimate calculation should get this
* value from the manifests, not the CVRs.
* @param contestResult the contestResult for this contest.
* @return the sum, over all counties that contain the contest, of the total number of CVRs in
* that county. This will be 0 if either the contestResult has no counties, or the counties
* have uploaded no CVRs.
*/
private static Long countCVRs(ContestResult contestResult) {
final String prefix = "[countCVRs]";

long total = 0L;
for(County county : contestResult.getCounties()) {
final OptionalLong countyCount
= CastVoteRecordQueries.countMatching(county.id(), CastVoteRecord.RecordType.UPLOADED);
if(countyCount.isPresent() && countyCount.getAsLong() != 0L) {
// Add all the ballots in this county to the total.
total += countyCount.getAsLong();
} else {
// If there are no CVRs, we can still make an estimate based on the other counties' data,
// but we need to warn that it may be inaccurate.
LOGGER.warn(String.format("%s Found no CVRs in database for county %s. Estimate for contest "
+ "%s may be inaccurate.", prefix, county.name(), contestResult.getContestName()));
}
}
return total;
}

/** add em up **/
public static Map<String,Integer>
accumulateVoteTotals(final List<Map<String,Integer>> voteTotals) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,8 @@ public List<ContestResult> countAndSaveContests(final Set<ContestToAudit> cta) {
LOGGER.debug(String.format("[countAndSaveContests: cta=%s]", cta));
final Map<String, AuditReason> tcr = targetedContestReasons(cta);

return ContestCounter.countAllContests().stream().map(cr -> {
// Count the contests, using the trusted Manifests to get the universe sizes.
return ContestCounter.countAllContests(true).stream().map(cr -> {
cr.setAuditReason(tcr.getOrDefault(cr.getContestName(),
AuditReason.OPPORTUNISTIC_BENEFITS));
return cr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,9 @@ public static void afterAll() {
public void IRVContestMakesIRVAudit() {
testUtils.log(LOGGER, "IRVContestMakesIRVAudit");

// Set up the contest results from the stored data.
List<ContestResult> results = ContestCounter.countAllContests();
// Set up the contest results from the stored data. Use manifests for this test (though it
// shouldn't matter because for this sample data, the manifests and CVRs have the same totals).
List<ContestResult> results = ContestCounter.countAllContests(true);

// Find all the ContestResults for TinyIRV - there should be one.
List<ContestResult> tinyIRVResults = results.stream().filter(
Expand All @@ -121,7 +122,7 @@ public void pluralityContestMakesPluralityAudit() {
testUtils.log(LOGGER, "pluralityContestMakesPluralityAudit");

// Set up the contest results from the stored data.
List<ContestResult> results = ContestCounter.countAllContests();
List<ContestResult> results = ContestCounter.countAllContests(true);

// Find all the ContestResults for TinyPlurality - there should be one.
List<ContestResult> tinyPluralityResults = results.stream().filter(
Expand All @@ -142,7 +143,7 @@ public void inconsistentContestThrowsException() {
testUtils.log(LOGGER, "inconsistentContestThrowsException");

// Set up the contest results from the stored data.
List<ContestResult> results = ContestCounter.countAllContests();
List<ContestResult> results = ContestCounter.countAllContests(true);

// Find all the contestResults for TinyMixed - there should be one.
List<ContestResult> tinyMixedResults = results.stream().filter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void basicEstimatedSampleSizesPluralityAndIRV() {
doSD.updateAuditInfo(new AuditInfo(null, null, null,null, BigDecimal.valueOf(0.04)));

// Mock return of empty contest list.
mockedCounter.when(ContestCounter::countAllContests).thenReturn(List.of());
mockedCounter.when(() -> ContestCounter.countAllContests(false)).thenReturn(List.of());

// Check for error response.
errorBody = "";
Expand All @@ -169,7 +169,7 @@ void basicEstimatedSampleSizesPluralityAndIRV() {

// Mock non-empty contest response (one plurality and one IRV contest).
List<ContestResult> mockedContestResults = List.of(pluralityContestResult, irvContestResult);
mockedCounter.when(ContestCounter::countAllContests).thenReturn(mockedContestResults);
mockedCounter.when(() -> ContestCounter.countAllContests(false)).thenReturn(mockedContestResults);

endpoint.endpointBody(request, response);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import spark.Request;
import us.freeandfair.corla.Main;
import us.freeandfair.corla.asm.*;
import us.freeandfair.corla.controller.ContestCounter;
import us.freeandfair.corla.model.AuditReason;
import us.freeandfair.corla.model.Choice;
import us.freeandfair.corla.model.ContestResult;
Expand Down Expand Up @@ -192,7 +191,7 @@ void assertionGenerationBlockedWhenInWrongASMState() {

// Mock the main class; mock its auth as the mocked state admin auth.
try (MockedStatic<Main> mockedMain = Mockito.mockStatic(Main.class);
MockedStatic<ContestCounter> mockedCounter = Mockito.mockStatic(ContestCounter.class)) {
MockedStatic<AbstractAllIrvEndpoint> mockedIRVEndpoint = Mockito.mockStatic(AbstractAllIrvEndpoint.class)) {

// Mock auth.
mockedMain.when(Main::authentication).thenReturn(auth);
Expand All @@ -202,7 +201,7 @@ void assertionGenerationBlockedWhenInWrongASMState() {

// Mock non-empty contest response (one IRV contest).
List<ContestResult> mockedContestResults = List.of(tinyIRVContestResult);
mockedCounter.when(ContestCounter::countAllContests).thenReturn(mockedContestResults);
mockedIRVEndpoint.when(AbstractAllIrvEndpoint::getIRVContestResults).thenReturn(mockedContestResults);

// We seem to need a dummy request to run before.
final Request request = new SparkRequestStub("", Map.of(CONTEST_NAME, tinyIRV));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public abstract class TestClassWithDatabase {
protected static final Properties blank = new Properties();

/**
* Oroperties derived from test.properties.
* Properties derived from test.properties.
*/
protected static Properties config = loadProperties();

Expand Down
Loading

0 comments on commit 2b512e4

Please sign in to comment.