Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More similarity metrics #1396

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion core/src/main/java/de/jplag/options/SimilarityMetric.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,19 @@
import java.util.function.ToDoubleFunction;

import de.jplag.JPlagComparison;
import de.jplag.Match;

public enum SimilarityMetric implements ToDoubleFunction<JPlagComparison> {
AVG("average similarity", JPlagComparison::similarity),
MIN("minimum similarity", JPlagComparison::minimalSimilarity),
MAX("maximal similarity", JPlagComparison::maximalSimilarity),
INTERSECTION("matched tokens", it -> (double) it.getNumberOfMatchedTokens());
INTERSECTION("matched tokens", it -> (double) it.getNumberOfMatchedTokens()),
SYMMETRIC(
"symmetric similarity",
it -> 2.0 * it.getNumberOfMatchedTokens() / (it.firstSubmission().getNumberOfTokens() + it.secondSubmission().getNumberOfTokens())),
OVERLAP("overlap between both submissions (number of matched tokens)", JPlagComparison::getNumberOfMatchedTokens),
TwoOfTwelve marked this conversation as resolved.
Show resolved Hide resolved
LONGEST_MATCH("number of tokens in the longest match", it -> it.matches().stream().mapToInt(Match::length).max().orElse(0)),
OVERALL("Sum of both submission lengths", it -> it.firstSubmission().getNumberOfTokens() + it.secondSubmission().getNumberOfTokens());

private final ToDoubleFunction<JPlagComparison> similarityFunction;
private final String description;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package de.jplag.reporting.jsonfactory;

import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
Expand Down Expand Up @@ -56,13 +53,20 @@ private void writeComparisons(String path, List<JPlagComparison> comparisons) {
String secondSubmissionId = submissionToIdFunction.apply(comparison.secondSubmission());
String fileName = generateComparisonName(firstSubmissionId, secondSubmissionId);
addToLookUp(firstSubmissionId, secondSubmissionId, fileName);
var comparisonReport = new ComparisonReport(firstSubmissionId, secondSubmissionId,
Map.of(SimilarityMetric.AVG.name(), comparison.similarity(), SimilarityMetric.MAX.name(), comparison.maximalSimilarity()),
var comparisonReport = new ComparisonReport(firstSubmissionId, secondSubmissionId, createMetricMap(comparison),
convertMatchesToReportMatches(comparison));
fileWriter.saveAsJSON(comparisonReport, path, fileName);
});
}

private Map<String, Double> createMetricMap(JPlagComparison comparison) {
Map<String, Double> result = new HashMap<>();
for (SimilarityMetric metric : SimilarityMetric.values()) {
result.put(metric.name(), metric.applyAsDouble(comparison));
}
return result;
}

private void addToLookUp(String firstSubmissionId, String secondSubmissionId, String fileName) {
writeToMap(secondSubmissionId, firstSubmissionId, fileName);
writeToMap(firstSubmissionId, secondSubmissionId, fileName);
Expand Down
Loading