From bcbd7dbb5a3a5db7856ebc849c08c06ad09bfee9 Mon Sep 17 00:00:00 2001 From: Alex | Kronox Date: Thu, 28 Nov 2024 08:12:54 +0100 Subject: [PATCH] include basecode in symmetric similarity --- core/src/main/java/de/jplag/JPlagComparison.java | 13 +++++++++++++ .../java/de/jplag/options/SimilarityMetric.java | 9 +-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/de/jplag/JPlagComparison.java b/core/src/main/java/de/jplag/JPlagComparison.java index 37aa0c7ad..2fa03cef0 100644 --- a/core/src/main/java/de/jplag/JPlagComparison.java +++ b/core/src/main/java/de/jplag/JPlagComparison.java @@ -54,6 +54,19 @@ public final double similarity() { return 2 * similarity(divisorA + divisorB); } + /** + * @return A symmetric similarity in interval [0, 1]. O means no similarity, 1 means maximum similarity. + */ + public final double symmetricSimilarity() { + boolean subtractBaseCode = firstSubmission.hasBaseCodeMatches() && secondSubmission.hasBaseCodeMatches(); + int divisorA = firstSubmission.getSimilarityDivisor(subtractBaseCode); + int divisorB = secondSubmission.getSimilarityDivisor(subtractBaseCode); + if (divisorA + divisorB == 0) { + return 0.0; + } + return 2.0 * getNumberOfMatchedTokens() / (divisorA + divisorB); + } + /** * @return Similarity of the first submission in interval [0, 1]. O means no similarity, 1 means maximum similarity. */ diff --git a/core/src/main/java/de/jplag/options/SimilarityMetric.java b/core/src/main/java/de/jplag/options/SimilarityMetric.java index 08d00f78d..4cefb9ff7 100644 --- a/core/src/main/java/de/jplag/options/SimilarityMetric.java +++ b/core/src/main/java/de/jplag/options/SimilarityMetric.java @@ -10,14 +10,7 @@ public enum SimilarityMetric implements ToDoubleFunction { MIN("minimum similarity", JPlagComparison::minimalSimilarity), MAX("maximal similarity", JPlagComparison::maximalSimilarity), INTERSECTION("matched tokens", it -> (double) it.getNumberOfMatchedTokens()), - SYMMETRIC("symmetric similarity", it -> { - int divisor = it.firstSubmission().getNumberOfTokens() + it.secondSubmission().getNumberOfTokens(); - if (divisor != 0) { - return 2.0 * it.getNumberOfMatchedTokens() / divisor; - } else { - return .0; - } - }), + SYMMETRIC("symmetric similarity", JPlagComparison::symmetricSimilarity), LONGEST_MATCH("number of tokens in the longest match", it -> it.matches().stream().mapToInt(Match::length).max().orElse(0)), OVERALL("Sum of both submission lengths", it -> it.firstSubmission().getNumberOfTokens() + it.secondSubmission().getNumberOfTokens());