Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more similarity metrics #2058

Draft
wants to merge 11 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions core/src/main/java/de/jplag/JPlagComparison.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,19 @@ public final double similarity() {
return 2 * similarity(divisorA + divisorB);
}

/**
* @return A symmetric similarity in interval [0, 1]. O means no similarity, 1 means maximum similarity.
*/
public final double symmetricSimilarity() {
boolean subtractBaseCode = firstSubmission.hasBaseCodeMatches() && secondSubmission.hasBaseCodeMatches();
int divisorA = firstSubmission.getSimilarityDivisor(subtractBaseCode);
int divisorB = secondSubmission.getSimilarityDivisor(subtractBaseCode);
if (divisorA + divisorB == 0) {
return 0.0;
}
return 2.0 * getNumberOfMatchedTokens() / (divisorA + divisorB);
}

/**
* @return Similarity of the first submission in interval [0, 1]. O means no similarity, 1 means maximum similarity.
*/
Expand Down
6 changes: 5 additions & 1 deletion core/src/main/java/de/jplag/options/SimilarityMetric.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
import java.util.function.ToDoubleFunction;

import de.jplag.JPlagComparison;
import de.jplag.Match;

public enum SimilarityMetric implements ToDoubleFunction<JPlagComparison> {
AVG("average similarity", JPlagComparison::similarity),
MIN("minimum similarity", JPlagComparison::minimalSimilarity),
MAX("maximal similarity", JPlagComparison::maximalSimilarity),
INTERSECTION("matched tokens", it -> (double) it.getNumberOfMatchedTokens());
INTERSECTION("matched tokens", it -> (double) it.getNumberOfMatchedTokens()),
SYMMETRIC("symmetric similarity", JPlagComparison::symmetricSimilarity),
LONGEST_MATCH("number of tokens in the longest match", it -> it.matches().stream().mapToInt(Match::length).max().orElse(0)),
OVERALL("Sum of both submission lengths", it -> it.firstSubmission().getNumberOfTokens() + it.secondSubmission().getNumberOfTokens());

private final ToDoubleFunction<JPlagComparison> similarityFunction;
private final String description;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.nio.file.Path;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
Expand Down Expand Up @@ -57,13 +58,20 @@ private void writeComparisons(List<JPlagComparison> comparisons) {
String secondSubmissionId = submissionToIdFunction.apply(comparison.secondSubmission());
String fileName = generateComparisonName(firstSubmissionId, secondSubmissionId);
addToLookUp(firstSubmissionId, secondSubmissionId, fileName);
var comparisonReport = new ComparisonReport(firstSubmissionId, secondSubmissionId,
Map.of(SimilarityMetric.AVG.name(), comparison.similarity(), SimilarityMetric.MAX.name(), comparison.maximalSimilarity()),
var comparisonReport = new ComparisonReport(firstSubmissionId, secondSubmissionId, createSimilarityMap(comparison),
convertMatchesToReportMatches(comparison), comparison.similarityOfFirst(), comparison.similarityOfSecond());
resultWriter.addJsonEntry(comparisonReport, Path.of(fileName));
}
}

private Map<String, Double> createSimilarityMap(JPlagComparison comparison) {
Map<String, Double> result = new HashMap<>();
for (SimilarityMetric metric : SimilarityMetric.values()) {
result.put(metric.name(), metric.applyAsDouble(comparison));
}
return result;
}

private void addToLookUp(String firstSubmissionId, String secondSubmissionId, String fileName) {
writeToMap(secondSubmissionId, firstSubmissionId, fileName);
writeToMap(firstSubmissionId, secondSubmissionId, fileName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ public List<TopComparison> getTopComparisons(JPlagResult result) {
}

private Map<String, Double> getComparisonMetricMap(JPlagComparison comparison) {
return Map.of(SimilarityMetric.AVG.name(), comparison.similarity(), SimilarityMetric.MAX.name(), comparison.maximalSimilarity());
Map<String, Double> metricMap = new HashMap<>();
for (SimilarityMetric metric : SimilarityMetric.values()) {
metricMap.put(metric.name(), metric.applyAsDouble(comparison));
}
return metricMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import de.jplag.JPlagComparison;
import de.jplag.JPlagResult;
import de.jplag.Match;
import de.jplag.Submission;
import de.jplag.options.JPlagOptions;
import de.jplag.options.SimilarityMetric;
Expand Down Expand Up @@ -48,14 +49,18 @@ public void test_getDistributions() {
public void test_getTopComparisons() {
// given
JPlagResult jPlagResult = createJPlagResult(distribution(EXPECTED_AVG_DISTRIBUTION), distribution(EXPECTED_MAX_DISTRIBUTION),
comparison(submission("1"), submission("2"), .7, .8), comparison(submission("3"), submission("4"), .3, .9));
comparison(submission("1", 22), submission("2", 30), .7, .8, .5, .5, new int[] {9, 3, 1}),
comparison(submission("3", 202), submission("4", 134), .3, .9, .01, .25, new int[] {1, 15, 23, 3}));

// when
List<TopComparison> result = metricMapper.getTopComparisons(jPlagResult);

// then
Assertions.assertEquals(
List.of(new TopComparison("1", "2", Map.of("AVG", .7, "MAX", .8)), new TopComparison("3", "4", Map.of("AVG", .3, "MAX", .9))),
Assertions.assertEquals(List.of(
new TopComparison("1", "2",
Map.of("AVG", .7, "MAX", .8, "MIN", .5, "LONGEST_MATCH", 9.0, "INTERSECTION", 13.0, "SYMMETRIC", .5, "OVERALL", 52.0)),
new TopComparison("3", "4",
Map.of("AVG", .3, "MAX", .9, "MIN", .01, "LONGEST_MATCH", 23.0, "INTERSECTION", 42.0, "SYMMETRIC", .25, "OVERALL", 336.0))),
result);
}

Expand All @@ -64,12 +69,21 @@ private int[] distribution(List<Integer> expectedDistribution) {
return distribution.stream().mapToInt(Integer::intValue).toArray();
}

private CreateSubmission submission(String name, int tokenCount) {
return new CreateSubmission(name, tokenCount);
}

private CreateSubmission submission(String name) {
return new CreateSubmission(name);
return submission(name, 0);
}

private Comparison comparison(CreateSubmission submission1, CreateSubmission submission2, double similarity, double maxSimilarity,
double minSimilarity, double symSimilarity, int[] matchLengths) {
return new Comparison(submission1, submission2, similarity, maxSimilarity, minSimilarity, symSimilarity, matchLengths);
}

private Comparison comparison(CreateSubmission submission1, CreateSubmission submission2, double similarity, double maxSimilarity) {
return new Comparison(submission1, submission2, similarity, maxSimilarity);
return comparison(submission1, submission2, similarity, maxSimilarity, 0, 0, new int[0]);
}

private JPlagResult createJPlagResult(int[] avgDistribution, int[] maxDistribution, Comparison... createComparisonsDto) {
Expand All @@ -85,25 +99,43 @@ private JPlagResult createJPlagResult(int[] avgDistribution, int[] maxDistributi
for (Comparison comparisonDto : createComparisonsDto) {
Submission submission1 = mock(Submission.class);
doReturn(comparisonDto.submission1.name).when(submission1).getName();
doReturn(comparisonDto.submission1.tokenCount).when(submission1).getNumberOfTokens();
Submission submission2 = mock(Submission.class);
doReturn(comparisonDto.submission2.name).when(submission2).getName();
doReturn(comparisonDto.submission2.tokenCount).when(submission2).getNumberOfTokens();

JPlagComparison mockedComparison = mock(JPlagComparison.class);
doReturn(submission1).when(mockedComparison).firstSubmission();
doReturn(submission2).when(mockedComparison).secondSubmission();
doReturn(comparisonDto.similarity).when(mockedComparison).similarity();
doReturn(comparisonDto.maxSimilarity).when(mockedComparison).maximalSimilarity();
doReturn(comparisonDto.minSimilarity).when(mockedComparison).minimalSimilarity();
doReturn(comparisonDto.symSimilarity).when(mockedComparison).symmetricSimilarity();
List<Match> matches = createMockMatchList(comparisonDto.matchLengths);
doReturn(matches).when(mockedComparison).matches();
doReturn(matches.stream().mapToInt(Match::length).sum()).when(mockedComparison).getNumberOfMatchedTokens();
comparisonList.add(mockedComparison);
}

doReturn(comparisonList).when(jPlagResult).getComparisons(anyInt());
return jPlagResult;
}

private record Comparison(CreateSubmission submission1, CreateSubmission submission2, double similarity, double maxSimilarity) {
private List<Match> createMockMatchList(int[] matchLengths) {
List<Match> matches = new ArrayList<>();
for (int l : matchLengths) {
Match m = mock(Match.class);
doReturn(l).when(m).length();
matches.add(m);
}
return matches;
}

private record Comparison(CreateSubmission submission1, CreateSubmission submission2, double similarity, double maxSimilarity,
double minSimilarity, double symSimilarity, int[] matchLengths) {
}

private record CreateSubmission(String name) {
private record CreateSubmission(String name, int tokenCount) {
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public double getSimilarityForMetric(SimilarityMetric metric) {
case MIN -> resultSimilarityMinimum();
case MAX -> resultSimilarityMaximum();
case INTERSECTION -> resultMatchedTokenNumber();
default -> throw new IllegalArgumentException(String.format("Similarity metric %s not supported for end to end tests", metric.name()));
};
}

Expand Down
36 changes: 29 additions & 7 deletions report-viewer/src/components/ComparisonTableFilter.vue
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,19 @@
</ButtonComponent>
</div>
<OptionsSelector
title="Sort By:"
title="Sorting Metric:"
:defaultSelected="getSortingMetric()"
:labels="tableSortingOptions"
@selection-changed="(index: number) => changeSortingMetric(index)"
/>
<MetricSelector
title="Secondary Metric:"
:defaultSelected="store().uiState.comparisonTableSecondaryMetric"
@selection-changed="
(metric: MetricJsonIdentifier) => (store().uiState.comparisonTableSecondaryMetric = metric)
"
:metrics="secondaryMetricOptions"
/>
</div>
</template>

Expand All @@ -45,8 +53,9 @@ import ToolTipComponent from './ToolTipComponent.vue'
import ButtonComponent from './ButtonComponent.vue'
import OptionsSelector from './optionsSelectors/OptionsSelectorComponent.vue'
import { store } from '@/stores/store'
import { MetricType, metricToolTips } from '@/model/MetricType'
import { MetricJsonIdentifier, MetricTypes } from '@/model/MetricType'
import type { ToolTipLabel } from '@/model/ui/ToolTip'
import MetricSelector from './optionsSelectors/MetricSelector.vue'

const props = defineProps({
searchString: {
Expand Down Expand Up @@ -93,23 +102,27 @@ const searchStringValue = computed({

function changeSortingMetric(index: number) {
store().uiState.comparisonTableSortingMetric =
index < tableSortingMetricOptions.length ? tableSortingMetricOptions[index] : MetricType.AVERAGE
index < tableSortingMetricOptions.length
? tableSortingMetricOptions[index].identifier
: MetricJsonIdentifier.AVERAGE_SIMILARITY
store().uiState.comparisonTableClusterSorting = tableSortingOptions.value[index] == 'Cluster'
}

function getSortingMetric() {
if (store().uiState.comparisonTableClusterSorting && props.enableClusterSorting) {
return tableSortingOptions.value.indexOf('Cluster')
}
return tableSortingMetricOptions.indexOf(store().uiState.comparisonTableSortingMetric)
return tableSortingMetricOptions.findIndex(
(m) => m.identifier == store().uiState.comparisonTableSortingMetric
)
}

const tableSortingMetricOptions = [MetricType.AVERAGE, MetricType.MAXIMUM]
const tableSortingMetricOptions = MetricTypes.METRIC_LIST
const tableSortingOptions = computed(() => {
const options: (ToolTipLabel | string)[] = tableSortingMetricOptions.map((metric) => {
return {
displayValue: metricToolTips[metric].longName,
tooltip: metricToolTips[metric].tooltip
displayValue: metric.longName,
tooltip: metric.tooltip
}
})
if (props.enableClusterSorting) {
Expand All @@ -118,6 +131,15 @@ const tableSortingOptions = computed(() => {
return options
})

const secondaryMetricOptions = [
MetricJsonIdentifier.MAXIMUM_SIMILARITY,
MetricJsonIdentifier.MINIMUM_SIMILARITY,
MetricJsonIdentifier.SYMMETRIC,
MetricJsonIdentifier.INTERSECTION,
MetricJsonIdentifier.LONGEST_MATCH,
MetricJsonIdentifier.OVERALL
]

/**
* Sets the anonymous set to empty if it is full or adds all submission ids to it if it is not full
*/
Expand Down
53 changes: 35 additions & 18 deletions report-viewer/src/components/ComparisonsTable.vue
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,30 @@
<ToolTipComponent class="flex-1" :direction="displayClusters ? 'top' : 'left'">
<template #default>
<p class="w-full text-center">
{{ metricToolTips[MetricType.AVERAGE].shortName }}
{{ MetricTypes.AVERAGE_SIMILARITY.shortName }}
</p>
</template>
<template #tooltip>
<p class="whitespace-pre text-sm">
{{ metricToolTips[MetricType.AVERAGE].tooltip }}
{{ MetricTypes.AVERAGE_SIMILARITY.tooltip }}
</p>
</template>
</ToolTipComponent>

<ToolTipComponent class="flex-1" :direction="displayClusters ? 'top' : 'left'">
<template #default>
<p class="w-full text-center">
{{ metricToolTips[MetricType.MAXIMUM].shortName }}
{{
MetricTypes.METRIC_MAP[store().uiState.comparisonTableSecondaryMetric]
.shortName
}}
</p>
</template>
<template #tooltip>
<p class="whitespace-pre text-sm">
{{ metricToolTips[MetricType.MAXIMUM].tooltip }}
{{
MetricTypes.METRIC_MAP[store().uiState.comparisonTableSecondaryMetric].tooltip
}}
</p>
</template>
</ToolTipComponent>
Expand Down Expand Up @@ -102,10 +107,18 @@
<!-- Similarities -->
<div class="tableCellSimilarity">
<div class="w-1/2">
{{ (item.similarities[MetricType.AVERAGE] * 100).toFixed(2) }}%
{{
MetricTypes.AVERAGE_SIMILARITY.format(
item.similarities[MetricTypes.AVERAGE_SIMILARITY.identifier]
)
}}
</div>
<div class="w-1/2">
{{ (item.similarities[MetricType.MAXIMUM] * 100).toFixed(2) }}%
{{
MetricTypes.METRIC_MAP[
store().uiState.comparisonTableSecondaryMetric
].format(item.similarities[store().uiState.comparisonTableSecondaryMetric])
}}
</div>
</div>
</RouterLink>
Expand Down Expand Up @@ -175,7 +188,7 @@ import { library } from '@fortawesome/fontawesome-svg-core'
import { faUserGroup } from '@fortawesome/free-solid-svg-icons'
import { generateHues } from '@/utils/ColorUtils'
import ToolTipComponent from './ToolTipComponent.vue'
import { MetricType, metricToolTips } from '@/model/MetricType'
import { MetricJsonIdentifier, MetricTypes } from '@/model/MetricType'
import NameElement from './NameElement.vue'
import ComparisonTableFilter from './ComparisonTableFilter.vue'

Expand Down Expand Up @@ -251,28 +264,32 @@ function getFilteredComparisons(comparisons: ComparisonListElement[]) {
}

// metric search
const searchPerMetric: Record<MetricType, string[]> = {
[MetricType.AVERAGE]: [],
[MetricType.MAXIMUM]: []
}
const searchPerMetric: Record<MetricJsonIdentifier, string[]> = {} as Record<
MetricJsonIdentifier,
string[]
>
MetricTypes.METRIC_JSON_IDENTIFIERS.forEach((m) => {
searchPerMetric[m] = []
})
metricSearches.forEach((s) => {
const regexResult = /^(?:(avg|max):)([<>]=?[0-9]+%?$)/.exec(s)
if (regexResult) {
const metricName = regexResult[1]
let metric = MetricType.AVERAGE
for (const m of [MetricType.AVERAGE, MetricType.MAXIMUM]) {
if (metricToolTips[m].shortName.toLowerCase() == metricName) {
let metric = MetricTypes.AVERAGE_SIMILARITY
for (const m of MetricTypes.METRIC_LIST) {
if (m.shortName.toLowerCase() == metricName) {
metric = m
break
}
}
searchPerMetric[metric].push(regexResult[2])
searchPerMetric[metric.identifier].push(regexResult[2])
} else {
searchPerMetric[MetricType.AVERAGE].push(s)
searchPerMetric[MetricType.MAXIMUM].push(s)
MetricTypes.METRIC_JSON_IDENTIFIERS.forEach((m) => {
searchPerMetric[m].push(s)
})
}
})
for (const metric of [MetricType.AVERAGE, MetricType.MAXIMUM]) {
for (const metric of MetricTypes.METRIC_JSON_IDENTIFIERS) {
for (const search of searchPerMetric[metric]) {
const regexResult = /([<>]=?)([0-9]+)%?/.exec(search)!
const operator = regexResult[1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ import { Chart, registerables } from 'chart.js'
import ChartDataLabels from 'chartjs-plugin-datalabels'
import { graphColors } from '@/utils/ColorUtils'
import type { Distribution } from '@/model/Distribution'
import { MetricType } from '@/model/MetricType'
import { store } from '@/stores/store'
import DistributionDiagramOptions from './DistributionDiagramOptions.vue'
import type { MetricJsonIdentifier } from '@/model/MetricType'

Chart.register(...registerables)
Chart.register(ChartDataLabels)

const props = defineProps({
distributions: {
type: Object as PropType<Record<MetricType, Distribution>>,
type: Object as PropType<Record<MetricJsonIdentifier, Distribution>>,
required: true
},
xScale: {
Expand Down
Loading
Loading