diff --git a/cli/src/main/java/de/jplag/cli/CLI.java b/cli/src/main/java/de/jplag/cli/CLI.java index 9507a9a804..3874cc3d55 100644 --- a/cli/src/main/java/de/jplag/cli/CLI.java +++ b/cli/src/main/java/de/jplag/cli/CLI.java @@ -171,7 +171,8 @@ public JPlagOptions buildOptionsFromArguments(ParseResult parseResult) throws Cl JPlagOptions jPlagOptions = new JPlagOptions(loadLanguage(parseResult), this.options.minTokenMatch, submissionDirectories, oldSubmissionDirectories, null, this.options.advanced.subdirectory, suffixes, this.options.advanced.exclusionFileName, JPlagOptions.DEFAULT_SIMILARITY_METRIC, this.options.advanced.similarityThreshold, this.options.shownComparisons, clusteringOptions, - this.options.advanced.debug, mergingOptions); + this.options.advanced.debug, mergingOptions, JPlagOptions.DEFAULT_PRE_PARSE_HOOK, JPlagOptions.DEFAULT_PARSE_HOOK, + JPlagOptions.DEFAULT_PRE_COMPARE_HOOK, JPlagOptions.DEFAULT_COMPARE_HOOK); String baseCodePath = this.options.baseCode; File baseCodeDirectory = baseCodePath == null ? null : new File(baseCodePath); diff --git a/core/src/main/java/de/jplag/SubmissionSetBuilder.java b/core/src/main/java/de/jplag/SubmissionSetBuilder.java index 4d93c0d443..4cb74b8038 100644 --- a/core/src/main/java/de/jplag/SubmissionSetBuilder.java +++ b/core/src/main/java/de/jplag/SubmissionSetBuilder.java @@ -233,18 +233,26 @@ private Submission processSubmission(String submissionName, File submissionFile, */ private void processRootDirectoryEntries(File rootDirectory, boolean multipleRoots, Map foundSubmissions, boolean isNew) throws ExitException { - for (String fileName : listSubmissionFiles(rootDirectory)) { - File submissionFile = new File(rootDirectory, fileName); - - String errorMessage = isExcludedEntry(submissionFile); - if (errorMessage == null) { - String rootDirectoryPrefix = multipleRoots ? (rootDirectory.getName() + File.separator) : ""; - String submissionName = rootDirectoryPrefix + fileName; - Submission submission = processSubmission(submissionName, submissionFile, isNew); - foundSubmissions.put(submission.getRoot(), submission); - } else { - logger.error(errorMessage); - } + List submissionFiles = Arrays.stream(listSubmissionFiles(rootDirectory)).map(fileName -> new File(rootDirectory, fileName)) + .filter(submissionFile -> { + String errorMessage = isExcludedEntry(submissionFile); + if (errorMessage == null) { + return true; + } else { + logger.error(errorMessage); + return false; + } + }).toList(); + + this.options.preParseHook().callback(submissionFiles); + + for (File submissionFile : submissionFiles) { + String rootDirectoryPrefix = multipleRoots ? (rootDirectory.getName() + File.separator) : ""; + String submissionName = rootDirectoryPrefix + submissionFile.getName(); + Submission submission = processSubmission(submissionName, submissionFile, isNew); + foundSubmissions.put(submission.getRoot(), submission); + + this.options.parseHook().callback(submission); } } diff --git a/core/src/main/java/de/jplag/options/JPlagOptions.java b/core/src/main/java/de/jplag/options/JPlagOptions.java index 44eea1d2f9..f08d2271f3 100644 --- a/core/src/main/java/de/jplag/options/JPlagOptions.java +++ b/core/src/main/java/de/jplag/options/JPlagOptions.java @@ -17,9 +17,11 @@ import de.jplag.JPlag; import de.jplag.Language; +import de.jplag.Submission; import de.jplag.clustering.ClusteringOptions; import de.jplag.exceptions.BasecodeException; import de.jplag.merging.MergingOptions; +import de.jplag.strategy.SubmissionTuple; import de.jplag.util.FileUtils; /** @@ -48,7 +50,8 @@ public record JPlagOptions(Language language, Integer minimumTokenMatch, Set submissionDirectories, Set oldSubmissionDirectories, File baseCodeSubmissionDirectory, String subdirectoryName, List fileSuffixes, String exclusionFileName, SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions, - boolean debugParser, MergingOptions mergingOptions) { + boolean debugParser, MergingOptions mergingOptions, PreParseHook preParseHook, ParseHook parseHook, PreCompareHook preCompareHook, + CompareHook compareHook) { public static final double DEFAULT_SIMILARITY_THRESHOLD = 0; public static final int DEFAULT_SHOWN_COMPARISONS = 100; @@ -61,13 +64,15 @@ public record JPlagOptions(Language language, Integer minimumTokenMatch, Set submissionDirectories, Set oldSubmissionDirectories) { this(language, null, submissionDirectories, oldSubmissionDirectories, null, null, null, null, DEFAULT_SIMILARITY_METRIC, - DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions()); + DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions(), + DEFAULT_PRE_PARSE_HOOK, DEFAULT_PARSE_HOOK, DEFAULT_PRE_COMPARE_HOOK, DEFAULT_COMPARE_HOOK); } public JPlagOptions(Language language, Integer minimumTokenMatch, Set submissionDirectories, Set oldSubmissionDirectories, File baseCodeSubmissionDirectory, String subdirectoryName, List fileSuffixes, String exclusionFileName, SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions, - boolean debugParser, MergingOptions mergingOptions) { + boolean debugParser, MergingOptions mergingOptions, PreParseHook preParseHook, ParseHook parseHook, PreCompareHook preCompareHook, + CompareHook compareHook) { this.language = language; this.debugParser = debugParser; this.fileSuffixes = fileSuffixes == null || fileSuffixes.isEmpty() ? null : Collections.unmodifiableList(fileSuffixes); @@ -82,90 +87,118 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, Set subm this.subdirectoryName = subdirectoryName; this.clusteringOptions = clusteringOptions; this.mergingOptions = mergingOptions; + this.preParseHook = preParseHook; + this.parseHook = parseHook; + this.preCompareHook = preCompareHook; + this.compareHook = compareHook; } public JPlagOptions withLanguageOption(Language language) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withDebugParser(boolean debugParser) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withFileSuffixes(List fileSuffixes) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSimilarityThreshold(double similarityThreshold) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withMaximumNumberOfComparisons(int maximumNumberOfComparisons) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSimilarityMetric(SimilarityMetric similarityMetric) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withMinimumTokenMatch(Integer minimumTokenMatch) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withExclusionFileName(String exclusionFileName) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSubmissionDirectories(Set submissionDirectories) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withOldSubmissionDirectories(Set oldSubmissionDirectories) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withBaseCodeSubmissionDirectory(File baseCodeSubmissionDirectory) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSubdirectoryName(String subdirectoryName) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withClusteringOptions(ClusteringOptions clusteringOptions) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withMergingOptions(MergingOptions mergingOptions) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withPreParseHook(PreParseHook preParseHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withParseHook(ParseHook parseHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withPreCompareHook(PreCompareHook preCompareHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withCompareHook(CompareHook compareHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public boolean hasBaseCode() { @@ -225,6 +258,46 @@ private Integer normalizeMinimumTokenMatch(Integer minimumTokenMatch) { return (minimumTokenMatch != null && minimumTokenMatch < 1) ? Integer.valueOf(1) : minimumTokenMatch; } + public interface PreParseHook { + void callback(List submissions); + } + + public static final PreParseHook DEFAULT_PRE_PARSE_HOOK = new PreParseHook() { + @Override + public void callback(List submissions) { + } + }; + + public interface ParseHook { + void callback(Submission submission); + } + + public static final ParseHook DEFAULT_PARSE_HOOK = new ParseHook() { + @Override + public void callback(Submission submission) { + } + }; + + public interface PreCompareHook { + void callback(List tuples); + } + + public static final PreCompareHook DEFAULT_PRE_COMPARE_HOOK = new PreCompareHook() { + @Override + public void callback(List tuples) { + } + }; + + public interface CompareHook { + void callback(SubmissionTuple tuple); + } + + public static final CompareHook DEFAULT_COMPARE_HOOK = new CompareHook() { + @Override + public void callback(SubmissionTuple tuple) { + } + }; + /** * Creates new options to configure {@link JPlag}. * @param language Language to use when parsing the submissions. @@ -257,7 +330,8 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, File submissio boolean debugParser, MergingOptions mergingOptions) throws BasecodeException { this(language, minimumTokenMatch, Set.of(submissionDirectory), oldSubmissionDirectories, convertLegacyBaseCodeToFile(baseCodeSubmissionName, submissionDirectory), subdirectoryName, fileSuffixes, exclusionFileName, - similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions); + similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions, + DEFAULT_PRE_PARSE_HOOK, DEFAULT_PARSE_HOOK, DEFAULT_PRE_COMPARE_HOOK, DEFAULT_COMPARE_HOOK); } /** diff --git a/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java b/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java index 19822ef412..efb35f8ae0 100644 --- a/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java +++ b/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java @@ -45,8 +45,16 @@ protected void compareSubmissionsToBaseCode(SubmissionSet submissionSet) { * Compares two submissions and optionally returns the results if similarity is high enough. */ protected Optional compareSubmissions(Submission first, Submission second) { + return compareSubmissions(new SubmissionTuple(first, second)); + } + + protected Optional compareSubmissions(SubmissionTuple tuple) { + Submission first = tuple.left(); + Submission second = tuple.right(); + JPlagComparison comparison = greedyStringTiling.compare(first, second); logger.info("Comparing {}-{}: {}", first.getName(), second.getName(), comparison.similarity()); + this.options.compareHook().callback(tuple); if (options.similarityMetric().isAboveThreshold(comparison, options.similarityThreshold())) { return Optional.of(comparison); @@ -57,7 +65,7 @@ protected Optional compareSubmissions(Submission first, Submiss /** * @return a list of all submission tuples to be processed. */ - protected static List buildComparisonTuples(List submissions) { + protected List buildComparisonTuples(List submissions) { List tuples = new ArrayList<>(); List validSubmissions = submissions.stream().filter(s -> s.getTokenList() != null).toList(); @@ -70,6 +78,7 @@ protected static List buildComparisonTuples(List su } } } + this.options.preCompareHook().callback(tuples); return tuples; } } diff --git a/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java b/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java index fd94b9293b..1a7fca01b8 100644 --- a/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java +++ b/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java @@ -28,8 +28,7 @@ public JPlagResult compareSubmissions(SubmissionSet submissionSet) { } List tuples = buildComparisonTuples(submissionSet.getSubmissions()); - List comparisons = tuples.stream().parallel().map(tuple -> compareSubmissions(tuple.left(), tuple.right())) - .flatMap(Optional::stream).toList(); + List comparisons = tuples.stream().parallel().map(this::compareSubmissions).flatMap(Optional::stream).toList(); long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis; return new JPlagResult(comparisons, submissionSet, durationInMillis, options);