From 48ecef4fa9def1456e6ce9ef96353559a70612cd Mon Sep 17 00:00:00 2001 From: Olmo Kramer Date: Mon, 4 Sep 2023 14:39:42 +0200 Subject: [PATCH] Add parse and compare hooks This adds 4 hooks that can be set on the `JPlagOptions` class: - A pre-parsing stage hook, which is called before all submissions are parsed. It is called with the list with all submission paths eligible for the comparison, i.e. all paths that are not filtered out by `SubmissionSetBuilder.isExcludedEntry`. - A post-parsing hook per submission, which is called after the submission has been parsed. It is called with the `Submission` object that has been parsed. - A pre-comparing stage hook, which is called before the comparisons are started. It is called with a list of all comparison tuples. - A post-comparing hook, which is called after each comparison. It is called with the `SubmissionTuple` that has just been compared. --- cli/src/main/java/de/jplag/cli/CLI.java | 3 +- .../java/de/jplag/SubmissionSetBuilder.java | 32 +++-- .../java/de/jplag/options/JPlagOptions.java | 110 +++++++++++++++--- .../strategy/AbstractComparisonStrategy.java | 11 +- .../strategy/ParallelComparisonStrategy.java | 3 +- 5 files changed, 125 insertions(+), 34 deletions(-) diff --git a/cli/src/main/java/de/jplag/cli/CLI.java b/cli/src/main/java/de/jplag/cli/CLI.java index 9507a9a804..3874cc3d55 100644 --- a/cli/src/main/java/de/jplag/cli/CLI.java +++ b/cli/src/main/java/de/jplag/cli/CLI.java @@ -171,7 +171,8 @@ public JPlagOptions buildOptionsFromArguments(ParseResult parseResult) throws Cl JPlagOptions jPlagOptions = new JPlagOptions(loadLanguage(parseResult), this.options.minTokenMatch, submissionDirectories, oldSubmissionDirectories, null, this.options.advanced.subdirectory, suffixes, this.options.advanced.exclusionFileName, JPlagOptions.DEFAULT_SIMILARITY_METRIC, this.options.advanced.similarityThreshold, this.options.shownComparisons, clusteringOptions, - this.options.advanced.debug, mergingOptions); + this.options.advanced.debug, mergingOptions, JPlagOptions.DEFAULT_PRE_PARSE_HOOK, JPlagOptions.DEFAULT_PARSE_HOOK, + JPlagOptions.DEFAULT_PRE_COMPARE_HOOK, JPlagOptions.DEFAULT_COMPARE_HOOK); String baseCodePath = this.options.baseCode; File baseCodeDirectory = baseCodePath == null ? null : new File(baseCodePath); diff --git a/core/src/main/java/de/jplag/SubmissionSetBuilder.java b/core/src/main/java/de/jplag/SubmissionSetBuilder.java index 4d93c0d443..4cb74b8038 100644 --- a/core/src/main/java/de/jplag/SubmissionSetBuilder.java +++ b/core/src/main/java/de/jplag/SubmissionSetBuilder.java @@ -233,18 +233,26 @@ private Submission processSubmission(String submissionName, File submissionFile, */ private void processRootDirectoryEntries(File rootDirectory, boolean multipleRoots, Map foundSubmissions, boolean isNew) throws ExitException { - for (String fileName : listSubmissionFiles(rootDirectory)) { - File submissionFile = new File(rootDirectory, fileName); - - String errorMessage = isExcludedEntry(submissionFile); - if (errorMessage == null) { - String rootDirectoryPrefix = multipleRoots ? (rootDirectory.getName() + File.separator) : ""; - String submissionName = rootDirectoryPrefix + fileName; - Submission submission = processSubmission(submissionName, submissionFile, isNew); - foundSubmissions.put(submission.getRoot(), submission); - } else { - logger.error(errorMessage); - } + List submissionFiles = Arrays.stream(listSubmissionFiles(rootDirectory)).map(fileName -> new File(rootDirectory, fileName)) + .filter(submissionFile -> { + String errorMessage = isExcludedEntry(submissionFile); + if (errorMessage == null) { + return true; + } else { + logger.error(errorMessage); + return false; + } + }).toList(); + + this.options.preParseHook().callback(submissionFiles); + + for (File submissionFile : submissionFiles) { + String rootDirectoryPrefix = multipleRoots ? (rootDirectory.getName() + File.separator) : ""; + String submissionName = rootDirectoryPrefix + submissionFile.getName(); + Submission submission = processSubmission(submissionName, submissionFile, isNew); + foundSubmissions.put(submission.getRoot(), submission); + + this.options.parseHook().callback(submission); } } diff --git a/core/src/main/java/de/jplag/options/JPlagOptions.java b/core/src/main/java/de/jplag/options/JPlagOptions.java index 44eea1d2f9..f08d2271f3 100644 --- a/core/src/main/java/de/jplag/options/JPlagOptions.java +++ b/core/src/main/java/de/jplag/options/JPlagOptions.java @@ -17,9 +17,11 @@ import de.jplag.JPlag; import de.jplag.Language; +import de.jplag.Submission; import de.jplag.clustering.ClusteringOptions; import de.jplag.exceptions.BasecodeException; import de.jplag.merging.MergingOptions; +import de.jplag.strategy.SubmissionTuple; import de.jplag.util.FileUtils; /** @@ -48,7 +50,8 @@ public record JPlagOptions(Language language, Integer minimumTokenMatch, Set submissionDirectories, Set oldSubmissionDirectories, File baseCodeSubmissionDirectory, String subdirectoryName, List fileSuffixes, String exclusionFileName, SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions, - boolean debugParser, MergingOptions mergingOptions) { + boolean debugParser, MergingOptions mergingOptions, PreParseHook preParseHook, ParseHook parseHook, PreCompareHook preCompareHook, + CompareHook compareHook) { public static final double DEFAULT_SIMILARITY_THRESHOLD = 0; public static final int DEFAULT_SHOWN_COMPARISONS = 100; @@ -61,13 +64,15 @@ public record JPlagOptions(Language language, Integer minimumTokenMatch, Set submissionDirectories, Set oldSubmissionDirectories) { this(language, null, submissionDirectories, oldSubmissionDirectories, null, null, null, null, DEFAULT_SIMILARITY_METRIC, - DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions()); + DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions(), + DEFAULT_PRE_PARSE_HOOK, DEFAULT_PARSE_HOOK, DEFAULT_PRE_COMPARE_HOOK, DEFAULT_COMPARE_HOOK); } public JPlagOptions(Language language, Integer minimumTokenMatch, Set submissionDirectories, Set oldSubmissionDirectories, File baseCodeSubmissionDirectory, String subdirectoryName, List fileSuffixes, String exclusionFileName, SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions, - boolean debugParser, MergingOptions mergingOptions) { + boolean debugParser, MergingOptions mergingOptions, PreParseHook preParseHook, ParseHook parseHook, PreCompareHook preCompareHook, + CompareHook compareHook) { this.language = language; this.debugParser = debugParser; this.fileSuffixes = fileSuffixes == null || fileSuffixes.isEmpty() ? null : Collections.unmodifiableList(fileSuffixes); @@ -82,90 +87,118 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, Set subm this.subdirectoryName = subdirectoryName; this.clusteringOptions = clusteringOptions; this.mergingOptions = mergingOptions; + this.preParseHook = preParseHook; + this.parseHook = parseHook; + this.preCompareHook = preCompareHook; + this.compareHook = compareHook; } public JPlagOptions withLanguageOption(Language language) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withDebugParser(boolean debugParser) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withFileSuffixes(List fileSuffixes) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSimilarityThreshold(double similarityThreshold) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withMaximumNumberOfComparisons(int maximumNumberOfComparisons) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSimilarityMetric(SimilarityMetric similarityMetric) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withMinimumTokenMatch(Integer minimumTokenMatch) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withExclusionFileName(String exclusionFileName) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSubmissionDirectories(Set submissionDirectories) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withOldSubmissionDirectories(Set oldSubmissionDirectories) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withBaseCodeSubmissionDirectory(File baseCodeSubmissionDirectory) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withSubdirectoryName(String subdirectoryName) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withClusteringOptions(ClusteringOptions clusteringOptions) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public JPlagOptions withMergingOptions(MergingOptions mergingOptions) { return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, - clusteringOptions, debugParser, mergingOptions); + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withPreParseHook(PreParseHook preParseHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withParseHook(ParseHook parseHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withPreCompareHook(PreCompareHook preCompareHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); + } + + public JPlagOptions withCompareHook(CompareHook compareHook) { + return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory, + subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons, + clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook); } public boolean hasBaseCode() { @@ -225,6 +258,46 @@ private Integer normalizeMinimumTokenMatch(Integer minimumTokenMatch) { return (minimumTokenMatch != null && minimumTokenMatch < 1) ? Integer.valueOf(1) : minimumTokenMatch; } + public interface PreParseHook { + void callback(List submissions); + } + + public static final PreParseHook DEFAULT_PRE_PARSE_HOOK = new PreParseHook() { + @Override + public void callback(List submissions) { + } + }; + + public interface ParseHook { + void callback(Submission submission); + } + + public static final ParseHook DEFAULT_PARSE_HOOK = new ParseHook() { + @Override + public void callback(Submission submission) { + } + }; + + public interface PreCompareHook { + void callback(List tuples); + } + + public static final PreCompareHook DEFAULT_PRE_COMPARE_HOOK = new PreCompareHook() { + @Override + public void callback(List tuples) { + } + }; + + public interface CompareHook { + void callback(SubmissionTuple tuple); + } + + public static final CompareHook DEFAULT_COMPARE_HOOK = new CompareHook() { + @Override + public void callback(SubmissionTuple tuple) { + } + }; + /** * Creates new options to configure {@link JPlag}. * @param language Language to use when parsing the submissions. @@ -257,7 +330,8 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, File submissio boolean debugParser, MergingOptions mergingOptions) throws BasecodeException { this(language, minimumTokenMatch, Set.of(submissionDirectory), oldSubmissionDirectories, convertLegacyBaseCodeToFile(baseCodeSubmissionName, submissionDirectory), subdirectoryName, fileSuffixes, exclusionFileName, - similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions); + similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions, + DEFAULT_PRE_PARSE_HOOK, DEFAULT_PARSE_HOOK, DEFAULT_PRE_COMPARE_HOOK, DEFAULT_COMPARE_HOOK); } /** diff --git a/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java b/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java index 19822ef412..efb35f8ae0 100644 --- a/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java +++ b/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java @@ -45,8 +45,16 @@ protected void compareSubmissionsToBaseCode(SubmissionSet submissionSet) { * Compares two submissions and optionally returns the results if similarity is high enough. */ protected Optional compareSubmissions(Submission first, Submission second) { + return compareSubmissions(new SubmissionTuple(first, second)); + } + + protected Optional compareSubmissions(SubmissionTuple tuple) { + Submission first = tuple.left(); + Submission second = tuple.right(); + JPlagComparison comparison = greedyStringTiling.compare(first, second); logger.info("Comparing {}-{}: {}", first.getName(), second.getName(), comparison.similarity()); + this.options.compareHook().callback(tuple); if (options.similarityMetric().isAboveThreshold(comparison, options.similarityThreshold())) { return Optional.of(comparison); @@ -57,7 +65,7 @@ protected Optional compareSubmissions(Submission first, Submiss /** * @return a list of all submission tuples to be processed. */ - protected static List buildComparisonTuples(List submissions) { + protected List buildComparisonTuples(List submissions) { List tuples = new ArrayList<>(); List validSubmissions = submissions.stream().filter(s -> s.getTokenList() != null).toList(); @@ -70,6 +78,7 @@ protected static List buildComparisonTuples(List su } } } + this.options.preCompareHook().callback(tuples); return tuples; } } diff --git a/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java b/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java index fd94b9293b..1a7fca01b8 100644 --- a/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java +++ b/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java @@ -28,8 +28,7 @@ public JPlagResult compareSubmissions(SubmissionSet submissionSet) { } List tuples = buildComparisonTuples(submissionSet.getSubmissions()); - List comparisons = tuples.stream().parallel().map(tuple -> compareSubmissions(tuple.left(), tuple.right())) - .flatMap(Optional::stream).toList(); + List comparisons = tuples.stream().parallel().map(this::compareSubmissions).flatMap(Optional::stream).toList(); long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis; return new JPlagResult(comparisons, submissionSet, durationInMillis, options);