Skip to content

Commit

Permalink
Add parse and compare hooks
Browse files Browse the repository at this point in the history
This adds 4 hooks that can be set on the `JPlagOptions` class:

- A pre-parsing stage hook, which is called before all submissions are
  parsed. It is called with the list with all submission paths eligible
  for the comparison, i.e. all paths that are not filtered out by
  `SubmissionSetBuilder.isExcludedEntry`.
- A post-parsing hook per submission, which is called after the
  submission has been parsed. It is called with the `Submission`
  object that has been parsed.
- A pre-comparing stage hook, which is called before the comparisons
  are started. It is called with a list of all comparison tuples.
- A post-comparing hook, which is called after each comparison. It
  is called with the `SubmissionTuple` that has just been compared.
  • Loading branch information
olmokramer committed Oct 3, 2023
1 parent 9149f42 commit a6f810b
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 22 deletions.
3 changes: 2 additions & 1 deletion cli/src/main/java/de/jplag/cli/CLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ public JPlagOptions buildOptionsFromArguments(ParseResult parseResult) throws Cl
JPlagOptions jPlagOptions = new JPlagOptions(loadLanguage(parseResult), this.options.minTokenMatch, submissionDirectories,
oldSubmissionDirectories, null, this.options.advanced.subdirectory, suffixes, this.options.advanced.exclusionFileName,
JPlagOptions.DEFAULT_SIMILARITY_METRIC, this.options.advanced.similarityThreshold, this.options.shownComparisons, clusteringOptions,
this.options.advanced.debug, mergingOptions);
this.options.advanced.debug, mergingOptions, JPlagOptions.DEFAULT_PRE_PARSE_HOOK, JPlagOptions.DEFAULT_PARSE_HOOK,
JPlagOptions.DEFAULT_PRE_COMPARE_HOOK, JPlagOptions.DEFAULT_COMPARE_HOOK);

String baseCodePath = this.options.baseCode;
File baseCodeDirectory = baseCodePath == null ? null : new File(baseCodePath);
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/de/jplag/SubmissionSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ private void parseBaseCodeSubmission(Submission baseCode) throws BasecodeExcepti
* Parse all given submissions.
*/
private void parseSubmissions(List<Submission> submissions) {
this.options.preParseHook().accept(submissions);

if (submissions.isEmpty()) {
logger.warn("No submissions to parse!");
return;
Expand Down Expand Up @@ -167,6 +169,8 @@ private void parseSubmissions(List<Submission> submissions) {
} else {
logger.error("ERROR -> Submission {} removed", currentSubmissionName);
}

this.options.parseHook().accept(submission);
}

int validSubmissions = submissions.size() - errors - tooShort;
Expand Down
98 changes: 78 additions & 20 deletions core/src/main/java/de/jplag/options/JPlagOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.JPlag;
import de.jplag.JPlagComparison;
import de.jplag.Language;
import de.jplag.Submission;
import de.jplag.clustering.ClusteringOptions;
import de.jplag.exceptions.BasecodeException;
import de.jplag.merging.MergingOptions;
import de.jplag.strategy.SubmissionTuple;
import de.jplag.util.FileUtils;

/**
Expand All @@ -44,11 +48,21 @@
* set to {@link #SHOW_ALL_COMPARISONS} all comparisons will be shown.
* @param clusteringOptions Clustering options
* @param debugParser If true, submissions that cannot be parsed will be stored in a separate directory.
* @param mergingOptions Parameters for match merging.
* @param preParseHook Hook to be executed before any submission is parsed. The hook is called with the list of all
* {@link Submission} instances that will be parsed. The default hook does nothing.
* @param parseHook Hook to be executed after parsing a single submision. The hook is called with the {@link Submission}
* that has just been parsed. The default hook does nothing.
* @param preCompareHook Hook to be executed directly before performing submission comparisons. The hook is called with
* the list of all {@link SubmissionTuple} instances that will be compared. The default hook does nothing.
* @param compareHook Hook to be executed after comparing two submissions. The hook is called with the
* {@link JPlagComparison} result. The default hook does nothing.
*/
public record JPlagOptions(Language language, Integer minimumTokenMatch, Set<File> submissionDirectories, Set<File> oldSubmissionDirectories,
File baseCodeSubmissionDirectory, String subdirectoryName, List<String> fileSuffixes, String exclusionFileName,
SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions,
boolean debugParser, MergingOptions mergingOptions) {
boolean debugParser, MergingOptions mergingOptions, Consumer<List<Submission>> preParseHook, Consumer<Submission> parseHook,
Consumer<List<SubmissionTuple>> preCompareHook, Consumer<JPlagComparison> compareHook) {

public static final double DEFAULT_SIMILARITY_THRESHOLD = 0;
public static final int DEFAULT_SHOWN_COMPARISONS = 100;
Expand All @@ -59,15 +73,29 @@ public record JPlagOptions(Language language, Integer minimumTokenMatch, Set<Fil

private static final Logger logger = LoggerFactory.getLogger(JPlagOptions.class);

public static final Consumer<List<Submission>> DEFAULT_PRE_PARSE_HOOK = submissions -> {
};

public static final Consumer<Submission> DEFAULT_PARSE_HOOK = submission -> {
};

public static final Consumer<List<SubmissionTuple>> DEFAULT_PRE_COMPARE_HOOK = comparisons -> {
};

public static final Consumer<JPlagComparison> DEFAULT_COMPARE_HOOK = comparison -> {
};

public JPlagOptions(Language language, Set<File> submissionDirectories, Set<File> oldSubmissionDirectories) {
this(language, null, submissionDirectories, oldSubmissionDirectories, null, null, null, null, DEFAULT_SIMILARITY_METRIC,
DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions());
DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions(), DEFAULT_PRE_PARSE_HOOK,
DEFAULT_PARSE_HOOK, DEFAULT_PRE_COMPARE_HOOK, DEFAULT_COMPARE_HOOK);
}

public JPlagOptions(Language language, Integer minimumTokenMatch, Set<File> submissionDirectories, Set<File> oldSubmissionDirectories,
File baseCodeSubmissionDirectory, String subdirectoryName, List<String> fileSuffixes, String exclusionFileName,
SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions,
boolean debugParser, MergingOptions mergingOptions) {
boolean debugParser, MergingOptions mergingOptions, Consumer<List<Submission>> preParseHook, Consumer<Submission> parseHook,
Consumer<List<SubmissionTuple>> preCompareHook, Consumer<JPlagComparison> compareHook) {
this.language = language;
this.debugParser = debugParser;
this.fileSuffixes = fileSuffixes == null || fileSuffixes.isEmpty() ? null : Collections.unmodifiableList(fileSuffixes);
Expand All @@ -82,90 +110,118 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, Set<File> subm
this.subdirectoryName = subdirectoryName;
this.clusteringOptions = clusteringOptions;
this.mergingOptions = mergingOptions;
this.preParseHook = preParseHook;
this.parseHook = parseHook;
this.preCompareHook = preCompareHook;
this.compareHook = compareHook;
}

public JPlagOptions withLanguageOption(Language language) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withDebugParser(boolean debugParser) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withFileSuffixes(List<String> fileSuffixes) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSimilarityThreshold(double similarityThreshold) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withMaximumNumberOfComparisons(int maximumNumberOfComparisons) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSimilarityMetric(SimilarityMetric similarityMetric) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withMinimumTokenMatch(Integer minimumTokenMatch) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withExclusionFileName(String exclusionFileName) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSubmissionDirectories(Set<File> submissionDirectories) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withOldSubmissionDirectories(Set<File> oldSubmissionDirectories) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withBaseCodeSubmissionDirectory(File baseCodeSubmissionDirectory) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSubdirectoryName(String subdirectoryName) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withClusteringOptions(ClusteringOptions clusteringOptions) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withMergingOptions(MergingOptions mergingOptions) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withPreParseHook(Consumer<List<Submission>> preParseHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withParseHook(Consumer<Submission> parseHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withPreCompareHook(Consumer<List<SubmissionTuple>> preCompareHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withCompareHook(Consumer<JPlagComparison> compareHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public boolean hasBaseCode() {
Expand Down Expand Up @@ -254,10 +310,12 @@ private Integer normalizeMinimumTokenMatch(Integer minimumTokenMatch) {
public JPlagOptions(Language language, Integer minimumTokenMatch, File submissionDirectory, Set<File> oldSubmissionDirectories,
String baseCodeSubmissionName, String subdirectoryName, List<String> fileSuffixes, String exclusionFileName,
SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions,
boolean debugParser, MergingOptions mergingOptions) throws BasecodeException {
boolean debugParser, MergingOptions mergingOptions, Consumer<List<Submission>> preParseHook, Consumer<Submission> parseHook,
Consumer<List<SubmissionTuple>> preCompareHook, Consumer<JPlagComparison> compareHook) throws BasecodeException {
this(language, minimumTokenMatch, Set.of(submissionDirectory), oldSubmissionDirectories,
convertLegacyBaseCodeToFile(baseCodeSubmissionName, submissionDirectory), subdirectoryName, fileSuffixes, exclusionFileName,
similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions);
similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions, preParseHook,
parseHook, preCompareHook, compareHook);
}

/**
Expand All @@ -280,7 +338,7 @@ public JPlagOptions withBaseCodeSubmissionName(String baseCodeSubmissionName) {
try {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectory, oldSubmissionDirectories, baseCodeSubmissionName,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
} catch (BasecodeException e) {
throw new IllegalArgumentException(e.getMessage(), e.getCause());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ protected void compareSubmissionsToBaseCode(SubmissionSet submissionSet) {
protected Optional<JPlagComparison> compareSubmissions(Submission first, Submission second) {
JPlagComparison comparison = greedyStringTiling.compare(first, second);
logger.info("Comparing {}-{}: {}", first.getName(), second.getName(), comparison.similarity());
this.options.compareHook().accept(comparison);

if (options.similarityMetric().isAboveThreshold(comparison, options.similarityThreshold())) {
return Optional.of(comparison);
Expand All @@ -57,7 +58,7 @@ protected Optional<JPlagComparison> compareSubmissions(Submission first, Submiss
/**
* @return a list of all submission tuples to be processed.
*/
protected static List<SubmissionTuple> buildComparisonTuples(List<Submission> submissions) {
protected List<SubmissionTuple> buildComparisonTuples(List<Submission> submissions) {
List<SubmissionTuple> tuples = new ArrayList<>();
List<Submission> validSubmissions = submissions.stream().filter(s -> s.getTokenList() != null).toList();

Expand All @@ -70,6 +71,7 @@ protected static List<SubmissionTuple> buildComparisonTuples(List<Submission> su
}
}
}
this.options.preCompareHook().accept(tuples);
return tuples;
}
}

0 comments on commit a6f810b

Please sign in to comment.