diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 7d451d706..b4ea875e3 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -13,7 +13,7 @@ jobs:
java-version: '21'
distribution: 'temurin'
- name: Set maven settings.xml
- uses: whelk-io/maven-settings-xml-action@v21
+ uses: whelk-io/maven-settings-xml-action@v22
with:
servers: '[{ "id": "ossrh", "username": "jplag", "password": "${{ secrets.OSSRH_TOKEN }}" }]'
- name: Import GPG key
diff --git a/README.md b/README.md
index 0bbf4ce1d..7ca077d13 100644
--- a/README.md
+++ b/README.md
@@ -22,8 +22,8 @@ In the following, a list of all supported languages with their supported languag
| Language | Version | CLI Argument Name | [state](https://github.com/jplag/JPlag/wiki/2.-Supported-Languages) | parser |
|--------------------------------------------------------|---------------------------------------------------------------------------------------:|-------------------|:-------------------------------------------------------------------:|:---------:|
| [Java](https://www.java.com) | 21 | java | mature | JavaC |
-| [C/C++](https://isocpp.org) | 11 | cpp | legacy | JavaCC |
-| [C/C++](https://isocpp.org) | 14 | cpp2 | beta | ANTLR 4 |
+| [C](https://isocpp.org) | 11 | c | legacy | JavaCC |
+| [C++](https://isocpp.org) | 14 | cpp | beta | ANTLR 4 |
| [C#](https://docs.microsoft.com/en-us/dotnet/csharp/) | 6 | csharp | beta | ANTLR 4 |
| [Go](https://go.dev) | 1.17 | golang | beta | ANTLR 4 |
| [Kotlin](https://kotlinlang.org) | 1.3 | kotlin | beta | ANTLR 4 |
@@ -147,8 +147,8 @@ Clustering
--cluster-skip Skips the clustering (default: false)
Commands:
+ c
cpp
- cpp2
csharp
emf
emf-model
diff --git a/cli/pom.xml b/cli/pom.xml
index 317110a3b..459daf3eb 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -45,12 +45,12 @@
de.jplag
- cpp
+ c
${revision}
de.jplag
- cpp2
+ cpp
${revision}
@@ -129,6 +129,12 @@
picocli
4.7.5
+
+
+ me.tongfei
+ progressbar
+ 0.10.0
+
diff --git a/cli/src/main/java/de/jplag/cli/CLI.java b/cli/src/main/java/de/jplag/cli/CLI.java
index 959675e70..06388e8a6 100644
--- a/cli/src/main/java/de/jplag/cli/CLI.java
+++ b/cli/src/main/java/de/jplag/cli/CLI.java
@@ -26,9 +26,11 @@
import de.jplag.Language;
import de.jplag.cli.logger.CollectedLoggerFactory;
import de.jplag.cli.server.ReportViewer;
+import de.jplag.cli.logger.TongfeiProgressBarProvider;
import de.jplag.clustering.ClusteringOptions;
import de.jplag.clustering.Preprocessing;
import de.jplag.exceptions.ExitException;
+import de.jplag.logging.ProgressBarLogger;
import de.jplag.merging.MergingOptions;
import de.jplag.options.JPlagOptions;
import de.jplag.options.LanguageOption;
@@ -68,6 +70,8 @@ public final class CLI {
private static final String DESCRIPTION_PATTERN = "%nJPlag - %s%n%s%n%n";
+ private static final String DEFAULT_FILE_ENDING = ".zip";
+
/**
* Main class for using JPlag via the CLI.
* @param args are the CLI arguments that will be passed to JPlag.
@@ -81,14 +85,20 @@ public static void main(String[] args) {
ParseResult parseResult = cli.parseOptions(args);
if (!parseResult.isUsageHelpRequested() && !(parseResult.subcommand() != null && parseResult.subcommand().isUsageHelpRequested())) {
+ ProgressBarLogger.setProgressBarProvider(new TongfeiProgressBarProvider());
switch (cli.options.mode) {
case RUN -> cli.runJPlag(parseResult);
case VIEW -> cli.runViewer(null);
case RUN_AND_VIEW -> cli.runViewer(cli.runJPlag(parseResult));
}
}
- } catch (ExitException | IOException exception) {
- logger.error(exception.getMessage()); // do not pass exception here to keep log clean
+ } catch (ExitException | IOException exception) { // do not pass exceptions here to keep log clean
+ if (exception.getCause() != null) {
+ logger.error("{} - {}", exception.getMessage(), exception.getCause().getMessage());
+ } else {
+ logger.error(exception.getMessage());
+ }
+
finalizeLogger();
System.exit(1);
}
@@ -106,9 +116,8 @@ public CLI() {
this.commandLine.getHelpSectionMap().put(SECTION_KEY_OPTION_LIST, help -> help.optionList().lines().map(it -> {
if (it.startsWith(" -")) {
return " " + it;
- } else {
- return it;
}
+ return it;
}).collect(Collectors.joining(System.lineSeparator())));
buildSubcommands().forEach(commandLine::addSubcommand);
@@ -121,10 +130,10 @@ public CLI() {
public File runJPlag(ParseResult parseResult) throws ExitException, FileNotFoundException {
JPlagOptions jplagOptions = buildOptionsFromArguments(parseResult);
JPlagResult result = JPlag.run(jplagOptions);
- File target = new File(getResultFolder() + ".zip");
+ File target = new File(getResultFilePath());
ReportObjectFactory reportObjectFactory = new ReportObjectFactory(target);
reportObjectFactory.createAndSaveReport(result);
- OutputFileGenerator.generateCsvOutput(result, new File(getResultFolder()), this.options);
+ OutputFileGenerator.generateCsvOutput(result, new File(getResultFileBaseName()), this.options);
return target;
}
@@ -168,7 +177,7 @@ public ParseResult parseOptions(String... args) throws CliException {
}
return result;
} catch (CommandLine.ParameterException e) {
- if (e.getArgSpec().isOption() && Arrays.asList(((OptionSpec) e.getArgSpec()).names()).contains("-l")) {
+ if (e.getArgSpec() != null && e.getArgSpec().isOption() && Arrays.asList(((OptionSpec) e.getArgSpec()).names()).contains("-l")) {
throw new CliException(String.format(UNKOWN_LANGAUGE_EXCEPTION, e.getValue(),
String.join(", ", LanguageLoader.getAllAvailableLanguageIdentifiers())));
}
@@ -206,33 +215,31 @@ public JPlagOptions buildOptionsFromArguments(ParseResult parseResult) throws Cl
JPlagOptions jPlagOptions = new JPlagOptions(loadLanguage(parseResult), this.options.minTokenMatch, submissionDirectories,
oldSubmissionDirectories, null, this.options.advanced.subdirectory, suffixes, this.options.advanced.exclusionFileName,
JPlagOptions.DEFAULT_SIMILARITY_METRIC, this.options.advanced.similarityThreshold, this.options.shownComparisons, clusteringOptions,
- this.options.advanced.debug, mergingOptions);
+ this.options.advanced.debug, mergingOptions, this.options.normalize);
String baseCodePath = this.options.baseCode;
File baseCodeDirectory = baseCodePath == null ? null : new File(baseCodePath);
if (baseCodeDirectory == null || baseCodeDirectory.exists()) {
return jPlagOptions.withBaseCodeSubmissionDirectory(baseCodeDirectory);
- } else {
- logger.warn("Using legacy partial base code API. Please migrate to new full path base code API.");
- return jPlagOptions.withBaseCodeSubmissionName(baseCodePath);
}
+ logger.warn("Using legacy partial base code API. Please migrate to new full path base code API.");
+ return jPlagOptions.withBaseCodeSubmissionName(baseCodePath);
}
private Language loadLanguage(ParseResult result) throws CliException {
- if (result.subcommand() != null) {
- ParseResult subcommandResult = result.subcommand();
- Language language = LanguageLoader.getLanguage(subcommandResult.commandSpec().name())
- .orElseThrow(() -> new CliException(IMPOSSIBLE_EXCEPTION));
- LanguageOptions languageOptions = language.getOptions();
- languageOptions.getOptionsAsList().forEach(option -> {
- if (subcommandResult.hasMatchedOption(option.getNameAsUnixParameter())) {
- option.setValue(subcommandResult.matchedOptionValue(option.getNameAsUnixParameter(), null));
- }
- });
- return language;
- } else {
+ if (result.subcommand() == null) {
return this.options.language;
}
+ ParseResult subcommandResult = result.subcommand();
+ Language language = LanguageLoader.getLanguage(subcommandResult.commandSpec().name())
+ .orElseThrow(() -> new CliException(IMPOSSIBLE_EXCEPTION));
+ LanguageOptions languageOptions = language.getOptions();
+ languageOptions.getOptionsAsList().forEach(option -> {
+ if (subcommandResult.hasMatchedOption(option.getNameAsUnixParameter())) {
+ option.setValue(subcommandResult.matchedOptionValue(option.getNameAsUnixParameter(), null));
+ }
+ });
+ return language;
}
private static ClusteringOptions getClusteringOptions(CliOptions options) {
@@ -274,7 +281,17 @@ private String generateDescription() {
return String.format(DESCRIPTION_PATTERN, randomDescription, CREDITS);
}
- public String getResultFolder() {
- return this.options.resultFolder;
+ private String getResultFilePath() {
+ String optionValue = this.options.resultFile;
+ if (optionValue.endsWith(DEFAULT_FILE_ENDING)) {
+ return optionValue;
+ } else {
+ return optionValue + DEFAULT_FILE_ENDING;
+ }
+ }
+
+ private String getResultFileBaseName() {
+ String defaultOutputFile = getResultFilePath();
+ return defaultOutputFile.substring(0, defaultOutputFile.length() - DEFAULT_FILE_ENDING.length());
}
}
diff --git a/cli/src/main/java/de/jplag/cli/CliOptions.java b/cli/src/main/java/de/jplag/cli/CliOptions.java
index 962be5bda..1a915f660 100644
--- a/cli/src/main/java/de/jplag/cli/CliOptions.java
+++ b/cli/src/main/java/de/jplag/cli/CliOptions.java
@@ -7,6 +7,7 @@
import de.jplag.clustering.ClusteringOptions;
import de.jplag.clustering.algorithm.InterClusterSimilarity;
import de.jplag.java.JavaLanguage;
+import de.jplag.merging.MergingOptions;
import de.jplag.options.JPlagOptions;
import de.jplag.options.SimilarityMetric;
@@ -50,8 +51,8 @@ public class CliOptions implements Runnable {
public int shownComparisons = JPlagOptions.DEFAULT_SHOWN_COMPARISONS;
@Option(names = {"-r",
- "--result-directory"}, description = "Name of the directory in which the comparison results will be stored (default: ${DEFAULT-VALUE})%n")
- public String resultFolder = "results";
+ "--result-file"}, description = "Name of the file in which the comparison results will be stored (default: ${DEFAULT-VALUE}). Missing .zip endings will be automatically added.%n")
+ public String resultFile = "results";
@Option(names = {"--mode"}, description = "The mode to run jplag in")
public JPlagMode mode = JPlagMode.RUN;
@@ -65,6 +66,9 @@ public class CliOptions implements Runnable {
@ArgGroup(validate = false, heading = "Merging of neighboring matches to increase the similarity of concealed plagiarism:%n")
public Merging merging = new Merging();
+ @Option(names = {"--normalize"}, description = "Activate the normalization of tokens. Supported for languages: Java, C++.")
+ public boolean normalize = false;
+
/**
* Empty run method, so picocli prints help automatically
*/
@@ -122,14 +126,15 @@ public static class ClusteringEnabled {
}
public static class Merging {
- @Option(names = {"--match-merging"}, description = "Enables match merging (default: false)%n")
- public boolean enabled;
+ @Option(names = {"--match-merging"}, description = "Enables match merging (default: ${DEFAULT-VALUE})%n")
+ public boolean enabled = MergingOptions.DEFAULT_ENABLED;
- @Option(names = {"--neighbor-length"}, description = "Defines how short a match can be, to be considered (default: 2)%n")
- public int minimumNeighborLength;
+ @Option(names = {"--neighbor-length"}, description = "Defines how short a match can be, to be considered (default: ${DEFAULT-VALUE})%n")
+ public int minimumNeighborLength = MergingOptions.DEFAULT_NEIGHBOR_LENGTH;
- @Option(names = {"--gap-size"}, description = "Defines how many token there can be between two neighboring matches (default: 6)%n")
- public int maximumGapSize;
+ @Option(names = {
+ "--gap-size"}, description = "Defines how many token there can be between two neighboring matches (default: ${DEFAULT-VALUE})%n")
+ public int maximumGapSize = MergingOptions.DEFAULT_GAP_SIZE;
}
diff --git a/cli/src/main/java/de/jplag/cli/LanguageLoader.java b/cli/src/main/java/de/jplag/cli/LanguageLoader.java
index 77c2de673..9c9fd0417 100644
--- a/cli/src/main/java/de/jplag/cli/LanguageLoader.java
+++ b/cli/src/main/java/de/jplag/cli/LanguageLoader.java
@@ -32,8 +32,9 @@ private LanguageLoader() {
* @return the languages as unmodifiable map from identifier to language instance.
*/
public static synchronized Map getAllAvailableLanguages() {
- if (cachedLanguageInstances != null)
+ if (cachedLanguageInstances != null) {
return cachedLanguageInstances;
+ }
Map languages = new TreeMap<>();
@@ -61,8 +62,9 @@ public static synchronized Map getAllAvailableLanguages() {
*/
public static Optional getLanguage(String identifier) {
var language = getAllAvailableLanguages().get(identifier);
- if (language == null)
+ if (language == null) {
logger.warn("Attempt to load Language {} was not successful", identifier);
+ }
return Optional.ofNullable(language);
}
diff --git a/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBar.java b/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBar.java
new file mode 100644
index 000000000..4305a497e
--- /dev/null
+++ b/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBar.java
@@ -0,0 +1,24 @@
+package de.jplag.cli.logger;
+
+import de.jplag.logging.ProgressBar;
+
+/**
+ * A ProgressBar, that used the tongfei progress bar library underneath, to show progress bars on the cli.
+ */
+public class TongfeiProgressBar implements ProgressBar {
+ private final me.tongfei.progressbar.ProgressBar progressBar;
+
+ public TongfeiProgressBar(me.tongfei.progressbar.ProgressBar progressBar) {
+ this.progressBar = progressBar;
+ }
+
+ @Override
+ public void step(int number) {
+ this.progressBar.stepBy(number);
+ }
+
+ @Override
+ public void dispose() {
+ this.progressBar.close();
+ }
+}
diff --git a/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBarProvider.java b/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBarProvider.java
new file mode 100644
index 000000000..f36d6b7b9
--- /dev/null
+++ b/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBarProvider.java
@@ -0,0 +1,20 @@
+package de.jplag.cli.logger;
+
+import de.jplag.logging.ProgressBar;
+import de.jplag.logging.ProgressBarProvider;
+import de.jplag.logging.ProgressBarType;
+
+import me.tongfei.progressbar.ProgressBarBuilder;
+import me.tongfei.progressbar.ProgressBarStyle;
+
+/**
+ * A ProgressBar provider, that used the tongfei progress bar library underneath, to show progress bars on the cli.
+ */
+public class TongfeiProgressBarProvider implements ProgressBarProvider {
+ @Override
+ public ProgressBar initProgressBar(ProgressBarType type, int totalSteps) {
+ me.tongfei.progressbar.ProgressBar progressBar = new ProgressBarBuilder().setTaskName(type.getDefaultText()).setInitialMax(totalSteps)
+ .setStyle(ProgressBarStyle.UNICODE_BLOCK).build();
+ return new TongfeiProgressBar(progressBar);
+ }
+}
diff --git a/cli/src/test/java/de/jplag/cli/MergingOptionsTest.java b/cli/src/test/java/de/jplag/cli/MergingOptionsTest.java
new file mode 100644
index 000000000..bf2b642c7
--- /dev/null
+++ b/cli/src/test/java/de/jplag/cli/MergingOptionsTest.java
@@ -0,0 +1,25 @@
+package de.jplag.cli;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+
+import de.jplag.merging.MergingOptions;
+
+/**
+ * Test cases for the options of the match merging mechanism.
+ */
+class MergingOptionsTest extends CommandLineInterfaceTest {
+
+ @Test
+ @DisplayName("Test if default values are used when creating merging options from CLI")
+ void testMergingDefault() throws CliException {
+ buildOptionsFromCLI(defaultArguments());
+ assertNotNull(options.mergingOptions());
+ assertEquals(MergingOptions.DEFAULT_ENABLED, options.mergingOptions().enabled());
+ assertEquals(MergingOptions.DEFAULT_NEIGHBOR_LENGTH, options.mergingOptions().minimumNeighborLength());
+ assertEquals(MergingOptions.DEFAULT_GAP_SIZE, options.mergingOptions().maximumGapSize());
+ }
+}
diff --git a/core/src/main/java/de/jplag/JPlag.java b/core/src/main/java/de/jplag/JPlag.java
index 822dc959c..25573ee05 100644
--- a/core/src/main/java/de/jplag/JPlag.java
+++ b/core/src/main/java/de/jplag/JPlag.java
@@ -71,6 +71,9 @@ public static JPlagResult run(JPlagOptions options) throws ExitException {
// Parse and validate submissions.
SubmissionSetBuilder builder = new SubmissionSetBuilder(options);
SubmissionSet submissionSet = builder.buildSubmissionSet();
+ if (options.normalize() && options.language().supportsNormalization() && options.language().requiresCoreNormalization()) {
+ submissionSet.normalizeSubmissions();
+ }
int submissionCount = submissionSet.numberOfSubmissions();
if (submissionCount < 2)
throw new SubmissionException("Not enough valid submissions! (found " + submissionCount + " valid submissions)");
@@ -103,6 +106,10 @@ private static void logSkippedSubmissions(SubmissionSet submissionSet, JPlagOpti
}
private static void checkForConfigurationConsistency(JPlagOptions options) throws RootDirectoryException {
+ if (options.normalize() && !options.language().supportsNormalization()) {
+ logger.error(String.format("The language %s cannot be used with normalization.", options.language().getName()));
+ }
+
List duplicateNames = getDuplicateSubmissionFolderNames(options);
if (duplicateNames.size() > 0) {
throw new RootDirectoryException(String.format("Duplicate root directory names found: %s", String.join(", ", duplicateNames)));
diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java
index c6b4f92cf..075cc2e2f 100644
--- a/core/src/main/java/de/jplag/Submission.java
+++ b/core/src/main/java/de/jplag/Submission.java
@@ -239,9 +239,11 @@ private static File createErrorDirectory(String... subdirectoryNames) {
/**
* Parse files of the submission.
+ * @param debugParser specifies if the submission should be copied upon parsing errors.
+ * @param normalize specifies if the tokens sequences should be normalized.
* @return Whether parsing was successful.
*/
- /* package-private */ boolean parse(boolean debugParser) {
+ /* package-private */ boolean parse(boolean debugParser, boolean normalize) {
if (files == null || files.isEmpty()) {
logger.error("ERROR: nothing to parse for submission \"{}\"", name);
tokenList = null;
@@ -250,7 +252,7 @@ private static File createErrorDirectory(String... subdirectoryNames) {
}
try {
- tokenList = language.parse(new HashSet<>(files));
+ tokenList = language.parse(new HashSet<>(files), normalize);
if (logger.isDebugEnabled()) {
for (Token token : tokenList) {
logger.debug(String.join(" | ", token.getType().toString(), Integer.toString(token.getLine()), token.getSemantics().toString()));
@@ -276,7 +278,7 @@ private static File createErrorDirectory(String... subdirectoryNames) {
}
/**
- * Perform token string normalization, which makes the token string invariant to dead code insertion and independent
+ * Perform token sequence normalization, which makes the token sequence invariant to dead code insertion and independent
* statement reordering.
*/
void normalize() {
diff --git a/core/src/main/java/de/jplag/SubmissionFileData.java b/core/src/main/java/de/jplag/SubmissionFileData.java
new file mode 100644
index 000000000..91eb3edda
--- /dev/null
+++ b/core/src/main/java/de/jplag/SubmissionFileData.java
@@ -0,0 +1,13 @@
+package de.jplag;
+
+import java.io.File;
+
+/**
+ * Contains the information about a single file in a submission. For single file submissions the submission file is the
+ * same as the root.
+ * @param submissionFile The file, that is part of a submission
+ * @param root The root of the submission
+ * @param isNew Indicates weather this follows the new or the old syntax
+ */
+public record SubmissionFileData(File submissionFile, File root, boolean isNew) {
+}
diff --git a/core/src/main/java/de/jplag/SubmissionSet.java b/core/src/main/java/de/jplag/SubmissionSet.java
index 166a151fb..1b3ab67cd 100644
--- a/core/src/main/java/de/jplag/SubmissionSet.java
+++ b/core/src/main/java/de/jplag/SubmissionSet.java
@@ -10,6 +10,9 @@
import de.jplag.exceptions.BasecodeException;
import de.jplag.exceptions.ExitException;
import de.jplag.exceptions.SubmissionException;
+import de.jplag.logging.ProgressBar;
+import de.jplag.logging.ProgressBarLogger;
+import de.jplag.logging.ProgressBarType;
import de.jplag.options.JPlagOptions;
/**
@@ -37,6 +40,7 @@ public class SubmissionSet {
/**
* @param submissions Submissions to check for plagiarism.
* @param baseCode Base code submission if it exists or {@code null}.
+ * @param options The JPlag options
*/
public SubmissionSet(List submissions, Submission baseCode, JPlagOptions options) throws ExitException {
this.allSubmissions = submissions;
@@ -119,7 +123,7 @@ private void parseAllSubmissions() throws ExitException {
private void parseBaseCodeSubmission(Submission baseCode) throws BasecodeException {
long startTime = System.currentTimeMillis();
logger.trace("----- Parsing basecode submission: " + baseCode.getName());
- if (!baseCode.parse(options.debugParser())) {
+ if (!baseCode.parse(options.debugParser(), options.normalize())) {
throw new BasecodeException("Could not successfully parse basecode submission!");
} else if (baseCode.getNumberOfTokens() < options.minimumTokenMatch()) {
throw new BasecodeException(String.format("Basecode submission contains %d token(s), which is less than the minimum match length (%d)!",
@@ -133,6 +137,7 @@ private void parseBaseCodeSubmission(Submission baseCode) throws BasecodeExcepti
/**
* Parse all given submissions.
+ * @param submissions The list of submissions
*/
private void parseSubmissions(List submissions) {
if (submissions.isEmpty()) {
@@ -143,14 +148,14 @@ private void parseSubmissions(List submissions) {
long startTime = System.currentTimeMillis();
int tooShort = 0;
+ ProgressBar progressBar = ProgressBarLogger.createProgressBar(ProgressBarType.PARSING, submissions.size());
for (Submission submission : submissions) {
- logger.info("Parsing submission {}", submission.getName());
boolean ok;
logger.trace("------ Parsing submission: " + submission.getName());
currentSubmissionName = submission.getName();
- if (!(ok = submission.parse(options.debugParser()))) {
+ if (!(ok = submission.parse(options.debugParser(), options.normalize()))) {
errors++;
}
@@ -168,7 +173,9 @@ private void parseSubmissions(List submissions) {
} else {
logger.error("ERROR -> Submission {} removed", currentSubmissionName);
}
+ progressBar.step();
}
+ progressBar.dispose();
int validSubmissions = submissions.size() - errors - tooShort;
logger.trace(validSubmissions + " submissions parsed successfully!");
diff --git a/core/src/main/java/de/jplag/SubmissionSetBuilder.java b/core/src/main/java/de/jplag/SubmissionSetBuilder.java
index 4d93c0d44..e0cf584e4 100644
--- a/core/src/main/java/de/jplag/SubmissionSetBuilder.java
+++ b/core/src/main/java/de/jplag/SubmissionSetBuilder.java
@@ -6,6 +6,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -21,6 +22,9 @@
import de.jplag.exceptions.ExitException;
import de.jplag.exceptions.RootDirectoryException;
import de.jplag.exceptions.SubmissionException;
+import de.jplag.logging.ProgressBar;
+import de.jplag.logging.ProgressBarLogger;
+import de.jplag.logging.ProgressBarType;
import de.jplag.options.JPlagOptions;
/**
@@ -35,9 +39,9 @@ public class SubmissionSetBuilder {
/**
* Creates a builder for submission sets.
- * @deprecated in favor of {@link #SubmissionSetBuilder(JPlagOptions)}.
* @param language is the language of the submissions.
* @param options are the configured options.
+ * @deprecated in favor of {@link #SubmissionSetBuilder(JPlagOptions)}.
*/
@Deprecated(since = "4.3.0")
public SubmissionSetBuilder(Language language, JPlagOptions options) {
@@ -67,14 +71,21 @@ public SubmissionSet buildSubmissionSet() throws ExitException {
int numberOfRootDirectories = submissionDirectories.size() + oldSubmissionDirectories.size();
boolean multipleRoots = (numberOfRootDirectories > 1);
- // Collect valid looking entries from the root directories.
- Map foundSubmissions = new HashMap<>();
- for (File directory : submissionDirectories) {
- processRootDirectoryEntries(directory, multipleRoots, foundSubmissions, true);
+ List submissionFiles = new ArrayList<>();
+ for (File submissionDirectory : submissionDirectories) {
+ submissionFiles.addAll(listSubmissionFiles(submissionDirectory, true));
}
- for (File oldDirectory : oldSubmissionDirectories) {
- processRootDirectoryEntries(oldDirectory, multipleRoots, foundSubmissions, false);
+ for (File submissionDirectory : oldSubmissionDirectories) {
+ submissionFiles.addAll(listSubmissionFiles(submissionDirectory, false));
+ }
+
+ ProgressBar progressBar = ProgressBarLogger.createProgressBar(ProgressBarType.LOADING, submissionFiles.size());
+ Map foundSubmissions = new HashMap<>();
+ for (SubmissionFileData submissionFile : submissionFiles) {
+ processSubmissionFile(submissionFile, multipleRoots, foundSubmissions);
+ progressBar.step();
}
+ progressBar.dispose();
Optional baseCodeSubmission = loadBaseCode();
baseCodeSubmission.ifPresent(baseSubmission -> foundSubmissions.remove(baseSubmission.getRoot()));
@@ -84,7 +95,7 @@ public SubmissionSet buildSubmissionSet() throws ExitException {
// Some languages expect a certain order, which is ensured here:
if (options.language().expectsSubmissionOrder()) {
- List rootFiles = foundSubmissions.values().stream().map(it -> it.getRoot()).toList();
+ List rootFiles = foundSubmissions.values().stream().map(Submission::getRoot).toList();
rootFiles = options.language().customizeSubmissionOrder(rootFiles);
submissions = new ArrayList<>(rootFiles.stream().map(foundSubmissions::get).toList());
}
@@ -155,31 +166,25 @@ private Optional loadBaseCode() throws ExitException {
Submission baseCodeSubmission = processSubmission(baseCodeSubmissionDirectory.getName(), baseCodeSubmissionDirectory, false);
logger.info("Basecode directory \"{}\" will be used.", baseCodeSubmission.getName());
- return Optional.ofNullable(baseCodeSubmission);
+ return Optional.of(baseCodeSubmission);
}
- /**
- * Read entries in the given root directory.
- */
- private String[] listSubmissionFiles(File rootDirectory) throws ExitException {
+ private List listSubmissionFiles(File rootDirectory, boolean isNew) throws RootDirectoryException {
if (!rootDirectory.isDirectory()) {
throw new AssertionError("Given root is not a directory.");
}
- String[] fileNames;
-
try {
- fileNames = rootDirectory.list();
+ File[] files = rootDirectory.listFiles();
+ if (files == null) {
+ throw new RootDirectoryException("Cannot list files of the root directory!");
+ }
+
+ return Arrays.stream(files).sorted(Comparator.comparing(File::getName)).map(it -> new SubmissionFileData(it, rootDirectory, isNew))
+ .toList();
} catch (SecurityException exception) {
throw new RootDirectoryException("Cannot list files of the root directory! " + exception.getMessage(), exception);
}
-
- if (fileNames == null) {
- throw new RootDirectoryException("Cannot list files of the root directory!");
- }
-
- Arrays.sort(fileNames);
- return fileNames;
}
/**
@@ -200,6 +205,7 @@ private String isExcludedEntry(File submissionEntry) {
/**
* Process the given directory entry as a submission, the path MUST not be excluded.
+ * @param submissionName The name of the submission
* @param submissionFile the file for the submission.
* @param isNew states whether submissions found in the root directory must be checked for plagiarism.
* @return The entry converted to a submission.
@@ -225,27 +231,16 @@ private Submission processSubmission(String submissionName, File submissionFile,
return new Submission(submissionName, submissionFile, isNew, parseFilesRecursively(submissionFile), options.language());
}
- /**
- * Process entries in the root directory to check whether they qualify as submissions.
- * @param rootDirectory is the root directory being examined.
- * @param foundSubmissions Submissions found so far, is updated in-place.
- * @param isNew states whether submissions found in the root directory must be checked for plagiarism.
- */
- private void processRootDirectoryEntries(File rootDirectory, boolean multipleRoots, Map foundSubmissions, boolean isNew)
- throws ExitException {
- for (String fileName : listSubmissionFiles(rootDirectory)) {
- File submissionFile = new File(rootDirectory, fileName);
-
- String errorMessage = isExcludedEntry(submissionFile);
- if (errorMessage == null) {
- String rootDirectoryPrefix = multipleRoots ? (rootDirectory.getName() + File.separator) : "";
- String submissionName = rootDirectoryPrefix + fileName;
- Submission submission = processSubmission(submissionName, submissionFile, isNew);
- foundSubmissions.put(submission.getRoot(), submission);
- } else {
- logger.error(errorMessage);
- }
+ private void processSubmissionFile(SubmissionFileData file, boolean multipleRoots, Map foundSubmissions) throws ExitException {
+ String errorMessage = isExcludedEntry(file.submissionFile());
+ if (errorMessage != null) {
+ logger.error(errorMessage);
}
+
+ String rootDirectoryPrefix = multipleRoots ? (file.root().getName() + File.separator) : "";
+ String submissionName = rootDirectoryPrefix + file.submissionFile().getName();
+ Submission submission = processSubmission(submissionName, file.submissionFile(), file.isNew());
+ foundSubmissions.put(submission.getRoot(), submission);
}
/**
@@ -311,4 +306,5 @@ private File makeCanonical(File file, Function excepti
throw exceptionWrapper.apply(exception);
}
}
+
}
diff --git a/core/src/main/java/de/jplag/exceptions/ConfigurationException.java b/core/src/main/java/de/jplag/exceptions/ConfigurationException.java
new file mode 100644
index 000000000..e29bdf9b4
--- /dev/null
+++ b/core/src/main/java/de/jplag/exceptions/ConfigurationException.java
@@ -0,0 +1,15 @@
+package de.jplag.exceptions;
+
+import java.io.Serial;
+
+/**
+ * Exceptions used if configuration is wrong.
+ */
+public class ConfigurationException extends ExitException {
+ @Serial
+ private static final long serialVersionUID = 4625302641982932127L; // generated
+
+ public ConfigurationException(String message) {
+ super(message);
+ }
+}
diff --git a/core/src/main/java/de/jplag/logging/ProgressBar.java b/core/src/main/java/de/jplag/logging/ProgressBar.java
new file mode 100644
index 000000000..04450434a
--- /dev/null
+++ b/core/src/main/java/de/jplag/logging/ProgressBar.java
@@ -0,0 +1,24 @@
+package de.jplag.logging;
+
+/**
+ * Exposed interactions for a running progress bar.
+ */
+public interface ProgressBar {
+ /**
+ * Advances the progress bar by a single step
+ */
+ default void step() {
+ step(1);
+ }
+
+ /**
+ * Advances the progress bar by amount steps
+ * @param number The number of steps
+ */
+ void step(int number);
+
+ /**
+ * Closes the progress bar. After this method has been called the behaviour of the other methods is undefined.
+ */
+ void dispose();
+}
diff --git a/core/src/main/java/de/jplag/logging/ProgressBarLogger.java b/core/src/main/java/de/jplag/logging/ProgressBarLogger.java
new file mode 100644
index 000000000..889d391bb
--- /dev/null
+++ b/core/src/main/java/de/jplag/logging/ProgressBarLogger.java
@@ -0,0 +1,68 @@
+package de.jplag.logging;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Provides static access to the creation of progress bars.
+ */
+public class ProgressBarLogger {
+ private static ProgressBarProvider progressBarProvider = new DummyProvider();
+
+ private ProgressBarLogger() {
+ // Hides default constructor
+ }
+
+ /**
+ * Creates a new {@link ProgressBar}
+ * @param type The type of the progress bar
+ * @param totalSteps The total number of steps
+ * @return The newly created progress bar
+ */
+ public static ProgressBar createProgressBar(ProgressBarType type, int totalSteps) {
+ return progressBarProvider.initProgressBar(type, totalSteps);
+ }
+
+ /**
+ * Sets the {@link ProgressBarProvider}. Should be used by the ui before calling JPlag, if progress bars should be
+ * shown.
+ * @param progressBarProvider The provider
+ */
+ public static void setProgressBarProvider(ProgressBarProvider progressBarProvider) {
+ ProgressBarLogger.progressBarProvider = progressBarProvider;
+ }
+
+ private static class DummyProvider implements ProgressBarProvider {
+ @Override
+ public ProgressBar initProgressBar(ProgressBarType type, int totalSteps) {
+ return new DummyBar(type, totalSteps);
+ }
+ }
+
+ private static class DummyBar implements ProgressBar {
+ private static final Logger logger = LoggerFactory.getLogger(DummyBar.class);
+ private int currentStep;
+
+ public DummyBar(ProgressBarType type, int totalSteps) {
+ this.currentStep = 0;
+ logger.info("{} ({})", type.getDefaultText(), totalSteps);
+ }
+
+ @Override
+ public void step() {
+ logger.info("Now at step {}", this.currentStep++);
+ }
+
+ @Override
+ public void step(int number) {
+ for (int i = 0; i < number; i++) {
+ step();
+ }
+ }
+
+ @Override
+ public void dispose() {
+ logger.info("Progress bar done.");
+ }
+ }
+}
diff --git a/core/src/main/java/de/jplag/logging/ProgressBarProvider.java b/core/src/main/java/de/jplag/logging/ProgressBarProvider.java
new file mode 100644
index 000000000..13268325b
--- /dev/null
+++ b/core/src/main/java/de/jplag/logging/ProgressBarProvider.java
@@ -0,0 +1,14 @@
+package de.jplag.logging;
+
+/**
+ * Provides the capability to create new progress bars, to allow JPlag to access the ui.
+ */
+public interface ProgressBarProvider {
+ /**
+ * Creates a new progress bar
+ * @param type The type of progress bar. Should mostly determine the name
+ * @param totalSteps The total number of steps the progress bar should have
+ * @return The newly created bar
+ */
+ ProgressBar initProgressBar(ProgressBarType type, int totalSteps);
+}
diff --git a/core/src/main/java/de/jplag/logging/ProgressBarType.java b/core/src/main/java/de/jplag/logging/ProgressBarType.java
new file mode 100644
index 000000000..88e520fcc
--- /dev/null
+++ b/core/src/main/java/de/jplag/logging/ProgressBarType.java
@@ -0,0 +1,23 @@
+package de.jplag.logging;
+
+/**
+ * The available processes. Used as a hint for the ui, which step JPlag is currently performing.
+ */
+public enum ProgressBarType {
+ LOADING("Loading Submissions "),
+ PARSING("Parsing Submissions "),
+ COMPARING("Comparing Submissions");
+
+ private final String defaultText;
+
+ ProgressBarType(String defaultText) {
+ this.defaultText = defaultText;
+ }
+
+ /**
+ * @return The default display text for the type
+ */
+ public String getDefaultText() {
+ return defaultText;
+ }
+}
diff --git a/core/src/main/java/de/jplag/merging/MergingOptions.java b/core/src/main/java/de/jplag/merging/MergingOptions.java
index 4c2b49e08..7d77f7f31 100644
--- a/core/src/main/java/de/jplag/merging/MergingOptions.java
+++ b/core/src/main/java/de/jplag/merging/MergingOptions.java
@@ -10,12 +10,16 @@
public record MergingOptions(@JsonProperty("enabled") boolean enabled, @JsonProperty("min_neighbour_length") int minimumNeighborLength,
@JsonProperty("max_gap_size") int maximumGapSize) {
+ public static final boolean DEFAULT_ENABLED = false;
+ public static final int DEFAULT_NEIGHBOR_LENGTH = 2;
+ public static final int DEFAULT_GAP_SIZE = 6;
+
/**
* The default values of MergingOptions are false for the enable-switch, which deactivate MatchMerging, while
* minimumNeighborLength and maximumGapSize default to (2,6), which in testing yielded the best results.
*/
public MergingOptions() {
- this(false, 2, 6);
+ this(DEFAULT_ENABLED, DEFAULT_NEIGHBOR_LENGTH, DEFAULT_GAP_SIZE);
}
/**
diff --git a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java
index c58a9188e..5ece0ff1f 100644
--- a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java
+++ b/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java
@@ -13,7 +13,7 @@
import de.jplag.Token;
/**
- * Performs token string normalization.
+ * Performs token sequence normalization.
*/
public class TokenStringNormalizer {
@@ -21,11 +21,11 @@ private TokenStringNormalizer() {
}
/**
- * Performs token string normalization. Tokens representing dead code have been eliminated and tokens representing
+ * Performs token sequence normalization. Tokens representing dead code have been eliminated and tokens representing
* subsequent independent statements have been put in a fixed order. Works by first constructing a Normalization Graph
- * and then turning it back into a token string.
- * @param tokens The original token string, remains unaltered.
- * @return The normalized token string.
+ * and then turning it back into a token sequence.
+ * @param tokens The original token sequence, remains unaltered.
+ * @return The normalized token sequence.
*/
public static List normalize(List tokens) {
SimpleDirectedGraph normalizationGraph = new NormalizationGraphConstructor(tokens).get();
diff --git a/core/src/main/java/de/jplag/options/JPlagOptions.java b/core/src/main/java/de/jplag/options/JPlagOptions.java
index f2e876f43..ec62b9d9c 100644
--- a/core/src/main/java/de/jplag/options/JPlagOptions.java
+++ b/core/src/main/java/de/jplag/options/JPlagOptions.java
@@ -55,7 +55,8 @@ public record JPlagOptions(@JsonSerialize(using = LanguageSerializer.class) Lang
@JsonProperty("subdirectory_name") String subdirectoryName, @JsonProperty("file_suffixes") List fileSuffixes,
@JsonProperty("exclusion_file_name") String exclusionFileName, @JsonProperty("similarity_metric") SimilarityMetric similarityMetric,
@JsonProperty("similarity_threshold") double similarityThreshold, @JsonProperty("max_comparisons") int maximumNumberOfComparisons,
- @JsonProperty("cluster") ClusteringOptions clusteringOptions, boolean debugParser, @JsonProperty("merging") MergingOptions mergingOptions) {
+ @JsonProperty("cluster") ClusteringOptions clusteringOptions, boolean debugParser, @JsonProperty("merging") MergingOptions mergingOptions,
+ @JsonProperty("normalize") boolean normalize) {
public static final double DEFAULT_SIMILARITY_THRESHOLD = 0;
public static final int DEFAULT_SHOWN_COMPARISONS = 500;
@@ -68,13 +69,13 @@ public record JPlagOptions(@JsonSerialize(using = LanguageSerializer.class) Lang
public JPlagOptions(Language language, Set submissionDirectories, Set oldSubmissionDirectories) {
this(language, null, submissionDirectories, oldSubmissionDirectories, null, null, null, null, DEFAULT_SIMILARITY_METRIC,
- DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions());
+ DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions(), false);
}
public JPlagOptions(Language language, Integer minimumTokenMatch, Set submissionDirectories, Set oldSubmissionDirectories,
File baseCodeSubmissionDirectory, String subdirectoryName, List fileSuffixes, String exclusionFileName,
SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions,
- boolean debugParser, MergingOptions mergingOptions) {
+ boolean debugParser, MergingOptions mergingOptions, boolean normalize) {
this.language = language;
this.debugParser = debugParser;
this.fileSuffixes = fileSuffixes == null || fileSuffixes.isEmpty() ? null : Collections.unmodifiableList(fileSuffixes);
@@ -89,90 +90,97 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, Set subm
this.subdirectoryName = subdirectoryName;
this.clusteringOptions = clusteringOptions;
this.mergingOptions = mergingOptions;
+ this.normalize = normalize;
}
public JPlagOptions withLanguageOption(Language language) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withDebugParser(boolean debugParser) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withFileSuffixes(List fileSuffixes) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withSimilarityThreshold(double similarityThreshold) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withMaximumNumberOfComparisons(int maximumNumberOfComparisons) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withSimilarityMetric(SimilarityMetric similarityMetric) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withMinimumTokenMatch(Integer minimumTokenMatch) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withExclusionFileName(String exclusionFileName) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withSubmissionDirectories(Set submissionDirectories) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withOldSubmissionDirectories(Set oldSubmissionDirectories) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withBaseCodeSubmissionDirectory(File baseCodeSubmissionDirectory) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withSubdirectoryName(String subdirectoryName) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withClusteringOptions(ClusteringOptions clusteringOptions) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public JPlagOptions withMergingOptions(MergingOptions mergingOptions) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
- clusteringOptions, debugParser, mergingOptions);
+ clusteringOptions, debugParser, mergingOptions, normalize);
+ }
+
+ public JPlagOptions withNormalize(boolean normalize) {
+ return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
+ subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
+ clusteringOptions, debugParser, mergingOptions, normalize);
}
public boolean hasBaseCode() {
@@ -264,7 +272,7 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, File submissio
boolean debugParser, MergingOptions mergingOptions) throws BasecodeException {
this(language, minimumTokenMatch, Set.of(submissionDirectory), oldSubmissionDirectories,
convertLegacyBaseCodeToFile(baseCodeSubmissionName, submissionDirectory), subdirectoryName, fileSuffixes, exclusionFileName,
- similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions);
+ similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions, false);
}
/**
diff --git a/core/src/main/java/de/jplag/reporting/FilePathUtil.java b/core/src/main/java/de/jplag/reporting/FilePathUtil.java
index d2db74ad7..51aefdd36 100644
--- a/core/src/main/java/de/jplag/reporting/FilePathUtil.java
+++ b/core/src/main/java/de/jplag/reporting/FilePathUtil.java
@@ -7,6 +7,7 @@
import de.jplag.Submission;
public final class FilePathUtil {
+ private static final String ZIP_PATH_SEPARATOR = "/"; // Paths in zip files are always separated by a slash
private FilePathUtil() {
// private constructor to prevent instantiation
@@ -26,4 +27,23 @@ public static String getRelativeSubmissionPath(File file, Submission submission,
return Path.of(submissionToIdFunction.apply(submission), submission.getRoot().toPath().relativize(file.toPath()).toString()).toString();
}
+ /**
+ * Joins logical paths using a slash. This method ensures, that no duplicate slashes are created in between.
+ * @param left The left path segment
+ * @param right The right path segment
+ * @return The joined paths
+ */
+ public static String joinZipPathSegments(String left, String right) {
+ String rightStripped = right;
+ while (rightStripped.startsWith(ZIP_PATH_SEPARATOR)) {
+ rightStripped = rightStripped.substring(1);
+ }
+
+ String leftStripped = left;
+ while (leftStripped.endsWith(ZIP_PATH_SEPARATOR)) {
+ leftStripped = leftStripped.substring(0, leftStripped.length() - 1);
+ }
+
+ return leftStripped + ZIP_PATH_SEPARATOR + rightStripped;
+ }
}
diff --git a/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java b/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java
index 224b277e7..29f93744c 100644
--- a/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java
+++ b/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java
@@ -3,7 +3,6 @@
import java.util.Comparator;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
@@ -105,13 +104,9 @@ private Match convertMatchToReportMatch(JPlagComparison comparison, de.jplag.Mat
Token startOfSecond = tokensSecond.stream().min(lineComparator).orElseThrow();
Token endOfSecond = tokensSecond.stream().max(lineComparator).orElseThrow();
- List firstTotalTokens = tokensFirst.stream().filter(x -> Objects.equals(x.getFile(), startOfFirst.getFile())).toList();
- List secondTotalTokens = tokensSecond.stream().filter(x -> Objects.equals(x.getFile(), startOfSecond.getFile())).toList();
-
return new Match(FilePathUtil.getRelativeSubmissionPath(startOfFirst.getFile(), comparison.firstSubmission(), submissionToIdFunction),
FilePathUtil.getRelativeSubmissionPath(startOfSecond.getFile(), comparison.secondSubmission(), submissionToIdFunction),
- startOfFirst.getLine(), endOfFirst.getLine(), startOfSecond.getLine(), endOfSecond.getLine(), match.length(), firstTotalTokens.size(),
- secondTotalTokens.size());
+ startOfFirst.getLine(), endOfFirst.getLine(), startOfSecond.getLine(), endOfSecond.getLine(), match.length());
}
}
diff --git a/core/src/main/java/de/jplag/reporting/jsonfactory/serializer/LanguageSerializer.java b/core/src/main/java/de/jplag/reporting/jsonfactory/serializer/LanguageSerializer.java
index 22e961105..c521507c7 100644
--- a/core/src/main/java/de/jplag/reporting/jsonfactory/serializer/LanguageSerializer.java
+++ b/core/src/main/java/de/jplag/reporting/jsonfactory/serializer/LanguageSerializer.java
@@ -1,6 +1,7 @@
package de.jplag.reporting.jsonfactory.serializer;
import java.io.IOException;
+import java.io.Serial;
import de.jplag.Language;
@@ -10,6 +11,9 @@
public class LanguageSerializer extends StdSerializer {
+ @Serial
+ private static final long serialVersionUID = 5944655736767387268L; // generated
+
/**
* Constructor used by the fasterxml.jackson
*/
diff --git a/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java b/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java
index 2354b254a..d569a5252 100644
--- a/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java
+++ b/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java
@@ -112,7 +112,7 @@ private void copySubmissionFilesToReport(JPlagResult result) {
if (relativeFilePath.isEmpty()) {
relativeFilePath = file.getName();
}
- String zipPath = submissionRootPath + relativeFilePath;
+ String zipPath = FilePathUtil.joinZipPathSegments(submissionRootPath, relativeFilePath);
File fileToCopy = getFileToCopy(language, file);
this.resultWriter.addFileContentEntry(zipPath, fileToCopy);
diff --git a/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java b/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java
index 3f708dc1b..8af13af20 100644
--- a/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java
+++ b/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java
@@ -4,6 +4,5 @@
public record Match(@JsonProperty("file1") String firstFileName, @JsonProperty("file2") String secondFileName,
@JsonProperty("start1") int startInFirst, @JsonProperty("end1") int endInFirst, @JsonProperty("start2") int startInSecond,
- @JsonProperty("end2") int endInSecond, @JsonProperty("tokens") int tokens, @JsonProperty("file1Tokens") long file1Tokens,
- @JsonProperty("file2Tokens") long file2Tokens) {
+ @JsonProperty("end2") int endInSecond, @JsonProperty("tokens") int tokens) {
}
diff --git a/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java b/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java
index 19822ef41..09c8717cb 100644
--- a/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java
+++ b/core/src/main/java/de/jplag/strategy/AbstractComparisonStrategy.java
@@ -3,14 +3,19 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
+import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.jplag.GreedyStringTiling;
import de.jplag.JPlagComparison;
+import de.jplag.JPlagResult;
import de.jplag.Submission;
import de.jplag.SubmissionSet;
+import de.jplag.logging.ProgressBar;
+import de.jplag.logging.ProgressBarLogger;
+import de.jplag.logging.ProgressBarType;
import de.jplag.options.JPlagOptions;
public abstract class AbstractComparisonStrategy implements ComparisonStrategy {
@@ -46,7 +51,7 @@ protected void compareSubmissionsToBaseCode(SubmissionSet submissionSet) {
*/
protected Optional compareSubmissions(Submission first, Submission second) {
JPlagComparison comparison = greedyStringTiling.compare(first, second);
- logger.info("Comparing {}-{}: {}", first.getName(), second.getName(), comparison.similarity());
+ logger.trace("Comparing {}-{}: {}", first.getName(), second.getName(), comparison.similarity());
if (options.similarityMetric().isAboveThreshold(comparison, options.similarityThreshold())) {
return Optional.of(comparison);
@@ -57,7 +62,7 @@ protected Optional compareSubmissions(Submission first, Submiss
/**
* @return a list of all submission tuples to be processed.
*/
- protected static List buildComparisonTuples(List submissions) {
+ protected List buildComparisonTuples(List submissions) {
List tuples = new ArrayList<>();
List validSubmissions = submissions.stream().filter(s -> s.getTokenList() != null).toList();
@@ -72,4 +77,44 @@ protected static List buildComparisonTuples(List su
}
return tuples;
}
+
+ @Override
+ public JPlagResult compareSubmissions(SubmissionSet submissionSet) {
+ long timeBeforeStartInMillis = System.currentTimeMillis();
+
+ handleBaseCode(submissionSet);
+
+ List tuples = buildComparisonTuples(submissionSet.getSubmissions());
+ ProgressBar progressBar = ProgressBarLogger.createProgressBar(ProgressBarType.COMPARING, tuples.size());
+ List comparisons = prepareStream(tuples).flatMap(tuple -> {
+ Optional result = compareTuple(tuple);
+ progressBar.step();
+ return result.stream();
+ }).toList();
+ progressBar.dispose();
+
+ long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
+
+ return new JPlagResult(comparisons, submissionSet, durationInMillis, options);
+ }
+
+ /**
+ * Handle the parsing of the base code.
+ * @param submissionSet The submission set to parse
+ */
+ protected abstract void handleBaseCode(SubmissionSet submissionSet);
+
+ /**
+ * Prepare a stream for parsing the tuples. Here you can modify the tuples or the stream as necessary.
+ * @param tuples The tuples to stream
+ * @return The Stream of tuples
+ */
+ protected abstract Stream prepareStream(List tuples);
+
+ /**
+ * Compares a single tuple. Returns nothing, if the similarity is not high enough.
+ * @param tuple The Tuple to compare
+ * @return The comparison
+ */
+ protected abstract Optional compareTuple(SubmissionTuple tuple);
}
diff --git a/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java b/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java
index fd94b9293..43cc66ae6 100644
--- a/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java
+++ b/core/src/main/java/de/jplag/strategy/ParallelComparisonStrategy.java
@@ -2,10 +2,10 @@
import java.util.List;
import java.util.Optional;
+import java.util.stream.Stream;
import de.jplag.GreedyStringTiling;
import de.jplag.JPlagComparison;
-import de.jplag.JPlagResult;
import de.jplag.SubmissionSet;
import de.jplag.options.JPlagOptions;
@@ -19,19 +19,20 @@ public ParallelComparisonStrategy(JPlagOptions options, GreedyStringTiling greed
}
@Override
- public JPlagResult compareSubmissions(SubmissionSet submissionSet) {
- // Initialize:
- long timeBeforeStartInMillis = System.currentTimeMillis();
+ protected void handleBaseCode(SubmissionSet submissionSet) {
boolean withBaseCode = submissionSet.hasBaseCode();
if (withBaseCode) {
compareSubmissionsToBaseCode(submissionSet);
}
+ }
- List tuples = buildComparisonTuples(submissionSet.getSubmissions());
- List comparisons = tuples.stream().parallel().map(tuple -> compareSubmissions(tuple.left(), tuple.right()))
- .flatMap(Optional::stream).toList();
+ @Override
+ protected Stream prepareStream(List tuples) {
+ return tuples.stream().parallel();
+ }
- long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
- return new JPlagResult(comparisons, submissionSet, durationInMillis, options);
+ @Override
+ protected Optional compareTuple(SubmissionTuple tuple) {
+ return compareSubmissions(tuple.left(), tuple.right());
}
}
diff --git a/core/src/test/java/de/jplag/NormalizationTest.java b/core/src/test/java/de/jplag/NormalizationTest.java
index f2e447b1c..c6a9db9ed 100644
--- a/core/src/test/java/de/jplag/NormalizationTest.java
+++ b/core/src/test/java/de/jplag/NormalizationTest.java
@@ -12,8 +12,8 @@
import de.jplag.options.JPlagOptions;
class NormalizationTest extends TestBase {
- private Map> tokenStringMap;
- private List originalTokenString;
+ private final Map> tokenStringMap;
+ private final List originalTokenString;
NormalizationTest() throws ExitException {
JPlagOptions options = getDefaultOptions("normalization");
diff --git a/core/src/test/java/de/jplag/reporting/FilePathUtilTest.java b/core/src/test/java/de/jplag/reporting/FilePathUtilTest.java
new file mode 100644
index 000000000..91dd56462
--- /dev/null
+++ b/core/src/test/java/de/jplag/reporting/FilePathUtilTest.java
@@ -0,0 +1,26 @@
+package de.jplag.reporting;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import org.junit.jupiter.api.Test;
+
+class FilePathUtilTest {
+ private static final String JOINED = "left/right";
+ private static final String LEFT = "left";
+ private static final String RIGHT = "right";
+
+ @Test
+ void testJoinPath() {
+ assertEquals(JOINED, FilePathUtil.joinZipPathSegments(LEFT, RIGHT));
+ }
+
+ @Test
+ void testJoinPathWithLeftSlashSuffix() {
+ assertEquals(JOINED, FilePathUtil.joinZipPathSegments(LEFT + "/", RIGHT));
+ }
+
+ @Test
+ void testJoinPathWithRightSlashSuffix() {
+ assertEquals(JOINED, FilePathUtil.joinZipPathSegments(LEFT, "/" + RIGHT));
+ }
+}
\ No newline at end of file
diff --git a/coverage-report/pom.xml b/coverage-report/pom.xml
index 3d918dd10..71f01179a 100644
--- a/coverage-report/pom.xml
+++ b/coverage-report/pom.xml
@@ -63,12 +63,12 @@
de.jplag
- cpp
+ c
${revision}
de.jplag
- cpp2
+ cpp
${revision}
diff --git a/docs/1.-How-to-Use-JPlag.md b/docs/1.-How-to-Use-JPlag.md
index 18babafee..a68e3c43b 100644
--- a/docs/1.-How-to-Use-JPlag.md
+++ b/docs/1.-How-to-Use-JPlag.md
@@ -84,8 +84,8 @@ Clustering
--cluster-skip Skips the clustering (default: false)
Commands:
+ c
cpp
- cpp2
csharp
emf
emf-model
diff --git a/docs/2.-Supported-Languages.md b/docs/2.-Supported-Languages.md
index b3b54345a..c89bb8ee3 100644
--- a/docs/2.-Supported-Languages.md
+++ b/docs/2.-Supported-Languages.md
@@ -1,4 +1,4 @@
-JPlag currently supports Java, C/C++, C#, Go, Kotlin, Python, R, Rust, Scala, Swift, and Scheme. Additionally, it has primitive support for text and prototypical support for EMF metamodels. A detailed list, including the supported language versions can be found in the [project readme](https://github.com/jplag/JPlag/blob/main/README.md#supported-languages).
+JPlag currently supports Java, C, C++, C#, Go, Kotlin, Python, R, Rust, Scala, Swift, and Scheme. Additionally, it has primitive support for text and prototypical support for EMF metamodels. A detailed list, including the supported language versions can be found in the [project readme](https://github.com/jplag/JPlag/blob/main/README.md#supported-languages).
The language modules differ in their maturity due to their age and different usage frequencies.
Thus, each frontend has a state label:
diff --git a/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractAntlrLanguage.java b/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractAntlrLanguage.java
index 93f6ace30..1d75847bd 100644
--- a/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractAntlrLanguage.java
+++ b/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractAntlrLanguage.java
@@ -34,9 +34,9 @@ protected AbstractAntlrLanguage() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
if (this.parser == null) {
- this.parser = this.initializeParser();
+ this.parser = this.initializeParser(normalize);
}
return this.parser.parse(files);
@@ -46,7 +46,7 @@ public List parse(Set files) throws ParsingException {
* Lazily creates the parser. Has to be implemented, if no parser is passed in the constructor.
* @return The newly initialized parser
*/
- protected AbstractAntlrParserAdapter> initializeParser() {
+ protected AbstractAntlrParserAdapter> initializeParser(boolean normalize) {
throw new UnsupportedOperationException(
String.format("The initializeParser method needs to be implemented for %s", this.getClass().getName()));
}
diff --git a/language-antlr-utils/src/test/java/de/jplag/antlr/LanguageTest.java b/language-antlr-utils/src/test/java/de/jplag/antlr/LanguageTest.java
index 81941ac8d..b8fe50f51 100644
--- a/language-antlr-utils/src/test/java/de/jplag/antlr/LanguageTest.java
+++ b/language-antlr-utils/src/test/java/de/jplag/antlr/LanguageTest.java
@@ -20,19 +20,19 @@ class LanguageTest {
void testExceptionForNoDefinedParser() {
LanguageWithoutParser lang = new LanguageWithoutParser();
Set emptySet = Set.of();
- assertThrows(UnsupportedOperationException.class, () -> lang.parse(emptySet));
+ assertThrows(UnsupportedOperationException.class, () -> lang.parse(emptySet, false));
}
@Test
void testLanguageWithStaticParser() throws ParsingException {
TestLanguage lang = new TestLanguage();
- Assertions.assertEquals(0, lang.parse(Set.of()).size());
+ Assertions.assertEquals(0, lang.parse(Set.of(), false).size());
}
@Test
void testLanguageWithLazyParser() throws ParsingException {
LanguageWithLazyParser lang = new LanguageWithLazyParser();
- Assertions.assertEquals(0, lang.parse(Set.of()).size());
+ Assertions.assertEquals(0, lang.parse(Set.of(), false).size());
}
private static class LanguageWithoutParser extends AbstractAntlrLanguage {
@@ -59,7 +59,7 @@ public int minimumTokenMatch() {
private static class LanguageWithLazyParser extends LanguageWithoutParser {
@Override
- protected AbstractAntlrParserAdapter> initializeParser() {
+ protected AbstractAntlrParserAdapter> initializeParser(boolean normalize) {
return new TestParserAdapter();
}
}
diff --git a/language-api/src/main/java/de/jplag/Language.java b/language-api/src/main/java/de/jplag/Language.java
index c7e199667..ffe6aa9a6 100644
--- a/language-api/src/main/java/de/jplag/Language.java
+++ b/language-api/src/main/java/de/jplag/Language.java
@@ -32,12 +32,25 @@ public interface Language {
int minimumTokenMatch();
/**
- * Parses a set of files.
+ * Parses a set of files. Override this method, if you don't require normalization.
* @param files are the files to parse.
* @return the list of parsed JPlag tokens.
* @throws ParsingException if an error during parsing the files occurred.
+ * @deprecated Replaced by {@link #parse(Set, boolean)}
*/
- List parse(Set files) throws ParsingException;
+ @Deprecated(forRemoval = true)
+ default List parse(Set files) throws ParsingException {
+ return parse(files, false);
+ }
+
+ /**
+ * Parses a set of files. Override this method, if you require normalization within the language module.
+ * @param files are the files to parse.
+ * @param normalize True, if the tokens should be normalized
+ * @return the list of parsed JPlag tokens.
+ * @throws ParsingException if an error during parsing the files occurred.
+ */
+ List parse(Set files, boolean normalize) throws ParsingException;
/**
* Indicates whether the tokens returned by parse have semantic information added to them, i.e. whether the token
@@ -93,4 +106,20 @@ default boolean expectsSubmissionOrder() {
default List customizeSubmissionOrder(List submissions) {
return submissions;
}
+
+ /**
+ * @return True, if this language supports token sequence normalization. This does not include other normalization
+ * mechanisms that might be part of the language modules.
+ */
+ default boolean supportsNormalization() {
+ return false;
+ }
+
+ /**
+ * Override this method, if you need normalization within the language module, but not in the core module.
+ * @return True, If the core normalization should be used.
+ */
+ default boolean requiresCoreNormalization() {
+ return true;
+ }
}
diff --git a/languages/c/README.md b/languages/c/README.md
new file mode 100644
index 000000000..735466e44
--- /dev/null
+++ b/languages/c/README.md
@@ -0,0 +1,12 @@
+# JPlag C language module
+
+This module allows the use of JPlag with submissions in c.
+
+## Usage
+
+To parse C submissions run JPlag with: ` c` or use the `-l c` options.
+To use the module from the API configure your `JPlagOption` object with `new CLanguage()` as 'Language' as described in the usage information in the [readme](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag).
+
+## C++
+
+This module might work with C++ submissions. However you should use the [cpp module](https://github.com/jplag/JPlag/tree/main/languages/cpp) for that.
\ No newline at end of file
diff --git a/languages/c/pom.xml b/languages/c/pom.xml
new file mode 100644
index 000000000..71d9cdb8e
--- /dev/null
+++ b/languages/c/pom.xml
@@ -0,0 +1,35 @@
+
+
+ 4.0.0
+
+ de.jplag
+ languages
+ ${revision}
+
+ c
+
+
+
+
+ com.helger.maven
+ ph-javacc-maven-plugin
+
+
+ javacc-gen
+
+ javacc
+
+ generate-sources
+
+ 21
+ true
+ de.jplag.c
+ src/main/javacc
+ ${project.build.directory}/generated-sources/javacc
+
+
+
+
+
+
+
diff --git a/languages/cpp/src/main/java/de/jplag/cpp/Language.java b/languages/c/src/main/java/de/jplag/c/CLanguage.java
similarity index 67%
rename from languages/cpp/src/main/java/de/jplag/cpp/Language.java
rename to languages/c/src/main/java/de/jplag/c/CLanguage.java
index dcc69edd7..ba99dbf49 100644
--- a/languages/cpp/src/main/java/de/jplag/cpp/Language.java
+++ b/languages/c/src/main/java/de/jplag/c/CLanguage.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp;
+package de.jplag.c;
import java.io.File;
import java.util.List;
@@ -6,16 +6,17 @@
import org.kohsuke.MetaInfServices;
+import de.jplag.Language;
import de.jplag.ParsingException;
import de.jplag.Token;
@MetaInfServices(de.jplag.Language.class)
-public class Language implements de.jplag.Language {
- private static final String IDENTIFIER = "cpp";
+public class CLanguage implements Language {
+ private static final String IDENTIFIER = "c";
- private final Scanner scanner; // cpp code is scanned not parsed
+ private final Scanner scanner; // c code is scanned not parsed
- public Language() {
+ public CLanguage() {
scanner = new Scanner();
}
@@ -26,7 +27,7 @@ public String[] suffixes() {
@Override
public String getName() {
- return "C/C++ Scanner [basic markup]";
+ return "C Scanner";
}
@Override
@@ -40,7 +41,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return this.scanner.scan(files);
}
}
diff --git a/languages/c/src/main/java/de/jplag/c/CTokenType.java b/languages/c/src/main/java/de/jplag/c/CTokenType.java
new file mode 100644
index 000000000..185daa3e1
--- /dev/null
+++ b/languages/c/src/main/java/de/jplag/c/CTokenType.java
@@ -0,0 +1,77 @@
+package de.jplag.c;
+
+import de.jplag.TokenType;
+
+public enum CTokenType implements TokenType {
+ C_BLOCK_BEGIN("BLOCK{"),
+ C_BLOCK_END("}BLOCK"),
+ C_QUESTIONMARK("COND"),
+ C_ELLIPSIS("..."),
+ C_ASSIGN("ASSIGN"),
+ C_DOT("DOT"),
+ C_ARROW("ARROW"),
+ C_ARROWSTAR("ARROWSTAR"),
+ C_AUTO("AUTO"),
+ C_BREAK("BREAK"),
+ C_CASE("CASE"),
+ C_CATCH("CATCH"),
+ C_CHAR("CHAR"),
+ C_CONST("CONST"),
+ C_CONTINUE("CONTINUE"),
+ C_DEFAULT("DEFAULT"),
+ C_DELETE("DELETE"),
+ C_DO("DO"),
+ C_DOUBLE("DOUBLE"),
+ C_ELSE("ELSE"),
+ C_ENUM("ENUM"),
+ C_EXTERN("EXTERN"),
+ C_FLOAT("FLOAT"),
+ C_FOR("FOR"),
+ C_FRIEND("FRIEND"),
+ C_GOTO("GOTO"),
+ C_IF("IF"),
+ C_INLINE("INLINE"),
+ C_INT("INT"),
+ C_LONG("LONG"),
+ C_NEW("NEW"),
+ C_PRIVATE("PRIVATE"),
+ C_PROTECTED("PROTECTED"),
+ C_PUBLIC("PUBLIC"),
+ C_REDECLARED("REDECLARED"),
+ C_REGISTER("REGISTER"),
+ C_RETURN("RETURN"),
+ C_SHORT("SHORT"),
+ C_SIGNED("SIGNED"),
+ C_SIZEOF("SIZEOF"),
+ C_STATIC("STATIC"),
+ C_STRUCT("STRUCT"),
+ C_CLASS("CLASS"),
+ C_SWITCH("SWITCH"),
+ C_TEMPLATE("TEMPLATE"),
+ C_THIS("THIS"),
+ C_TRY("TRY"),
+ C_TYPEDEF("TYPEDEF"),
+ C_UNION("UNION"),
+ C_UNSIGNED("UNSIGNED"),
+ C_VIRTUAL("VIRTUAL"),
+ C_VOID("VOID"),
+ C_VOLATILE("VOLATILE"),
+ C_WHILE("WHILE"),
+ C_OPERATOR("OPERATOR"),
+ C_THROW("THROW"),
+ C_ID("ID"),
+ C_FUN("FUN"),
+ C_DOTSTAR("DOTSTAR"),
+ C_NULL("NULL");
+
+ private final String description;
+
+ @Override
+ public String getDescription() {
+ return this.description;
+ }
+
+ CTokenType(String description) {
+ this.description = description;
+ }
+}
diff --git a/languages/cpp/src/main/java/de/jplag/cpp/NewlineStream.java b/languages/c/src/main/java/de/jplag/c/NewlineStream.java
similarity index 97%
rename from languages/cpp/src/main/java/de/jplag/cpp/NewlineStream.java
rename to languages/c/src/main/java/de/jplag/c/NewlineStream.java
index 9cc21a3d8..f45c9af61 100644
--- a/languages/cpp/src/main/java/de/jplag/cpp/NewlineStream.java
+++ b/languages/c/src/main/java/de/jplag/c/NewlineStream.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp;
+package de.jplag.c;
import java.io.IOException;
import java.io.InputStream;
diff --git a/languages/cpp/src/main/java/de/jplag/cpp/Scanner.java b/languages/c/src/main/java/de/jplag/c/Scanner.java
similarity index 91%
rename from languages/cpp/src/main/java/de/jplag/cpp/Scanner.java
rename to languages/c/src/main/java/de/jplag/c/Scanner.java
index e7f25476d..c3292814e 100644
--- a/languages/cpp/src/main/java/de/jplag/cpp/Scanner.java
+++ b/languages/c/src/main/java/de/jplag/c/Scanner.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp;
+package de.jplag.c;
import java.io.File;
import java.util.ArrayList;
@@ -32,7 +32,7 @@ public List scan(Set files) throws ParsingException {
return tokens;
}
- public void add(CPPTokenType type, de.jplag.cpp.Token token) {
+ public void add(CTokenType type, de.jplag.c.Token token) {
int length = token.endColumn - token.beginColumn + 1;
tokens.add(new Token(type, currentFile, token.beginLine, token.beginColumn, length));
}
diff --git a/languages/cpp/src/main/java/de/jplag/cpp/experimental/GCCSourceAnalysis.java b/languages/c/src/main/java/de/jplag/c/experimental/GCCSourceAnalysis.java
similarity index 95%
rename from languages/cpp/src/main/java/de/jplag/cpp/experimental/GCCSourceAnalysis.java
rename to languages/c/src/main/java/de/jplag/c/experimental/GCCSourceAnalysis.java
index 52b7bdfcd..85d94d514 100644
--- a/languages/cpp/src/main/java/de/jplag/cpp/experimental/GCCSourceAnalysis.java
+++ b/languages/c/src/main/java/de/jplag/c/experimental/GCCSourceAnalysis.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp.experimental;
+package de.jplag.c.experimental;
import java.io.BufferedReader;
import java.io.File;
@@ -25,7 +25,7 @@ public GCCSourceAnalysis() {
}
@Override
- public boolean isTokenIgnored(de.jplag.cpp.Token token, File file) {
+ public boolean isTokenIgnored(de.jplag.c.Token token, File file) {
String fileName = file.getName();
if (linesToDelete.containsKey(fileName)) {
var ignoredLineNumbers = linesToDelete.get(fileName);
diff --git a/languages/cpp/src/main/java/de/jplag/cpp/experimental/SourceAnalysis.java b/languages/c/src/main/java/de/jplag/c/experimental/SourceAnalysis.java
similarity index 90%
rename from languages/cpp/src/main/java/de/jplag/cpp/experimental/SourceAnalysis.java
rename to languages/c/src/main/java/de/jplag/c/experimental/SourceAnalysis.java
index 0d846370e..f92986d1d 100644
--- a/languages/cpp/src/main/java/de/jplag/cpp/experimental/SourceAnalysis.java
+++ b/languages/c/src/main/java/de/jplag/c/experimental/SourceAnalysis.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp.experimental;
+package de.jplag.c.experimental;
import java.io.File;
import java.util.Set;
@@ -16,7 +16,7 @@ public interface SourceAnalysis {
* @param file The file the token was scanned in
* @return True, if the token should not be added to a TokenList, false if it should
*/
- boolean isTokenIgnored(de.jplag.cpp.Token token, File file);
+ boolean isTokenIgnored(de.jplag.c.Token token, File file);
/**
* Executes the source analysis on the files of a submission.
diff --git a/languages/cpp/src/main/java/de/jplag/cpp/experimental/UnreachableCodeFilter.java b/languages/c/src/main/java/de/jplag/c/experimental/UnreachableCodeFilter.java
similarity index 98%
rename from languages/cpp/src/main/java/de/jplag/cpp/experimental/UnreachableCodeFilter.java
rename to languages/c/src/main/java/de/jplag/c/experimental/UnreachableCodeFilter.java
index e795c6976..59b8df85b 100644
--- a/languages/cpp/src/main/java/de/jplag/cpp/experimental/UnreachableCodeFilter.java
+++ b/languages/c/src/main/java/de/jplag/c/experimental/UnreachableCodeFilter.java
@@ -1,7 +1,7 @@
-package de.jplag.cpp.experimental;
+package de.jplag.c.experimental;
import static de.jplag.SharedTokenType.FILE_END;
-import static de.jplag.cpp.CPPTokenType.*;
+import static de.jplag.c.CTokenType.*;
import java.util.List;
import java.util.ListIterator;
diff --git a/languages/cpp/src/main/javacc/CPP.jj b/languages/c/src/main/javacc/CPP.jj
similarity index 99%
rename from languages/cpp/src/main/javacc/CPP.jj
rename to languages/c/src/main/javacc/CPP.jj
index cc1689fbe..ee4ca34d8 100644
--- a/languages/cpp/src/main/javacc/CPP.jj
+++ b/languages/c/src/main/javacc/CPP.jj
@@ -14,7 +14,7 @@ options
}
PARSER_BEGIN(CPPScanner)
-package de.jplag.cpp;
+package de.jplag.c;
import java.io.File;
import java.io.FileInputStream;
@@ -23,7 +23,7 @@ import java.io.InputStream;
import de.jplag.ParsingException;
-import static de.jplag.cpp.CPPTokenType.*;
+import static de.jplag.c.CTokenType.*;
public class CPPScanner {
private Scanner delegatingScanner;
diff --git a/languages/cpp2/README.md b/languages/cpp/README.md
similarity index 59%
rename from languages/cpp2/README.md
rename to languages/cpp/README.md
index a8046b20f..9eb9b09aa 100644
--- a/languages/cpp2/README.md
+++ b/languages/cpp/README.md
@@ -1,11 +1,8 @@
# JPlag C++ language module
-**Note**: This language module is meant to replace the existing C++ language module in the future.
-While the old language module is based on lexer tokens, this language module uses a parse tree for token extraction.
-The base package name of this language module and its identifier are `cpp2` currently, but this might change if the old
-language module gets replaced.
+**Note**: This replaces the old cpp module, which is now only meant for c, as it works better for c than this one.
-The JPlag C++ frontend allows the use of JPlag with submissions in C/C++.
+The JPlag C++ frontend allows the use of JPlag with submissions in C++.
It is based on the [C++ ANTLR4 grammar](https://github.com/antlr/grammars-v4/tree/master/cpp), licensed under MIT.
### C++ specification compatibility
@@ -21,11 +18,11 @@ While the Java language module is based on an AST, this language module uses a p
There are differences, including:
- `import` is extracted in Java, while `using` is not extracted due to the fact that it can be placed freely in the code.
-More syntactic elements of C/C++ may turn out to be helpful to include in the future, especially those that are newly introduced.
+More syntactic elements of C++ may turn out to be helpful to include in the future, especially those that are newly introduced.
### Usage
-To use the C++ frontend, add the `-l cpp2` flag in the CLI, or use a `JPlagOption` object with `new de.jplag.cpp2.CPPLanguage()` as `Language` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag).
+To use the C++ frontend, add the `-l cpp` flag in the CLI, or use a `JPlagOption` object with `new de.jplag.cpp.CPPLanguage()` as `Language` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag).
### Changes to the Grammar
diff --git a/languages/cpp/pom.xml b/languages/cpp/pom.xml
index 734f52d92..d3cfb4881 100644
--- a/languages/cpp/pom.xml
+++ b/languages/cpp/pom.xml
@@ -8,25 +8,28 @@
cpp
+
+
+ org.antlr
+ antlr4-runtime
+
+
+ de.jplag
+ language-antlr-utils
+ ${revision}
+
+
+
- com.helger.maven
- ph-javacc-maven-plugin
+ org.antlr
+ antlr4-maven-plugin
- javacc-gen
- javacc
+ antlr4
- generate-sources
-
- 21
- true
- de.jplag.cpp
- src/main/javacc
- ${project.build.directory}/generated-sources/javacc
-
diff --git a/languages/cpp2/src/main/antlr4/de/jplag/cpp2/grammar/CPP14Lexer.g4 b/languages/cpp/src/main/antlr4/de/jplag/cpp/grammar/CPP14Lexer.g4
similarity index 100%
rename from languages/cpp2/src/main/antlr4/de/jplag/cpp2/grammar/CPP14Lexer.g4
rename to languages/cpp/src/main/antlr4/de/jplag/cpp/grammar/CPP14Lexer.g4
diff --git a/languages/cpp2/src/main/antlr4/de/jplag/cpp2/grammar/CPP14Parser.g4 b/languages/cpp/src/main/antlr4/de/jplag/cpp/grammar/CPP14Parser.g4
similarity index 100%
rename from languages/cpp2/src/main/antlr4/de/jplag/cpp2/grammar/CPP14Parser.g4
rename to languages/cpp/src/main/antlr4/de/jplag/cpp/grammar/CPP14Parser.g4
diff --git a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPLanguage.java b/languages/cpp/src/main/java/de/jplag/cpp/CPPLanguage.java
similarity index 81%
rename from languages/cpp2/src/main/java/de/jplag/cpp2/CPPLanguage.java
rename to languages/cpp/src/main/java/de/jplag/cpp/CPPLanguage.java
index 990ab8c9f..c08e53dce 100644
--- a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPLanguage.java
+++ b/languages/cpp/src/main/java/de/jplag/cpp/CPPLanguage.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp2;
+package de.jplag.cpp;
import org.kohsuke.MetaInfServices;
@@ -10,7 +10,7 @@
*/
@MetaInfServices(Language.class)
public class CPPLanguage extends AbstractAntlrLanguage {
- private static final String IDENTIFIER = "cpp2";
+ private static final String IDENTIFIER = "cpp";
public CPPLanguage() {
super(new CPPParserAdapter());
@@ -23,7 +23,7 @@ public String[] suffixes() {
@Override
public String getName() {
- return "C/C++ Parser";
+ return "C++ Parser";
}
@Override
@@ -40,4 +40,9 @@ public int minimumTokenMatch() {
public boolean tokensHaveSemantics() {
return true;
}
+
+ @Override
+ public boolean supportsNormalization() {
+ return true;
+ }
}
diff --git a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPListener.java b/languages/cpp/src/main/java/de/jplag/cpp/CPPListener.java
similarity index 95%
rename from languages/cpp2/src/main/java/de/jplag/cpp2/CPPListener.java
rename to languages/cpp/src/main/java/de/jplag/cpp/CPPListener.java
index 115ec54de..19db6dea7 100644
--- a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPListener.java
+++ b/languages/cpp/src/main/java/de/jplag/cpp/CPPListener.java
@@ -1,6 +1,6 @@
-package de.jplag.cpp2;
+package de.jplag.cpp;
-import static de.jplag.cpp2.CPPTokenType.*;
+import static de.jplag.cpp.CPPTokenType.*;
import java.util.function.Function;
@@ -10,8 +10,8 @@
import de.jplag.TokenType;
import de.jplag.antlr.AbstractAntlrListener;
import de.jplag.antlr.ContextVisitor;
-import de.jplag.cpp2.grammar.CPP14Parser;
-import de.jplag.cpp2.grammar.CPP14Parser.*;
+import de.jplag.cpp.grammar.CPP14Parser;
+import de.jplag.cpp.grammar.CPP14Parser.*;
import de.jplag.semantics.CodeSemantics;
import de.jplag.semantics.VariableAccessType;
import de.jplag.semantics.VariableRegistry;
@@ -33,29 +33,11 @@ class CPPListener extends AbstractAntlrListener {
visit(FunctionDefinitionContext.class).map(FUNCTION_BEGIN, FUNCTION_END).addLocalScope().withSemantics(CodeSemantics::createControl);
- visit(IterationStatementContext.class, rule -> rule.Do() != null).map(DO_BEGIN, DO_END).addLocalScope().withLoopSemantics();
- visit(IterationStatementContext.class, rule -> rule.For() != null).map(FOR_BEGIN, FOR_END).addLocalScope().withLoopSemantics();
- visit(IterationStatementContext.class, rule -> rule.While() != null && rule.Do() == null).map(WHILE_BEGIN, WHILE_END).addLocalScope()
- .withLoopSemantics();
-
- visit(SelectionStatementContext.class, rule -> rule.Switch() != null).map(SWITCH_BEGIN, SWITCH_END).addLocalScope()
- .withSemantics(CodeSemantics::createControl);
- visit(SelectionStatementContext.class, rule -> rule.If() != null).map(IF_BEGIN, IF_END).addLocalScope()
- .withSemantics(CodeSemantics::createControl);
- // possible problem: variable from if visible in else, but in reality is not -- doesn't really matter
- visit(CPP14Parser.Else).map(ELSE).withSemantics(CodeSemantics::createControl);
-
- visit(LabeledStatementContext.class, rule -> rule.Case() != null).map(CASE).withSemantics(CodeSemantics::createControl);
- visit(LabeledStatementContext.class, rule -> rule.Default() != null).map(DEFAULT).withSemantics(CodeSemantics::createControl);
+ statementRules();
visit(TryBlockContext.class).map(TRY_BEGIN, TRY_END).addLocalScope().withSemantics(CodeSemantics::createControl);
visit(HandlerContext.class).map(CATCH_BEGIN, CATCH_END).addLocalScope().withSemantics(CodeSemantics::createControl);
- visit(JumpStatementContext.class, rule -> rule.Break() != null).map(BREAK).withSemantics(CodeSemantics::createControl);
- visit(JumpStatementContext.class, rule -> rule.Continue() != null).map(CONTINUE).withSemantics(CodeSemantics::createControl);
- visit(JumpStatementContext.class, rule -> rule.Goto() != null).map(GOTO).withSemantics(CodeSemantics::createControl);
- visit(JumpStatementContext.class, rule -> rule.Return() != null).map(RETURN).withSemantics(CodeSemantics::createControl);
-
visit(ThrowExpressionContext.class).map(THROW).withSemantics(CodeSemantics::createControl);
visit(NewExpressionContext.class, rule -> rule.newInitializer() != null).map(NEWCLASS).withSemantics(CodeSemantics::new);
@@ -73,6 +55,35 @@ class CPPListener extends AbstractAntlrListener {
.onEnter((rule, varReg) -> varReg.setNextVariableAccessType(VariableAccessType.WRITE));
visit(BracedInitListContext.class).map(BRACED_INIT_BEGIN, BRACED_INIT_END).withSemantics(CodeSemantics::new);
+ typeSpecifierRule();
+ declarationRules();
+ expressionRules();
+ idRules();
+ }
+
+ private void statementRules() {
+ visit(IterationStatementContext.class, rule -> rule.Do() != null).map(DO_BEGIN, DO_END).addLocalScope().withLoopSemantics();
+ visit(IterationStatementContext.class, rule -> rule.For() != null).map(FOR_BEGIN, FOR_END).addLocalScope().withLoopSemantics();
+ visit(IterationStatementContext.class, rule -> rule.While() != null && rule.Do() == null).map(WHILE_BEGIN, WHILE_END).addLocalScope()
+ .withLoopSemantics();
+
+ visit(SelectionStatementContext.class, rule -> rule.Switch() != null).map(SWITCH_BEGIN, SWITCH_END).addLocalScope()
+ .withSemantics(CodeSemantics::createControl);
+ visit(SelectionStatementContext.class, rule -> rule.If() != null).map(IF_BEGIN, IF_END).addLocalScope()
+ .withSemantics(CodeSemantics::createControl);
+ // possible problem: variable from if visible in else, but in reality is not -- doesn't really matter
+ visit(CPP14Parser.Else).map(ELSE).withSemantics(CodeSemantics::createControl);
+
+ visit(LabeledStatementContext.class, rule -> rule.Case() != null).map(CASE).withSemantics(CodeSemantics::createControl);
+ visit(LabeledStatementContext.class, rule -> rule.Default() != null).map(DEFAULT).withSemantics(CodeSemantics::createControl);
+
+ visit(JumpStatementContext.class, rule -> rule.Break() != null).map(BREAK).withSemantics(CodeSemantics::createControl);
+ visit(JumpStatementContext.class, rule -> rule.Continue() != null).map(CONTINUE).withSemantics(CodeSemantics::createControl);
+ visit(JumpStatementContext.class, rule -> rule.Goto() != null).map(GOTO).withSemantics(CodeSemantics::createControl);
+ visit(JumpStatementContext.class, rule -> rule.Return() != null).map(RETURN).withSemantics(CodeSemantics::createControl);
+ }
+
+ private void typeSpecifierRule() {
visit(SimpleTypeSpecifierContext.class, rule -> {
if (hasAncestor(rule, MemberdeclarationContext.class, FunctionDefinitionContext.class)) {
return true;
@@ -99,7 +110,9 @@ class CPPListener extends AbstractAntlrListener {
variableRegistry.registerVariable(name, scope, true);
}
});
+ }
+ private void declarationRules() {
mapApply(visit(SimpleDeclarationContext.class, rule -> {
if (!hasAncestor(rule, FunctionBodyContext.class)) {
return false;
@@ -125,12 +138,17 @@ class CPPListener extends AbstractAntlrListener {
varReg.setNextVariableAccessType(VariableAccessType.WRITE);
}
});
+ }
+
+ private void expressionRules() {
visit(ConditionalExpressionContext.class, rule -> rule.Question() != null).map(QUESTIONMARK).withSemantics(CodeSemantics::new);
mapApply(visit(PostfixExpressionContext.class, rule -> rule.LeftParen() != null));
visit(PostfixExpressionContext.class, rule -> rule.PlusPlus() != null || rule.MinusMinus() != null).map(ASSIGN)
.withSemantics(CodeSemantics::new).onEnter((rule, varReg) -> varReg.setNextVariableAccessType(VariableAccessType.READ_WRITE));
+ }
+ private void idRules() {
visit(UnqualifiedIdContext.class).onEnter((ctx, varReg) -> {
ParserRuleContext parentCtx = ctx.getParent().getParent();
if (!parentCtx.getParent().getParent().getText().contains("(")) {
diff --git a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPParserAdapter.java b/languages/cpp/src/main/java/de/jplag/cpp/CPPParserAdapter.java
similarity index 91%
rename from languages/cpp2/src/main/java/de/jplag/cpp2/CPPParserAdapter.java
rename to languages/cpp/src/main/java/de/jplag/cpp/CPPParserAdapter.java
index 925406360..d1bc12a6b 100644
--- a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPParserAdapter.java
+++ b/languages/cpp/src/main/java/de/jplag/cpp/CPPParserAdapter.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp2;
+package de.jplag.cpp;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CommonTokenStream;
@@ -8,8 +8,8 @@
import de.jplag.AbstractParser;
import de.jplag.antlr.AbstractAntlrListener;
import de.jplag.antlr.AbstractAntlrParserAdapter;
-import de.jplag.cpp2.grammar.CPP14Lexer;
-import de.jplag.cpp2.grammar.CPP14Parser;
+import de.jplag.cpp.grammar.CPP14Lexer;
+import de.jplag.cpp.grammar.CPP14Parser;
/**
* The adapter between {@link AbstractParser} and the ANTLR based parser of this language module.
diff --git a/languages/cpp/src/main/java/de/jplag/cpp/CPPTokenType.java b/languages/cpp/src/main/java/de/jplag/cpp/CPPTokenType.java
index e8143b98f..0bf830cd6 100644
--- a/languages/cpp/src/main/java/de/jplag/cpp/CPPTokenType.java
+++ b/languages/cpp/src/main/java/de/jplag/cpp/CPPTokenType.java
@@ -2,67 +2,52 @@
import de.jplag.TokenType;
+/**
+ * C++ token types extracted by this language module.
+ */
public enum CPPTokenType implements TokenType {
- C_BLOCK_BEGIN("BLOCK{"),
- C_BLOCK_END("}BLOCK"),
- C_QUESTIONMARK("COND"),
- C_ELLIPSIS("..."),
- C_ASSIGN("ASSIGN"),
- C_DOT("DOT"),
- C_ARROW("ARROW"),
- C_ARROWSTAR("ARROWSTAR"),
- C_AUTO("AUTO"),
- C_BREAK("BREAK"),
- C_CASE("CASE"),
- C_CATCH("CATCH"),
- C_CHAR("CHAR"),
- C_CONST("CONST"),
- C_CONTINUE("CONTINUE"),
- C_DEFAULT("DEFAULT"),
- C_DELETE("DELETE"),
- C_DO("DO"),
- C_DOUBLE("DOUBLE"),
- C_ELSE("ELSE"),
- C_ENUM("ENUM"),
- C_EXTERN("EXTERN"),
- C_FLOAT("FLOAT"),
- C_FOR("FOR"),
- C_FRIEND("FRIEND"),
- C_GOTO("GOTO"),
- C_IF("IF"),
- C_INLINE("INLINE"),
- C_INT("INT"),
- C_LONG("LONG"),
- C_NEW("NEW"),
- C_PRIVATE("PRIVATE"),
- C_PROTECTED("PROTECTED"),
- C_PUBLIC("PUBLIC"),
- C_REDECLARED("REDECLARED"),
- C_REGISTER("REGISTER"),
- C_RETURN("RETURN"),
- C_SHORT("SHORT"),
- C_SIGNED("SIGNED"),
- C_SIZEOF("SIZEOF"),
- C_STATIC("STATIC"),
- C_STRUCT("STRUCT"),
- C_CLASS("CLASS"),
- C_SWITCH("SWITCH"),
- C_TEMPLATE("TEMPLATE"),
- C_THIS("THIS"),
- C_TRY("TRY"),
- C_TYPEDEF("TYPEDEF"),
- C_UNION("UNION"),
- C_UNSIGNED("UNSIGNED"),
- C_VIRTUAL("VIRTUAL"),
- C_VOID("VOID"),
- C_VOLATILE("VOLATILE"),
- C_WHILE("WHILE"),
- C_OPERATOR("OPERATOR"),
- C_THROW("THROW"),
- C_ID("ID"),
- C_FUN("FUN"),
- C_DOTSTAR("DOTSTAR"),
- C_NULL("NULL");
+ CLASS_BEGIN("CLASS{"),
+ CLASS_END("}CLASS"),
+ STRUCT_BEGIN("STRUCT{"),
+ STRUCT_END("}STRUCT"),
+ ENUM_BEGIN("ENUM{"),
+ ENUM_END("}ENUM"),
+ UNION_BEGIN("UNION{"),
+ UNION_END("}UNION"),
+ FUNCTION_BEGIN("FUNCTION{"),
+ FUNCTION_END("}FUNCTION"),
+ DO_BEGIN("DO{"),
+ DO_END("}DO"),
+ WHILE_BEGIN("WHILE{"),
+ WHILE_END("}WHILE"),
+ FOR_BEGIN("FOR{"),
+ FOR_END("}FOR"),
+ SWITCH_BEGIN("SWITCH{"),
+ SWITCH_END("}SWITCH"),
+ CASE("CASE"),
+ TRY_BEGIN("TRY{"),
+ TRY_END("}TRY"),
+ CATCH_BEGIN("CATCH{"),
+ CATCH_END("}CATCH"),
+ IF_BEGIN("IF{"),
+ IF_END("}IF"),
+ ELSE("ELSE"),
+ BREAK("BREAK"),
+ CONTINUE("CONTINUE"),
+ GOTO("GOTO"),
+ RETURN("RETURN"),
+ THROW("THROW"),
+ NEWCLASS("NEWCLASS"),
+ GENERIC("GENERIC"),
+ NEWARRAY("NEWARRAY"),
+ BRACED_INIT_BEGIN("BRACED_INIT{"),
+ BRACED_INIT_END("}BRACED_INIT"),
+ ASSIGN("ASSIGN"),
+ STATIC_ASSERT("STATIC_ASSERT"),
+ VARDEF("VARDEF"),
+ QUESTIONMARK("COND"),
+ DEFAULT("DEFAULT"),
+ APPLY("APPLY");
private final String description;
diff --git a/languages/cpp2/src/test/java/de/jplag/cpp2/CppLanguageTest.java b/languages/cpp/src/test/java/de/jplag/cpp/CppLanguageTest.java
similarity index 99%
rename from languages/cpp2/src/test/java/de/jplag/cpp2/CppLanguageTest.java
rename to languages/cpp/src/test/java/de/jplag/cpp/CppLanguageTest.java
index 52d4342d9..c885c4562 100644
--- a/languages/cpp2/src/test/java/de/jplag/cpp2/CppLanguageTest.java
+++ b/languages/cpp/src/test/java/de/jplag/cpp/CppLanguageTest.java
@@ -1,4 +1,4 @@
-package de.jplag.cpp2;
+package de.jplag.cpp;
import java.util.Arrays;
diff --git a/languages/cpp2/src/test/resources/de/jplag/cpp2/CallOutsideMethodInClass.cpp b/languages/cpp/src/test/resources/de/jplag/cpp/CallOutsideMethodInClass.cpp
similarity index 100%
rename from languages/cpp2/src/test/resources/de/jplag/cpp2/CallOutsideMethodInClass.cpp
rename to languages/cpp/src/test/resources/de/jplag/cpp/CallOutsideMethodInClass.cpp
diff --git a/languages/cpp2/src/test/resources/de/jplag/cpp2/FunctionCall.cpp b/languages/cpp/src/test/resources/de/jplag/cpp/FunctionCall.cpp
similarity index 100%
rename from languages/cpp2/src/test/resources/de/jplag/cpp2/FunctionCall.cpp
rename to languages/cpp/src/test/resources/de/jplag/cpp/FunctionCall.cpp
diff --git a/languages/cpp2/src/test/resources/de/jplag/cpp2/IfElse.cpp b/languages/cpp/src/test/resources/de/jplag/cpp/IfElse.cpp
similarity index 100%
rename from languages/cpp2/src/test/resources/de/jplag/cpp2/IfElse.cpp
rename to languages/cpp/src/test/resources/de/jplag/cpp/IfElse.cpp
diff --git a/languages/cpp2/src/test/resources/de/jplag/cpp2/IntArray.cpp b/languages/cpp/src/test/resources/de/jplag/cpp/IntArray.cpp
similarity index 100%
rename from languages/cpp2/src/test/resources/de/jplag/cpp2/IntArray.cpp
rename to languages/cpp/src/test/resources/de/jplag/cpp/IntArray.cpp
diff --git a/languages/cpp2/src/test/resources/de/jplag/cpp2/Loop.cpp b/languages/cpp/src/test/resources/de/jplag/cpp/Loop.cpp
similarity index 100%
rename from languages/cpp2/src/test/resources/de/jplag/cpp2/Loop.cpp
rename to languages/cpp/src/test/resources/de/jplag/cpp/Loop.cpp
diff --git a/languages/cpp2/src/test/resources/de/jplag/cpp2/Union.cpp b/languages/cpp/src/test/resources/de/jplag/cpp/Union.cpp
similarity index 100%
rename from languages/cpp2/src/test/resources/de/jplag/cpp2/Union.cpp
rename to languages/cpp/src/test/resources/de/jplag/cpp/Union.cpp
diff --git a/languages/cpp2/src/test/resources/de/jplag/cpp2/bc6h_enc.h b/languages/cpp/src/test/resources/de/jplag/cpp/bc6h_enc.h
similarity index 100%
rename from languages/cpp2/src/test/resources/de/jplag/cpp2/bc6h_enc.h
rename to languages/cpp/src/test/resources/de/jplag/cpp/bc6h_enc.h
diff --git a/languages/cpp2/pom.xml b/languages/cpp2/pom.xml
deleted file mode 100644
index cfd8366cc..000000000
--- a/languages/cpp2/pom.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
- 4.0.0
-
- de.jplag
- languages
- ${revision}
-
- cpp2
-
-
-
- org.antlr
- antlr4-runtime
-
-
- de.jplag
- language-antlr-utils
- ${revision}
-
-
-
-
-
-
- org.antlr
- antlr4-maven-plugin
-
-
-
- antlr4
-
-
-
-
-
-
-
diff --git a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPTokenType.java b/languages/cpp2/src/main/java/de/jplag/cpp2/CPPTokenType.java
deleted file mode 100644
index a87a43630..000000000
--- a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPTokenType.java
+++ /dev/null
@@ -1,62 +0,0 @@
-package de.jplag.cpp2;
-
-import de.jplag.TokenType;
-
-/**
- * C++ token types extracted by this language module.
- */
-public enum CPPTokenType implements TokenType {
- CLASS_BEGIN("CLASS{"),
- CLASS_END("}CLASS"),
- STRUCT_BEGIN("STRUCT{"),
- STRUCT_END("}STRUCT"),
- ENUM_BEGIN("ENUM{"),
- ENUM_END("}ENUM"),
- UNION_BEGIN("UNION{"),
- UNION_END("}UNION"),
- FUNCTION_BEGIN("FUNCTION{"),
- FUNCTION_END("}FUNCTION"),
- DO_BEGIN("DO{"),
- DO_END("}DO"),
- WHILE_BEGIN("WHILE{"),
- WHILE_END("}WHILE"),
- FOR_BEGIN("FOR{"),
- FOR_END("}FOR"),
- SWITCH_BEGIN("SWITCH{"),
- SWITCH_END("}SWITCH"),
- CASE("CASE"),
- TRY_BEGIN("TRY{"),
- TRY_END("}TRY"),
- CATCH_BEGIN("CATCH{"),
- CATCH_END("}CATCH"),
- IF_BEGIN("IF{"),
- IF_END("}IF"),
- ELSE("ELSE"),
- BREAK("BREAK"),
- CONTINUE("CONTINUE"),
- GOTO("GOTO"),
- RETURN("RETURN"),
- THROW("THROW"),
- NEWCLASS("NEWCLASS"),
- GENERIC("GENERIC"),
- NEWARRAY("NEWARRAY"),
- BRACED_INIT_BEGIN("BRACED_INIT{"),
- BRACED_INIT_END("}BRACED_INIT"),
- ASSIGN("ASSIGN"),
- STATIC_ASSERT("STATIC_ASSERT"),
- VARDEF("VARDEF"),
- QUESTIONMARK("COND"),
- DEFAULT("DEFAULT"),
- APPLY("APPLY");
-
- private final String description;
-
- @Override
- public String getDescription() {
- return this.description;
- }
-
- CPPTokenType(String description) {
- this.description = description;
- }
-}
diff --git a/languages/emf-metamodel-dynamic/README.md b/languages/emf-metamodel-dynamic/README.md
index a0ff03849..0502cfac5 100644
--- a/languages/emf-metamodel-dynamic/README.md
+++ b/languages/emf-metamodel-dynamic/README.md
@@ -1,5 +1,5 @@
# Dynamic EMF metamodel language module
-The dynamic EMF metamodel language module allows the use of JPlag with metamodel submissions.
+The dynamic EMF metamodel language module allows the use of JPlag with EMF metamodel submissions.
It is based on the EMF API.
### EMF specification compatibility
@@ -9,8 +9,14 @@ This module is based on the EMF dependencies available on maven central. These m
For the token extraction, we visit the containment tree of the metamodel and extract tokens for all metamodel elements based on their concrete metaclass. In this module, we thus extract tokens based on a dynamic token set.
### Usage
-To use this module, add the `-l emf-metamodel-dynamic` flag in the CLI, or use a `JPlagOption` object with `new DynamicEmfLanguage()` as `language` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag).
+Note that this language module is currently not offered via the CLI.
+Use the non-dymamic version instead (`-l emf`).
-### More Info
-More information can be found in the paper [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*](https://dl.acm.org/doi/10.1145/3550356.3556508).
-A short summary can be found on [Kudos](https://www.growkudos.com/publications/10.1145%25252F3550356.3556508/reader).
+### Report Viewer
+In the report viewer, Emfatic is used as a textual model view.
+
+### Literature
+* [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*](https://dl.acm.org/doi/10.1145/3550356.3556508).
+* Its [Kudos Summary](https://www.growkudos.com/publications/10.1145%25252F3550356.3556508/reader).
+* [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*]
+* *"Automated Detection of AI-Obfuscated Plagiarism in Modeling Assignments" (ICSE-SEET'24)*
diff --git a/languages/emf-metamodel-dynamic/src/main/java/de/jplag/emf/dynamic/parser/DynamicElementTokenizer.java b/languages/emf-metamodel-dynamic/src/main/java/de/jplag/emf/dynamic/parser/DynamicElementTokenizer.java
index 346771411..e3ee69097 100644
--- a/languages/emf-metamodel-dynamic/src/main/java/de/jplag/emf/dynamic/parser/DynamicElementTokenizer.java
+++ b/languages/emf-metamodel-dynamic/src/main/java/de/jplag/emf/dynamic/parser/DynamicElementTokenizer.java
@@ -1,7 +1,7 @@
package de.jplag.emf.dynamic.parser;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.LinkedHashSet;
+import java.util.SequencedSet;
import org.eclipse.emf.ecore.EClass;
import org.eclipse.emf.ecore.EObject;
@@ -15,14 +15,7 @@
*/
public class DynamicElementTokenizer implements ModelingElementTokenizer {
- private final Set knownTokenTypes;
-
- /**
- * Creates the tokenizer, initially with an empty token set.
- */
- public DynamicElementTokenizer() {
- knownTokenTypes = new HashSet<>();
- }
+ private static final SequencedSet knownTokenTypes = new LinkedHashSet<>();
@Override
public TokenType element2Token(EObject modelElement) {
@@ -32,7 +25,7 @@ public TokenType element2Token(EObject modelElement) {
}
@Override
- public Set allTokenTypes() {
- return Set.copyOf(knownTokenTypes);
+ public SequencedSet allTokenTypes() {
+ return new LinkedHashSet<>(knownTokenTypes);
}
}
diff --git a/languages/emf-metamodel-dynamic/src/test/java/de/jplag/emf/dynamic/MinimalDynamicMetamodelTest.java b/languages/emf-metamodel-dynamic/src/test/java/de/jplag/emf/dynamic/MinimalDynamicMetamodelTest.java
index 0911fd455..a437be6e2 100644
--- a/languages/emf-metamodel-dynamic/src/test/java/de/jplag/emf/dynamic/MinimalDynamicMetamodelTest.java
+++ b/languages/emf-metamodel-dynamic/src/test/java/de/jplag/emf/dynamic/MinimalDynamicMetamodelTest.java
@@ -47,7 +47,7 @@ public void setUp() {
@DisplayName("Test tokens generated from example metamodels")
void testBookstoreMetamodels() throws ParsingException {
List testFiles = Arrays.stream(TEST_SUBJECTS).map(path -> new File(BASE_PATH.toFile(), path)).toList();
- List result = language.parse(new HashSet<>(testFiles));
+ List result = language.parse(new HashSet<>(testFiles), true);
List tokenTypes = result.stream().map(Token::getType).toList();
logger.debug(TokenPrinter.printTokens(result, baseDirectory, Optional.of(EmfLanguage.VIEW_FILE_SUFFIX)));
logger.info("parsed token types: " + tokenTypes.stream().map(TokenType::getDescription).toList());
diff --git a/languages/emf-metamodel/README.md b/languages/emf-metamodel/README.md
index fc203805d..071bfbcaf 100644
--- a/languages/emf-metamodel/README.md
+++ b/languages/emf-metamodel/README.md
@@ -1,5 +1,5 @@
# EMF metamodel language module
-The EMF metamodel language module allows the use of JPlag with metamodel submissions.
+The EMF metamodel language module allows the use of JPlag with EMF metamodel submissions.
It is based on the EMF API.
### EMF specification compatibility
@@ -9,8 +9,14 @@ This module is based on the EMF dependencies available on maven central. These m
For the token extraction, we visit the containment tree of the metamodel and extract tokens for certain metamodel elements based on their metaclass. In this module, we extract tokens based on a [handcrafted token set](https://github.com/jplag/JPlag/blob/master/languages/emf-metamodel/src/main/java/de/jplag/emf/MetamodelTokenType.java). Note that not for all concrete metaclasses tokens are extracted. `EFactory`, `EGenericType`, and `EObject` are ignored. Moreover, for some metaclasses, multiple token types are extracted. Finally, some references are also used for token extraction.
### Usage
-To use this module, add the `-l emf-metamodel` flag in the CLI, or use a `JPlagOption` object with `new EmfLanguage()` as `language` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag).
+The input for this module is a set of EMF metamodels (`.ecore` files).
+To use this module, add the `-l emf` flag in the CLI, or use a `JPlagOption` object with `new EmfLanguage()` as `language` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag).
-### More Info
-More information can be found in the paper [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*](https://dl.acm.org/doi/10.1145/3550356.3556508).
-A short summary can be found on [Kudos](https://www.growkudos.com/publications/10.1145%25252F3550356.3556508/reader).
+### Report Viewer
+In the report viewer, Emfatic is used as a textual model view.
+
+### Literature
+* [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*](https://dl.acm.org/doi/10.1145/3550356.3556508).
+* Its [Kudos Summary](https://www.growkudos.com/publications/10.1145%25252F3550356.3556508/reader).
+* [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*]
+* *"Automated Detection of AI-Obfuscated Plagiarism in Modeling Assignments" (ICSE-SEET'24)*
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/EmfLanguage.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/EmfLanguage.java
index ba3c967ac..fac48399f 100644
--- a/languages/emf-metamodel/src/main/java/de/jplag/emf/EmfLanguage.java
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/EmfLanguage.java
@@ -55,8 +55,8 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
- return parser.parse(files);
+ public List parse(Set files, boolean normalize) throws ParsingException {
+ return parser.parse(files, normalize);
}
@Override
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/ContainmentOrderNormalizer.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/ContainmentOrderNormalizer.java
index f8dc57934..0e40f976d 100644
--- a/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/ContainmentOrderNormalizer.java
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/ContainmentOrderNormalizer.java
@@ -45,9 +45,11 @@ public int compare(EObject first, EObject second) {
// 0. comparison if token types are absent for one or more elements.
if (firstType == null && secondType == null) {
return 0;
- } else if (firstType == null) {
+ }
+ if (firstType == null) {
return -1;
- } else if (secondType == null) {
+ }
+ if (secondType == null) {
return 1;
}
@@ -91,7 +93,7 @@ private List calculatePath(TokenType type) {
List elements = modelElementsToSort.stream().filter(it -> type.equals(tokenizer.element2Token(it))).toList();
// Generate token type distributions for the subtrees of the elements to sort:
- Map> subtreeVectors = new HashMap<>();
+ Map subtreeVectors = new HashMap<>();
elements.forEach(it -> subtreeVectors.put(it, tokenVectorGenerator.generateOccurenceVector(it.eAllContents())));
// Calculate distance matrix:
@@ -118,10 +120,11 @@ private int countSubtreeTokens(EObject modelElement) {
return count;
}
- private static double euclideanDistance(List first, List second) {
- if (first.size() != second.size()) {
- throw new IllegalArgumentException("Lists must have the same size");
- }
+ /**
+ * Calculates the euclidean distance for two token occurrence vectors. As they are zero-padded, they are virtually of
+ * the same length.
+ */
+ private static double euclideanDistance(TokenOccurenceVector first, TokenOccurenceVector second) {
double sum = 0;
for (int i = 0; i < first.size(); i++) {
double diff = first.get(i) - second.get(i);
@@ -129,4 +132,5 @@ private static double euclideanDistance(List first, List second)
}
return Math.sqrt(sum);
}
+
}
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/TokenOccurenceVector.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/TokenOccurenceVector.java
new file mode 100644
index 000000000..47c2c0dbb
--- /dev/null
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/TokenOccurenceVector.java
@@ -0,0 +1,39 @@
+package de.jplag.emf.normalization;
+
+import java.util.List;
+
+/**
+ * A vector for the occurrence frequency of different token types. The vector is padded with zeroes beyond its original
+ * size. The vector content cannot be changed after its creation.
+ */
+public class TokenOccurenceVector {
+ private final List originalVector;
+
+ /**
+ * Creates a zero-padded token occurrence vector.
+ * @param originalVector specifies the occurrence frequency values for the vector.
+ */
+ public TokenOccurenceVector(List originalVector) {
+ this.originalVector = originalVector;
+ }
+
+ /**
+ * Return a occurrence frequency value of the vector at the specified.
+ * @param index is the specified index.
+ * @return the occurrence frequency value or zero if the index is beyond the size of the vector.
+ */
+ public double get(int index) {
+ if (index >= originalVector.size()) {
+ return 0.0;
+ }
+ return originalVector.get(index);
+ }
+
+ /**
+ * The original size of the vector, without padding.
+ * @return the size.
+ */
+ public int size() {
+ return originalVector.size();
+ }
+}
\ No newline at end of file
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/TokenVectorGenerator.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/TokenVectorGenerator.java
index 3eb4ce5ea..734b1d081 100644
--- a/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/TokenVectorGenerator.java
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/normalization/TokenVectorGenerator.java
@@ -10,7 +10,6 @@
import org.eclipse.emf.ecore.EObject;
import de.jplag.TokenType;
-import de.jplag.emf.MetamodelTokenType;
import de.jplag.emf.parser.ModelingElementTokenizer;
/**
@@ -27,10 +26,10 @@ public TokenVectorGenerator(ModelingElementTokenizer tokenizer) {
/**
* Generate a token occurrence vector for a subtree of a model.
* @param modelElements is a visitor for the subtree.
- * @return a list, where each entry represents the number of tokens in the subtree. The order is determined by
- * {@link MetamodelTokenType}.
+ * @return a zero padded token occurrence vector, where each entry represents the number of tokens in the subtree. The
+ * order is determined by {@link ModelingElementTokenizer#allTokenTypes()}.
*/
- public List generateOccurenceVector(Iterator modelElements) {
+ public TokenOccurenceVector generateOccurenceVector(Iterator modelElements) {
Map tokenTypeHistogram = new HashMap<>();
while (modelElements.hasNext()) {
@@ -40,7 +39,7 @@ public List generateOccurenceVector(Iterator modelElements) {
for (TokenType type : tokenizer.allTokenTypes()) {
occurenceVector.add(tokenTypeHistogram.getOrDefault(type, 0));
}
- return normalize(occurenceVector);
+ return new TokenOccurenceVector(normalize(occurenceVector));
}
public static List normalize(List vector) {
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/EcoreParser.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/EcoreParser.java
index 924d3d544..5ea3adf11 100644
--- a/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/EcoreParser.java
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/EcoreParser.java
@@ -41,10 +41,10 @@ public EcoreParser() {
* @param files is the set of files.
* @return the list of parsed tokens.
*/
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
tokens = new ArrayList<>();
for (File file : files) {
- parseModelFile(file);
+ parseModelFile(file, normalize);
}
return tokens;
}
@@ -53,21 +53,22 @@ public List parse(Set files) throws ParsingException {
* Loads a metamodel from a file and parses it.
* @param file is the metamodel file.
*/
- protected void parseModelFile(File file) throws ParsingException {
+ protected void parseModelFile(File file, boolean normalize) throws ParsingException {
currentFile = file;
Resource model = EMFUtil.loadModelResource(file);
if (model == null) {
throw new ParsingException(file, "failed to load model");
- } else {
+ }
+ if (normalize) {
normalizeOrder(model);
- treeView = createView(file, model);
- visitor = createMetamodelVisitor();
- for (EObject root : model.getContents()) {
- visitor.visit(root);
- }
- tokens.add(Token.fileEnd(currentFile));
- treeView.writeToFile(getCorrespondingViewFileSuffix());
}
+ treeView = createView(file, model);
+ visitor = createMetamodelVisitor();
+ for (EObject root : model.getContents()) {
+ visitor.visit(root);
+ }
+ tokens.add(Token.fileEnd(currentFile));
+ treeView.writeToFile(getCorrespondingViewFileSuffix());
}
/**
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/MetamodelElementTokenizer.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/MetamodelElementTokenizer.java
index f5f7c66c1..1d2b83c09 100644
--- a/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/MetamodelElementTokenizer.java
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/parser/MetamodelElementTokenizer.java
@@ -38,20 +38,19 @@ public MetamodelTokenType caseEAnnotation(EAnnotation eAnnotation) {
public MetamodelTokenType caseEAttribute(EAttribute eAttribute) {
if (eAttribute.isID()) {
return MetamodelTokenType.ID_ATTRIBUTE;
- } else {
- return MetamodelTokenType.ATTRIBUTE;
}
+ return MetamodelTokenType.ATTRIBUTE;
}
@Override
public MetamodelTokenType caseEClass(EClass eClass) {
if (eClass.isInterface()) {
return MetamodelTokenType.INTERFACE;
- } else if (eClass.isAbstract()) {
+ }
+ if (eClass.isAbstract()) {
return MetamodelTokenType.ABSTRACT_CLASS;
- } else {
- return MetamodelTokenType.CLASS;
}
+ return MetamodelTokenType.CLASS;
}
@Override
@@ -94,16 +93,13 @@ public MetamodelTokenType caseEReference(EReference eReference) {
if (eReference.isContainment()) {
if (eReference.getUpperBound() == 1) {
return MetamodelTokenType.CONTAINMENT;
- } else {
- return MetamodelTokenType.CONTAINMENT_MULT;
- }
- } else {
- if (eReference.getUpperBound() == 1) {
- return MetamodelTokenType.REFERENCE;
- } else {
- return MetamodelTokenType.REFERENCE_MULT;
}
+ return MetamodelTokenType.CONTAINMENT_MULT;
+ }
+ if (eReference.getUpperBound() == 1) {
+ return MetamodelTokenType.REFERENCE;
}
+ return MetamodelTokenType.REFERENCE_MULT;
}
@Override
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/util/GenericEmfTreeView.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/util/GenericEmfTreeView.java
new file mode 100644
index 000000000..f68e08620
--- /dev/null
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/util/GenericEmfTreeView.java
@@ -0,0 +1,153 @@
+package de.jplag.emf.util;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.StringJoiner;
+
+import org.eclipse.emf.ecore.ENamedElement;
+import org.eclipse.emf.ecore.EObject;
+import org.eclipse.emf.ecore.EStructuralFeature;
+import org.eclipse.emf.ecore.resource.Resource;
+
+import de.jplag.TokenTrace;
+import de.jplag.emf.MetamodelToken;
+
+/**
+ * Very basic tree view representation of an EMF metamodel or model.
+ */
+public class GenericEmfTreeView extends AbstractModelView {
+ private final List lines;
+ private final Map objectToLine;
+ private final ModelingElementIdentifierManager identifierManager;
+
+ /**
+ * Creates a tree view for a metamodel.
+ * @param file is the path to the metamodel.
+ */
+ public GenericEmfTreeView(File file, Resource modelResource) {
+ super(file);
+ lines = new ArrayList<>();
+ objectToLine = new HashMap<>();
+ identifierManager = new ModelingElementIdentifierManager();
+ TreeViewBuilder visitor = new TreeViewBuilder();
+ modelResource.getContents().forEach(visitor::visit);
+ }
+
+ /**
+ * Adds a token to the view, thus adding the index information to the token. Returns a new token enriched with the index
+ * metadata.
+ * @param token is the token to add.
+ */
+ @Override
+ public MetamodelToken convertToMetadataEnrichedToken(MetamodelToken token) {
+ Optional optionalEObject = token.getEObject();
+ if (optionalEObject.isPresent()) {
+ EObject object = optionalEObject.get();
+ TokenTrace trace = objectToLine.get(object);
+ return new MetamodelToken(token.getType(), token.getFile(), trace, optionalEObject);
+ }
+ return new MetamodelToken(token.getType(), token.getFile());
+ }
+
+ private final class TreeViewBuilder extends AbstractMetamodelVisitor {
+ private static final String IDENTIFIER_PREFIX = " #";
+ private static final String VALUE_ASSIGNMENT = "=";
+ private static final String COLLECTION_PREFIX = "[";
+ private static final String COLLECTION_SUFFIX = "]";
+ private static final String COLLECTION_DELIMITER = ", ";
+ private static final int ABBREVIATION_LIMIT = 20;
+ private static final String ABBREVIATION_SUFFIX = "...";
+ private static final String TEXT_AFFIX = "\"";
+ private static final String IDENTIFIER_REGEX = "name|identifier";
+ private static final String INDENTATION = " ";
+
+ @Override
+ protected void visitEObject(EObject eObject) {
+ String prefix = INDENTATION.repeat(getCurrentTreeDepth());
+ StringBuilder line = new StringBuilder(prefix);
+
+ line.append(eObject.eClass().getName()); // Build element type
+ line.append(IDENTIFIER_PREFIX);
+ line.append(identifierManager.getIdentifier(eObject));
+ visitStructuralFeatures(eObject, line); // Build element features
+
+ lines.add(line.toString());
+ viewBuilder.append(line + System.lineSeparator());
+ // line and column values are one-indexed
+ TokenTrace trace = new TokenTrace(lines.size(), prefix.length() + 1, line.toString().trim().length());
+ objectToLine.put(eObject, trace);
+ }
+
+ private void visitStructuralFeatures(EObject eObject, StringBuilder line) {
+ List structuralFeatures = eObject.eClass().getEAllStructuralFeatures();
+ if (!structuralFeatures.isEmpty()) {
+ line.append(": ");
+ StringJoiner joiner = new StringJoiner(COLLECTION_DELIMITER);
+ for (EStructuralFeature feature : structuralFeatures) {
+ Object value = eObject.eGet(feature);
+ String name = featureValueToString(value);
+ if (name != null) {
+ joiner.add(feature.getName() + VALUE_ASSIGNMENT + name);
+ }
+ }
+ line.append(joiner.toString());
+
+ }
+ }
+
+ private String featureValueToString(Object value) {
+ String name = null;
+ if (value != null) {
+ if (value instanceof EObject featureValue) {
+ List valueIdentifiers = deriveNameOrIdentifers(featureValue);
+
+ if (!valueIdentifiers.isEmpty()) {
+ name = TEXT_AFFIX + valueIdentifiers.get(0) + TEXT_AFFIX;
+ } else {
+ name = featureValue.eClass().getName() + IDENTIFIER_PREFIX + identifierManager.getIdentifier(featureValue);
+ }
+ } else if (value instanceof Collection> multipleValues) {
+ name = valueListToString(multipleValues);
+ } else {
+ name = value.toString();
+ name = (name.length() > ABBREVIATION_LIMIT) ? name.substring(0, ABBREVIATION_LIMIT) + ABBREVIATION_SUFFIX : name;
+ name = TEXT_AFFIX + name + TEXT_AFFIX;
+ }
+ }
+ return name;
+ }
+
+ private String valueListToString(Collection> multipleValues) {
+ String name = null;
+ if (!multipleValues.isEmpty()) {
+ name = COLLECTION_PREFIX;
+ StringJoiner joiner = new StringJoiner(COLLECTION_DELIMITER);
+ for (Object innerValue : multipleValues) {
+ joiner.add(featureValueToString(innerValue));
+ }
+ name += joiner.toString() + COLLECTION_SUFFIX;
+ }
+ return name;
+ }
+
+ private static List deriveNameOrIdentifers(EObject eObject) {
+ List names = new ArrayList<>();
+ if (eObject instanceof ENamedElement element) {
+ names.add(element.getName());
+ } else {
+ for (EStructuralFeature feature : eObject.eClass().getEAllStructuralFeatures()) {
+ if (feature.getName().toLowerCase().matches(IDENTIFIER_REGEX) && eObject.eGet(feature) != null) {
+ names.add(eObject.eGet(feature).toString());
+ }
+ }
+ }
+ return names;
+ }
+ }
+
+}
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/util/MetamodelTreeView.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/util/MetamodelTreeView.java
deleted file mode 100644
index 55f3647bc..000000000
--- a/languages/emf-metamodel/src/main/java/de/jplag/emf/util/MetamodelTreeView.java
+++ /dev/null
@@ -1,73 +0,0 @@
-package de.jplag.emf.util;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-
-import org.eclipse.emf.ecore.ENamedElement;
-import org.eclipse.emf.ecore.EObject;
-import org.eclipse.emf.ecore.resource.Resource;
-
-import de.jplag.TokenTrace;
-import de.jplag.emf.MetamodelToken;
-
-/**
- * Very basic tree view representation of an EMF metamodel or model.
- */
-public class MetamodelTreeView extends AbstractModelView {
- private final List lines;
- private final Map objectToLine;
-
- /**
- * Creates a tree view for a metamodel.
- * @param file is the path to the metamodel.
- */
- public MetamodelTreeView(File file, Resource modelResource) {
- super(file);
- lines = new ArrayList<>();
- objectToLine = new HashMap<>();
- TreeViewBuilder visitor = new TreeViewBuilder();
- modelResource.getContents().forEach(visitor::visit);
- }
-
- /**
- * Adds a token to the view, thus adding the index information to the token. Returns a new token enriched with the index
- * metadata.
- * @param token is the token to add.
- */
- @Override
- public MetamodelToken convertToMetadataEnrichedToken(MetamodelToken token) {
- Optional optionalEObject = token.getEObject();
- if (optionalEObject.isPresent()) {
- EObject object = optionalEObject.get();
- TokenTrace trace = objectToLine.get(object);
- return new MetamodelToken(token.getType(), token.getFile(), trace, optionalEObject);
- }
- return new MetamodelToken(token.getType(), token.getFile());
- }
-
- private final class TreeViewBuilder extends AbstractMetamodelVisitor {
- private static final String INDENTATION = " ";
- private static final String NAME_SEPARATOR = " : ";
-
- @Override
- protected void visitEObject(EObject eObject) {
- String prefix = INDENTATION.repeat(getCurrentTreeDepth());
- String line = prefix;
- if (eObject instanceof ENamedElement element) {
- line += element.getName() + NAME_SEPARATOR;
- }
- line += eObject.eClass().getName();
-
- lines.add(line);
- viewBuilder.append(line + System.lineSeparator());
- // line and column values are one-indexed
- TokenTrace trace = new TokenTrace(lines.size(), prefix.length() + 1, line.trim().length());
- objectToLine.put(eObject, trace);
- }
- }
-
-}
diff --git a/languages/emf-metamodel/src/main/java/de/jplag/emf/util/ModelingElementIdentifierManager.java b/languages/emf-metamodel/src/main/java/de/jplag/emf/util/ModelingElementIdentifierManager.java
new file mode 100644
index 000000000..895087dd6
--- /dev/null
+++ b/languages/emf-metamodel/src/main/java/de/jplag/emf/util/ModelingElementIdentifierManager.java
@@ -0,0 +1,42 @@
+package de.jplag.emf.util;
+
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.eclipse.emf.ecore.EClass;
+import org.eclipse.emf.ecore.EObject;
+
+/**
+ * This class provides type-unique identifiers for EObjects.
+ */
+public class ModelingElementIdentifierManager {
+
+ private final Map> elementToIdentifer;
+
+ /**
+ * Creates the identifier manager. Identifiers are only unique if managed by the same instance.
+ */
+ public ModelingElementIdentifierManager() {
+ elementToIdentifer = new HashMap<>();
+ }
+
+ /**
+ * Returns the type-unique identifier for any EMF modeling element.
+ * @param element is the modeling element for which the identifier is requested.
+ * @return the identifier, that is unique for all elements of the same EClass.
+ */
+ public int getIdentifier(EObject element) {
+ Set elements = elementToIdentifer.computeIfAbsent(element.eClass(), key -> new LinkedHashSet<>());
+ int index = 0;
+ for (EObject containedElement : elements) {
+ if (containedElement.equals(element)) {
+ return index;
+ }
+ ++index;
+ }
+ elements.add(element);
+ return index;
+ }
+}
diff --git a/languages/emf-metamodel/src/test/java/de/jplag/emf/MinimalMetamodelTest.java b/languages/emf-metamodel/src/test/java/de/jplag/emf/MinimalMetamodelTest.java
index 28f8f5347..c2559f7c8 100644
--- a/languages/emf-metamodel/src/test/java/de/jplag/emf/MinimalMetamodelTest.java
+++ b/languages/emf-metamodel/src/test/java/de/jplag/emf/MinimalMetamodelTest.java
@@ -28,7 +28,7 @@ class MinimalMetamodelTest extends AbstractEmfTest {
@DisplayName("Test tokens generated from example metamodels")
void testBookstoreMetamodels() throws ParsingException {
List testFiles = Arrays.stream(TEST_SUBJECTS).map(path -> new File(BASE_PATH.toFile(), path)).toList();
- List result = language.parse(new HashSet<>(testFiles));
+ List result = language.parse(new HashSet<>(testFiles), true);
logger.debug(TokenPrinter.printTokens(result, baseDirectory, Optional.of(EmfLanguage.VIEW_FILE_SUFFIX)));
List tokenTypes = result.stream().map(Token::getType).toList();
diff --git a/languages/emf-metamodel/src/test/java/de/jplag/emf/util/MetamodelTreeViewTest.java b/languages/emf-metamodel/src/test/java/de/jplag/emf/util/GenericEmfTreeViewTest.java
similarity index 85%
rename from languages/emf-metamodel/src/test/java/de/jplag/emf/util/MetamodelTreeViewTest.java
rename to languages/emf-metamodel/src/test/java/de/jplag/emf/util/GenericEmfTreeViewTest.java
index 9a2976974..bef41d57b 100644
--- a/languages/emf-metamodel/src/test/java/de/jplag/emf/util/MetamodelTreeViewTest.java
+++ b/languages/emf-metamodel/src/test/java/de/jplag/emf/util/GenericEmfTreeViewTest.java
@@ -13,7 +13,7 @@
import de.jplag.emf.AbstractEmfTest;
import de.jplag.testutils.FileUtil;
-class MetamodelTreeViewTest extends AbstractEmfTest {
+class GenericEmfTreeViewTest extends AbstractEmfTest {
private static final String VIEW_FILE_SUFFIX = ".treeview";
private static final String EXPECTED_VIEW_FOLDER = "treeview";
@@ -23,7 +23,7 @@ private static List provideModelNames() {
}
@ParameterizedTest
- @DisplayName("Test content of emfatic view files of example metamodels")
+ @DisplayName("Test content of generic EMF view files of example metamodels")
@MethodSource("provideModelNames")
void testEmfaticViewFiles(String modelName) {
// Load model:
@@ -31,7 +31,7 @@ void testEmfaticViewFiles(String modelName) {
Resource modelResource = loadAndVerifyModel(modelFile);
// Generate emfatic view:
- MetamodelTreeView view = new MetamodelTreeView(modelFile, modelResource);
+ GenericEmfTreeView view = new GenericEmfTreeView(modelFile, modelResource);
view.writeToFile(VIEW_FILE_SUFFIX);
// Compare expected vs. actual view file:
diff --git a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStore.ecore.treeview b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStore.ecore.treeview
index ce6ad5331..05c24f48e 100644
--- a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStore.ecore.treeview
+++ b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStore.ecore.treeview
@@ -1,13 +1,13 @@
-BookStorePackage : EPackage
- BookStore : EClass
- owner : EAttribute
- EGenericType
- location : EAttribute
- EGenericType
- books : EReference
- EGenericType
- Book : EClass
- name : EAttribute
- EGenericType
- isbn : EAttribute
- EGenericType
+EPackage #0: name="BookStorePackage", nsURI="http:///com.ibm.dyna...", nsPrefix="bookStore", eFactoryInstance=EFactory #0, eClassifiers=["BookStore", "Book"]
+ EClass #0: name="BookStore", ePackage="BookStorePackage", abstract="false", interface="false", eAllAttributes=["owner", "location"], eAllReferences=["books"], eReferences=["books"], eAttributes=["owner", "location"], eAllContainments=["books"], eAllStructuralFeatures=["owner", "location", "books"], eStructuralFeatures=["owner", "location", "books"]
+ EAttribute #0: name="owner", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #0, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", iD="false", eAttributeType="EString"
+ EGenericType #0: eRawType="EString", eClassifier="EString"
+ EAttribute #1: name="location", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #1, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", iD="false", eAttributeType="EString"
+ EGenericType #1: eRawType="EString", eClassifier="EString"
+ EReference #0: name="books", ordered="true", unique="true", lowerBound="0", upperBound="-1", many="true", required="false", eType="Book", eGenericType=EGenericType #2, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", containment="true", container="false", resolveProxies="true", eReferenceType="Book"
+ EGenericType #2: eRawType="Book", eClassifier="Book"
+ EClass #1: name="Book", ePackage="BookStorePackage", abstract="false", interface="false", eAllAttributes=["name", "isbn"], eAttributes=["name", "isbn"], eAllStructuralFeatures=["name", "isbn"], eIDAttribute="isbn", eStructuralFeatures=["name", "isbn"]
+ EAttribute #2: name="name", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #3, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Book", iD="false", eAttributeType="EString"
+ EGenericType #3: eRawType="EString", eClassifier="EString"
+ EAttribute #3: name="isbn", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EInt", eGenericType=EGenericType #4, changeable="true", volatile="false", transient="false", defaultValue="0", unsettable="false", derived="false", eContainingClass="Book", iD="true", eAttributeType="EInt"
+ EGenericType #4: eRawType="EInt", eClassifier="EInt"
diff --git a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtended.ecore.treeview b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtended.ecore.treeview
index 4fdd2c505..d70740d4f 100644
--- a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtended.ecore.treeview
+++ b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtended.ecore.treeview
@@ -1,35 +1,35 @@
-BookStorePackage : EPackage
- store : EPackage
- BookStore : EClass
- owner : EReference
- EGenericType
- name : EAttribute
- EGenericType
- location : EAttribute
- EGenericType
- books : EReference
- EGenericType
- Book : EClass
- name : EAttribute
- EGenericType
- isbn : EAttribute
- EGenericType
- author : EReference
- EGenericType
- genre : EAttribute
- EGenericType
- Genre : EEnum
- NOVEL : EEnumLiteral
- COOKBOOK : EEnumLiteral
- BIOGRAPHY : EEnumLiteral
- TEXTBOOK : EEnumLiteral
- person : EPackage
- Author : EClass
- isStageName : EAttribute
- EGenericType
- EGenericType
- Person : EClass
- firstName : EAttribute
- EGenericType
- lastName : EAttribute
- EGenericType
+EPackage #0: name="BookStorePackage", nsURI="http:///com.ibm.dyna...", nsPrefix="bookStore", eFactoryInstance=EFactory #0, eSubpackages=["store", "person"]
+ EPackage #1: name="store", nsURI="http:///com.ibm.dyna...", nsPrefix="store", eFactoryInstance=EFactory #1, eClassifiers=["BookStore", "Book", "Genre"], eSuperPackage="BookStorePackage"
+ EClass #0: name="BookStore", ePackage="store", abstract="false", interface="false", eAllAttributes=["name", "location"], eAllReferences=["owner", "books"], eReferences=["owner", "books"], eAttributes=["name", "location"], eAllContainments=["books"], eAllStructuralFeatures=["owner", "name", "location", "books"], eStructuralFeatures=["owner", "name", "location", "books"]
+ EReference #0: name="owner", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="Person", eGenericType=EGenericType #0, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", containment="false", container="false", resolveProxies="true", eReferenceType="Person"
+ EGenericType #0: eRawType="Person", eClassifier="Person"
+ EAttribute #0: name="name", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #1, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", iD="false", eAttributeType="EString"
+ EGenericType #1: eRawType="EString", eClassifier="EString"
+ EAttribute #1: name="location", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #2, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", iD="false", eAttributeType="EString"
+ EGenericType #2: eRawType="EString", eClassifier="EString"
+ EReference #1: name="books", ordered="true", unique="true", lowerBound="0", upperBound="-1", many="true", required="false", eType="Book", eGenericType=EGenericType #3, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", containment="true", container="false", resolveProxies="true", eReferenceType="Book"
+ EGenericType #3: eRawType="Book", eClassifier="Book"
+ EClass #1: name="Book", ePackage="store", abstract="false", interface="false", eAllAttributes=["name", "isbn", "genre"], eAllReferences=["author"], eReferences=["author"], eAttributes=["name", "isbn", "genre"], eAllStructuralFeatures=["name", "isbn", "author", "genre"], eIDAttribute="isbn", eStructuralFeatures=["name", "isbn", "author", "genre"]
+ EAttribute #2: name="name", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #4, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Book", iD="false", eAttributeType="EString"
+ EGenericType #4: eRawType="EString", eClassifier="EString"
+ EAttribute #3: name="isbn", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EInt", eGenericType=EGenericType #5, changeable="true", volatile="false", transient="false", defaultValue="0", unsettable="false", derived="false", eContainingClass="Book", iD="true", eAttributeType="EInt"
+ EGenericType #5: eRawType="EInt", eClassifier="EInt"
+ EReference #2: name="author", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="Author", eGenericType=EGenericType #6, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Book", containment="false", container="false", resolveProxies="true", eReferenceType="Author"
+ EGenericType #6: eRawType="Author", eClassifier="Author"
+ EAttribute #4: name="genre", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="Genre", eGenericType=EGenericType #7, changeable="true", volatile="false", transient="false", defaultValue="NOVEL", unsettable="false", derived="false", eContainingClass="Book", iD="false", eAttributeType="Genre"
+ EGenericType #7: eRawType="Genre", eClassifier="Genre"
+ EEnum #0: name="Genre", defaultValue="NOVEL", ePackage="store", serializable="true", eLiterals=["NOVEL", "COOKBOOK", "BIOGRAPHY", "TEXTBOOK"]
+ EEnumLiteral #0: name="NOVEL", value="0", instance="NOVEL", literal="NOVEL", eEnum="Genre"
+ EEnumLiteral #1: name="COOKBOOK", value="1", instance="COOKBOOK", literal="COOKBOOK", eEnum="Genre"
+ EEnumLiteral #2: name="BIOGRAPHY", value="3", instance="BIOGRAPHY", literal="BIOGRAPHY", eEnum="Genre"
+ EEnumLiteral #3: name="TEXTBOOK", value="4", instance="TEXTBOOK", literal="TEXTBOOK", eEnum="Genre"
+ EPackage #2: name="person", nsURI="http:///com.ibm.dyna...", nsPrefix="person", eFactoryInstance=EFactory #2, eClassifiers=["Author", "Person"], eSuperPackage="BookStorePackage"
+ EClass #2: name="Author", ePackage="person", abstract="false", interface="false", eSuperTypes=["Person"], eAllAttributes=["firstName", "lastName", "isStageName"], eAttributes=["isStageName"], eAllStructuralFeatures=["firstName", "lastName", "isStageName"], eAllSuperTypes=["Person"], eStructuralFeatures=["isStageName"], eGenericSuperTypes=[EGenericType #8], eAllGenericSuperTypes=[EGenericType #8]
+ EAttribute #5: name="isStageName", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EBoolean", eGenericType=EGenericType #9, changeable="true", volatile="false", transient="false", defaultValue="false", unsettable="false", derived="false", eContainingClass="Author", iD="false", eAttributeType="EBoolean"
+ EGenericType #9: eRawType="EBoolean", eClassifier="EBoolean"
+ EGenericType #8: eRawType="Person", eClassifier="Person"
+ EClass #3: name="Person", ePackage="person", abstract="false", interface="false", eAllAttributes=["firstName", "lastName"], eAttributes=["firstName", "lastName"], eAllStructuralFeatures=["firstName", "lastName"], eStructuralFeatures=["firstName", "lastName"]
+ EAttribute #6: name="firstName", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #10, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Person", iD="false", eAttributeType="EString"
+ EGenericType #10: eRawType="EString", eClassifier="EString"
+ EAttribute #7: name="lastName", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #11, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Person", iD="false", eAttributeType="EString"
+ EGenericType #11: eRawType="EString", eClassifier="EString"
diff --git a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtendedRefactor.ecore.treeview b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtendedRefactor.ecore.treeview
index ca127fe5a..ee121f2ea 100644
--- a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtendedRefactor.ecore.treeview
+++ b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreExtendedRefactor.ecore.treeview
@@ -1,29 +1,29 @@
-BookStorePackage : EPackage
- store : EPackage
- Store : EClass
- owner : EReference
- EGenericType
- name : EAttribute
- EGenericType
- location : EAttribute
- EGenericType
- BookStore : EClass
- books : EReference
- EGenericType
- EGenericType
- Book : EClass
- title : EAttribute
- EGenericType
- isbn : EAttribute
- EGenericType
- author : EReference
- EGenericType
- category : EAttribute
- EGenericType
- Person : EClass
- firstName : EAttribute
- EGenericType
- lastName : EAttribute
- EGenericType
- isStageName : EAttribute
- EGenericType
+EPackage #0: name="BookStorePackage", nsURI="http:///com.ibm.dyna...", nsPrefix="bookStore", eFactoryInstance=EFactory #0, eSubpackages=["store"]
+ EPackage #1: name="store", nsURI="http:///com.ibm.dyna...", nsPrefix="store", eFactoryInstance=EFactory #1, eClassifiers=["Store", "BookStore", "Book", "Person"], eSuperPackage="BookStorePackage"
+ EClass #0: name="Store", ePackage="store", abstract="false", interface="false", eAllAttributes=["name", "location"], eAllReferences=["owner"], eReferences=["owner"], eAttributes=["name", "location"], eAllStructuralFeatures=["owner", "name", "location"], eStructuralFeatures=["owner", "name", "location"]
+ EReference #0: name="owner", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="Person", eGenericType=EGenericType #0, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Store", containment="false", container="false", resolveProxies="true", eReferenceType="Person"
+ EGenericType #0: eRawType="Person", eClassifier="Person"
+ EAttribute #0: name="name", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #1, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Store", iD="false", eAttributeType="EString"
+ EGenericType #1: eRawType="EString", eClassifier="EString"
+ EAttribute #1: name="location", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #2, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Store", iD="false", eAttributeType="EString"
+ EGenericType #2: eRawType="EString", eClassifier="EString"
+ EClass #1: name="BookStore", ePackage="store", abstract="false", interface="false", eSuperTypes=["Store"], eAllAttributes=["name", "location"], eAllReferences=["owner", "books"], eReferences=["books"], eAllContainments=["books"], eAllStructuralFeatures=["owner", "name", "location", "books"], eAllSuperTypes=["Store"], eStructuralFeatures=["books"], eGenericSuperTypes=[EGenericType #3], eAllGenericSuperTypes=[EGenericType #3]
+ EReference #1: name="books", ordered="true", unique="true", lowerBound="0", upperBound="-1", many="true", required="false", eType="Book", eGenericType=EGenericType #4, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="BookStore", containment="true", container="false", resolveProxies="true", eReferenceType="Book"
+ EGenericType #4: eRawType="Book", eClassifier="Book"
+ EGenericType #3: eRawType="Store", eClassifier="Store"
+ EClass #2: name="Book", ePackage="store", abstract="false", interface="false", eAllAttributes=["title", "isbn", "category"], eAllReferences=["author"], eReferences=["author"], eAttributes=["title", "isbn", "category"], eAllStructuralFeatures=["title", "isbn", "author", "category"], eIDAttribute="isbn", eStructuralFeatures=["title", "isbn", "author", "category"]
+ EAttribute #2: name="title", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #5, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Book", iD="false", eAttributeType="EString"
+ EGenericType #5: eRawType="EString", eClassifier="EString"
+ EAttribute #3: name="isbn", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EInt", eGenericType=EGenericType #6, changeable="true", volatile="false", transient="false", defaultValue="0", unsettable="false", derived="false", eContainingClass="Book", iD="true", eAttributeType="EInt"
+ EGenericType #6: eRawType="EInt", eClassifier="EInt"
+ EReference #2: name="author", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="Person", eGenericType=EGenericType #7, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Book", containment="false", container="false", resolveProxies="true", eReferenceType="Person"
+ EGenericType #7: eRawType="Person", eClassifier="Person"
+ EAttribute #4: name="category", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #8, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Book", iD="false", eAttributeType="EString"
+ EGenericType #8: eRawType="EString", eClassifier="EString"
+ EClass #3: name="Person", ePackage="store", abstract="false", interface="false", eAllAttributes=["firstName", "lastName", "isStageName"], eAttributes=["firstName", "lastName", "isStageName"], eAllStructuralFeatures=["firstName", "lastName", "isStageName"], eStructuralFeatures=["firstName", "lastName", "isStageName"]
+ EAttribute #5: name="firstName", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #9, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Person", iD="false", eAttributeType="EString"
+ EGenericType #9: eRawType="EString", eClassifier="EString"
+ EAttribute #6: name="lastName", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #10, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Person", iD="false", eAttributeType="EString"
+ EGenericType #10: eRawType="EString", eClassifier="EString"
+ EAttribute #7: name="isStageName", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EBoolean", eGenericType=EGenericType #11, changeable="true", volatile="false", transient="false", defaultValue="false", unsettable="false", derived="false", eContainingClass="Person", iD="false", eAttributeType="EBoolean"
+ EGenericType #11: eRawType="EBoolean", eClassifier="EBoolean"
diff --git a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreRenamed.ecore.treeview b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreRenamed.ecore.treeview
index 4ee5941e3..43ab1ee1a 100644
--- a/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreRenamed.ecore.treeview
+++ b/languages/emf-metamodel/src/test/resources/de/jplag/treeview/bookStoreRenamed.ecore.treeview
@@ -1,13 +1,13 @@
-BookStorePackage : EPackage
- Store : EClass
- nameOfOwner : EAttribute
- EGenericType
- city : EAttribute
- EGenericType
- soldItems : EReference
- EGenericType
- Item : EClass
- title : EAttribute
- EGenericType
- identifier : EAttribute
- EGenericType
+EPackage #0: name="BookStorePackage", nsURI="http:///com.ibm.dyna...", nsPrefix="bookStore", eFactoryInstance=EFactory #0, eClassifiers=["Store", "Item"]
+ EClass #0: name="Store", ePackage="BookStorePackage", abstract="false", interface="false", eAllAttributes=["nameOfOwner", "city"], eAllReferences=["soldItems"], eReferences=["soldItems"], eAttributes=["nameOfOwner", "city"], eAllContainments=["soldItems"], eAllStructuralFeatures=["nameOfOwner", "city", "soldItems"], eStructuralFeatures=["nameOfOwner", "city", "soldItems"]
+ EAttribute #0: name="nameOfOwner", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #0, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Store", iD="false", eAttributeType="EString"
+ EGenericType #0: eRawType="EString", eClassifier="EString"
+ EAttribute #1: name="city", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #1, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Store", iD="false", eAttributeType="EString"
+ EGenericType #1: eRawType="EString", eClassifier="EString"
+ EReference #0: name="soldItems", ordered="true", unique="true", lowerBound="0", upperBound="-1", many="true", required="false", eType="Item", eGenericType=EGenericType #2, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Store", containment="true", container="false", resolveProxies="true", eReferenceType="Item"
+ EGenericType #2: eRawType="Item", eClassifier="Item"
+ EClass #1: name="Item", ePackage="BookStorePackage", abstract="false", interface="false", eAllAttributes=["title", "identifier"], eAttributes=["title", "identifier"], eAllStructuralFeatures=["title", "identifier"], eIDAttribute="identifier", eStructuralFeatures=["title", "identifier"]
+ EAttribute #2: name="title", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EString", eGenericType=EGenericType #3, changeable="true", volatile="false", transient="false", unsettable="false", derived="false", eContainingClass="Item", iD="false", eAttributeType="EString"
+ EGenericType #3: eRawType="EString", eClassifier="EString"
+ EAttribute #3: name="identifier", ordered="true", unique="true", lowerBound="0", upperBound="1", many="false", required="false", eType="EInt", eGenericType=EGenericType #4, changeable="true", volatile="false", transient="false", defaultValue="0", unsettable="false", derived="false", eContainingClass="Item", iD="true", eAttributeType="EInt"
+ EGenericType #4: eRawType="EInt", eClassifier="EInt"
diff --git a/languages/emf-model/README.md b/languages/emf-model/README.md
new file mode 100644
index 000000000..0f63945ad
--- /dev/null
+++ b/languages/emf-model/README.md
@@ -0,0 +1,24 @@
+# Dynamic EMF model language module
+The dynamic EMF model language module allows the use of JPlag with model submissions.
+It is based on the EMF API.
+
+### EMF specification compatibility
+This module is based on the EMF dependencies available on maven central. These might not be the newest versions of EMF. For details, the [JPlag aggregator pom](https://github.com/jplag/JPlag/blob/263e85e544152cc8b0caa3399127debb7a458746/pom.xml#L84-L86).
+
+### Token Extraction
+For the token extraction, we visit the containment tree of the model and extract tokens for all model elements based on their concrete metaclass. In this module, we thus extract tokens based on a dynamic token set. This works well for structural models with tree-like structures. It is less effective for models where the containment structure is not semantically relevant (e.g. state charts). These kinds of models require a dedicated language module.
+
+### Usage
+The input for this is an EMF metamodel and a set of corresponding instances.
+To ensure only the intended files are parsed, you can use `-p` to specify allowed file types: `-p ecore,xmi,mysuffix`.
+To use this module, add the `-l emf-model` flag in the CLI, or use a `JPlagOption` object with `new DynamicEmfLanguage()` as `language` in the Java API as described in the usage information in the [readme of the main project](https://github.com/jplag/JPlag#usage) and [in the wiki](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag).
+
+### Report Viewer
+In the report viewer, a simple textual syntax is used to generate a tree-based model view.
+To provide a custom visualization of a specific metamodel, a custom language module is required.
+
+### Literature
+* [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*](https://dl.acm.org/doi/10.1145/3550356.3556508).
+* Its [Kudos Summary](https://www.growkudos.com/publications/10.1145%25252F3550356.3556508/reader).
+* [*"Token-based Plagiarism Detection for Metamodels" (MODELS-C'22)*]
+* *"Automated Detection of AI-Obfuscated Plagiarism in Modeling Assignments" (ICSE-SEET'24)*
\ No newline at end of file
diff --git a/languages/emf-model/src/main/java/de/jplag/emf/model/EmfModelLanguage.java b/languages/emf-model/src/main/java/de/jplag/emf/model/EmfModelLanguage.java
index 36f328e67..e64036bfa 100644
--- a/languages/emf-model/src/main/java/de/jplag/emf/model/EmfModelLanguage.java
+++ b/languages/emf-model/src/main/java/de/jplag/emf/model/EmfModelLanguage.java
@@ -52,8 +52,6 @@ public boolean expectsSubmissionOrder() {
@Override
public List customizeSubmissionOrder(List sub) {
- Comparator fileEndingComparator = (first, second) -> Boolean.compare(second.getName().endsWith(FILE_ENDING),
- first.getName().endsWith(FILE_ENDING));
- return sub.stream().sorted(fileEndingComparator).toList();
+ return sub.stream().sorted(Comparator.comparing(file -> file.getName().endsWith(FILE_ENDING) ? 0 : 1)).toList();
}
}
diff --git a/languages/emf-model/src/main/java/de/jplag/emf/model/parser/DynamicModelParser.java b/languages/emf-model/src/main/java/de/jplag/emf/model/parser/DynamicModelParser.java
index 70d77e55e..e2d4ac3ed 100644
--- a/languages/emf-model/src/main/java/de/jplag/emf/model/parser/DynamicModelParser.java
+++ b/languages/emf-model/src/main/java/de/jplag/emf/model/parser/DynamicModelParser.java
@@ -14,7 +14,7 @@
import de.jplag.emf.model.EmfModelLanguage;
import de.jplag.emf.util.AbstractModelView;
import de.jplag.emf.util.EMFUtil;
-import de.jplag.emf.util.MetamodelTreeView;
+import de.jplag.emf.util.GenericEmfTreeView;
/**
* Parser for EMF metamodels based on dynamically created tokens.
@@ -36,7 +36,7 @@ public DynamicModelParser() {
}
@Override
- protected void parseModelFile(File file) throws ParsingException {
+ protected void parseModelFile(File file, boolean normalize) throws ParsingException {
// implicit assumption: Metamodel gets parsed first!
if (file.getName().endsWith(EmfLanguage.FILE_ENDING)) {
parseMetamodelFile(file);
@@ -46,7 +46,7 @@ protected void parseModelFile(File file) throws ParsingException {
if (metapackages.isEmpty()) {
logger.warn(METAPACKAGE_WARNING, file.getName());
}
- super.parseModelFile(file);
+ super.parseModelFile(file, normalize);
}
}
@@ -57,7 +57,7 @@ protected String getCorrespondingViewFileSuffix() {
@Override
protected AbstractModelView createView(File file, Resource modelResource) {
- return new MetamodelTreeView(file, modelResource);
+ return new GenericEmfTreeView(file, modelResource);
}
private void parseMetamodelFile(File file) throws ParsingException {
@@ -65,15 +65,14 @@ private void parseMetamodelFile(File file) throws ParsingException {
Resource modelResource = EMFUtil.loadModelResource(file);
if (modelResource == null) {
throw new ParsingException(file, METAMODEL_LOADING_ERROR);
- } else {
- for (EObject object : modelResource.getContents()) {
- if (object instanceof EPackage ePackage) {
- metapackages.add(ePackage);
- } else {
- logger.error(METAPACKAGE_ERROR, object);
- }
+ }
+ for (EObject object : modelResource.getContents()) {
+ if (object instanceof EPackage ePackage) {
+ metapackages.add(ePackage);
+ } else {
+ logger.error(METAPACKAGE_ERROR, object);
}
- EMFUtil.registerEPackageURIs(metapackages);
}
+ EMFUtil.registerEPackageURIs(metapackages);
}
}
diff --git a/languages/emf-model/src/test/java/de/jplag/emf/model/MinimalModelInstanceTest.java b/languages/emf-model/src/test/java/de/jplag/emf/model/MinimalModelInstanceTest.java
index 2c13e44a5..2ff7001ad 100644
--- a/languages/emf-model/src/test/java/de/jplag/emf/model/MinimalModelInstanceTest.java
+++ b/languages/emf-model/src/test/java/de/jplag/emf/model/MinimalModelInstanceTest.java
@@ -8,9 +8,9 @@
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
-import java.util.TreeSet;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
@@ -22,7 +22,6 @@
import de.jplag.ParsingException;
import de.jplag.Token;
import de.jplag.TokenPrinter;
-import de.jplag.emf.EmfLanguage;
import de.jplag.testutils.FileUtil;
class MinimalModelInstanceTest {
@@ -46,11 +45,11 @@ public void setUp() {
void testBookStoreInstances() {
File baseFile = new File(BASE_PATH.toString());
List baseFiles = new ArrayList<>(Arrays.asList(baseFile.listFiles()));
- var sortedFiles = new TreeSet<>(language.customizeSubmissionOrder(baseFiles));
+ var sortedFiles = new LinkedHashSet<>(language.customizeSubmissionOrder(baseFiles));
try {
- List tokens = language.parse(sortedFiles);
+ List tokens = language.parse(sortedFiles, true);
assertNotEquals(0, tokens.size());
- logger.debug(TokenPrinter.printTokens(tokens, baseDirectory, Optional.of(EmfLanguage.VIEW_FILE_SUFFIX)));
+ logger.debug(TokenPrinter.printTokens(tokens, baseDirectory, Optional.of(EmfModelLanguage.VIEW_FILE_SUFFIX)));
logger.info("Parsed tokens: " + tokens);
assertEquals(7, tokens.size());
} catch (ParsingException e) {
diff --git a/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java b/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java
index 581a57a19..3dbd09ec4 100644
--- a/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java
+++ b/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java
@@ -43,7 +43,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return parserAdapter.parse(files);
}
}
diff --git a/languages/java/src/main/java/de/jplag/java/JavaLanguage.java b/languages/java/src/main/java/de/jplag/java/JavaLanguage.java
index 14172b8c6..4db88ef01 100644
--- a/languages/java/src/main/java/de/jplag/java/JavaLanguage.java
+++ b/languages/java/src/main/java/de/jplag/java/JavaLanguage.java
@@ -44,7 +44,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return this.parser.parse(files);
}
@@ -53,6 +53,11 @@ public boolean tokensHaveSemantics() {
return true;
}
+ @Override
+ public boolean supportsNormalization() {
+ return true;
+ }
+
@Override
public String toString() {
return this.getIdentifier();
diff --git a/languages/pom.xml b/languages/pom.xml
index f2dba9a5d..819b1f491 100644
--- a/languages/pom.xml
+++ b/languages/pom.xml
@@ -9,8 +9,8 @@
languages
pom
+ c
cpp
- cpp2
csharp
emf-metamodel
emf-metamodel-dynamic
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
index 0140a37f7..b5a8fd73f 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
@@ -41,7 +41,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return this.parser.parse(files);
}
}
diff --git a/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java b/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java
index 182b856d2..d09e23b72 100644
--- a/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java
+++ b/languages/rlang/src/main/java/de/jplag/rlang/RLanguage.java
@@ -46,7 +46,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return parserAdapter.parse(files);
}
}
diff --git a/languages/rust/src/main/java/de/jplag/rust/RustLanguage.java b/languages/rust/src/main/java/de/jplag/rust/RustLanguage.java
index 72d8fb89f..50f0826e0 100644
--- a/languages/rust/src/main/java/de/jplag/rust/RustLanguage.java
+++ b/languages/rust/src/main/java/de/jplag/rust/RustLanguage.java
@@ -47,7 +47,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return parserAdapter.parse(files);
}
}
diff --git a/languages/scala/src/main/scala/de/jplag/scala/ScalaLanguage.scala b/languages/scala/src/main/scala/de/jplag/scala/ScalaLanguage.scala
index 47988c6d9..424b0f733 100644
--- a/languages/scala/src/main/scala/de/jplag/scala/ScalaLanguage.scala
+++ b/languages/scala/src/main/scala/de/jplag/scala/ScalaLanguage.scala
@@ -20,5 +20,5 @@ class ScalaLanguage extends de.jplag.Language {
override def minimumTokenMatch = 8
- override def parse(files: util.Set[File]): java.util.List[Token] = this.parser.parse(files.asScala.toSet).asJava
+ override def parse(files: util.Set[File], normalize: Boolean): java.util.List[Token] = this.parser.parse(files.asScala.toSet).asJava
}
diff --git a/languages/scala/src/test/java/de/jplag/scala/ScalaLanguageTest.java b/languages/scala/src/test/java/de/jplag/scala/ScalaLanguageTest.java
index fc659f083..2f0660b54 100644
--- a/languages/scala/src/test/java/de/jplag/scala/ScalaLanguageTest.java
+++ b/languages/scala/src/test/java/de/jplag/scala/ScalaLanguageTest.java
@@ -19,6 +19,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import de.jplag.ParsingException;
import de.jplag.SharedTokenType;
import de.jplag.Token;
import de.jplag.TokenPrinter;
@@ -57,7 +58,7 @@ void setup() {
}
@Test
- void parseTestFiles() {
+ void parseTestFiles() throws ParsingException {
for (String fileName : testFiles) {
List tokens = language.parse(Set.of(new File(testFileLocation, fileName)));
String output = TokenPrinter.printTokens(tokens, testFileLocation);
diff --git a/languages/scheme/src/main/java/de/jplag/scheme/SchemeLanguage.java b/languages/scheme/src/main/java/de/jplag/scheme/SchemeLanguage.java
index 08dec1df8..0ebbf4ef9 100644
--- a/languages/scheme/src/main/java/de/jplag/scheme/SchemeLanguage.java
+++ b/languages/scheme/src/main/java/de/jplag/scheme/SchemeLanguage.java
@@ -40,7 +40,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return this.parser.parse(files);
}
}
diff --git a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java
index 6d83703e6..ec6316f4d 100644
--- a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java
+++ b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java
@@ -63,7 +63,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return parser.parse(files);
}
diff --git a/languages/swift/src/main/java/de/jplag/swift/SwiftLanguage.java b/languages/swift/src/main/java/de/jplag/swift/SwiftLanguage.java
index b02aa9094..87e13269f 100644
--- a/languages/swift/src/main/java/de/jplag/swift/SwiftLanguage.java
+++ b/languages/swift/src/main/java/de/jplag/swift/SwiftLanguage.java
@@ -47,7 +47,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return parserAdapter.parse(files);
}
}
diff --git a/languages/text/src/main/java/de/jplag/text/NaturalLanguage.java b/languages/text/src/main/java/de/jplag/text/NaturalLanguage.java
index a9b974840..572713009 100644
--- a/languages/text/src/main/java/de/jplag/text/NaturalLanguage.java
+++ b/languages/text/src/main/java/de/jplag/text/NaturalLanguage.java
@@ -45,7 +45,7 @@ public int minimumTokenMatch() {
}
@Override
- public List parse(Set files) throws ParsingException {
+ public List parse(Set files, boolean normalize) throws ParsingException {
return parserAdapter.parse(files);
}
}
diff --git a/languages/typescript/src/main/java/de/jplag/typescript/TypeScriptLanguage.java b/languages/typescript/src/main/java/de/jplag/typescript/TypeScriptLanguage.java
index d6f1fbb0a..9fa5ad514 100644
--- a/languages/typescript/src/main/java/de/jplag/typescript/TypeScriptLanguage.java
+++ b/languages/typescript/src/main/java/de/jplag/typescript/TypeScriptLanguage.java
@@ -39,7 +39,7 @@ public TypeScriptLanguageOptions getOptions() {
}
@Override
- protected TypeScriptParserAdapter initializeParser() {
+ protected TypeScriptParserAdapter initializeParser(boolean normalize) {
return new TypeScriptParserAdapter(getOptions().useStrictDefault());
}
}
diff --git a/report-viewer/package-lock.json b/report-viewer/package-lock.json
index 4003471c1..b272d1fce 100644
--- a/report-viewer/package-lock.json
+++ b/report-viewer/package-lock.json
@@ -30,8 +30,8 @@
"@playwright/test": "^1.40.1",
"@rushstack/eslint-patch": "^1.7.2",
"@types/jsdom": "^21.1.6",
- "@types/node": "^18.19.11",
- "@vitejs/plugin-vue": "^5.0.3",
+ "@types/node": "^18.19.15",
+ "@vitejs/plugin-vue": "^5.0.4",
"@vue/eslint-config-prettier": "^9.0.0",
"@vue/eslint-config-typescript": "^12.0.0",
"@vue/test-utils": "^2.4.3",
@@ -39,16 +39,16 @@
"autoprefixer": "^10.4.16",
"eslint": "^8.56.0",
"eslint-plugin-vue": "^9.20.1",
- "husky": "^9.0.10",
+ "husky": "^9.0.11",
"jsdom": "^24.0.0",
"lint-staged": "^15.2.2",
"npm-run-all": "^4.1.5",
- "postcss": "^8.4.34",
+ "postcss": "^8.4.35",
"prettier": "^3.2.5",
"prettier-plugin-tailwindcss": "^0.5.11",
"tailwindcss": "^3.4.1",
"typescript": "^5.3.3",
- "vite": "^5.0.12",
+ "vite": "^5.1.1",
"vitest": "^1.2.2",
"vue-tsc": "^1.8.27"
}
@@ -1053,9 +1053,9 @@
"dev": true
},
"node_modules/@types/node": {
- "version": "18.19.14",
- "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.14.tgz",
- "integrity": "sha512-EnQ4Us2rmOS64nHDWr0XqAD8DsO6f3XR6lf9UIIrZQpUzPVdN/oPuEzfDWNHSyXLvoGgjuEm/sPwFGSSs35Wtg==",
+ "version": "18.19.15",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.15.tgz",
+ "integrity": "sha512-AMZ2UWx+woHNfM11PyAEQmfSxi05jm9OlkxczuHeEqmvwPkYj6MWv44gbzDPefYOLysTOFyI3ziiy2ONmUZfpA==",
"dev": true,
"dependencies": {
"undici-types": "~5.26.4"
@@ -1270,9 +1270,9 @@
"dev": true
},
"node_modules/@vitejs/plugin-vue": {
- "version": "5.0.3",
- "resolved": "https://registry.npmjs.org/@vitejs/plugin-vue/-/plugin-vue-5.0.3.tgz",
- "integrity": "sha512-b8S5dVS40rgHdDrw+DQi/xOM9ed+kSRZzfm1T74bMmBDCd8XO87NKlFYInzCtwvtWwXZvo1QxE2OSspTATWrbA==",
+ "version": "5.0.4",
+ "resolved": "https://registry.npmjs.org/@vitejs/plugin-vue/-/plugin-vue-5.0.4.tgz",
+ "integrity": "sha512-WS3hevEszI6CEVEx28F8RjTX97k3KsrcY6kvTg7+Whm5y3oYvcqzVeGCU3hxSAn4uY2CLCkeokkGKpoctccilQ==",
"dev": true,
"engines": {
"node": "^18.0.0 || >=20.0.0"
@@ -3538,9 +3538,9 @@
}
},
"node_modules/husky": {
- "version": "9.0.10",
- "resolved": "https://registry.npmjs.org/husky/-/husky-9.0.10.tgz",
- "integrity": "sha512-TQGNknoiy6bURzIO77pPRu+XHi6zI7T93rX+QnJsoYFf3xdjKOur+IlfqzJGMHIK/wXrLg+GsvMs8Op7vI2jVA==",
+ "version": "9.0.11",
+ "resolved": "https://registry.npmjs.org/husky/-/husky-9.0.11.tgz",
+ "integrity": "sha512-AB6lFlbwwyIqMdHYhwPe+kjOC3Oc5P3nThEoW/AaO2BX3vJDjWPFxYLxokUZOo6RNX20He3AaT8sESs9NJcmEw==",
"dev": true,
"bin": {
"husky": "bin.mjs"
@@ -5333,9 +5333,9 @@
}
},
"node_modules/postcss": {
- "version": "8.4.34",
- "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.34.tgz",
- "integrity": "sha512-4eLTO36woPSocqZ1zIrFD2K1v6wH7pY1uBh0JIM2KKfrVtGvPFiAku6aNOP0W1Wr9qwnaCsF0Z+CrVnryB2A8Q==",
+ "version": "8.4.35",
+ "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.35.tgz",
+ "integrity": "sha512-u5U8qYpBCpN13BsiEB0CbR1Hhh4Gc0zLFuedrHJKMctHCHAGrMdG0PRM/KErzAL3CU6/eckEtmHNB3x6e3c0vA==",
"funding": [
{
"type": "opencollective",
@@ -6913,13 +6913,13 @@
}
},
"node_modules/vite": {
- "version": "5.0.12",
- "resolved": "https://registry.npmjs.org/vite/-/vite-5.0.12.tgz",
- "integrity": "sha512-4hsnEkG3q0N4Tzf1+t6NdN9dg/L3BM+q8SWgbSPnJvrgH2kgdyzfVJwbR1ic69/4uMJJ/3dqDZZE5/WwqW8U1w==",
+ "version": "5.1.1",
+ "resolved": "https://registry.npmjs.org/vite/-/vite-5.1.1.tgz",
+ "integrity": "sha512-wclpAgY3F1tR7t9LL5CcHC41YPkQIpKUGeIuT8MdNwNZr6OqOTLs7JX5vIHAtzqLWXts0T+GDrh9pN2arneKqg==",
"dev": true,
"dependencies": {
"esbuild": "^0.19.3",
- "postcss": "^8.4.32",
+ "postcss": "^8.4.35",
"rollup": "^4.2.0"
},
"bin": {
diff --git a/report-viewer/package.json b/report-viewer/package.json
index fa54c9836..b1d1d8a06 100644
--- a/report-viewer/package.json
+++ b/report-viewer/package.json
@@ -41,8 +41,8 @@
"@playwright/test": "^1.40.1",
"@rushstack/eslint-patch": "^1.7.2",
"@types/jsdom": "^21.1.6",
- "@types/node": "^18.19.11",
- "@vitejs/plugin-vue": "^5.0.3",
+ "@types/node": "^18.19.15",
+ "@vitejs/plugin-vue": "^5.0.4",
"@vue/eslint-config-prettier": "^9.0.0",
"@vue/eslint-config-typescript": "^12.0.0",
"@vue/test-utils": "^2.4.3",
@@ -50,16 +50,16 @@
"autoprefixer": "^10.4.16",
"eslint": "^8.56.0",
"eslint-plugin-vue": "^9.20.1",
- "husky": "^9.0.10",
+ "husky": "^9.0.11",
"jsdom": "^24.0.0",
"lint-staged": "^15.2.2",
"npm-run-all": "^4.1.5",
- "postcss": "^8.4.34",
+ "postcss": "^8.4.35",
"prettier": "^3.2.5",
"prettier-plugin-tailwindcss": "^0.5.11",
"tailwindcss": "^3.4.1",
"typescript": "^5.3.3",
- "vite": "^5.0.12",
+ "vite": "^5.1.1",
"vitest": "^1.2.2",
"vue-tsc": "^1.8.27"
}
diff --git a/report-viewer/src/model/Language.ts b/report-viewer/src/model/Language.ts
index 0a833f3e7..1334d56e1 100644
--- a/report-viewer/src/model/Language.ts
+++ b/report-viewer/src/model/Language.ts
@@ -4,8 +4,10 @@
enum ParserLanguage {
JAVA = 'Javac based AST plugin',
PYTHON = 'Python3 Parser',
- CPP = 'C/C++ Scanner [basic markup]',
- CPP2 = 'C/C++ Parser',
+ C = 'C Scanner',
+ CPP_OLD = 'C/C++ Scanner [basic markup]',
+ CPP = 'C++ Parser',
+ CPP_2 = 'C/C++ Parser',
C_SHARP = 'C# 6 Parser',
EMF_METAMODEL_DYNAMIC = 'emf-dynamic',
EMF_METAMODEL = 'EMF metamodel',
diff --git a/report-viewer/src/utils/CodeHighlighter.ts b/report-viewer/src/utils/CodeHighlighter.ts
index 2d4b101f6..913e6d959 100644
--- a/report-viewer/src/utils/CodeHighlighter.ts
+++ b/report-viewer/src/utils/CodeHighlighter.ts
@@ -38,8 +38,11 @@ function getHighlightLanguage(lang: ParserLanguage) {
switch (lang) {
case ParserLanguage.PYTHON:
return 'python'
+ case ParserLanguage.C:
+ return 'c'
case ParserLanguage.CPP:
- case ParserLanguage.CPP2:
+ case ParserLanguage.CPP_OLD:
+ case ParserLanguage.CPP_2:
return 'cpp'
case ParserLanguage.C_SHARP:
return 'csharp'