Skip to content

Commit

Permalink
Moved progpedia data into a zip file.
Browse files Browse the repository at this point in the history
  • Loading branch information
TwoOfTwelve committed Oct 31, 2023
1 parent 659e7c7 commit 7774c0c
Show file tree
Hide file tree
Showing 211 changed files with 126 additions and 168,046 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
package de.jplag.endtoend.helper;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

/**
* Helper class to perform all necessary operations or functions on files or folders.
Expand Down Expand Up @@ -52,4 +58,25 @@ public static void createFileIfItDoesNotExist(File file) throws IOException {
private static String createNewIOExceptionStringForFileOrFOlderCreation(File file) {
return "The file/folder at the location [" + file.toString() + "] could not be created!";
}

public static void unzip(File zip, File targetDirectory) throws IOException {
ZipFile zipFile = new ZipFile(zip);
Enumeration<? extends ZipEntry> entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
if (entry.isDirectory()) {
new File(targetDirectory, entry.getName()).mkdirs();
} else {
File outputFile = new File(targetDirectory, entry.getName());
outputFile.getParentFile().mkdirs();

InputStream inputStream = zipFile.getInputStream(entry);
OutputStream outputStream = new FileOutputStream(outputFile);
inputStream.transferTo(outputStream);
inputStream.close();
outputStream.close();
}
}
zipFile.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package de.jplag.endtoend.helper;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Map;

import de.jplag.endtoend.model.DataSet;

public class UnzipManager {
private final Map<DataSet, File> unzippedFiles;
private static UnzipManager instance;

private static UnzipManager getInstance() {
if (instance == null) {
synchronized (UnzipManager.class) {
if (instance == null) {
instance = new UnzipManager();
}
}
}

return instance;
}

public static File unzipOrCache(DataSet dataSet, File zip) throws IOException {
return getInstance().unzipOrCacheInternal(dataSet, zip);
}

private UnzipManager() {
this.unzippedFiles = new HashMap<>();
}

private File unzipOrCacheInternal(DataSet dataSet, File zip) throws IOException {
if (!unzippedFiles.containsKey(dataSet)) {
File target = Files.createTempDirectory(zip.getName()).toFile();
FileHelper.unzip(zip, target);
this.unzippedFiles.put(dataSet, target);
}

return this.unzippedFiles.get(dataSet);
}
}
44 changes: 31 additions & 13 deletions endtoend-testing/src/main/java/de/jplag/endtoend/model/DataSet.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package de.jplag.endtoend.model;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

import de.jplag.Language;
import de.jplag.endtoend.constants.TestDirectoryConstants;
import de.jplag.endtoend.helper.LanguageDeserializer;
import de.jplag.endtoend.helper.UnzipManager;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
Expand All @@ -18,38 +20,53 @@
* @param name The name of the data set
* @param language The language
* @param format The format
* @param sourceDirectory The source directory, may be null
* @param sourceLocation The source directory, may be null
* @param resultFile The result file name, may be null
* @param goldStandardFile The gold standard file, may be null
* @param options The options for the jplag runs, may be null
*/
public record DataSet(@JsonProperty(required = true) String name,
@JsonDeserialize(using = LanguageDeserializer.class) @JsonProperty(required = true) Language language,
@JsonProperty(required = true) DataSetFormat format, @JsonProperty String sourceDirectory, @JsonProperty String resultFile,
@JsonProperty String goldStandardFile, @JsonProperty String goldStandardDelimiter, @JsonProperty Options options) {
@JsonProperty(required = true) DataSetFormat format, @JsonProperty String sourceLocation, @JsonProperty StorageFormat storageFormat,
@JsonProperty String resultFile, @JsonProperty String goldStandardFile, @JsonProperty String goldStandardDelimiter,
@JsonProperty Options options) {

private static final String DEFAULT_GOLD_STANDARD_DELIMITER = ";";
private static final String DEFAULT_SOURCE_DIRECTORY = "data/%s";
private static final String DEFAULT_SOURCE_ZIP = "data/%s.zip";
private static final String DEFAULT_RESULT_FILE_NAME = "%s.json";

/**
* Gets the source directories
* @return The source directories
*/
public Set<File> getSourceDirectories() {
return format.getSourceDirectories(this).stream()
.map(file -> new File(TestDirectoryConstants.BASE_PATH_TO_RESOURCES.toFile(), file.getPath())).collect(Collectors.toSet());
public Set<File> getSourceDirectories() throws IOException {
return new HashSet<>(format.getSourceDirectories(this));
}

/**
* Helper function replacing null by the default value
* @return The source directory
*/
String actualSourceDirectory() {
if (sourceDirectory == null) {
return String.format(DEFAULT_SOURCE_DIRECTORY, this.name);
File actualSourceDirectory() throws IOException {
switch (storageFormat == null ? StorageFormat.DIRECTORY : storageFormat) {
case DIRECTORY -> {
String location = sourceLocation;
if (location == null) {
location = String.format(DEFAULT_SOURCE_DIRECTORY, this.name);
}
return new File(TestDirectoryConstants.BASE_PATH_TO_RESOURCES.toFile(), location);
}
case ZIP -> {
String location = sourceLocation;
if (location == null) {
location = String.format(DEFAULT_SOURCE_ZIP, this.name);
}
return UnzipManager.unzipOrCache(this, new File(TestDirectoryConstants.BASE_PATH_TO_RESOURCES.toFile(), location));
}
}
return sourceDirectory;

throw new IllegalStateException();
}

/**
Expand All @@ -67,8 +84,9 @@ public File getResultFile() {
/**
* @return The gold standard file as an optional.
*/
public Optional<File> getGoldStandardFile() {
return Optional.ofNullable(this.goldStandardFile).map(name -> new File(TestDirectoryConstants.BASE_PATH_TO_RESOURCES.toFile(), name));
public Optional<File> getGoldStandardFile() throws IOException {
File actualSourceDirectory = this.actualSourceDirectory();
return Optional.ofNullable(this.goldStandardFile).map(name -> new File(actualSourceDirectory, name));
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package de.jplag.endtoend.model;

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Set;

Expand All @@ -14,12 +15,12 @@ public enum DataSetFormat {
*/
PROGPEDIA {
@Override
public Set<File> getSourceDirectories(DataSet dataSet) {
public Set<File> getSourceDirectories(DataSet dataSet) throws IOException {
return Collections.singleton(new File(dataSet.actualSourceDirectory(), "ACCEPTED"));
}

@Override
public File getBaseCodeDirectory(DataSet dataSet, String directoryName) {
public File getBaseCodeDirectory(DataSet dataSet, String directoryName) throws IOException {
return new File(dataSet.actualSourceDirectory(), directoryName);
}
},
Expand All @@ -28,8 +29,8 @@ public File getBaseCodeDirectory(DataSet dataSet, String directoryName) {
*/
PLAIN {
@Override
public Set<File> getSourceDirectories(DataSet dataSet) {
return Collections.singleton(new File(dataSet.actualSourceDirectory()));
public Set<File> getSourceDirectories(DataSet dataSet) throws IOException {
return Collections.singleton(dataSet.actualSourceDirectory());
}

@Override
Expand All @@ -43,13 +44,13 @@ public File getBaseCodeDirectory(DataSet dataSet, String directoryName) {
* @param dataSet The data set
* @return The source directories
*/
public abstract Set<File> getSourceDirectories(DataSet dataSet);
public abstract Set<File> getSourceDirectories(DataSet dataSet) throws IOException;

/**
* Resolves the base code directory
* @param dataSet The data set
* @param directoryName The name of the base code directory
* @return The base code directory
*/
public abstract File getBaseCodeDirectory(DataSet dataSet, String directoryName);
public abstract File getBaseCodeDirectory(DataSet dataSet, String directoryName) throws IOException;
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package de.jplag.endtoend.model;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import de.jplag.endtoend.constants.TestDirectoryConstants;
import de.jplag.options.JPlagOptions;

/**
Expand All @@ -21,7 +21,7 @@ public record DataSetRunConfiguration(JPlagOptions jPlagOptions, String identifi
* @param dataSet The data set
* @return The configurations
*/
public static List<DataSetRunConfiguration> generateRunConfigurations(DataSet dataSet) {
public static List<DataSetRunConfiguration> generateRunConfigurations(DataSet dataSet) throws IOException {
Options configuredOptions = dataSet.getOptions();
List<DataSetRunConfiguration> result = new ArrayList<>();

Expand All @@ -30,8 +30,7 @@ public static List<DataSetRunConfiguration> generateRunConfigurations(DataSet da
options = options.withMinimumTokenMatch(minimumTokenMatch);
if (configuredOptions.baseCodeDirectory() != null) {
File baseCode = dataSet.format().getBaseCodeDirectory(dataSet, configuredOptions.baseCodeDirectory());
options = options
.withBaseCodeSubmissionDirectory(new File(TestDirectoryConstants.BASE_PATH_TO_RESOURCES.toFile(), baseCode.getPath()));
options = options.withBaseCodeSubmissionDirectory(baseCode);
}
result.add(new DataSetRunConfiguration(options, String.format(IDENTIFIER_FORMAT, minimumTokenMatch)));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package de.jplag.endtoend.model;

public enum StorageFormat {
DIRECTORY,
ZIP
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class EndToEndSuiteTest {
* @throws ExitException If JPlag throws an error
*/
@TestFactory
Collection<DynamicContainer> endToEndTestFactory() throws ExitException {
Collection<DynamicContainer> endToEndTestFactory() throws ExitException, IOException {
File descriptorDirectory = TestDirectoryConstants.BASE_PATH_TO_DATA_SET_DESCRIPTORS.toFile();
List<File> testDescriptorFiles = Arrays.asList(Objects.requireNonNull(descriptorDirectory.listFiles()));
List<DynamicContainer> allTests = new ArrayList<>();
Expand All @@ -90,7 +90,7 @@ Collection<DynamicContainer> endToEndTestFactory() throws ExitException {
* @return The dynamic container containing the tests
* @throws ExitException If JPlag throws an error
*/
private DynamicContainer generateTestForLanguage(Language language, List<DataSet> dataSets) throws ExitException {
private DynamicContainer generateTestForLanguage(Language language, List<DataSet> dataSets) throws ExitException, IOException {
List<DynamicContainer> languageTests = new LinkedList<>();
for (DataSet dataSet : dataSets) {
languageTests.add(generateTestsForDataSet(dataSet));
Expand All @@ -104,7 +104,7 @@ private DynamicContainer generateTestForLanguage(Language language, List<DataSet
* @return The dynamic container containing the tests
* @throws ExitException If JPlag throws an error
*/
private DynamicContainer generateTestsForDataSet(DataSet dataSet) throws ExitException {
private DynamicContainer generateTestsForDataSet(DataSet dataSet) throws ExitException, IOException {
List<DynamicContainer> testContainers = new LinkedList<>();
Map<String, ResultDescription> results = new HashMap<>();
try {
Expand Down Expand Up @@ -136,7 +136,7 @@ private DynamicContainer generateTestsForDataSet(DataSet dataSet) throws ExitExc
* @throws ExitException If JPlag throw an error
*/
private DynamicContainer generateTestsForResultDescription(ResultDescription result, DataSet dataSet, DataSetRunConfiguration runConfiguration)
throws ExitException {
throws ExitException, IOException {
JPlagOptions options = runConfiguration.jPlagOptions();
JPlagResult jplagResult = JPlag.run(options);
var comparisons = jplagResult.getAllComparisons().stream().collect(Collectors.toMap(TestSuiteHelper::getTestIdentifier, it -> it));
Expand Down Expand Up @@ -198,7 +198,8 @@ private DynamicTest generateTest(String name, ExpectedResult expectedResult, JPl
* @param goldStandard The gold standard previously saved
* @return The node containing the tests
*/
private DynamicNode generateGoldStandardTest(DataSet dataSet, Map<String, JPlagComparison> comparisonMap, GoldStandard goldStandard) {
private DynamicNode generateGoldStandardTest(DataSet dataSet, Map<String, JPlagComparison> comparisonMap, GoldStandard goldStandard)
throws IOException {
if (goldStandard != null) {
Set<ComparisonIdentifier> goldStandardIdentifiers = ComparisonIdentifier
.loadIdentifiersFromFile(dataSet.getGoldStandardFile().orElseThrow(), dataSet.getActualDelimiter());
Expand Down
Binary file not shown.

This file was deleted.

Loading

0 comments on commit 7774c0c

Please sign in to comment.