Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi Language Program Support #2087

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@
<artifactId>llvmir</artifactId>
<version>${revision}</version>
</dependency>
<dependency>
<groupId>de.jplag</groupId>
<artifactId>multi-language</artifactId>
<version>${revision}</version>
</dependency>
<!-- CLI -->
<dependency>
<groupId>org.kohsuke.metainf-services</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import java.util.ArrayList;

import de.jplag.LanguageLoader;

/**
* Helper class for picocli to find all available languages.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package de.jplag.cli.options;

import de.jplag.Language;
import de.jplag.LanguageLoader;

import picocli.CommandLine;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import java.util.stream.Collectors;

import de.jplag.Language;
import de.jplag.LanguageLoader;
import de.jplag.cli.CliException;
import de.jplag.cli.options.CliOptions;
import de.jplag.cli.options.LanguageLoader;
import de.jplag.options.LanguageOption;
import de.jplag.options.LanguageOptions;

Expand Down
2 changes: 1 addition & 1 deletion cli/src/test/java/de/jplag/cli/LanguageTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import org.junit.jupiter.params.provider.MethodSource;

import de.jplag.Language;
import de.jplag.LanguageLoader;
import de.jplag.cli.options.CliOptions;
import de.jplag.cli.options.LanguageLoader;
import de.jplag.cli.test.CliArgument;
import de.jplag.cli.test.CliTest;
import de.jplag.exceptions.ExitException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import java.io.IOException;

import de.jplag.Language;
import de.jplag.cli.options.LanguageLoader;
import de.jplag.LanguageLoader;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package de.jplag.cli.options;
package de.jplag;

import java.util.Collections;
import java.util.Map;
Expand All @@ -11,8 +11,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.Language;

/**
* This class contains methods to load {@link Language Languages}.
* @author Dominik Fuchss
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ protected <T> LanguageOption<T> createDefaultOption(OptionType<T> type, String n
* @return The new option
*/
protected <T> LanguageOption<T> createOption(OptionType<T> type, String name, String description) {
LanguageOption<T> option = new DefaultLanguageOption<>(type, name, description);
LanguageOption<T> option = new DefaultLanguageOption<>(type, description, name);
TwoOfTwelve marked this conversation as resolved.
Show resolved Hide resolved
this.options.add(option);
return option;
}
Expand Down
26 changes: 26 additions & 0 deletions languages/multi-language/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>de.jplag</groupId>
<artifactId>languages</artifactId>
<version>${revision}</version>
</parent>
<artifactId>multi-language</artifactId>

<dependencies>
<dependency>
<groupId>de.jplag</groupId>
<artifactId>java</artifactId>
<version>${revision}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>de.jplag</groupId>
<artifactId>cpp</artifactId>
<version>${revision}</version>
<scope>test</scope>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package de.jplag.multilang;

import java.io.File;
import java.util.Arrays;
import java.util.List;
import java.util.Set;

import org.kohsuke.MetaInfServices;

import de.jplag.Language;
import de.jplag.LanguageLoader;
import de.jplag.ParsingException;
import de.jplag.Token;
import de.jplag.options.LanguageOptions;

@MetaInfServices(Language.class)
public class MultiLanguage implements Language {
private final MultiLanguageOptions options;

public MultiLanguage() {
this.options = new MultiLanguageOptions();
}

@Override
public String[] suffixes() {
return LanguageLoader.getAllAvailableLanguages().values().stream().filter(it -> !(it == this)).flatMap(it -> Arrays.stream(it.suffixes()))
.toArray(String[]::new);
}

@Override
public String getName() {
return "multi-language";
}

@Override
public String getIdentifier() {
return "multi";
}

@Override
public int minimumTokenMatch() {
return this.options.getLanguages().stream().mapToInt(Language::minimumTokenMatch).min().orElse(9);
}

@Override
public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
MultiLanguageParser parser = new MultiLanguageParser(this.options);
return parser.parseFiles(files, normalize);
}

@Override
public LanguageOptions getOptions() {
return this.options;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package de.jplag.multilang;

import java.util.Arrays;
import java.util.List;

import de.jplag.Language;
import de.jplag.LanguageLoader;
import de.jplag.options.LanguageOption;
import de.jplag.options.LanguageOptions;
import de.jplag.options.OptionType;

public class MultiLanguageOptions extends LanguageOptions {
private static final String ERROR_LANGUAGE_NOT_FOUND = "The selected language %s could not be found";
private static final String ERROR_NOT_ENOUGH_LANGUAGES = "To use multi language specify at least 1 language";

public LanguageOption<String> languageNames = createOption(OptionType.string(), "languages",
"The languages that should be used. This is a ',' separated list");
TwoOfTwelve marked this conversation as resolved.
Show resolved Hide resolved
private List<Language> languages = null;

public List<Language> getLanguages() {
if (this.languages == null) {
if (languageNames.getValue() == null) {
throw new IllegalArgumentException(ERROR_NOT_ENOUGH_LANGUAGES);
}

List<Language> languages = Arrays.stream(languageNames.getValue().split(","))
.map(name -> LanguageLoader.getLanguage(name)
.orElseThrow(() -> new IllegalArgumentException(String.format(ERROR_LANGUAGE_NOT_FOUND, name))))
.filter(language -> !language.getClass().equals(MultiLanguage.class)).toList();

if (languages.isEmpty()) {
throw new IllegalArgumentException(ERROR_NOT_ENOUGH_LANGUAGES);
}

this.languages = languages;
}

return this.languages;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package de.jplag.multilang;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.Set;

import de.jplag.Language;
import de.jplag.ParsingException;
import de.jplag.Token;

public class MultiLanguageParser {
private final List<Language> languages;

public MultiLanguageParser(MultiLanguageOptions options) {
this.languages = options.getLanguages();
}

public List<Token> parseFiles(Set<File> files, boolean normalize) throws ParsingException {
List<Token> results = new ArrayList<>();
for (File file : files) {
Optional<Language> language = findLanguageForFile(file);
if (language.isPresent()) {
results.addAll(language.get().parse(Set.of(file), normalize));
}
}
return results;
}

private Optional<Language> findLanguageForFile(File file) {
return this.languages.stream().filter(language -> Arrays.stream(language.suffixes()).anyMatch(suffix -> file.getName().endsWith(suffix)))
.findFirst();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will currently pick the first language for cases where multiple languages support the same file type. Have you discussed this behavior? This affects the C/C++ modules and also the EMF modules.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(also in future Java vs. Java-CPG)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have discussed this with Robin, Nils and Sebastian. Currently it's not that big of an issue, since the user has to select the modules manually. If there are multiple selected modules for the same file, the module is chosen arbitrarily.

This should be addressed in the future, maybe by adding priorities to language modules or by distinguishing files in more detail than just the suffix. I think it should be done separately though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I understand; I overlooked that the users specify the languages when using the module.
In the long run, I considered making the language module the default, but that would only be user-friendly if users do not need to specify languages. This would mean the multi-language module automatically parses all code that JPlag supports. Then we need prioritization.

With the current solution, we add yet another cli argument, which is less likely to be used by many users. For now, let us leave it as is, but before the release, we need to think about which mode we truly want. If we want more people to try out the language module, we probably need to implement the unparameterized version. However, in all cases, I would not make it the default language straight away.

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package de.java.multilang;

import static de.jplag.SharedTokenType.FILE_END;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import de.jplag.ParsingException;
import de.jplag.Token;
import de.jplag.TokenType;
import de.jplag.cpp.CPPTokenType;
import de.jplag.java.JavaTokenType;
import de.jplag.multilang.MultiLanguage;
import de.jplag.multilang.MultiLanguageOptions;

public class MultilangTest {
private static File testDataDirectory;
private static File javaCode;
private static File cppCode;

private static List<TokenType> expectedTokens = List.of(CPPTokenType.FUNCTION_BEGIN, CPPTokenType.RETURN, CPPTokenType.FUNCTION_END, FILE_END,
JavaTokenType.J_CLASS_BEGIN, JavaTokenType.J_CLASS_END, FILE_END);

@BeforeAll
static void setUp() throws IOException {
testDataDirectory = Files.createTempDirectory("multiLanguageTestData").toFile();
cppCode = new File(testDataDirectory, "CppCode.cpp");
javaCode = new File(testDataDirectory, "JavaCode.java");

MultilangTest.class.getResourceAsStream("/de/jplag/multilang/testDataSet/CppCode.cpp").transferTo(new FileOutputStream(cppCode));
MultilangTest.class.getResourceAsStream("/de/jplag/multilang/testDataSet/JavaCode.java").transferTo(new FileOutputStream(javaCode));
}

@Test
void testMultiLanguageParsing() throws ParsingException {
MultiLanguage languageModule = new MultiLanguage();
((MultiLanguageOptions) languageModule.getOptions()).languageNames.setValue("java,cpp");

Set<File> sources = new TreeSet<>(List.of(javaCode, cppCode)); // Using TreeSet to ensure order of entries
List<Token> tokens = languageModule.parse(sources, false);

Assertions.assertEquals(expectedTokens, tokens.stream().map(Token::getType).toList());
}

@Test
void testNoLanguagesConfigured() {
MultiLanguage languageModule = new MultiLanguage();
Assertions.assertThrowsExactly(IllegalArgumentException.class, () -> {
languageModule.parse(Set.of(javaCode, cppCode), false);
});
}

@Test
void testInvalidLanguage() {
MultiLanguage languageModule = new MultiLanguage();
((MultiLanguageOptions) languageModule.getOptions()).languageNames.setValue("thisIsNotALanguage");

Assertions.assertThrowsExactly(IllegalArgumentException.class, () -> {
languageModule.parse(Set.of(javaCode, cppCode), false);
});
}

@AfterAll
static void cleanUp() {
javaCode.delete();
cppCode.delete();
testDataDirectory.delete();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
int main() {
return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
public class JavaCode {

}
1 change: 1 addition & 0 deletions languages/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
<module>typescript</module>
<module>javascript</module>
<module>llvmir</module>
<module>multi-language</module>
</modules>
<dependencies>
<dependency>
Expand Down
Loading