diff --git a/.github/workflows/closeDevIssues.yml b/.github/workflows/closeDevIssues.yml new file mode 100644 index 0000000000..f8577bc0c0 --- /dev/null +++ b/.github/workflows/closeDevIssues.yml @@ -0,0 +1,21 @@ +name: Close linked issue on PR merge + +on: + pull_request: + types: + - closed + +jobs: + close_issues: + if: github.event.pull_request.merged == true && github.base_ref == 'develop' + runs-on: ubuntu-latest + + steps: + - name: Checkout ๐Ÿ›Ž๏ธ + uses: actions/checkout@v4 + + - name: Run script + working-directory: .github/workflows/scripts + run: | + pip install requests + python closeDevIssues.py ${{ secrets.GITHUB_TOKEN }} jplag JPlag ${{ github.event.pull_request.number }} \ No newline at end of file diff --git a/.github/workflows/complete-e2e.yml b/.github/workflows/complete-e2e.yml new file mode 100644 index 0000000000..332cc834e1 --- /dev/null +++ b/.github/workflows/complete-e2e.yml @@ -0,0 +1,160 @@ +# Builds JPlag and then runs Datasets and Report Viewer e2e tests on multiple OS +name: Complete e2e Test + +on: + workflow_dispatch: + pull_request: + types: [opened, synchronize, reopened] + paths: + - ".github/workflows/complete-e2e.yml" + - "report-viewer/**" + - "**/pom.xml" + - "**.java" + - "**.g4" + +jobs: + pre_job: + runs-on: ubuntu-latest + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@master + with: + concurrent_skipping: 'same_content_newer' + skip_after_successful_duplicate: 'true' + + build_jar: + needs: pre_job + if: ${{ needs.pre_job.outputs.should_skip != 'true' }} + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 21 + distribution: 'temurin' + + - uses: actions/setup-node@v4 + with: + node-version: "18" + + - name: Build Assembly + run: mvn -Pwith-report-viewer -DskipTests clean package assembly:single + + - name: Rename Jar + run: mv cli/target/jplag-*-jar-with-dependencies.jar cli/target/jplag.jar + + - name: Upload Assembly + uses: actions/upload-artifact@v4 + with: + name: "JPlag" + path: "cli/target/jplag.jar" + retention-days: 30 + + run_jplag: + needs: build_jar + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + dataset: [ + {zip: "progpedia.zip", name: "progpedia", folder: "ACCEPTED", language: "java", cliArgs: "-bc base"}, + {zip: "fileSingleRoot.zip", name: "fileSingleRoot", folder: "fileSingleRoot", language: "java", cliArgs: ""}, + {zip: "folderSingleRoot.zip", name: "folderSingleRoot", folder: "folderSingleRoot", language: "java", cliArgs: ""}, + {zip: "fileMultiRoot.zip", name: "fileMultiRoot", folder: "f0", language: "java", cliArgs: "--new f1"}, + {zip: "folderMultiRoot.zip", name: "folderMultiRoot", folder: "f0", language: "java", cliArgs: "--new f1"}, + {zip: "mixedMultiRoot.zip", name: "mixedBaseFile", folder: "f0", language: "java", cliArgs: "--new f1"}, + {zip: "mixedMultiRoot.zip", name: "mixedBaseFolder", folder: "f1", language: "java", cliArgs: "--new f0"}, + {zip: "cpp.zip", name: "cpp", folder: "./cpp", language: "cpp", cliArgs: ""}, + {zip: "csharp.zip", name: "csharp", folder: "./csharp", language: "csharp", cliArgs: ""}, + {zip: "python.zip", name: "python", folder: "./python", language: "python3", cliArgs: ""} + ] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 21 + distribution: 'temurin' + + - name: Get JAR + uses: actions/download-artifact@v4 + with: + name: JPlag + + - name: Copy and unzip dataset windows + if: ${{ matrix.os == 'windows-latest' }} + run: | + Expand-Archive -LiteralPath .github/workflows/files/${{ matrix.dataset.zip }} -DestinationPath ./ + + - name: Copy and unzip dataset macos and ubuntu + if: ${{ matrix.os == 'macos-latest' || matrix.os == 'ubuntu-latest'}} + run: | + unzip .github/workflows/files/${{ matrix.dataset.zip }} + + - name: Run JPlag + run: | + java -jar jplag.jar ${{ matrix.dataset.folder }} -l ${{ matrix.dataset.language }} -r ${{ matrix.dataset.name }}-report ${{ matrix.dataset.cliArgs }} + + - name: Upload result + uses: actions/upload-artifact@v4 + with: + name: "${{ matrix.dataset.name }}-${{ matrix.os }}" + path: "${{ matrix.dataset.name }}-report.zip" + retention-days: 30 + + e2e_test: + needs: run_jplag + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: "18" + + - name: Install and Build + working-directory: report-viewer + run: | + npm install + npm run build + + - name: Install playwright + working-directory: report-viewer + run: npx playwright install --with-deps + + - name: Download JPlag Reports + uses: actions/download-artifact@v4 + with: + pattern: "*-${{ matrix.os }}" + path: "report-viewer/tests/e2e/assets" + merge-multiple: true + + - name: Run tests + working-directory: report-viewer + run: | + npm run test:e2e + + - name: Upload test results + uses: actions/upload-artifact@v4 + if: always() + with: + name: "test-results-${{ matrix.os }}" + path: | + report-viewer/test-results + report-viewer/playwright-report + retention-days: 30 \ No newline at end of file diff --git a/.github/workflows/files/cpp.zip b/.github/workflows/files/cpp.zip new file mode 100644 index 0000000000..4977bab09e Binary files /dev/null and b/.github/workflows/files/cpp.zip differ diff --git a/.github/workflows/files/csharp.zip b/.github/workflows/files/csharp.zip new file mode 100644 index 0000000000..c6cc4360ce Binary files /dev/null and b/.github/workflows/files/csharp.zip differ diff --git a/.github/workflows/files/fileMultiRoot.zip b/.github/workflows/files/fileMultiRoot.zip new file mode 100644 index 0000000000..24baa4dbfc Binary files /dev/null and b/.github/workflows/files/fileMultiRoot.zip differ diff --git a/.github/workflows/files/fileSingleRoot.zip b/.github/workflows/files/fileSingleRoot.zip new file mode 100644 index 0000000000..778ceace2a Binary files /dev/null and b/.github/workflows/files/fileSingleRoot.zip differ diff --git a/.github/workflows/files/folderMultiRoot.zip b/.github/workflows/files/folderMultiRoot.zip new file mode 100644 index 0000000000..db8f2f80fa Binary files /dev/null and b/.github/workflows/files/folderMultiRoot.zip differ diff --git a/.github/workflows/files/folderSingleRoot.zip b/.github/workflows/files/folderSingleRoot.zip new file mode 100644 index 0000000000..d23caf3d47 Binary files /dev/null and b/.github/workflows/files/folderSingleRoot.zip differ diff --git a/.github/workflows/files/mixedMultiRoot.zip b/.github/workflows/files/mixedMultiRoot.zip new file mode 100644 index 0000000000..628c301e75 Binary files /dev/null and b/.github/workflows/files/mixedMultiRoot.zip differ diff --git a/.github/workflows/files/python.zip b/.github/workflows/files/python.zip new file mode 100644 index 0000000000..61fc945265 Binary files /dev/null and b/.github/workflows/files/python.zip differ diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index e83f58f23f..9629427c80 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -57,7 +57,7 @@ jobs: - name: Upload Assembly uses: actions/upload-artifact@v4 with: - name: "JPlag" - path: "jplag.cli/target/jplag-*-jar-with-dependencies.jar" + name: "JPlag Jar" + path: "cli/target/jplag-*-jar-with-dependencies.jar" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b4ea875e36..3fcc4b7e3c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -2,7 +2,7 @@ name: Deploy to Maven Central & GitHub on: workflow_dispatch: release: - types: [created] + types: [created, published] jobs: publish-maven-central: runs-on: ubuntu-latest @@ -42,7 +42,7 @@ jobs: run: mvn -Pwith-report-viewer -U -B clean package assembly:single - name: Attach CLI to Release on GitHub - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: cli/target/jplag-*-jar-with-dependencies.jar diff --git a/.github/workflows/report-viewer-demo.yml b/.github/workflows/report-viewer-demo.yml index 78e6fbe37f..f7a747813e 100644 --- a/.github/workflows/report-viewer-demo.yml +++ b/.github/workflows/report-viewer-demo.yml @@ -102,7 +102,7 @@ jobs: npm run build-demo - name: Deploy ๐Ÿš€ - uses: JamesIves/github-pages-deploy-action@v4.5.0 + uses: JamesIves/github-pages-deploy-action@v4.6.1 with: branch: gh-pages folder: report-viewer/dist diff --git a/.github/workflows/report-viewer-dev.yml b/.github/workflows/report-viewer-dev.yml index 2f849fb6a9..ef2bfd8074 100644 --- a/.github/workflows/report-viewer-dev.yml +++ b/.github/workflows/report-viewer-dev.yml @@ -27,7 +27,7 @@ jobs: npm run build-dev - name: Deploy ๐Ÿš€ - uses: JamesIves/github-pages-deploy-action@v4.5.0 + uses: JamesIves/github-pages-deploy-action@v4.6.1 with: branch: gh-pages folder: report-viewer/dist diff --git a/.github/workflows/report-viewer-e2e.yml b/.github/workflows/report-viewer-e2e.yml deleted file mode 100644 index 98744e5548..0000000000 --- a/.github/workflows/report-viewer-e2e.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: Report Viewer e2e Test - -on: - workflow_dispatch: - pull_request: - types: [opened, synchronize, reopened] - paths: - - ".github/workflows/report-viewer-e2e.yml" - - "report-viewer/**" - -jobs: - pre_job: - runs-on: ubuntu-latest - outputs: - should_skip: ${{ steps.skip_check.outputs.should_skip }} - steps: - - id: skip_check - uses: fkirc/skip-duplicate-actions@master - with: - concurrent_skipping: 'same_content_newer' - skip_after_successful_duplicate: 'true' - - test: - needs: pre_job - if: ${{ needs.pre_job.outputs.should_skip != 'true' }} - runs-on: ubuntu-latest - steps: - - name: Checkout ๐Ÿ›Ž๏ธ - uses: actions/checkout@v4 - - - uses: actions/setup-node@v4 - with: - node-version: "18" - - - name: Install and Build ๐Ÿ”ง - working-directory: report-viewer - run: | - npm install - npm run build - - - name: Install playwright ๐Ÿ”ง - working-directory: report-viewer - run: npx playwright install --with-deps - - - name: Run tests ๐Ÿงช - working-directory: report-viewer - run: | - npm run test:e2e - - - name: Upload test results ๐Ÿ“ค - uses: actions/upload-artifact@v4 - if: always() - with: - name: test-results - path: | - report-viewer/test-results - report-viewer/playwright-report - retention-days: 30 \ No newline at end of file diff --git a/.github/workflows/report-viewer.yml b/.github/workflows/report-viewer.yml index f1662be72c..dfa0689e3d 100644 --- a/.github/workflows/report-viewer.yml +++ b/.github/workflows/report-viewer.yml @@ -37,7 +37,7 @@ jobs: npm run build-prod - name: Deploy ๐Ÿš€ - uses: JamesIves/github-pages-deploy-action@v4.5.0 + uses: JamesIves/github-pages-deploy-action@v4.6.1 with: branch: gh-pages folder: report-viewer/dist diff --git a/.github/workflows/scripts/closeDevIssues.py b/.github/workflows/scripts/closeDevIssues.py new file mode 100644 index 0000000000..0ddac50fac --- /dev/null +++ b/.github/workflows/scripts/closeDevIssues.py @@ -0,0 +1,76 @@ +import requests +import re +import sys + +headers = {"Authorization": f"Bearer {sys.argv[1]}"} + +owner = sys.argv[2] +repo = sys.argv[3] +pr_number = int(sys.argv[4]) + +query = f""" +{{ + repository(owner: "{owner}", name: "{repo}") {{ + pullRequest(number: {pr_number}) {{ + merged + baseRefName + body + closingIssuesReferences (first: 50) {{ + nodes {{ + number + }} + }} + }} + }} +}} +""" + +def run_query(query): + request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers) + if request.status_code == 200: + return request.json() + else: + raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query)) + + +closing_keywords = [ + 'closes', 'close', 'closed', 'fix', 'fixes', 'fixed', 'resolves', 'resolve', 'resolved' +] + +# checks all subsequeces of the pr body for closing keywords and extracts the coresponidng issue numbers +def subsequences_matching_regex(input_string, regex): + matches = [] + for i in range(len(input_string)): + for j in range(i+1, len(input_string)+1): + subsequence = input_string[i:j] + match = re.fullmatch(regex, subsequence) + if match: + matches.append(int(match.group(1))) + return matches + +# gets all issues linked to pr either via the closing keywords or the sidebar +def get_linked_issues(result): + issue_body = result['body'].lower() + "." # we append a dot to the end of the body to make sure the last word is checked with the regex + closing_issues = [] + for keyword in closing_keywords: + closing_issues.extend(subsequences_matching_regex(issue_body, f'{keyword} #([0-9]+)[^0-9]')) + for k in result['closingIssuesReferences']['nodes']: + closing_issues.append(k['number']) + return list(set(closing_issues)) + +result = run_query(query)['data']['repository']['pullRequest'] +issues = get_linked_issues(result) + +def close_issue(issue_number): + requests.post(f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments", json={"body": f"Closed by #{pr_number}."}, headers=headers) + requests.patch(f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}", json={"state": "closed"}, headers=headers) + +if result['baseRefName'] != "develop": + print("PR not merged to develop, not closing issues") +elif result['merged']: + print(f"Closing issues: {issues}") + for issue in issues: + close_issue(issue) + print(f"Closed issue {issue}") +else: + print("PR not merged, not closing issues") \ No newline at end of file diff --git a/README.md b/README.md index 303ce1a573..eeda7526fe 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Maven Central](https://maven-badges.herokuapp.com/maven-central/de.jplag/jplag/badge.svg)](https://maven-badges.herokuapp.com/maven-central/de.jplag/jplag) [![License](https://img.shields.io/github/license/jplag/jplag.svg)](https://github.com/jplag/jplag/blob/main/LICENSE) [![GitHub commit activity](https://img.shields.io/github/commit-activity/y/jplag/JPlag)](https://github.com/jplag/JPlag/pulse) -[![SonarCloud Coverage](https://sonarcloud.io/api/project_badges/measure?project=jplag_JPlag&metric=coverage)](https://sonarcloud.io/component_measures/metric/coverage/list?id=jplag_JPlag) +[![SonarCloud Coverage](https://sonarcloud.io/api/project_badges/measure?project=jplag_JPlag&metric=coverage)](https://sonarcloud.io/component_measures?metric=Coverage&view=list&id=jplag_JPlag) [![Report Viewer](https://img.shields.io/badge/report%20viewer-online-b80025)](https://jplag.github.io/JPlag/) [![Java Version](https://img.shields.io/badge/java-SE%2021-yellowgreen)](#download-and-installation) @@ -32,7 +32,7 @@ All supported languages and their supported versions are listed below. | [C](https://isocpp.org) | 11 | c | legacy | JavaCC | | [C++](https://isocpp.org) | 14 | cpp | beta | ANTLR 4 | | [C#](https://docs.microsoft.com/en-us/dotnet/csharp/) | 6 | csharp | mature | ANTLR 4 | -| [Python](https://www.python.org) | 3.6 | python3 | legacy | ANTLR 4 | +| [Python](https://www.python.org) | 3.6 | python3 | beta | ANTLR 4 | | [JavaScript](https://www.javascript.com/) | ES6 | javascript | beta | ANTLR 4 | | [TypeScript](https://www.typescriptlang.org/) | [~5](https://github.com/antlr/grammars-v4/tree/master/javascript/typescript/README.md) | typescript | beta | ANTLR 4 | | [Go](https://go.dev) | 1.17 | golang | beta | ANTLR 4 | diff --git a/cli/pom.xml b/cli/pom.xml index 459daf3eb1..f376667af7 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -127,13 +127,13 @@ info.picocli picocli - 4.7.5 + 4.7.6 me.tongfei progressbar - 0.10.0 + 0.10.1 @@ -183,7 +183,7 @@ org.codehaus.mojo exec-maven-plugin - 1.3.2 + 3.3.0 npm install diff --git a/cli/src/main/java/de/jplag/cli/CLI.java b/cli/src/main/java/de/jplag/cli/CLI.java index ac79e68c01..948a0927d4 100644 --- a/cli/src/main/java/de/jplag/cli/CLI.java +++ b/cli/src/main/java/de/jplag/cli/CLI.java @@ -1,21 +1,8 @@ package de.jplag.cli; -import static picocli.CommandLine.Model.UsageMessageSpec.SECTION_KEY_DESCRIPTION_HEADING; -import static picocli.CommandLine.Model.UsageMessageSpec.SECTION_KEY_OPTION_LIST; -import static picocli.CommandLine.Model.UsageMessageSpec.SECTION_KEY_SYNOPSIS; - -import java.awt.Desktop; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.net.URI; -import java.security.SecureRandom; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.stream.Collectors; import org.slf4j.ILoggerFactory; import org.slf4j.Logger; @@ -23,173 +10,108 @@ import de.jplag.JPlag; import de.jplag.JPlagResult; -import de.jplag.Language; +import de.jplag.cli.logger.CliProgressBarProvider; import de.jplag.cli.logger.CollectedLoggerFactory; -import de.jplag.cli.logger.TongfeiProgressBarProvider; -import de.jplag.cli.server.ReportViewer; -import de.jplag.clustering.ClusteringOptions; -import de.jplag.clustering.Preprocessing; +import de.jplag.cli.picocli.CliInputHandler; import de.jplag.exceptions.ExitException; import de.jplag.logging.ProgressBarLogger; -import de.jplag.merging.MergingOptions; import de.jplag.options.JPlagOptions; -import de.jplag.options.LanguageOption; -import de.jplag.options.LanguageOptions; -import de.jplag.reporting.reportobject.ReportObjectFactory; - -import picocli.CommandLine; -import picocli.CommandLine.Model.CommandSpec; -import picocli.CommandLine.Model.OptionSpec; -import picocli.CommandLine.ParseResult; +import de.jplag.util.FileUtils; /** * Command line interface class, allows using via command line. * @see CLI#main(String[]) */ public final class CLI { - private static final Logger logger = LoggerFactory.getLogger(CLI.class); - private static final Random RANDOM = new SecureRandom(); - - private static final String CREDITS = "Created by IPD Tichy, Guido Malpohl, and others. Maintained by Timur Saglam and Sebastian Hahner. Logo by Sandro Koch."; + private static final String DEFAULT_FILE_ENDING = ".zip"; + private static final int NAME_COLLISION_ATTEMPTS = 4; - private static final String[] DESCRIPTIONS = {"Detecting Software Plagiarism", "Software-Archaeological Playground", "Since 1996", - "Scientifically Published", "Maintained by SDQ", "RIP Structure and Table", "What else?", "You have been warned!", "Since Java 1.0", - "More Abstract than Tree", "Students Nightmare", "No, changing variable names does not work...", "The tech is out there!", - "Developed by plagiarism experts.", "State of the Art Obfuscation Resilience", "www.helmholtz.software/software/jplag"}; + private static final String OUTPUT_FILE_EXISTS = "The output file (also with suffixes e.g. results(1).zip) already exists. You can use --overwrite to overwrite the file."; + private static final String OUTPUT_FILE_NOT_WRITABLE = "The output file (%s) cannot be written to."; - private static final String OPTION_LIST_HEADING = "Parameter descriptions: "; + private final CliInputHandler inputHandler; - private final CommandLine commandLine; - private final CliOptions options; + /** + * Creates a cli. + * @param args The command line arguments + */ + public CLI(String[] args) { + this.inputHandler = new CliInputHandler(args); + } - private static final String IMPOSSIBLE_EXCEPTION = "This should not have happened." - + " Please create an issue on github (https://github.com/jplag/JPlag/issues) with the entire output."; - private static final String UNKOWN_LANGAUGE_EXCEPTION = "Language %s does not exists. Available languages are: %s"; + /** + * Executes the cli + * @throws ExitException If anything on the side of JPlag goes wrong + * @throws IOException If any files did not work + */ + public void executeCli() throws ExitException, IOException { + logger.debug("Your version of JPlag is {}", JPlag.JPLAG_VERSION); - private static final String DESCRIPTION_PATTERN = "%nJPlag - %s%n%s%n%n"; + if (!this.inputHandler.parse()) { + ProgressBarLogger.setProgressBarProvider(new CliProgressBarProvider()); - private static final String DEFAULT_FILE_ENDING = ".zip"; + switch (this.inputHandler.getCliOptions().mode) { + case RUN -> runJPlag(); + case VIEW -> runViewer(null); + case RUN_AND_VIEW -> runViewer(runJPlag()); + } + } + } /** - * Main class for using JPlag via the CLI. - * @param args are the CLI arguments that will be passed to JPlag. + * Executes the cli and handles the exceptions that might occur. + * @return true, if an exception has been caught. */ - public static void main(String[] args) { - try { - logger.debug("Your version of JPlag is {}", JPlag.JPLAG_VERSION); - - CLI cli = new CLI(); - - ParseResult parseResult = cli.parseOptions(args); + public boolean executeCliAndHandleErrors() { + boolean hadErrors = false; - if (!parseResult.isUsageHelpRequested() && !(parseResult.subcommand() != null && parseResult.subcommand().isUsageHelpRequested())) { - ProgressBarLogger.setProgressBarProvider(new TongfeiProgressBarProvider()); - switch (cli.options.mode) { - case RUN -> cli.runJPlag(parseResult); - case VIEW -> cli.runViewer(null); - case RUN_AND_VIEW -> cli.runViewer(cli.runJPlag(parseResult)); - } - } - } catch (ExitException | IOException exception) { // do not pass exceptions here to keep log clean + try { + this.executeCli(); + } catch (IOException | ExitException exception) { if (exception.getCause() != null) { logger.error("{} - {}", exception.getMessage(), exception.getCause().getMessage()); } else { logger.error(exception.getMessage()); } - + hadErrors = true; + } finally { finalizeLogger(); - System.exit(1); } + + return hadErrors; } /** - * Creates a new instance + * Runs JPlag and returns the file the result has been written to + * @return The file containing the result + * @throws ExitException If JPlag threw an exception + * @throws FileNotFoundException If the file could not be written */ - public CLI() { - this.options = new CliOptions(); - this.commandLine = new CommandLine(options); + public File runJPlag() throws ExitException, FileNotFoundException { + File target = new File(getWritableFileName()); - this.commandLine.setHelpFactory(new HelpFactory()); + JPlagOptionsBuilder optionsBuilder = new JPlagOptionsBuilder(this.inputHandler); + JPlagOptions options = optionsBuilder.buildOptions(); + JPlagResult result = JPlagRunner.runJPlag(options); - this.commandLine.getHelpSectionMap().put(SECTION_KEY_OPTION_LIST, help -> help.optionList().lines().map(it -> { - if (it.startsWith(" -")) { - return " " + it; - } - return it; - }).collect(Collectors.joining(System.lineSeparator()))); - - buildSubcommands().forEach(commandLine::addSubcommand); + OutputFileGenerator.generateJPlagResultZip(result, target); + OutputFileGenerator.generateCsvOutput(result, new File(getResultFileBaseName()), this.inputHandler.getCliOptions()); - this.commandLine.getHelpSectionMap().put(SECTION_KEY_SYNOPSIS, help -> help.synopsis(help.synopsisHeadingLength()) + generateDescription()); - this.commandLine.getHelpSectionMap().put(SECTION_KEY_DESCRIPTION_HEADING, help -> OPTION_LIST_HEADING); - this.commandLine.setAllowSubcommandsAsOptionParameters(true); - } - - public File runJPlag(ParseResult parseResult) throws ExitException, FileNotFoundException { - JPlagOptions jplagOptions = buildOptionsFromArguments(parseResult); - JPlagResult result = JPlag.run(jplagOptions); - File target = new File(getResultFilePath()); - ReportObjectFactory reportObjectFactory = new ReportObjectFactory(target); - reportObjectFactory.createAndSaveReport(result); - logger.info("Successfully written the result: {}", target.getPath()); - logger.info("View the result using --mode or at: https://jplag.github.io/JPlag/"); - OutputFileGenerator.generateCsvOutput(result, new File(getResultFileBaseName()), this.options); return target; } - public void runViewer(File zipFile) throws IOException { - ReportViewer reportViewer = new ReportViewer(zipFile, this.options.advanced.port); - int port = reportViewer.start(); - logger.info("ReportViewer started on port http://localhost:{}", port); - Desktop.getDesktop().browse(URI.create("http://localhost:" + port + "/")); - - System.out.println("Press Enter key to exit..."); - System.in.read(); - reportViewer.stop(); - } - - private List buildSubcommands() { - return LanguageLoader.getAllAvailableLanguages().values().stream().map(language -> { - CommandSpec command = CommandSpec.create().name(language.getIdentifier()); - - for (LanguageOption option : language.getOptions().getOptionsAsList()) { - command.addOption(OptionSpec.builder(option.getNameAsUnixParameter()).type(option.getType().getJavaType()) - .description(option.getDescription()).build()); - } - command.mixinStandardHelpOptions(true); - command.addPositional( - CommandLine.Model.PositionalParamSpec.builder().type(List.class).auxiliaryTypes(File.class).hidden(true).required(false).build()); - - return command; - }).toList(); - } - /** - * Parses the options from the given command line arguments. Also prints help pages when requested. - * @param args The command line arguments - * @return the parse result generated by picocli + * Runs the report viewer using the given file as the default result.zip + * @param zipFile The zip file to pass to the viewer. Can be null, if no result should be opened by default + * @throws IOException If something went wrong with the internal server */ - public ParseResult parseOptions(String... args) throws CliException { - try { - ParseResult result = commandLine.parseArgs(args); - if (result.isUsageHelpRequested() || (result.subcommand() != null && result.subcommand().isUsageHelpRequested())) { - commandLine.getExecutionStrategy().execute(result); - } - return result; - } catch (CommandLine.ParameterException e) { - if (e.getArgSpec() != null && e.getArgSpec().isOption() && Arrays.asList(((OptionSpec) e.getArgSpec()).names()).contains("-l")) { - throw new CliException(String.format(UNKOWN_LANGAUGE_EXCEPTION, e.getValue(), - String.join(", ", LanguageLoader.getAllAvailableLanguageIdentifiers()))); - } - throw new CliException("Error during parsing", e); - } catch (CommandLine.PicocliException e) { - throw new CliException("Error during parsing", e); - } + public void runViewer(File zipFile) throws IOException { + JPlagRunner.runInternalServer(zipFile, this.inputHandler.getCliOptions().advanced.port); } - private static void finalizeLogger() { + private void finalizeLogger() { ILoggerFactory factory = LoggerFactory.getILoggerFactory(); if (!(factory instanceof CollectedLoggerFactory collectedLoggerFactory)) { return; @@ -197,102 +119,51 @@ private static void finalizeLogger() { collectedLoggerFactory.finalizeInstances(); } - /** - * Builds an options instance from parsed options. - * @return the newly built options - */ - public JPlagOptions buildOptionsFromArguments(ParseResult parseResult) throws CliException { - Set submissionDirectories = new HashSet<>(List.of(this.options.rootDirectory)); - Set oldSubmissionDirectories = Set.of(this.options.oldDirectories); - List suffixes = List.of(this.options.advanced.suffixes); - submissionDirectories.addAll(List.of(this.options.newDirectories)); - - if (parseResult.subcommand() != null && parseResult.subcommand().hasMatchedPositional(0)) { - submissionDirectories.addAll(parseResult.subcommand().matchedPositional(0).getValue()); - } - - ClusteringOptions clusteringOptions = getClusteringOptions(this.options); - MergingOptions mergingOptions = getMergingOptions(this.options); - - JPlagOptions jPlagOptions = new JPlagOptions(loadLanguage(parseResult), this.options.minTokenMatch, submissionDirectories, - oldSubmissionDirectories, null, this.options.advanced.subdirectory, suffixes, this.options.advanced.exclusionFileName, - JPlagOptions.DEFAULT_SIMILARITY_METRIC, this.options.advanced.similarityThreshold, this.options.shownComparisons, clusteringOptions, - this.options.advanced.debug, mergingOptions, this.options.normalize); - - String baseCodePath = this.options.baseCode; - File baseCodeDirectory = baseCodePath == null ? null : new File(baseCodePath); - if (baseCodeDirectory == null || baseCodeDirectory.exists()) { - return jPlagOptions.withBaseCodeSubmissionDirectory(baseCodeDirectory); + private String getResultFilePath() { + String optionValue = this.inputHandler.getCliOptions().resultFile; + if (optionValue.endsWith(DEFAULT_FILE_ENDING)) { + return optionValue; } - logger.warn("Using legacy partial base code API. Please migrate to new full path base code API."); - return jPlagOptions.withBaseCodeSubmissionName(baseCodePath); + return optionValue + DEFAULT_FILE_ENDING; } - private Language loadLanguage(ParseResult result) throws CliException { - if (result.subcommand() == null) { - return this.options.language; - } - ParseResult subcommandResult = result.subcommand(); - Language language = LanguageLoader.getLanguage(subcommandResult.commandSpec().name()) - .orElseThrow(() -> new CliException(IMPOSSIBLE_EXCEPTION)); - LanguageOptions languageOptions = language.getOptions(); - languageOptions.getOptionsAsList().forEach(option -> { - if (subcommandResult.hasMatchedOption(option.getNameAsUnixParameter())) { - option.setValue(subcommandResult.matchedOptionValue(option.getNameAsUnixParameter(), null)); - } - }); - return language; + private String getResultFileBaseName() { + String defaultOutputFile = getResultFilePath(); + return defaultOutputFile.substring(0, defaultOutputFile.length() - DEFAULT_FILE_ENDING.length()); } - private static ClusteringOptions getClusteringOptions(CliOptions options) { - ClusteringOptions clusteringOptions = new ClusteringOptions().withEnabled(!options.clustering.disable) - .withAlgorithm(options.clustering.enabled.algorithm).withSimilarityMetric(options.clustering.enabled.metric) - .withSpectralKernelBandwidth(options.clusterSpectralBandwidth).withSpectralGaussianProcessVariance(options.clusterSpectralNoise) - .withSpectralMinRuns(options.clusterSpectralMinRuns).withSpectralMaxRuns(options.clusterSpectralMaxRuns) - .withSpectralMaxKMeansIterationPerRun(options.clusterSpectralKMeansIterations) - .withAgglomerativeThreshold(options.clusterAgglomerativeThreshold) - .withAgglomerativeInterClusterSimilarity(options.clusterAgglomerativeInterClusterSimilarity); - - if (options.clusterPreprocessingNone) { - clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.NONE); + private String getOffsetFileName(int offset) { + if (offset <= 0) { + return getResultFilePath(); + } else { + return getResultFileBaseName() + "(" + offset + ")" + DEFAULT_FILE_ENDING; } + } - if (options.clusterPreprocessingCdf) { - clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.CUMULATIVE_DISTRIBUTION_FUNCTION); + private String getWritableFileName() throws CliException { + int retryAttempt = 0; + while (!this.inputHandler.getCliOptions().advanced.overwrite && new File(getOffsetFileName(retryAttempt)).exists() + && retryAttempt < NAME_COLLISION_ATTEMPTS) { + retryAttempt++; } - if (options.clusterPreprocessingPercentile != 0) { - clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.PERCENTILE) - .withPreprocessorPercentile(options.clusterPreprocessingPercentile); + String targetFileName = this.getOffsetFileName(retryAttempt); + File targetFile = new File(targetFileName); + if (!this.inputHandler.getCliOptions().advanced.overwrite && targetFile.exists()) { + throw new CliException(OUTPUT_FILE_EXISTS); } - if (options.clusterPreprocessingThreshold != 0) { - clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.THRESHOLD) - .withPreprocessorThreshold(options.clusterPreprocessingThreshold); + if (!FileUtils.checkWritable(targetFile)) { + throw new CliException(String.format(OUTPUT_FILE_NOT_WRITABLE, targetFileName)); } - return clusteringOptions; - } - - private static MergingOptions getMergingOptions(CliOptions options) { - return new MergingOptions(options.merging.enabled, options.merging.minimumNeighborLength, options.merging.maximumGapSize); - } - - private String generateDescription() { - var randomDescription = DESCRIPTIONS[RANDOM.nextInt(DESCRIPTIONS.length)]; - return String.format(DESCRIPTION_PATTERN, randomDescription, CREDITS); + return targetFileName; } - private String getResultFilePath() { - String optionValue = this.options.resultFile; - if (optionValue.endsWith(DEFAULT_FILE_ENDING)) { - return optionValue; + public static void main(String[] args) { + CLI cli = new CLI(args); + if (cli.executeCliAndHandleErrors()) { + System.exit(1); } - return optionValue + DEFAULT_FILE_ENDING; - } - - private String getResultFileBaseName() { - String defaultOutputFile = getResultFilePath(); - return defaultOutputFile.substring(0, defaultOutputFile.length() - DEFAULT_FILE_ENDING.length()); } } diff --git a/cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java b/cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java new file mode 100644 index 0000000000..16c309f4b9 --- /dev/null +++ b/cli/src/main/java/de/jplag/cli/JPlagOptionsBuilder.java @@ -0,0 +1,104 @@ +package de.jplag.cli; + +import java.io.File; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import de.jplag.cli.options.CliOptions; +import de.jplag.cli.picocli.CliInputHandler; +import de.jplag.clustering.ClusteringOptions; +import de.jplag.clustering.Preprocessing; +import de.jplag.merging.MergingOptions; +import de.jplag.options.JPlagOptions; + +/** + * Handles the building of JPlag options from the cli options + */ +public class JPlagOptionsBuilder { + private static final Logger logger = LoggerFactory.getLogger(JPlagOptionsBuilder.class); + + private final CliInputHandler cliInputHandler; + private final CliOptions cliOptions; + + /** + * @param cliInputHandler The cli handler containing the parsed cli options + */ + public JPlagOptionsBuilder(CliInputHandler cliInputHandler) { + this.cliInputHandler = cliInputHandler; + this.cliOptions = this.cliInputHandler.getCliOptions(); + } + + /** + * Builds the JPlag options + * @return The JPlag options + * @throws CliException If the input handler could properly parse everything. + */ + public JPlagOptions buildOptions() throws CliException { + Set submissionDirectories = new HashSet<>(List.of(this.cliOptions.rootDirectory)); + Set oldSubmissionDirectories = Set.of(this.cliOptions.oldDirectories); + List suffixes = List.of(this.cliOptions.advanced.suffixes); + submissionDirectories.addAll(List.of(this.cliOptions.newDirectories)); + submissionDirectories.addAll(this.cliInputHandler.getSubcommandSubmissionDirectories()); + + JPlagOptions jPlagOptions = initializeJPlagOptions(submissionDirectories, oldSubmissionDirectories, suffixes); + + String baseCodePath = this.cliOptions.baseCode; + File baseCodeDirectory = baseCodePath == null ? null : new File(baseCodePath); + if (baseCodeDirectory == null || baseCodeDirectory.exists()) { + return jPlagOptions.withBaseCodeSubmissionDirectory(baseCodeDirectory); + } + logger.error("Using legacy partial base code API. Please migrate to new full path base code API."); + return jPlagOptions.withBaseCodeSubmissionName(baseCodePath); + } + + private JPlagOptions initializeJPlagOptions(Set submissionDirectories, Set oldSubmissionDirectories, List suffixes) + throws CliException { + ClusteringOptions clusteringOptions = getClusteringOptions(); + MergingOptions mergingOptions = getMergingOptions(); + + return new JPlagOptions(this.cliInputHandler.getSelectedLanguage(), this.cliOptions.minTokenMatch, submissionDirectories, + oldSubmissionDirectories, null, this.cliOptions.advanced.subdirectory, suffixes, this.cliOptions.advanced.exclusionFileName, + JPlagOptions.DEFAULT_SIMILARITY_METRIC, this.cliOptions.advanced.similarityThreshold, this.cliOptions.shownComparisons, + clusteringOptions, this.cliOptions.advanced.debug, mergingOptions, this.cliOptions.normalize); + } + + private ClusteringOptions getClusteringOptions() { + ClusteringOptions clusteringOptions = new ClusteringOptions().withEnabled(!this.cliOptions.clustering.disable) + .withAlgorithm(this.cliOptions.clustering.enabled.algorithm).withSimilarityMetric(this.cliOptions.clustering.enabled.metric) + .withSpectralKernelBandwidth(this.cliOptions.clusterSpectralBandwidth) + .withSpectralGaussianProcessVariance(this.cliOptions.clusterSpectralNoise).withSpectralMinRuns(this.cliOptions.clusterSpectralMinRuns) + .withSpectralMaxRuns(this.cliOptions.clusterSpectralMaxRuns) + .withSpectralMaxKMeansIterationPerRun(this.cliOptions.clusterSpectralKMeansIterations) + .withAgglomerativeThreshold(this.cliOptions.clusterAgglomerativeThreshold) + .withAgglomerativeInterClusterSimilarity(this.cliOptions.clusterAgglomerativeInterClusterSimilarity); + + if (this.cliOptions.clusterPreprocessingNone) { + clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.NONE); + } + + if (this.cliOptions.clusterPreprocessingCdf) { + clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.CUMULATIVE_DISTRIBUTION_FUNCTION); + } + + if (this.cliOptions.clusterPreprocessingPercentile != 0) { + clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.PERCENTILE) + .withPreprocessorPercentile(this.cliOptions.clusterPreprocessingPercentile); + } + + if (this.cliOptions.clusterPreprocessingThreshold != 0) { + clusteringOptions = clusteringOptions.withPreprocessor(Preprocessing.THRESHOLD) + .withPreprocessorThreshold(this.cliOptions.clusterPreprocessingThreshold); + } + + return clusteringOptions; + } + + private MergingOptions getMergingOptions() { + return new MergingOptions(this.cliOptions.merging.enabled, this.cliOptions.merging.minimumNeighborLength, + this.cliOptions.merging.maximumGapSize); + } +} diff --git a/cli/src/main/java/de/jplag/cli/JPlagRunner.java b/cli/src/main/java/de/jplag/cli/JPlagRunner.java new file mode 100644 index 0000000000..e53a0d03de --- /dev/null +++ b/cli/src/main/java/de/jplag/cli/JPlagRunner.java @@ -0,0 +1,52 @@ +package de.jplag.cli; + +import java.awt.Desktop; +import java.io.File; +import java.io.IOException; +import java.net.URI; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import de.jplag.JPlag; +import de.jplag.JPlagResult; +import de.jplag.cli.server.ReportViewer; +import de.jplag.exceptions.ExitException; +import de.jplag.options.JPlagOptions; + +/** + * Wraps the execution of the JPlag components + */ +public final class JPlagRunner { + private static final Logger logger = LoggerFactory.getLogger(JPlagRunner.class); + + private JPlagRunner() { + } + + /** + * Executes JPlag + * @param options The options to pass to JPlag + * @return The result returned by JPlag + * @throws ExitException If JPlag throws an error + */ + public static JPlagResult runJPlag(JPlagOptions options) throws ExitException { + return JPlag.run(options); + } + + /** + * Runs the internal server. Blocks until the server has stopped. + * @param zipFile The zip file to pass to the server. May be null. + * @param port The port to open the server on + * @throws IOException If the internal server throws an exception + */ + public static void runInternalServer(File zipFile, int port) throws IOException { + ReportViewer reportViewer = new ReportViewer(zipFile, port); + int actualPort = reportViewer.start(); + logger.info("ReportViewer started on port http://localhost:{}", actualPort); + Desktop.getDesktop().browse(URI.create("http://localhost:" + actualPort + "/")); + + System.out.println("Press Enter key to exit..."); + System.in.read(); + reportViewer.stop(); + } +} diff --git a/cli/src/main/java/de/jplag/cli/OutputFileGenerator.java b/cli/src/main/java/de/jplag/cli/OutputFileGenerator.java index 028361346f..4d9d41c84a 100644 --- a/cli/src/main/java/de/jplag/cli/OutputFileGenerator.java +++ b/cli/src/main/java/de/jplag/cli/OutputFileGenerator.java @@ -1,23 +1,28 @@ package de.jplag.cli; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import de.jplag.JPlagResult; +import de.jplag.cli.options.CliOptions; import de.jplag.csv.comparisons.CsvComparisonOutput; +import de.jplag.reporting.reportobject.ReportObjectFactory; +/** + * Manages the creation of output files + */ public final class OutputFileGenerator { private static final Logger logger = LoggerFactory.getLogger(OutputFileGenerator.class); private OutputFileGenerator() { - // Prevents default constructor } /** - * Exports the given result as csvs, if the csvExport is activated in the options. Both a full and an anonymized version + * Exports the given result as CSVs, if the csvExport is activated in the options. Both a full and an anonymized version * will be written. * @param result The result to export * @param outputRoot The root folder for the output @@ -33,4 +38,17 @@ public static void generateCsvOutput(JPlagResult result, File outputRoot, CliOpt } } } + + /** + * Generates the JPLag result zip + * @param result The JPlag result + * @param outputFile The output file + * @throws FileNotFoundException If the file cannot be written + */ + public static void generateJPlagResultZip(JPlagResult result, File outputFile) throws FileNotFoundException { + ReportObjectFactory reportObjectFactory = new ReportObjectFactory(outputFile); + reportObjectFactory.createAndSaveReport(result); + logger.info("Successfully written the result: {}", outputFile.getPath()); + logger.info("View the result using --mode or at: https://jplag.github.io/JPlag/"); + } } diff --git a/cli/src/main/java/de/jplag/cli/logger/CliProgressBarProvider.java b/cli/src/main/java/de/jplag/cli/logger/CliProgressBarProvider.java new file mode 100644 index 0000000000..ebfd98dd9a --- /dev/null +++ b/cli/src/main/java/de/jplag/cli/logger/CliProgressBarProvider.java @@ -0,0 +1,26 @@ +package de.jplag.cli.logger; + +import de.jplag.logging.ProgressBar; +import de.jplag.logging.ProgressBarProvider; +import de.jplag.logging.ProgressBarType; + +import me.tongfei.progressbar.ProgressBarBuilder; +import me.tongfei.progressbar.ProgressBarStyle; + +/** + * A ProgressBar provider, that used the tongfei progress bar library underneath, to show progress bars on the cli. + */ +public class CliProgressBarProvider implements ProgressBarProvider { + @Override + public ProgressBar initProgressBar(ProgressBarType type, int totalSteps) { + if (type.isIdleBar()) { + IdleBar idleBar = new IdleBar(type.getDefaultText()); + idleBar.start(); + return idleBar; + } else { + me.tongfei.progressbar.ProgressBar progressBar = new ProgressBarBuilder().setTaskName(type.getDefaultText()).setInitialMax(totalSteps) + .setStyle(ProgressBarStyle.ASCII).build(); + return new TongfeiProgressBar(progressBar); + } + } +} diff --git a/cli/src/main/java/de/jplag/cli/logger/CollectedLogger.java b/cli/src/main/java/de/jplag/cli/logger/CollectedLogger.java index 3be42c8cd7..85b40629a1 100644 --- a/cli/src/main/java/de/jplag/cli/logger/CollectedLogger.java +++ b/cli/src/main/java/de/jplag/cli/logger/CollectedLogger.java @@ -1,43 +1,22 @@ package de.jplag.cli.logger; import java.io.PrintStream; -import java.io.Serial; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.concurrent.ConcurrentLinkedDeque; -import org.slf4j.helpers.FormattingTuple; -import org.slf4j.helpers.MarkerIgnoringBase; +import org.slf4j.Marker; +import org.slf4j.event.Level; import org.slf4j.helpers.MessageFormatter; -import org.slf4j.spi.LocationAwareLogger; /** - * This logger is able to collect errors and print them at the end. Mainly adopted from org.slf4j.impl.SimpleLogger - * @author Dominik Fuchss + * A logger implementation, that prints all errors during finalization */ -public final class CollectedLogger extends MarkerIgnoringBase { +public class CollectedLogger extends JPlagLoggerBase { + private static final int MAXIMUM_MESSAGE_LENGTH = 32; - @Serial - private static final long serialVersionUID = -1278670638921140275L; - - private static final int LOG_LEVEL_TRACE = LocationAwareLogger.TRACE_INT; - private static final int LOG_LEVEL_DEBUG = LocationAwareLogger.DEBUG_INT; - private static final int LOG_LEVEL_INFO = LocationAwareLogger.INFO_INT; - private static final int LOG_LEVEL_WARN = LocationAwareLogger.WARN_INT; - private static final int LOG_LEVEL_ERROR = LocationAwareLogger.ERROR_INT; - - /** - * The default log level that shall be used for external libraries (like Stanford Core NLP) - */ - private static final int LOG_LEVEL_FOR_EXTERNAL_LIBRARIES = LOG_LEVEL_ERROR; - - private static final int CURRENT_LOG_LEVEL = LOG_LEVEL_INFO; - - /** - * The short name of this simple log instance - */ - private transient String shortLogName = null; + private static final PrintStream TARGET_STREAM = System.out; /** * Indicator whether finalization is in progress. @@ -47,262 +26,67 @@ public final class CollectedLogger extends MarkerIgnoringBase { private final transient SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-hh:mm:ss_SSS"); - private final ConcurrentLinkedDeque> allErrors = new ConcurrentLinkedDeque<>(); + private final ConcurrentLinkedDeque allErrors = new ConcurrentLinkedDeque<>(); - CollectedLogger(String name) { - this.name = name; + public CollectedLogger(String name) { + super(LOG_LEVEL_INFO, name); } - private void log(int level, String message, Throwable throwable) { - log(level, message, throwable, null); - } - - private void log(int level, String message, Throwable throwable, Date timeOfError) { - if (!isLevelEnabled(level)) { - return; - } + @Override + protected void handleNormalizedLoggingCall(Level level, Marker marker, String format, Object[] args, Throwable cause) { + String logMessage = prepareFormattedMessage(format, args); + LogEntry entry = new LogEntry(logMessage, cause, new Date(), level); if (level == LOG_LEVEL_ERROR && !isFinalizing) { - // Buffer errors for the final output - allErrors.add(new Triple<>(message, throwable, new Date())); - return; + allErrors.add(entry); + } else { + printLogEntry(entry); } + } - StringBuilder builder = new StringBuilder(32); + private String prepareFormattedMessage(String format, Object[] args) { + if (args == null) { + return format; + } - // Append date-time - builder.append(dateFormat.format(timeOfError == null ? new Date() : timeOfError)).append(' '); + return MessageFormatter.arrayFormat(format, args).getMessage(); + } - // Append current Level - builder.append('[').append(renderLevel(level)).append(']').append(' '); + private void printLogEntry(LogEntry entry) { + StringBuilder output = prepareLogOutput(entry); - // Append the name of the log instance - if (shortLogName == null) { - shortLogName = computeShortName(); + TARGET_STREAM.println(output); + if (entry.cause() != null) { + entry.cause().printStackTrace(TARGET_STREAM); } - builder.append(shortLogName).append(" - "); - // Append the message - builder.append(message); + TARGET_STREAM.flush(); + } - write(builder, throwable); + private StringBuilder prepareLogOutput(LogEntry entry) { + StringBuilder outputBuilder = new StringBuilder(MAXIMUM_MESSAGE_LENGTH); + outputBuilder.append(dateFormat.format(entry.timeOfLog())).append(' '); + outputBuilder.append('[').append(entry.logLevel().name()).append("] "); + outputBuilder.append(computeShortName()).append(" - "); + outputBuilder.append(entry.message()); + return outputBuilder; } void printAllErrorsForLogger() { this.isFinalizing = true; - // Copy errors to prevent infinite recursion - var errors = new ArrayList<>(this.allErrors); - if (errors.isEmpty()) { - return; + ArrayList errors = new ArrayList<>(this.allErrors); + + if (!errors.isEmpty()) { + info("Summary of all errors:"); + this.allErrors.removeAll(errors); + for (LogEntry errorEntry : errors) { + printLogEntry(errorEntry); + } } - this.allErrors.removeAll(errors); - - info("Summary of all Errors:"); - errors.forEach(error -> log(LOG_LEVEL_ERROR, error.first(), error.second(), error.third())); - isFinalizing = false; - } - - @SuppressWarnings("java:S106") - void write(StringBuilder buf, Throwable throwable) { - PrintStream targetStream = System.out; - - targetStream.println(buf.toString()); - writeThrowable(throwable, targetStream); - targetStream.flush(); - } - - private void writeThrowable(Throwable throwable, PrintStream targetStream) { - if (throwable != null) { - throwable.printStackTrace(targetStream); - } + this.isFinalizing = false; } private String computeShortName() { return name.substring(name.lastIndexOf(".") + 1); } - - private boolean isLevelEnabled(int logLevel) { - return logLevel >= (isJPlagLog() ? CURRENT_LOG_LEVEL : LOG_LEVEL_FOR_EXTERNAL_LIBRARIES); - } - - private boolean isJPlagLog() { - return this.name.startsWith("de.jplag."); - } - - private String renderLevel(int level) { - return switch (level) { - case LOG_LEVEL_TRACE -> "TRACE"; - case LOG_LEVEL_DEBUG -> "DEBUG"; - case LOG_LEVEL_INFO -> "INFO"; - case LOG_LEVEL_WARN -> "WARN"; - case LOG_LEVEL_ERROR -> "ERROR"; - default -> throw new IllegalStateException("Unrecognized level [" + level + "]"); - }; - } - - @Override - public boolean isTraceEnabled() { - return isLevelEnabled(LOG_LEVEL_TRACE); - } - - @Override - public void trace(String message) { - log(LOG_LEVEL_TRACE, message, null); - } - - @Override - public void trace(String format, Object param1) { - formatAndLog(LOG_LEVEL_TRACE, format, param1, null); - } - - @Override - public void trace(String format, Object param1, Object param2) { - formatAndLog(LOG_LEVEL_TRACE, format, param1, param2); - } - - @Override - public void trace(String format, Object... argArray) { - formatAndLog(LOG_LEVEL_TRACE, format, argArray); - } - - @Override - public void trace(String message, Throwable t) { - log(LOG_LEVEL_TRACE, message, t); - } - - @Override - public boolean isDebugEnabled() { - return isLevelEnabled(LOG_LEVEL_DEBUG); - } - - @Override - public void debug(String message) { - log(LOG_LEVEL_DEBUG, message, null); - } - - @Override - public void debug(String format, Object param1) { - formatAndLog(LOG_LEVEL_DEBUG, format, param1, null); - } - - @Override - public void debug(String format, Object param1, Object param2) { - formatAndLog(LOG_LEVEL_DEBUG, format, param1, param2); - } - - @Override - public void debug(String format, Object... argArray) { - formatAndLog(LOG_LEVEL_DEBUG, format, argArray); - } - - @Override - public void debug(String message, Throwable throwable) { - log(LOG_LEVEL_DEBUG, message, throwable); - } - - @Override - public boolean isInfoEnabled() { - return isLevelEnabled(LOG_LEVEL_INFO); - } - - @Override - public void info(String message) { - log(LOG_LEVEL_INFO, message, null); - } - - @Override - public void info(String format, Object arg) { - formatAndLog(LOG_LEVEL_INFO, format, arg, null); - } - - @Override - public void info(String format, Object arg1, Object arg2) { - formatAndLog(LOG_LEVEL_INFO, format, arg1, arg2); - } - - @Override - public void info(String format, Object... argArray) { - formatAndLog(LOG_LEVEL_INFO, format, argArray); - } - - @Override - public void info(String message, Throwable throwable) { - log(LOG_LEVEL_INFO, message, throwable); - } - - @Override - public boolean isWarnEnabled() { - return isLevelEnabled(LOG_LEVEL_WARN); - } - - @Override - public void warn(String message) { - log(LOG_LEVEL_WARN, message, null); - } - - @Override - public void warn(String format, Object arg) { - formatAndLog(LOG_LEVEL_WARN, format, arg, null); - } - - @Override - public void warn(String format, Object arg1, Object arg2) { - formatAndLog(LOG_LEVEL_WARN, format, arg1, arg2); - } - - @Override - public void warn(String format, Object... argArray) { - formatAndLog(LOG_LEVEL_WARN, format, argArray); - } - - @Override - public void warn(String message, Throwable throwable) { - log(LOG_LEVEL_WARN, message, throwable); - } - - @Override - public boolean isErrorEnabled() { - return isLevelEnabled(LOG_LEVEL_ERROR); - } - - @Override - public void error(String message) { - log(LOG_LEVEL_ERROR, message, null); - } - - @Override - public void error(String format, Object arg) { - formatAndLog(LOG_LEVEL_ERROR, format, arg, null); - } - - @Override - public void error(String format, Object arg1, Object arg2) { - formatAndLog(LOG_LEVEL_ERROR, format, arg1, arg2); - } - - @Override - public void error(String format, Object... argArray) { - formatAndLog(LOG_LEVEL_ERROR, format, argArray); - } - - @Override - public void error(String message, Throwable throwable) { - log(LOG_LEVEL_ERROR, message, throwable); - } - - private void formatAndLog(int level, String format, Object arg1, Object arg2) { - if (!isLevelEnabled(level)) { - return; - } - FormattingTuple formattingTuple = MessageFormatter.format(format, arg1, arg2); - log(level, formattingTuple.getMessage(), formattingTuple.getThrowable()); - } - - private void formatAndLog(int level, String format, Object... arguments) { - if (!isLevelEnabled(level)) { - return; - } - FormattingTuple formattingTuple = MessageFormatter.arrayFormat(format, arguments); - log(level, formattingTuple.getMessage(), formattingTuple.getThrowable()); - } } diff --git a/cli/src/main/java/de/jplag/cli/logger/IdleBar.java b/cli/src/main/java/de/jplag/cli/logger/IdleBar.java new file mode 100644 index 0000000000..fe2fb723c6 --- /dev/null +++ b/cli/src/main/java/de/jplag/cli/logger/IdleBar.java @@ -0,0 +1,103 @@ +package de.jplag.cli.logger; + +import java.io.IOException; +import java.io.PrintStream; + +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.jline.terminal.Terminal; +import org.jline.terminal.TerminalBuilder; + +import de.jplag.logging.ProgressBar; + +/** + * Prints an idle progress bar, that does not count upwards. + */ +public class IdleBar implements ProgressBar { + private final PrintStream output; + + private final Thread runner; + + private long startTime; + private final String text; + private int length; + + private int currentPos; + private int currentDirection; + + private boolean running = false; + + public IdleBar(String text) { + this.output = System.out; + this.runner = new Thread(this::run); + this.length = 50; + this.currentDirection = -1; + this.currentPos = 0; + this.text = text; + try { + Terminal terminal = TerminalBuilder.terminal(); + this.length = Math.min(terminal.getWidth() / 2, terminal.getWidth() - 50); + terminal.close(); + } catch (IOException ignore) { + // ignore exceptions here. If we cannot access the terminal, we guess a width + } + if (this.length < 10) { + this.length = 10; + } + } + + public void start() { + this.startTime = System.currentTimeMillis(); + this.running = true; + this.runner.start(); + } + + @Override + public void dispose() { + this.running = false; + try { + this.runner.join(); + } catch (InterruptedException ignored) { + Thread.currentThread().interrupt(); + } + this.output.println(); + } + + private void run() { + while (running) { + this.output.print('\r'); + this.output.print(printLine()); + if (currentPos == 0 || currentPos == length - 1) { + currentDirection *= -1; + } + try { + Thread.sleep(200); + } catch (InterruptedException ignore) { + Thread.currentThread().interrupt(); + } + currentPos += currentDirection; + } + } + + private String printLine() { + StringBuilder line = new StringBuilder(); + line.append(this.text).append(' '); + + line.append('<'); + line.append(" ".repeat(Math.max(0, currentPos))); + line.append("<+>"); + line.append(" ".repeat(Math.max(0, length - currentPos - 1))); + line.append('>'); + + long timeRunning = System.currentTimeMillis() - this.startTime; + line.append(' '); + String duration = DurationFormatUtils.formatDuration(timeRunning, "H:mm:ss"); + line.append(duration); + + return line.toString(); + } + + @Override + public void step(int number) { + // does nothing, because the idle bar has no steps + } +} diff --git a/cli/src/main/java/de/jplag/cli/logger/JPlagLoggerBase.java b/cli/src/main/java/de/jplag/cli/logger/JPlagLoggerBase.java new file mode 100644 index 0000000000..34a59fedc7 --- /dev/null +++ b/cli/src/main/java/de/jplag/cli/logger/JPlagLoggerBase.java @@ -0,0 +1,92 @@ +package de.jplag.cli.logger; + +import org.slf4j.Marker; +import org.slf4j.event.Level; +import org.slf4j.helpers.AbstractLogger; + +/** + * Handles the enabled log levels for SLF4J. + */ +public abstract class JPlagLoggerBase extends AbstractLogger { + protected static final Level LOG_LEVEL_TRACE = Level.TRACE; + protected static final Level LOG_LEVEL_DEBUG = Level.DEBUG; + protected static final Level LOG_LEVEL_INFO = Level.INFO; + protected static final Level LOG_LEVEL_WARN = Level.WARN; + protected static final Level LOG_LEVEL_ERROR = Level.ERROR; + + private static final Level LOG_LEVEL_FOR_EXTERNAL_LIBRARIES = LOG_LEVEL_ERROR; + + private final Level currentLogLevel; + + /** + * @param currentLogLevel The current log level + * @param name The name of the logger + */ + protected JPlagLoggerBase(Level currentLogLevel, String name) { + this.currentLogLevel = currentLogLevel; + this.name = name; + } + + @Override + public boolean isTraceEnabled() { + return isLogLevelEnabled(LOG_LEVEL_TRACE); + } + + @Override + public boolean isTraceEnabled(Marker marker) { + return isTraceEnabled(); + } + + @Override + public boolean isDebugEnabled() { + return isLogLevelEnabled(LOG_LEVEL_DEBUG); + } + + @Override + public boolean isDebugEnabled(Marker marker) { + return isDebugEnabled(); + } + + @Override + public boolean isInfoEnabled() { + return isLogLevelEnabled(LOG_LEVEL_INFO); + } + + @Override + public boolean isInfoEnabled(Marker marker) { + return isInfoEnabled(); + } + + @Override + public boolean isWarnEnabled() { + return isLogLevelEnabled(LOG_LEVEL_WARN); + } + + @Override + public boolean isWarnEnabled(Marker marker) { + return isWarnEnabled(); + } + + @Override + public boolean isErrorEnabled() { + return isLogLevelEnabled(LOG_LEVEL_ERROR); + } + + @Override + public boolean isErrorEnabled(Marker marker) { + return isErrorEnabled(); + } + + private boolean isLogLevelEnabled(Level logLevel) { + return logLevel.toInt() >= (isJPlagLog() ? this.currentLogLevel.toInt() : LOG_LEVEL_FOR_EXTERNAL_LIBRARIES.toInt()); + } + + private boolean isJPlagLog() { + return this.name.startsWith("de.jplag."); + } + + @Override + protected String getFullyQualifiedCallerName() { + return null; // does not seem to be used by anything, but is required by SLF4J + } +} diff --git a/cli/src/main/java/de/jplag/cli/logger/LogEntry.java b/cli/src/main/java/de/jplag/cli/logger/LogEntry.java new file mode 100644 index 0000000000..86d94d2ce7 --- /dev/null +++ b/cli/src/main/java/de/jplag/cli/logger/LogEntry.java @@ -0,0 +1,15 @@ +package de.jplag.cli.logger; + +import java.util.Date; + +import org.slf4j.event.Level; + +/** + * Holds a log entry for later usage + * @param message The message of the log + * @param cause The cause of the log + * @param timeOfLog The time of the log + * @param logLevel The level of the log entry + */ +public record LogEntry(String message, Throwable cause, Date timeOfLog, Level logLevel) { +} diff --git a/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBarProvider.java b/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBarProvider.java deleted file mode 100644 index da09fff337..0000000000 --- a/cli/src/main/java/de/jplag/cli/logger/TongfeiProgressBarProvider.java +++ /dev/null @@ -1,20 +0,0 @@ -package de.jplag.cli.logger; - -import de.jplag.logging.ProgressBar; -import de.jplag.logging.ProgressBarProvider; -import de.jplag.logging.ProgressBarType; - -import me.tongfei.progressbar.ProgressBarBuilder; -import me.tongfei.progressbar.ProgressBarStyle; - -/** - * A ProgressBar provider, that used the tongfei progress bar library underneath, to show progress bars on the cli. - */ -public class TongfeiProgressBarProvider implements ProgressBarProvider { - @Override - public ProgressBar initProgressBar(ProgressBarType type, int totalSteps) { - me.tongfei.progressbar.ProgressBar progressBar = new ProgressBarBuilder().setTaskName(type.getDefaultText()).setInitialMax(totalSteps) - .setStyle(ProgressBarStyle.ASCII).build(); - return new TongfeiProgressBar(progressBar); - } -} diff --git a/cli/src/main/java/de/jplag/cli/logger/Triple.java b/cli/src/main/java/de/jplag/cli/logger/Triple.java deleted file mode 100644 index 76a3090634..0000000000 --- a/cli/src/main/java/de/jplag/cli/logger/Triple.java +++ /dev/null @@ -1,4 +0,0 @@ -package de.jplag.cli.logger; - -public record Triple(A first, B second, C third) { -} diff --git a/cli/src/main/java/de/jplag/cli/CliOptions.java b/cli/src/main/java/de/jplag/cli/options/CliOptions.java similarity index 91% rename from cli/src/main/java/de/jplag/cli/CliOptions.java rename to cli/src/main/java/de/jplag/cli/options/CliOptions.java index 384a2d41ce..0cc7f13260 100644 --- a/cli/src/main/java/de/jplag/cli/CliOptions.java +++ b/cli/src/main/java/de/jplag/cli/options/CliOptions.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.options; import java.io.File; @@ -51,8 +51,7 @@ public class CliOptions implements Runnable { "--result-file"}, description = "Name of the file in which the comparison results will be stored (default: ${DEFAULT-VALUE}). Missing .zip endings will be automatically added.") public String resultFile = "results"; - @Option(names = {"-M", - "--mode"}, description = "The mode of JPlag: either only run analysis, only open the viewer, or do both (default: ${DEFAULT_VALUE})") + @Option(names = {"-M", "--mode"}, description = "The mode of JPlag. One of: ${COMPLETION-CANDIDATES} (default: ${DEFAULT_VALUE})") public JPlagMode mode = JPlagMode.RUN; @Option(names = {"--normalize"}, description = "Activate the normalization of tokens. Supported for languages: Java, C++.") @@ -99,6 +98,9 @@ public static class Advanced { @Option(names = "--csv-export", description = "Export pairwise similarity values as a CSV file.") public boolean csvExport = false; + + @Option(names = "--overwrite", description = "Existing result files will be overwritten.") + public boolean overwrite = false; } public static class Clustering { @@ -109,10 +111,12 @@ public static class Clustering { public ClusteringEnabled enabled = new ClusteringEnabled(); public static class ClusteringEnabled { - @Option(names = {"--cluster-alg", "--cluster-algorithm"}, description = "Specifies the clustering algorithm (default: ${DEFAULT-VALUE}).") + @Option(names = {"--cluster-alg", + "--cluster-algorithm"}, description = "Specifies the clustering algorithm. Available algorithms: ${COMPLETION-CANDIDATES} (default: ${DEFAULT-VALUE}).") public ClusteringAlgorithm algorithm = new ClusteringOptions().algorithm(); - @Option(names = {"--cluster-metric"}, description = "The similarity metric used for clustering (default: ${DEFAULT-VALUE}).") + @Option(names = { + "--cluster-metric"}, description = "The similarity metric used for clustering. Available metrics: ${COMPLETION-CANDIDATES} (default: ${DEFAULT-VALUE}).") public SimilarityMetric metric = new ClusteringOptions().similarityMetric(); } } @@ -122,7 +126,7 @@ public static class Merging { public boolean enabled = MergingOptions.DEFAULT_ENABLED; @Option(names = { - "--neighbor-length"}, description = "Minimal length of neighboring matches to be merged (between 1 and minTokenMatch, default: ${DEFAULT-VALUE}).%n") + "--neighbor-length"}, description = "Minimal length of neighboring matches to be merged (between 1 and minTokenMatch, default: ${DEFAULT-VALUE}).") public int minimumNeighborLength = MergingOptions.DEFAULT_NEIGHBOR_LENGTH; @Option(names = { diff --git a/cli/src/main/java/de/jplag/cli/JPlagMode.java b/cli/src/main/java/de/jplag/cli/options/JPlagMode.java similarity index 91% rename from cli/src/main/java/de/jplag/cli/JPlagMode.java rename to cli/src/main/java/de/jplag/cli/options/JPlagMode.java index 402a18b58b..8d1607d46b 100644 --- a/cli/src/main/java/de/jplag/cli/JPlagMode.java +++ b/cli/src/main/java/de/jplag/cli/options/JPlagMode.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.options; /** * The mode JPlag runs in. This influences which steps JPlag will execute. diff --git a/cli/src/main/java/de/jplag/cli/LanguageCandidates.java b/cli/src/main/java/de/jplag/cli/options/LanguageCandidates.java similarity index 92% rename from cli/src/main/java/de/jplag/cli/LanguageCandidates.java rename to cli/src/main/java/de/jplag/cli/options/LanguageCandidates.java index 715d504ea3..e1c764b8f5 100644 --- a/cli/src/main/java/de/jplag/cli/LanguageCandidates.java +++ b/cli/src/main/java/de/jplag/cli/options/LanguageCandidates.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.options; import java.util.ArrayList; diff --git a/cli/src/main/java/de/jplag/cli/LanguageConverter.java b/cli/src/main/java/de/jplag/cli/options/LanguageConverter.java similarity index 91% rename from cli/src/main/java/de/jplag/cli/LanguageConverter.java rename to cli/src/main/java/de/jplag/cli/options/LanguageConverter.java index 0a2523b952..9f92ec9449 100644 --- a/cli/src/main/java/de/jplag/cli/LanguageConverter.java +++ b/cli/src/main/java/de/jplag/cli/options/LanguageConverter.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.options; import de.jplag.Language; diff --git a/cli/src/main/java/de/jplag/cli/LanguageLoader.java b/cli/src/main/java/de/jplag/cli/options/LanguageLoader.java similarity index 99% rename from cli/src/main/java/de/jplag/cli/LanguageLoader.java rename to cli/src/main/java/de/jplag/cli/options/LanguageLoader.java index 2ee1c815ba..4082476381 100644 --- a/cli/src/main/java/de/jplag/cli/LanguageLoader.java +++ b/cli/src/main/java/de/jplag/cli/options/LanguageLoader.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.options; import java.util.Collections; import java.util.Map; diff --git a/cli/src/main/java/de/jplag/cli/picocli/CliInputHandler.java b/cli/src/main/java/de/jplag/cli/picocli/CliInputHandler.java new file mode 100644 index 0000000000..4ae12fbfc1 --- /dev/null +++ b/cli/src/main/java/de/jplag/cli/picocli/CliInputHandler.java @@ -0,0 +1,170 @@ +package de.jplag.cli.picocli; + +import static picocli.CommandLine.Model.UsageMessageSpec.SECTION_KEY_DESCRIPTION_HEADING; +import static picocli.CommandLine.Model.UsageMessageSpec.SECTION_KEY_OPTION_LIST; +import static picocli.CommandLine.Model.UsageMessageSpec.SECTION_KEY_SYNOPSIS; + +import java.io.File; +import java.security.SecureRandom; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.stream.Collectors; + +import de.jplag.Language; +import de.jplag.cli.CliException; +import de.jplag.cli.options.CliOptions; +import de.jplag.cli.options.LanguageLoader; +import de.jplag.options.LanguageOption; +import de.jplag.options.LanguageOptions; + +import picocli.CommandLine; +import picocli.CommandLine.ParseResult; + +/** + * Handles the parsing of the command line arguments + */ +public class CliInputHandler { + private static final String OPTION_LIST_HEADING = "Parameter descriptions: "; + + private static final String UNKNOWN_LANGUAGE_EXCEPTION = "Language %s does not exists. Available languages are: %s"; + private static final String IMPOSSIBLE_EXCEPTION = "This should not have happened." + + " Please create an issue on github (https://github.com/jplag/JPlag/issues) with the entire output."; + + private static final String[] DESCRIPTIONS = {"Detecting Software Plagiarism", "Software-Archaeological Playground", "Since 1996", + "Scientifically Published", "Maintained by SDQ", "RIP Structure and Table", "What else?", "You have been warned!", "Since Java 1.0", + "More Abstract than Tree", "Students Nightmare", "No, changing variable names does not work...", "The tech is out there!", + "Developed by plagiarism experts.", "State of the Art Obfuscation Resilience", "www.helmholtz.software/software/jplag"}; + private static final String DESCRIPTION_PATTERN = "%nJPlag - %s%n%s%n%n"; + private static final String CREDITS = "Created by IPD Tichy, Guido Malpohl, and others. Maintained by Timur Saglam and Sebastian Hahner. Logo by Sandro Koch."; + + private static final String PARAMETER_SHORT_PREFIX = " -"; + private static final String PARAMETER_SHORT_ADDITIONAL_INDENT = " "; + + private static final Random RANDOM = new SecureRandom(); + + private final String[] args; + private final CliOptions options; + private final CommandLine commandLine; + + private ParseResult parseResult; + + /** + * Creates a new handler. Before using it you need to call {@link #parse()} + * @param args The arguments. + */ + public CliInputHandler(String[] args) { + this.args = args; + this.options = new CliOptions(); + this.commandLine = buildCommandLine(); + } + + private CommandLine buildCommandLine() { + CommandLine cli = new CommandLine(this.options).setCaseInsensitiveEnumValuesAllowed(true); + cli.setHelpFactory(new HelpFactory()); + + cli.getHelpSectionMap().put(SECTION_KEY_OPTION_LIST, help -> help.optionList().lines().map(it -> { + if (it.startsWith(PARAMETER_SHORT_PREFIX)) { + return PARAMETER_SHORT_ADDITIONAL_INDENT + it; + } + return it; + }).collect(Collectors.joining(System.lineSeparator())) + System.lineSeparator()); + + buildSubcommands().forEach(cli::addSubcommand); + + cli.getHelpSectionMap().put(SECTION_KEY_SYNOPSIS, help -> help.synopsis(help.synopsisHeadingLength()) + generateDescription()); + cli.getHelpSectionMap().put(SECTION_KEY_DESCRIPTION_HEADING, help -> OPTION_LIST_HEADING); + cli.setAllowSubcommandsAsOptionParameters(true); + + return cli; + } + + private List buildSubcommands() { + return LanguageLoader.getAllAvailableLanguages().values().stream().map(language -> { + CommandLine.Model.CommandSpec command = CommandLine.Model.CommandSpec.create().name(language.getIdentifier()); + + for (LanguageOption option : language.getOptions().getOptionsAsList()) { + command.addOption(CommandLine.Model.OptionSpec.builder(option.getNameAsUnixParameter()).type(option.getType().getJavaType()) + .description(option.getDescription()).build()); + } + command.mixinStandardHelpOptions(true); + command.addPositional( + CommandLine.Model.PositionalParamSpec.builder().type(List.class).auxiliaryTypes(File.class).hidden(true).required(false).build()); + + return command; + }).toList(); + } + + /** + * Parses the cli parameters and prints the usage help if requested. + * @return true, if the usage help has been requested. In this case the program should stop. + * @throws CliException If something went wrong during parsing. + */ + public boolean parse() throws CliException { + try { + this.parseResult = this.commandLine.parseArgs(args); + if (this.parseResult.isUsageHelpRequested() + || (this.parseResult.subcommand() != null && this.parseResult.subcommand().isUsageHelpRequested())) { + commandLine.getExecutionStrategy().execute(this.parseResult); + return true; + } + } catch (CommandLine.ParameterException e) { + if (e.getArgSpec() != null && e.getArgSpec().isOption() + && Arrays.asList(((CommandLine.Model.OptionSpec) e.getArgSpec()).names()).contains("-l")) { + throw new CliException(String.format(UNKNOWN_LANGUAGE_EXCEPTION, e.getValue(), + String.join(", ", LanguageLoader.getAllAvailableLanguageIdentifiers()))); + } + throw new CliException("Error during parsing", e); + } catch (CommandLine.PicocliException e) { + throw new CliException("Error during parsing", e); + } + return false; + } + + /** + * If {@link #parse()} has not been called yet, this will be empty, otherwise it will be a valid object. + * @return The parsed cli options. + */ + public CliOptions getCliOptions() { + return options; + } + + /** + * Resolves the language selected by the cli arguments. + * @return The selected language + * @throws CliException In the event the language cannot be resolved. Should not happen under normal circumstances. + */ + public Language getSelectedLanguage() throws CliException { + if (this.parseResult.subcommand() == null) { + return this.options.language; + } + + ParseResult subcommand = this.parseResult.subcommand(); + + Language language = LanguageLoader.getLanguage(subcommand.commandSpec().name()).orElseThrow(() -> new CliException(IMPOSSIBLE_EXCEPTION)); + + LanguageOptions languageOptions = language.getOptions(); + languageOptions.getOptionsAsList().forEach(option -> { + if (subcommand.hasMatchedOption(option.getNameAsUnixParameter())) { + option.setValue(subcommand.matchedOptionValue(option.getNameAsUnixParameter(), null)); + } + }); + return language; + } + + /** + * @return The submission directories configured for the subcommand, if one has been given. + */ + public List getSubcommandSubmissionDirectories() { + if (this.parseResult.subcommand() != null && this.parseResult.subcommand().hasMatchedPositional(0)) { + return this.parseResult.subcommand().matchedPositional(0).getValue(); + } + return Collections.emptyList(); + } + + private String generateDescription() { + var randomDescription = DESCRIPTIONS[RANDOM.nextInt(DESCRIPTIONS.length)]; + return String.format(DESCRIPTION_PATTERN, randomDescription, CREDITS); + } +} diff --git a/cli/src/main/java/de/jplag/cli/CustomHelp.java b/cli/src/main/java/de/jplag/cli/picocli/CustomHelp.java similarity index 96% rename from cli/src/main/java/de/jplag/cli/CustomHelp.java rename to cli/src/main/java/de/jplag/cli/picocli/CustomHelp.java index 368f358cec..9926486971 100644 --- a/cli/src/main/java/de/jplag/cli/CustomHelp.java +++ b/cli/src/main/java/de/jplag/cli/picocli/CustomHelp.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.picocli; import picocli.CommandLine; diff --git a/cli/src/main/java/de/jplag/cli/HelpFactory.java b/cli/src/main/java/de/jplag/cli/picocli/HelpFactory.java similarity index 92% rename from cli/src/main/java/de/jplag/cli/HelpFactory.java rename to cli/src/main/java/de/jplag/cli/picocli/HelpFactory.java index 53aa208b77..a587f38038 100644 --- a/cli/src/main/java/de/jplag/cli/HelpFactory.java +++ b/cli/src/main/java/de/jplag/cli/picocli/HelpFactory.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.picocli; import picocli.CommandLine; diff --git a/cli/src/main/java/de/jplag/cli/ParamLabelRenderer.java b/cli/src/main/java/de/jplag/cli/picocli/ParamLabelRenderer.java similarity index 98% rename from cli/src/main/java/de/jplag/cli/ParamLabelRenderer.java rename to cli/src/main/java/de/jplag/cli/picocli/ParamLabelRenderer.java index 2d815af309..bd23f8735c 100644 --- a/cli/src/main/java/de/jplag/cli/ParamLabelRenderer.java +++ b/cli/src/main/java/de/jplag/cli/picocli/ParamLabelRenderer.java @@ -1,4 +1,4 @@ -package de.jplag.cli; +package de.jplag.cli.picocli; import java.util.Arrays; import java.util.List; diff --git a/cli/src/main/java/de/jplag/cli/server/ReportViewer.java b/cli/src/main/java/de/jplag/cli/server/ReportViewer.java index 6e861c9266..b571be8eca 100644 --- a/cli/src/main/java/de/jplag/cli/server/ReportViewer.java +++ b/cli/src/main/java/de/jplag/cli/server/ReportViewer.java @@ -89,7 +89,7 @@ public void stop() { /** * Do not call manually. Called by the running web server. - * @param exchange The http reqest + * @param exchange The http request * @throws IOException If the IO handling goes wrong */ @Override diff --git a/cli/src/main/java/de/jplag/cli/server/Routing.java b/cli/src/main/java/de/jplag/cli/server/Routing.java index a6152a031e..e0a001dbf7 100644 --- a/cli/src/main/java/de/jplag/cli/server/Routing.java +++ b/cli/src/main/java/de/jplag/cli/server/Routing.java @@ -3,7 +3,7 @@ import com.sun.net.httpserver.HttpExchange; /** - * Handles the data for a url prefix. + * Handles the data for an url prefix. */ public interface Routing { /** @@ -15,7 +15,7 @@ default HttpRequestMethod[] allowedMethods() { /** * Gets the data for the given url - * @param subPath The remaining suffix of the url, that is not jet interpreted + * @param subPath The remaining suffix of the url, that is not yet interpreted * @param request The original http request * @param viewer The current report viewer * @return The data to respond with diff --git a/cli/src/test/java/de/jplag/cli/ArgumentBuilder.java b/cli/src/test/java/de/jplag/cli/ArgumentBuilder.java index b5503791b2..305331b1cb 100644 --- a/cli/src/test/java/de/jplag/cli/ArgumentBuilder.java +++ b/cli/src/test/java/de/jplag/cli/ArgumentBuilder.java @@ -138,7 +138,7 @@ public ArgumentBuilder minTokens(int count) { } /** - * Sets the similarity threshold as a string, so invalid values can be configures + * Sets the similarity threshold as a string, so invalid values can be configured * @param value The value * @return self reference */ @@ -168,6 +168,26 @@ public ArgumentBuilder shownComparisons(String value) { return this; } + /** + * Sets the result file + * @param path The path to the result file + * @return self reference + */ + public ArgumentBuilder resultFile(String path) { + this.arguments.add("-r"); + this.arguments.add(path); + return this; + } + + /** + * Adds the overwrite argument + * @return self reference + */ + public ArgumentBuilder overwrite() { + this.arguments.add("--overwrite"); + return this; + } + /** * Sets the shown comparisons option * @param value The option value diff --git a/cli/src/test/java/de/jplag/cli/CheckResultFileWritableTest.java b/cli/src/test/java/de/jplag/cli/CheckResultFileWritableTest.java new file mode 100644 index 0000000000..4baab95e47 --- /dev/null +++ b/cli/src/test/java/de/jplag/cli/CheckResultFileWritableTest.java @@ -0,0 +1,96 @@ +package de.jplag.cli; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.nio.file.Files; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import de.jplag.cli.picocli.CliInputHandler; + +public class CheckResultFileWritableTest extends CommandLineInterfaceTest { + private static Field inputHandlerField; + private static Method getWritableFileMethod; + + @BeforeAll + public static void setup() throws NoSuchFieldException, NoSuchMethodException { + Class cliClass = CLI.class; + inputHandlerField = cliClass.getDeclaredField("inputHandler"); + getWritableFileMethod = cliClass.getDeclaredMethod("getWritableFileName"); + + inputHandlerField.setAccessible(true); + getWritableFileMethod.setAccessible(true); + } + + @Test + void testNonExistingWritableFile() throws Throwable { + File directory = Files.createTempDirectory("JPlagTest").toFile(); + File targetFile = new File(directory, "results.zip"); + + String path = runResultFileCheck(defaultArguments().resultFile(targetFile.getAbsolutePath())); + Assertions.assertEquals(targetFile.getAbsolutePath(), path); + } + + @Test + void testNonExistingNotWritableFile() throws IOException { + File directory = Files.createTempDirectory("JPlagTest").toFile(); + Assumptions.assumeTrue(directory.setWritable(false)); + File targetFile = new File(directory, "results.zip"); + + Assertions.assertThrows(CliException.class, () -> { + runResultFileCheck(defaultArguments().resultFile(targetFile.getAbsolutePath())); + }); + } + + @Test + void testExistingFile() throws Throwable { + File directory = Files.createTempDirectory("JPlagTest").toFile(); + File targetFile = new File(directory, "results.zip"); + Assumptions.assumeTrue(targetFile.createNewFile()); + + String path = runResultFileCheck(defaultArguments().resultFile(targetFile.getAbsolutePath())); + Assertions.assertEquals(new File(directory, "results(1).zip").getAbsolutePath(), path); + } + + @Test + void testExistingFileOverwrite() throws Throwable { + File directory = Files.createTempDirectory("JPlagTest").toFile(); + File targetFile = new File(directory, "results.zip"); + Assumptions.assumeTrue(targetFile.createNewFile()); + + String path = runResultFileCheck(defaultArguments().resultFile(targetFile.getAbsolutePath()).overwrite()); + Assertions.assertEquals(targetFile.getAbsolutePath(), path); + } + + @Test + void testExistingNotWritableFile() throws IOException { + File directory = Files.createTempDirectory("JPlagTest").toFile(); + File targetFile = new File(directory, "results.zip"); + Assumptions.assumeTrue(targetFile.createNewFile()); + Assumptions.assumeTrue(targetFile.setWritable(false)); + + Assertions.assertThrows(CliException.class, () -> { + runResultFileCheck(defaultArguments().resultFile(targetFile.getAbsolutePath()).overwrite()); + }); + } + + private String runResultFileCheck(ArgumentBuilder builder) throws Throwable { + String[] args = builder.getArgumentsAsArray(); + CLI cli = new CLI(args); + + CliInputHandler inputHandler = (CliInputHandler) inputHandlerField.get(cli); + inputHandler.parse(); + + try { + return (String) getWritableFileMethod.invoke(cli); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + } +} diff --git a/cli/src/test/java/de/jplag/cli/CommandLineInterfaceTest.java b/cli/src/test/java/de/jplag/cli/CommandLineInterfaceTest.java index eb6ffca8c1..3946dbe45e 100644 --- a/cli/src/test/java/de/jplag/cli/CommandLineInterfaceTest.java +++ b/cli/src/test/java/de/jplag/cli/CommandLineInterfaceTest.java @@ -1,9 +1,8 @@ package de.jplag.cli; +import de.jplag.cli.picocli.CliInputHandler; import de.jplag.options.JPlagOptions; -import picocli.CommandLine; - /** * Test base for tests regarding the {@link CLI}. Solely tests if the arguments set via the command line interface are * propagated correctly into options. JPlag is not executed for the different command line arguments, thus these tests @@ -14,7 +13,6 @@ public abstract class CommandLineInterfaceTest { protected static final String CURRENT_DIRECTORY = "."; protected static final double DELTA = 1E-5; - protected CLI cli; protected JPlagOptions options; /** @@ -32,13 +30,13 @@ protected ArgumentBuilder defaultArguments() { } /** - * Builds {@link JPlagOptions} via the command line interface. Sets {@link CommandLineInterfaceTest#cli} + * Builds {@link JPlagOptions} via the command line interface. * @param builder The argument builder containing the values to pass to the cli */ protected void buildOptionsFromCLI(ArgumentBuilder builder) throws CliException { - cli = new CLI(); - CommandLine.ParseResult result = cli.parseOptions(builder.getArgumentsAsArray()); - options = cli.buildOptionsFromArguments(result); + CliInputHandler inputHandler = new CliInputHandler(builder.getArgumentsAsArray()); + inputHandler.parse(); + this.options = new JPlagOptionsBuilder(inputHandler).buildOptions(); } } diff --git a/cli/src/test/java/de/jplag/cli/CustomHelpTests.java b/cli/src/test/java/de/jplag/cli/CustomHelpTests.java index 3abde75055..9fecd3f3ba 100644 --- a/cli/src/test/java/de/jplag/cli/CustomHelpTests.java +++ b/cli/src/test/java/de/jplag/cli/CustomHelpTests.java @@ -4,6 +4,10 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import de.jplag.cli.picocli.CustomHelp; +import de.jplag.cli.picocli.HelpFactory; +import de.jplag.cli.picocli.ParamLabelRenderer; + import picocli.CommandLine; /** @@ -26,7 +30,7 @@ void setup() { */ @Test void testReturnsCustomRenderer() { - Assertions.assertTrue(this.help.parameterLabelRenderer() instanceof ParamLabelRenderer, + Assertions.assertInstanceOf(ParamLabelRenderer.class, this.help.parameterLabelRenderer(), "The custom help object returned the wrong ParamLabelRenderer type."); } } diff --git a/cli/src/test/java/de/jplag/cli/LanguageTest.java b/cli/src/test/java/de/jplag/cli/LanguageTest.java index 6561c8034c..a239112842 100644 --- a/cli/src/test/java/de/jplag/cli/LanguageTest.java +++ b/cli/src/test/java/de/jplag/cli/LanguageTest.java @@ -9,6 +9,8 @@ import org.junit.jupiter.api.Test; import de.jplag.Language; +import de.jplag.cli.options.CliOptions; +import de.jplag.cli.options.LanguageLoader; class LanguageTest extends CommandLineInterfaceTest { diff --git a/cli/src/test/java/de/jplag/cli/ParamLabelRendererTest.java b/cli/src/test/java/de/jplag/cli/ParamLabelRendererTest.java index f10a2d3502..817b84fef9 100644 --- a/cli/src/test/java/de/jplag/cli/ParamLabelRendererTest.java +++ b/cli/src/test/java/de/jplag/cli/ParamLabelRendererTest.java @@ -8,6 +8,8 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import de.jplag.cli.picocli.ParamLabelRenderer; + import picocli.CommandLine; /** diff --git a/cli/src/test/java/de/jplag/cli/logger/IdleBarTest.java b/cli/src/test/java/de/jplag/cli/logger/IdleBarTest.java new file mode 100644 index 0000000000..88bf382db6 --- /dev/null +++ b/cli/src/test/java/de/jplag/cli/logger/IdleBarTest.java @@ -0,0 +1,70 @@ +package de.jplag.cli.logger; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class IdleBarTest { + private static final String TEST_BAR_TEXT = "Test"; + private static final long IDLE_BAR_ANIMATION_DELAY = 200; + + private static final int TARGET_FRAME_NUMBER = 5; + + /** + * Tests if the output of the idle bar looks plausible + */ + @Test + void testIdleBarPlausible() { + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream oldSystemOut = System.out; + System.setOut(new PrintStream(outputStream)); + + IdleBar idleBar = new IdleBar(TEST_BAR_TEXT); + idleBar.start(); + while (outputStream.toString().split("\\r").length <= TARGET_FRAME_NUMBER) { + Thread.yield(); + } + + idleBar.dispose(); + System.setOut(oldSystemOut); + + String result = outputStream.toString(); + String[] animationFrames = result.substring(1).split("\\r"); + + String firstFrame = animationFrames[0]; + int numberOfSpaces = firstFrame.lastIndexOf('>') - firstFrame.indexOf('<') - 3 - 1; + for (int i = 0; i < TARGET_FRAME_NUMBER; i++) { + checkIdleBarOutput(animationFrames[i], i, numberOfSpaces); + } + } + + /** + * Checks that the given string matches the expected content of an animation frame + * @param output The animation frame + * @param frameIndex The index of the frame + * @param numberOfSpaces The number of spaces within the bar + */ + private void checkIdleBarOutput(String output, int frameIndex, int numberOfSpaces) { + int pass = frameIndex / numberOfSpaces; + int offset = frameIndex % numberOfSpaces; + if (pass % 2 == 1) { + offset = numberOfSpaces - offset; + } + + String expectedOutput = TEST_BAR_TEXT + ' ' + '<' + " ".repeat(offset) + "<+>" + " ".repeat(numberOfSpaces - offset) + '>'; + + int endOfPredictableOutput = output.lastIndexOf(' '); + String predictableOutput = output.substring(0, endOfPredictableOutput); + String time = output.substring(endOfPredictableOutput + 1).trim(); + + Assertions.assertEquals(expectedOutput, predictableOutput); + Assertions.assertTrue(time.matches("[0-9]:[0-9]{2}:[0-9]{2}"), "Invalid format for time"); + + String[] timeParts = time.split(":"); + int seconds = Integer.parseInt(timeParts[0]) * 60 * 60 + Integer.parseInt(timeParts[1]) * 60 + Integer.parseInt(timeParts[2]); + int expectedTime = (int) ((IDLE_BAR_ANIMATION_DELAY * frameIndex) / 1000); + Assertions.assertTrue(Math.abs(seconds - expectedTime) < 1, "Frame time of by more than one second"); + } +} \ No newline at end of file diff --git a/cli/src/test/java/de/jplag/cli/server/RoutingTreeTest.java b/cli/src/test/java/de/jplag/cli/server/RoutingTreeTest.java index 318c00db22..dc0bf86a70 100644 --- a/cli/src/test/java/de/jplag/cli/server/RoutingTreeTest.java +++ b/cli/src/test/java/de/jplag/cli/server/RoutingTreeTest.java @@ -54,7 +54,7 @@ void testPartialPathRoute() { } @Test - void testPartialPathRouteWithSubpath() { + void testPartialPathRouteWithSubPath() { RoutingTree routingTree = new RoutingTree(); routingTree.insertRouting("/path/", new TestRouting("/path/")); routingTree.insertRouting("/path/subPath/a.html", new TestRouting("")); @@ -65,13 +65,7 @@ void testPartialPathRouteWithSubpath() { assertEquals("/path/", ((TestRouting) result.getRight()).path); } - private static class TestRouting implements Routing { - private final String path; - - public TestRouting(String path) { - this.path = path; - } - + private record TestRouting(String path) implements Routing { @Override public ResponseData fetchData(RoutingPath subPath, HttpExchange request, ReportViewer viewer) { return null; diff --git a/core/src/main/java/de/jplag/SubmissionSet.java b/core/src/main/java/de/jplag/SubmissionSet.java index 9076879745..09b21b5dbe 100644 --- a/core/src/main/java/de/jplag/SubmissionSet.java +++ b/core/src/main/java/de/jplag/SubmissionSet.java @@ -95,7 +95,7 @@ public List getInvalidSubmissions() { } public void normalizeSubmissions() { - submissions.forEach(Submission::normalize); + ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, Submission::normalize); } private List filterValidSubmissions() { diff --git a/core/src/main/java/de/jplag/clustering/ClusteringFactory.java b/core/src/main/java/de/jplag/clustering/ClusteringFactory.java index 7351493e1d..de2bc1e607 100644 --- a/core/src/main/java/de/jplag/clustering/ClusteringFactory.java +++ b/core/src/main/java/de/jplag/clustering/ClusteringFactory.java @@ -12,6 +12,9 @@ import de.jplag.JPlagComparison; import de.jplag.Submission; import de.jplag.clustering.algorithm.GenericClusteringAlgorithm; +import de.jplag.logging.ProgressBar; +import de.jplag.logging.ProgressBarLogger; +import de.jplag.logging.ProgressBarType; /** * Runs the clustering according to an options object. @@ -42,6 +45,8 @@ public static List> getClusterings(Collection> getClusterings(Collection removeBadClusters(final ClusteringRe private static void logClusters(ClusteringResult result) { var clusters = new ArrayList<>(result.getClusters()); clusters.sort((first, second) -> Double.compare(second.getAverageSimilarity(), first.getAverageSimilarity())); - logger.info(CLUSTERING_RESULT, clusters.size()); + logger.trace(CLUSTERING_RESULT, clusters.size()); clusters.forEach(ClusteringFactory::logCluster); } @@ -82,7 +89,7 @@ private static void logCluster(Cluster cluster) { String members = membersToString(cluster.getMembers()); String similarity = String.format(SIMILARITY_FORMAT, cluster.getAverageSimilarity() * 100); String strength = String.format(STRENGTH_FORMAT, cluster.getCommunityStrength()); - logger.info(CLUSTER_PATTERN, similarity, strength, cluster.getMembers().size(), members); + logger.trace(CLUSTER_PATTERN, similarity, strength, cluster.getMembers().size(), members); } private static String membersToString(Collection members) { diff --git a/core/src/main/java/de/jplag/logging/ProgressBarLogger.java b/core/src/main/java/de/jplag/logging/ProgressBarLogger.java index 889d391bbb..757734ddc4 100644 --- a/core/src/main/java/de/jplag/logging/ProgressBarLogger.java +++ b/core/src/main/java/de/jplag/logging/ProgressBarLogger.java @@ -1,5 +1,9 @@ package de.jplag.logging; +import java.util.Collection; +import java.util.Iterator; +import java.util.function.Consumer; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,6 +36,26 @@ public static void setProgressBarProvider(ProgressBarProvider progressBarProvide ProgressBarLogger.progressBarProvider = progressBarProvider; } + /** + * Iterates over the given collection while showing and updating a progress bar of the given type. The progress bar is + * updated, everytime the given action is done. + * @param type The type of progress bar + * @param data The data to iterate over + * @param action The action to call for each item + * @param The type of data + */ + public static void iterate(ProgressBarType type, Collection data, Consumer action) { + Iterator iterator = data.iterator(); + ProgressBar progressBar = ProgressBarLogger.createProgressBar(type, data.size()); + + while (iterator.hasNext()) { + action.accept(iterator.next()); + progressBar.step(); + } + + progressBar.dispose(); + } + private static class DummyProvider implements ProgressBarProvider { @Override public ProgressBar initProgressBar(ProgressBarType type, int totalSteps) { @@ -45,7 +69,11 @@ private static class DummyBar implements ProgressBar { public DummyBar(ProgressBarType type, int totalSteps) { this.currentStep = 0; - logger.info("{} ({})", type.getDefaultText(), totalSteps); + if (type.isIdleBar()) { + logger.info("{} - started", type.getDefaultText()); + } else { + logger.info("{} ({})", type.getDefaultText(), totalSteps); + } } @Override diff --git a/core/src/main/java/de/jplag/logging/ProgressBarType.java b/core/src/main/java/de/jplag/logging/ProgressBarType.java index 88e520fcc0..e8de359f93 100644 --- a/core/src/main/java/de/jplag/logging/ProgressBarType.java +++ b/core/src/main/java/de/jplag/logging/ProgressBarType.java @@ -4,14 +4,19 @@ * The available processes. Used as a hint for the ui, which step JPlag is currently performing. */ public enum ProgressBarType { - LOADING("Loading Submissions "), - PARSING("Parsing Submissions "), - COMPARING("Comparing Submissions"); + LOADING("Loading Submissions ", false), + PARSING("Parsing Submissions ", false), + COMPARING("Comparing Submissions", false), + MATCH_MERGING("Merging matched subsequences ", false), + TOKEN_STRING_NORMALIZATION("Normalizing Token Sequence", false), + CLUSTERING("Finding clusters ", true); private final String defaultText; + private final boolean isIdleBar; - ProgressBarType(String defaultText) { + ProgressBarType(String defaultText, boolean isIdleBar) { this.defaultText = defaultText; + this.isIdleBar = isIdleBar; } /** @@ -20,4 +25,11 @@ public enum ProgressBarType { public String getDefaultText() { return defaultText; } + + /** + * @return True, if this bar should be rendered as an idle bar instead. + */ + public boolean isIdleBar() { + return isIdleBar; + } } diff --git a/core/src/main/java/de/jplag/merging/MatchMerging.java b/core/src/main/java/de/jplag/merging/MatchMerging.java index 3067e32a66..bc31e269c5 100644 --- a/core/src/main/java/de/jplag/merging/MatchMerging.java +++ b/core/src/main/java/de/jplag/merging/MatchMerging.java @@ -10,6 +10,8 @@ import de.jplag.SharedTokenType; import de.jplag.Submission; import de.jplag.Token; +import de.jplag.logging.ProgressBarLogger; +import de.jplag.logging.ProgressBarType; import de.jplag.options.JPlagOptions; /** @@ -44,7 +46,7 @@ public JPlagResult mergeMatchesOf(JPlagResult result) { List comparisons = new ArrayList<>(result.getAllComparisons()); List comparisonsMerged = new ArrayList<>(); - for (JPlagComparison comparison : comparisons) { + ProgressBarLogger.iterate(ProgressBarType.MATCH_MERGING, comparisons, comparison -> { Submission leftSubmission = comparison.firstSubmission().copy(); Submission rightSubmission = comparison.secondSubmission().copy(); List globalMatches = new ArrayList<>(comparison.matches()); @@ -52,7 +54,7 @@ public JPlagResult mergeMatchesOf(JPlagResult result) { globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission); globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList(); comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>())); - } + }); long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis; return new JPlagResult(comparisonsMerged, result.getSubmissions(), result.getDuration() + durationInMillis, options); diff --git a/core/src/main/java/de/jplag/reporting/FilePathUtil.java b/core/src/main/java/de/jplag/reporting/FilePathUtil.java index aff67ef3ec..b5d6f23429 100644 --- a/core/src/main/java/de/jplag/reporting/FilePathUtil.java +++ b/core/src/main/java/de/jplag/reporting/FilePathUtil.java @@ -8,7 +8,6 @@ public final class FilePathUtil { private static final String ZIP_PATH_SEPARATOR = "/"; // Paths in zip files are always separated by a slash - private static final String WINDOWS_PATH_SEPARATOR = "\\"; private FilePathUtil() { // private constructor to prevent instantiation @@ -21,30 +20,39 @@ private FilePathUtil() { * @param submissionToIdFunction Function to map names to ids * @return Relative path */ - public static String getRelativeSubmissionPath(File file, Submission submission, Function submissionToIdFunction) { + public static Path getRelativeSubmissionPath(File file, Submission submission, Function submissionToIdFunction) { if (file.toPath().equals(submission.getRoot().toPath())) { - return Path.of(submissionToIdFunction.apply(submission), submissionToIdFunction.apply(submission)).toString(); + return Path.of(submissionToIdFunction.apply(submission), submissionToIdFunction.apply(submission)); } - return Path.of(submissionToIdFunction.apply(submission), submission.getRoot().toPath().relativize(file.toPath()).toString()).toString(); + return Path.of(submissionToIdFunction.apply(submission), submission.getRoot().toPath().relativize(file.toPath()).toString()); } /** - * Joins logical paths using a slash. This method ensures, that no duplicate slashes are created in between. - * @param left The left path segment - * @param right The right path segment - * @return The joined paths + * Forces a path to be relative. If the path is absolute, the returned path will be relative to the root. + * @param path The path to relativize + * @return The relative path */ - public static String joinZipPathSegments(String left, String right) { - String rightStripped = right; - while (rightStripped.startsWith(ZIP_PATH_SEPARATOR) || rightStripped.startsWith(WINDOWS_PATH_SEPARATOR)) { - rightStripped = rightStripped.substring(1); + public static Path forceRelativePath(Path path) { + if (path.isAbsolute()) { + return Path.of("/").relativize(path); } + return path; + } - String leftStripped = left; - while (leftStripped.endsWith(ZIP_PATH_SEPARATOR) || leftStripped.startsWith(WINDOWS_PATH_SEPARATOR)) { - leftStripped = leftStripped.substring(0, leftStripped.length() - 1); + /** + * Formats the path for usage with zip files. Returns the path segments separated by {@link #ZIP_PATH_SEPARATOR} + * @param path The path to format + * @return The zip file path + */ + public static String pathAsZipPath(Path path) { + Path relativePath = forceRelativePath(path); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < relativePath.getNameCount(); i++) { + if (i != 0) { + builder.append(ZIP_PATH_SEPARATOR); + } + builder.append(relativePath.getName(i)); } - - return leftStripped + ZIP_PATH_SEPARATOR + rightStripped; + return builder.toString(); } } diff --git a/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java b/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java index 29f93744c0..0396290c89 100644 --- a/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java +++ b/core/src/main/java/de/jplag/reporting/jsonfactory/ComparisonReportWriter.java @@ -1,5 +1,6 @@ package de.jplag.reporting.jsonfactory; +import java.nio.file.Path; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -58,7 +59,7 @@ private void writeComparisons(List comparisons) { var comparisonReport = new ComparisonReport(firstSubmissionId, secondSubmissionId, Map.of(SimilarityMetric.AVG.name(), comparison.similarity(), SimilarityMetric.MAX.name(), comparison.maximalSimilarity()), convertMatchesToReportMatches(comparison), comparison.similarityOfFirst(), comparison.similarityOfSecond()); - resultWriter.addJsonEntry(comparisonReport, fileName); + resultWriter.addJsonEntry(comparisonReport, Path.of(fileName)); } } @@ -97,16 +98,34 @@ private Match convertMatchToReportMatch(JPlagComparison comparison, de.jplag.Mat List tokensFirst = comparison.firstSubmission().getTokenList().subList(match.startOfFirst(), match.endOfFirst() + 1); List tokensSecond = comparison.secondSubmission().getTokenList().subList(match.startOfSecond(), match.endOfSecond() + 1); - Comparator lineComparator = Comparator.comparingInt(Token::getLine); + Comparator lineComparator = Comparator.comparingInt(Token::getLine).thenComparingInt(Token::getColumn); Token startOfFirst = tokensFirst.stream().min(lineComparator).orElseThrow(); Token endOfFirst = tokensFirst.stream().max(lineComparator).orElseThrow(); Token startOfSecond = tokensSecond.stream().min(lineComparator).orElseThrow(); Token endOfSecond = tokensSecond.stream().max(lineComparator).orElseThrow(); - return new Match(FilePathUtil.getRelativeSubmissionPath(startOfFirst.getFile(), comparison.firstSubmission(), submissionToIdFunction), - FilePathUtil.getRelativeSubmissionPath(startOfSecond.getFile(), comparison.secondSubmission(), submissionToIdFunction), - startOfFirst.getLine(), endOfFirst.getLine(), startOfSecond.getLine(), endOfSecond.getLine(), match.length()); + String firstFileName = FilePathUtil.getRelativeSubmissionPath(startOfFirst.getFile(), comparison.firstSubmission(), submissionToIdFunction) + .toString(); + String secondFileName = FilePathUtil.getRelativeSubmissionPath(startOfSecond.getFile(), comparison.secondSubmission(), submissionToIdFunction) + .toString(); + + int startLineFirst = startOfFirst.getLine(); + int startColumnFirst = startOfFirst.getColumn(); + int startTokenFirst = match.startOfFirst(); + int endLineFirst = endOfFirst.getLine(); + int endColumnFirst = endOfFirst.getColumn() + endOfFirst.getLength() - 1; + int endTokenFirst = match.endOfFirst(); + + int startLineSecond = startOfSecond.getLine(); + int startColumnSecond = startOfSecond.getColumn(); + int startTokenSecond = match.startOfSecond(); + int endLineSecond = endOfSecond.getLine(); + int endColumnSecond = endOfSecond.getColumn() + endOfSecond.getLength() - 1; + int endTokenSecond = match.endOfSecond(); + + return new Match(firstFileName, secondFileName, startLineFirst, startColumnFirst, startTokenFirst, endLineFirst, endColumnFirst, + endTokenFirst, startLineSecond, startColumnSecond, startTokenSecond, endLineSecond, endColumnSecond, endTokenSecond, match.length()); } } diff --git a/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java b/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java index d569a52520..8ce7d205a4 100644 --- a/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java +++ b/core/src/main/java/de/jplag/reporting/reportobject/ReportObjectFactory.java @@ -4,6 +4,7 @@ import java.io.File; import java.io.FileNotFoundException; +import java.nio.file.Path; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -41,17 +42,17 @@ public class ReportObjectFactory { private static final Logger logger = LoggerFactory.getLogger(ReportObjectFactory.class); - public static final String OVERVIEW_FILE_NAME = "overview.json"; + public static final Path OVERVIEW_FILE_NAME = Path.of("overview.json"); - public static final String README_FILE_NAME = "README.txt"; - public static final String OPTIONS_FILE_NAME = "options.json"; + public static final Path README_FILE_NAME = Path.of("README.txt"); + public static final Path OPTIONS_FILE_NAME = Path.of("options.json"); private static final String[] README_CONTENT = new String[] {"This is a software plagiarism report generated by JPlag.", "To view the report go to https://jplag.github.io/JPlag/ and drag the generated zip file onto the page."}; - public static final String SUBMISSION_FILE_INDEX_FILE_NAME = "submissionFileIndex.json"; + public static final Path SUBMISSION_FILE_INDEX_FILE_NAME = Path.of("submissionFileIndex.json"); public static final Version REPORT_VIEWER_VERSION = JPlag.JPLAG_VERSION; - private static final String SUBMISSIONS_ROOT_PATH = "files/"; + private static final Path SUBMISSIONS_ROOT_PATH = Path.of("files"); private Map submissionNameToIdMap; private Function submissionToIdFunction; @@ -106,13 +107,9 @@ private void copySubmissionFilesToReport(JPlagResult result) { Set submissions = getSubmissions(comparisons); Language language = result.getOptions().language(); for (Submission submission : submissions) { - String submissionRootPath = SUBMISSIONS_ROOT_PATH + submissionToIdFunction.apply(submission); for (File file : submission.getFiles()) { - String relativeFilePath = file.getAbsolutePath().substring(submission.getRoot().getAbsolutePath().length()); - if (relativeFilePath.isEmpty()) { - relativeFilePath = file.getName(); - } - String zipPath = FilePathUtil.joinZipPathSegments(submissionRootPath, relativeFilePath); + Path filePath = FilePathUtil.getRelativeSubmissionPath(file, submission, submissionToIdFunction); + Path zipPath = SUBMISSIONS_ROOT_PATH.resolve(filePath); File fileToCopy = getFileToCopy(language, file); this.resultWriter.addFileContentEntry(zipPath, fileToCopy); @@ -175,7 +172,7 @@ private void writeSubmissionIndexFile(JPlagResult result) { List>> submissionTokenCountList = submissions.stream().parallel().map(submission -> { Map tokenCounts = new HashMap<>(); for (Map.Entry entry : submission.getTokenCountPerFile().entrySet()) { - String key = FilePathUtil.getRelativeSubmissionPath(entry.getKey(), submission, submissionToIdFunction); + String key = FilePathUtil.getRelativeSubmissionPath(entry.getKey(), submission, submissionToIdFunction).toString(); tokenCounts.put(key, new SubmissionFile(entry.getValue())); } return Map.of(submissionNameToIdMap.get(submission.getName()), tokenCounts); diff --git a/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java b/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java index 8af13af209..3c225633a4 100644 --- a/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java +++ b/core/src/main/java/de/jplag/reporting/reportobject/model/Match.java @@ -3,6 +3,10 @@ import com.fasterxml.jackson.annotation.JsonProperty; public record Match(@JsonProperty("file1") String firstFileName, @JsonProperty("file2") String secondFileName, - @JsonProperty("start1") int startInFirst, @JsonProperty("end1") int endInFirst, @JsonProperty("start2") int startInSecond, - @JsonProperty("end2") int endInSecond, @JsonProperty("tokens") int tokens) { + @JsonProperty("start1") int startInFirst, @JsonProperty("start1_col") int startColumnInFirst, + @JsonProperty("startToken1") int startTokenInFirst, @JsonProperty("end1") int endInFirst, @JsonProperty("end1_col") int endColumnInFirst, + @JsonProperty("endToken1") int endTokenInFirst, @JsonProperty("start2") int startInSecond, + @JsonProperty("start2_col") int startColumnInSecond, @JsonProperty("startToken2") int startTokenInSecond, + @JsonProperty("end2") int endInSecond, @JsonProperty("endToken2") int endTokenInSecond, @JsonProperty("end2_col") int endColumnInSecond, + @JsonProperty("tokens") int tokens) { } diff --git a/core/src/main/java/de/jplag/reporting/reportobject/writer/DummyResultWriter.java b/core/src/main/java/de/jplag/reporting/reportobject/writer/DummyResultWriter.java index 1da95b72dd..4ca847090c 100644 --- a/core/src/main/java/de/jplag/reporting/reportobject/writer/DummyResultWriter.java +++ b/core/src/main/java/de/jplag/reporting/reportobject/writer/DummyResultWriter.java @@ -1,6 +1,7 @@ package de.jplag.reporting.reportobject.writer; import java.io.File; +import java.nio.file.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -16,17 +17,17 @@ public class DummyResultWriter implements JPlagResultWriter { private static final String MESSAGE_CLOSE = "DummyWriter closed."; @Override - public void addJsonEntry(Object jsonContent, String path) { + public void addJsonEntry(Object jsonContent, Path path) { logger.info(MESSAGE_JSON, jsonContent, path); } @Override - public void addFileContentEntry(String path, File original) { + public void addFileContentEntry(Path path, File original) { logger.info(MESSAGE_FILE, original.getAbsolutePath(), path); } @Override - public void writeStringEntry(String entry, String path) { + public void writeStringEntry(String entry, Path path) { logger.info(MESSAGE_STRING, entry, path); } diff --git a/core/src/main/java/de/jplag/reporting/reportobject/writer/JPlagResultWriter.java b/core/src/main/java/de/jplag/reporting/reportobject/writer/JPlagResultWriter.java index 57fbed8c46..24f6dd4f78 100644 --- a/core/src/main/java/de/jplag/reporting/reportobject/writer/JPlagResultWriter.java +++ b/core/src/main/java/de/jplag/reporting/reportobject/writer/JPlagResultWriter.java @@ -1,6 +1,7 @@ package de.jplag.reporting.reportobject.writer; import java.io.File; +import java.nio.file.Path; /** * Writer for JPlag result data. The way paths are resolved depends on the implementation @@ -11,21 +12,21 @@ public interface JPlagResultWriter { * @param jsonContent The json content * @param path The path to write to */ - void addJsonEntry(Object jsonContent, String path); + void addJsonEntry(Object jsonContent, Path path); /** * Writes data from a file * @param path The path to write to * @param original The original file */ - void addFileContentEntry(String path, File original); + void addFileContentEntry(Path path, File original); /** * Writes data from a string * @param entry The string to write * @param path The path to write to */ - void writeStringEntry(String entry, String path); + void writeStringEntry(String entry, Path path); /** * Closes the writer diff --git a/core/src/main/java/de/jplag/reporting/reportobject/writer/ZipWriter.java b/core/src/main/java/de/jplag/reporting/reportobject/writer/ZipWriter.java index b7606c5353..db64237b00 100644 --- a/core/src/main/java/de/jplag/reporting/reportobject/writer/ZipWriter.java +++ b/core/src/main/java/de/jplag/reporting/reportobject/writer/ZipWriter.java @@ -6,12 +6,15 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import de.jplag.reporting.FilePathUtil; + import com.fasterxml.jackson.databind.ObjectMapper; /** @@ -39,9 +42,9 @@ public ZipWriter(File zipFile) throws FileNotFoundException { } @Override - public void addJsonEntry(Object jsonContent, String path) { + public void addJsonEntry(Object jsonContent, Path path) { try { - this.file.putNextEntry(new ZipEntry(path)); + this.file.putNextEntry(new ZipEntry(FilePathUtil.pathAsZipPath(path))); this.file.write(objectMapper.writeValueAsBytes(jsonContent)); this.file.closeEntry(); } catch (IOException e) { @@ -50,9 +53,9 @@ public void addJsonEntry(Object jsonContent, String path) { } @Override - public void addFileContentEntry(String path, File original) { + public void addFileContentEntry(Path path, File original) { try (FileInputStream inputStream = new FileInputStream(original)) { - this.file.putNextEntry(new ZipEntry(path)); + this.file.putNextEntry(new ZipEntry(FilePathUtil.pathAsZipPath(path))); inputStream.transferTo(this.file); } catch (IOException e) { logger.error(String.format(COPY_FILE_ERROR, original.getAbsolutePath(), path), e); @@ -60,9 +63,9 @@ public void addFileContentEntry(String path, File original) { } @Override - public void writeStringEntry(String entry, String path) { + public void writeStringEntry(String entry, Path path) { try { - this.file.putNextEntry(new ZipEntry(path)); + this.file.putNextEntry(new ZipEntry(FilePathUtil.pathAsZipPath(path))); this.file.write(entry.getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { logger.error(String.format(WRITE_STRING_ERROR, path), e); diff --git a/core/src/test/java/de/jplag/reporting/FilePathUtilTest.java b/core/src/test/java/de/jplag/reporting/FilePathUtilTest.java deleted file mode 100644 index 661d7cc0b8..0000000000 --- a/core/src/test/java/de/jplag/reporting/FilePathUtilTest.java +++ /dev/null @@ -1,26 +0,0 @@ -package de.jplag.reporting; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.Test; - -class FilePathUtilTest { - private static final String JOINED = "left/right"; - private static final String LEFT = "left"; - private static final String RIGHT = "right"; - - @Test - void testJoinPath() { - assertEquals(JOINED, FilePathUtil.joinZipPathSegments(LEFT, RIGHT)); - } - - @Test - void testJoinPathWithLeftSlashSuffix() { - assertEquals(JOINED, FilePathUtil.joinZipPathSegments(LEFT + "/", RIGHT)); - } - - @Test - void testJoinPathWithRightSlashSuffix() { - assertEquals(JOINED, FilePathUtil.joinZipPathSegments(LEFT, "/" + RIGHT)); - } -} \ No newline at end of file diff --git a/docs/5.-End-to-End-Testing.md b/docs/5.-End-to-End-Testing.md index 98a4d38573..35bfd882c5 100644 --- a/docs/5.-End-to-End-Testing.md +++ b/docs/5.-End-to-End-Testing.md @@ -31,7 +31,7 @@ adding/removing comments to architectural changes in the deliverables. These changes were now transferred to a base class and thus the plagiarism was created. The named base class was provided with the individual changes. The numbers in the list shown above are intended for the traceability of the test data. Here the test data filenames were named with the respective changes. Example: SortAlgo4d1 contains the changes "Variable declaration at the beginning of the program". If several points are combined, this is separated by "_" e.g.: SortAlgo1_3 contains "(1) Inserting comments or empty lines" and "(3) Insertion of unnecessary or changed code lines". The following code examples show how these changes affect the program code and also how the detection of JPLag behaves. -All the code examples shown and more can be found at [testdata-resources-SortAlgo](https://github.com/jplag/JPlag/tree/main/endtoend-testing/src/test/resources/languageTestFiles/java/sortAlgo). +All the code examples shown and more can be found at [testdata-resources-SortAlgo](https://github.com/jplag/JPlag/tree/main/endtoend-testing/src/test/resources/data/sortAlgo). ### (1) Inserting comments or empty lines diff --git a/endtoend-testing/pom.xml b/endtoend-testing/pom.xml index a4e438e846..17d20de7f3 100644 --- a/endtoend-testing/pom.xml +++ b/endtoend-testing/pom.xml @@ -33,7 +33,7 @@ com.tngtech.archunit archunit-junit5 - 1.2.1 + 1.3.0 test diff --git a/endtoend-testing/src/main/java/de/jplag/endtoend/helper/LanguageDeserializer.java b/endtoend-testing/src/main/java/de/jplag/endtoend/helper/LanguageDeserializer.java index 07848b72e7..22e231eac9 100644 --- a/endtoend-testing/src/main/java/de/jplag/endtoend/helper/LanguageDeserializer.java +++ b/endtoend-testing/src/main/java/de/jplag/endtoend/helper/LanguageDeserializer.java @@ -3,7 +3,7 @@ import java.io.IOException; import de.jplag.Language; -import de.jplag.cli.LanguageLoader; +import de.jplag.cli.options.LanguageLoader; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.DeserializationContext; diff --git a/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractVisitor.java b/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractVisitor.java index 0ec4c19afd..17e8cb4b7d 100644 --- a/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractVisitor.java +++ b/language-antlr-utils/src/main/java/de/jplag/antlr/AbstractVisitor.java @@ -25,7 +25,7 @@ public abstract class AbstractVisitor { private final Predicate condition; private final List>> entryHandlers; - private TokenType entryTokenType; + protected TokenType entryTokenType; private Function entrySemantics; /** @@ -118,15 +118,24 @@ boolean matches(T entity) { * Enter a given entity, injecting the needed dependencies. */ void enter(HandlerData data) { + handleEnter(data, this::extractEnterToken, this::extractEnterToken); + } + + protected void handleEnter(HandlerData data, Function extractStartToken, Function extractEndToken) { if (entryTokenType == null && entrySemantics != null) { logger.warn("Received semantics, but no token type, so no token was generated and the semantics discarded"); } - addToken(data, entryTokenType, entrySemantics, this::extractEnterToken); // addToken takes null token types + addToken(data, entryTokenType, entrySemantics, extractStartToken, extractEndToken); // addToken takes null token types entryHandlers.forEach(handler -> handler.accept(data)); } void addToken(HandlerData data, TokenType tokenType, Function semantics, Function extractToken) { - data.collector().addToken(tokenType, semantics, data.entity(), extractToken, data.variableRegistry()); + addToken(data, tokenType, semantics, extractToken, extractToken); + } + + void addToken(HandlerData data, TokenType tokenType, Function semantics, Function extractStartToken, + Function extractEndToken) { + data.collector().addToken(tokenType, semantics, data.entity(), extractStartToken, extractEndToken, data.variableRegistry()); } abstract Token extractEnterToken(T entity); diff --git a/language-antlr-utils/src/main/java/de/jplag/antlr/ContextDelegateVisitor.java b/language-antlr-utils/src/main/java/de/jplag/antlr/ContextDelegateVisitor.java new file mode 100644 index 0000000000..6c8bf10ae4 --- /dev/null +++ b/language-antlr-utils/src/main/java/de/jplag/antlr/ContextDelegateVisitor.java @@ -0,0 +1,29 @@ +package de.jplag.antlr; + +import java.util.function.Function; + +import org.antlr.v4.runtime.ParserRuleContext; + +/** + * Delegates visiting a {@link ParserRuleContext} to a different {@link ContextVisitor} derived by the given mapper + * function + * @param The original antlr type visited + * @param The target {@link ParserRuleContext} to visit instead + */ +public class ContextDelegateVisitor extends DelegateVisitor { + private final ContextVisitor contextVisitor; + + /** + * @param delegate The visitor to delegate to + * @param mapper The mapper function used to derive the target antlr context + */ + public ContextDelegateVisitor(ContextVisitor delegate, Function mapper) { + super(delegate, mapper); + this.contextVisitor = delegate; + } + + @Override + public void delegateExit(HandlerData parentData) { + this.contextVisitor.exit(parentData.derive(this.mapper)); + } +} diff --git a/language-antlr-utils/src/main/java/de/jplag/antlr/ContextVisitor.java b/language-antlr-utils/src/main/java/de/jplag/antlr/ContextVisitor.java index 96436248de..e4dda7dc85 100644 --- a/language-antlr-utils/src/main/java/de/jplag/antlr/ContextVisitor.java +++ b/language-antlr-utils/src/main/java/de/jplag/antlr/ContextVisitor.java @@ -10,6 +10,7 @@ import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.tree.TerminalNode; import de.jplag.TokenType; import de.jplag.semantics.CodeSemantics; @@ -23,10 +24,15 @@ public class ContextVisitor extends AbstractVisitor private final List>> exitHandlers; private TokenType exitTokenType; private Function exitSemantics; + private boolean lengthAsRange; + + private DelegateVisitor delegate; ContextVisitor(Predicate condition) { super(condition); this.exitHandlers = new ArrayList<>(); + this.lengthAsRange = false; + this.delegate = null; } /** @@ -59,6 +65,53 @@ public ContextVisitor mapExit(TokenType tokenType) { return this; } + /** + * Behaves like mapEnter, but the created token will range from the beginning of this context to the end instead of only + * marking the beginning. + * @param tokenType The type of token to crate + * @return Self + */ + public AbstractVisitor mapRange(TokenType tokenType) { + this.entryTokenType = tokenType; + this.lengthAsRange = true; + return this; + } + + /** + * Delegates calls to this visitor to a derived visitor. The mapper function is used to determine the delegated token. + * This invalidated all mapping happening inside this visitor. You need to configure the new visitor to do so. + * @param mapper The mapper function + */ + public TerminalVisitor delegateTerminal(Function mapper) { + TerminalVisitor delegateVisitor = new TerminalVisitor(ignore -> true); + this.delegate = new DelegateVisitor<>(delegateVisitor, parentData -> mapper.apply(parentData).getSymbol()); + return delegateVisitor; + } + + /** + * Delegates calls to this visitor to a derived visitor. The mapper function is used to determine the delegated token. + * This invalidated all mapping happening inside this visitor. You need to configure the new visitor to do so. Visits + * the terminal upon exiting this context + * @param mapper The mapper function + */ + public TerminalVisitor delegateTerminalExit(Function mapper) { + TerminalVisitor delegateVisitor = new TerminalVisitor(ignore -> true); + this.delegate = new DelegateVisitor<>(delegateVisitor, parentData -> mapper.apply(parentData).getSymbol()); + this.delegate.mapOnExit(); + return delegateVisitor; + } + + /** + * Delegates calls to this visitor to a derived visitor. The mapper function is used to determine the delegated token. + * This invalidated all mapping happening inside this visitor. You need to configure the new visitor to do so. + * @param mapper The mapper function + */ + public ContextVisitor delegateContext(Function mapper) { + ContextVisitor visitor = new ContextVisitor<>(ignore -> true); + this.delegate = new ContextDelegateVisitor<>(visitor, mapper); + return visitor; + } + /** * Tell the visitor that it should generate a token upon entering and one upon exiting the entity. Should only be * invoked once per visitor. @@ -132,12 +185,40 @@ public ContextVisitor addClassScope() { * Exit a given entity, injecting the needed dependencies. */ void exit(HandlerData data) { + if (this.delegate != null) { + this.delegate.delegateExit(data); + return; + } + addToken(data, exitTokenType, exitSemantics, ParserRuleContext::getStop); exitHandlers.forEach(handler -> handler.accept(data)); } + @Override + void enter(HandlerData data) { + if (this.delegate != null) { + this.delegate.delegateEnter(data); + return; + } + + if (this.lengthAsRange) { + this.handleEnter(data, this::extractEnterToken, ParserRuleContext::getStop); + } else { + super.enter(data); + } + } + @Override Token extractEnterToken(T entity) { return entity.getStart(); } + + @Override + boolean matches(T entity) { + if (this.delegate != null && !this.delegate.isPresent(entity)) { + return false; + } + + return super.matches(entity); + } } diff --git a/language-antlr-utils/src/main/java/de/jplag/antlr/DelegateVisitor.java b/language-antlr-utils/src/main/java/de/jplag/antlr/DelegateVisitor.java new file mode 100644 index 0000000000..465a7363fc --- /dev/null +++ b/language-antlr-utils/src/main/java/de/jplag/antlr/DelegateVisitor.java @@ -0,0 +1,65 @@ +package de.jplag.antlr; + +import java.util.function.Function; + +/** + * Delegates visiting of a given antlr entity to a visitor for a different antlr entity. + * @param The original antlr type visited + * @param The target antlr type + */ +public class DelegateVisitor { + private final AbstractVisitor delegate; + protected final Function mapper; + private boolean mapOnExit; + + /** + * @param delegate The target visitor to use + * @param mapper The mapper function used to derive the target entity + */ + public DelegateVisitor(AbstractVisitor delegate, Function mapper) { + this.delegate = delegate; + this.mapper = mapper; + this.mapOnExit = false; + } + + /** + * Delegates entering the original context + * @param parentData The data of the original visitor + */ + public void delegateEnter(HandlerData parentData) { + if (!this.mapOnExit) { + this.delegate.enter(parentData.derive(this.mapper)); + } + } + + /** + * Makes this visitor map exit events to enter events. Used mostly for mapping exit events to terminal nodes, which only + * provide enter events + */ + public void mapOnExit() { + this.mapOnExit = true; + } + + /** + * Delegates exiting the original context + * @param parentData The data of the original visitor + */ + public void delegateExit(HandlerData parentData) { + if (this.mapOnExit) { + this.delegate.enter(parentData.derive(this.mapper)); + } + } + + /** + * Checks if the target entity is present in the given antlr entity + * @param entity The original antlr entity + * @return is present + */ + public boolean isPresent(T entity) { + try { + return this.mapper.apply(entity) != null; + } catch (Exception e) { // If something goes wrong during mapping, the delegate is not present + return false; + } + } +} diff --git a/language-antlr-utils/src/main/java/de/jplag/antlr/HandlerData.java b/language-antlr-utils/src/main/java/de/jplag/antlr/HandlerData.java index 6c1bb40953..9ebced3536 100644 --- a/language-antlr-utils/src/main/java/de/jplag/antlr/HandlerData.java +++ b/language-antlr-utils/src/main/java/de/jplag/antlr/HandlerData.java @@ -1,9 +1,14 @@ package de.jplag.antlr; +import java.util.function.Function; + import de.jplag.semantics.VariableRegistry; /** * Holds the data passed to the (quasi-static) listeners. */ -record HandlerData(T entity, VariableRegistry variableRegistry, TokenCollector collector) { +public record HandlerData(T entity, VariableRegistry variableRegistry, TokenCollector collector) { + public HandlerData derive(Function mapper) { + return new HandlerData<>(mapper.apply(entity), variableRegistry, collector); + } } diff --git a/language-antlr-utils/src/main/java/de/jplag/antlr/TokenCollector.java b/language-antlr-utils/src/main/java/de/jplag/antlr/TokenCollector.java index 150088a922..436892aff0 100644 --- a/language-antlr-utils/src/main/java/de/jplag/antlr/TokenCollector.java +++ b/language-antlr-utils/src/main/java/de/jplag/antlr/TokenCollector.java @@ -40,14 +40,16 @@ List getTokens() { } void addToken(TokenType jplagType, Function semanticsSupplier, T entity, - Function extractToken, VariableRegistry variableRegistry) { + Function extractStartToken, Function extractEndToken, + VariableRegistry variableRegistry) { if (jplagType == null) { return; } - org.antlr.v4.runtime.Token antlrToken = extractToken.apply(entity); + org.antlr.v4.runtime.Token antlrToken = extractStartToken.apply(entity); + org.antlr.v4.runtime.Token antlrEndToken = extractEndToken.apply(entity); int line = antlrToken.getLine(); int column = antlrToken.getCharPositionInLine() + 1; - int length = antlrToken.getText().length(); + int length = (antlrEndToken.getStopIndex() - antlrToken.getStartIndex()) + 1; Token token; if (extractsSemantics) { if (semanticsSupplier == null) { diff --git a/language-api/pom.xml b/language-api/pom.xml index 99d1d2e03d..87d62e29c5 100644 --- a/language-api/pom.xml +++ b/language-api/pom.xml @@ -18,7 +18,7 @@ com.ibm.icu icu4j-charset - 74.2 + 75.1 diff --git a/language-api/src/main/java/de/jplag/ParsingException.java b/language-api/src/main/java/de/jplag/ParsingException.java index c6e1ad6270..2f54e7a62d 100644 --- a/language-api/src/main/java/de/jplag/ParsingException.java +++ b/language-api/src/main/java/de/jplag/ParsingException.java @@ -76,8 +76,9 @@ private ParsingException(String message) { private static String constructMessage(File file, String reason) { StringBuilder messageBuilder = new StringBuilder(); - if (reason == null || !reason.contains(file.toString())) { - messageBuilder.append("failed to parse '%s'".formatted(file)); + String fileName = file == null ? "" : file.toString(); + if (reason == null || !reason.contains(fileName)) { + messageBuilder.append("failed to parse '%s'".formatted(fileName)); } if (reason != null && !reason.isBlank()) { messageBuilder.append(reason); diff --git a/language-api/src/main/java/de/jplag/util/FileUtils.java b/language-api/src/main/java/de/jplag/util/FileUtils.java index 5ce3c62b6c..1f645df750 100644 --- a/language-api/src/main/java/de/jplag/util/FileUtils.java +++ b/language-api/src/main/java/de/jplag/util/FileUtils.java @@ -178,4 +178,26 @@ public static void write(File file, String content) throws IOException { writer.write(content); writer.close(); } + + /** + * Checks if the given file can be written to. If the file does not exist checks if it can be created. + * @param file The file to check + * @return true, if the file can be written to + */ + public static boolean checkWritable(File file) { + if (file.exists()) { + return file.canWrite(); + } else { + return checkParentWritable(file); + } + } + + /** + * Checks if the parent file can be written to. + * @param file The file to check + * @return true, if the parent can be written to + */ + public static boolean checkParentWritable(File file) { + return file.getAbsoluteFile().getParentFile().canWrite(); + } } diff --git a/languages/c/src/main/java/de/jplag/c/Scanner.java b/languages/c/src/main/java/de/jplag/c/Scanner.java index c3292814e4..a3ca8bb5d5 100644 --- a/languages/c/src/main/java/de/jplag/c/Scanner.java +++ b/languages/c/src/main/java/de/jplag/c/Scanner.java @@ -26,7 +26,13 @@ public List scan(Set files) throws ParsingException { for (File file : files) { this.currentFile = file; logger.trace("Scanning file {}", currentFile); - CPPScanner.scanFile(file, this); + try { + CPPScanner.scanFile(file, this); + } catch (ParsingException e) { + throw e; + } catch (Exception e) { + throw new ParsingException(file, "Unexpected error during parsing." + System.lineSeparator() + e.getMessage(), e); + } tokens.add(Token.fileEnd(currentFile)); } return tokens; diff --git a/languages/cpp/src/main/java/de/jplag/cpp/CPPListener.java b/languages/cpp/src/main/java/de/jplag/cpp/CPPListener.java index 28a6a88b9a..6bfa81c098 100644 --- a/languages/cpp/src/main/java/de/jplag/cpp/CPPListener.java +++ b/languages/cpp/src/main/java/de/jplag/cpp/CPPListener.java @@ -207,7 +207,7 @@ private void declarationRules() { visit(ParameterDeclarationContext.class).map(VARDEF).withSemantics(CodeSemantics::new).onEnter((ctx, varReg) -> { // don't register parameters in function declarations, e.g. bc6h_enc lines 117-120 - if (hasAncestor(ctx, FunctionDefinitionContext.class, SimpleDeclarationContext.class)) { + if (hasAncestor(ctx, FunctionDefinitionContext.class, SimpleDeclarationContext.class) && ctx.declarator() != null) { CPP14Parser.PointerDeclaratorContext pd = ctx.declarator().pointerDeclarator(); String name = pd.noPointerDeclarator().getText(); varReg.registerVariable(name, VariableScope.LOCAL, true); diff --git a/languages/golang/pom.xml b/languages/golang/pom.xml index a3f044fc30..ca240d77cc 100644 --- a/languages/golang/pom.xml +++ b/languages/golang/pom.xml @@ -14,6 +14,11 @@ org.antlr antlr4-runtime + + de.jplag + language-antlr-utils + ${revision} + diff --git a/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java b/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java index 3dbd09ec4b..e14926b43e 100644 --- a/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java +++ b/languages/golang/src/main/java/de/jplag/golang/GoLanguage.java @@ -1,25 +1,18 @@ package de.jplag.golang; -import java.io.File; -import java.util.List; -import java.util.Set; - import org.kohsuke.MetaInfServices; -import de.jplag.ParsingException; -import de.jplag.Token; +import de.jplag.antlr.AbstractAntlrLanguage; @MetaInfServices(de.jplag.Language.class) -public class GoLanguage implements de.jplag.Language { - +public class GoLanguage extends AbstractAntlrLanguage { private static final String NAME = "Go Parser"; private static final String IDENTIFIER = "go"; private static final int DEFAULT_MIN_TOKEN_MATCH = 8; private static final String[] FILE_EXTENSIONS = {".go"}; - private final GoParserAdapter parserAdapter; public GoLanguage() { - this.parserAdapter = new GoParserAdapter(); + super(new GoParserAdapter()); } @Override @@ -41,9 +34,4 @@ public String getIdentifier() { public int minimumTokenMatch() { return DEFAULT_MIN_TOKEN_MATCH; } - - @Override - public List parse(Set files, boolean normalize) throws ParsingException { - return parserAdapter.parse(files); - } } diff --git a/languages/golang/src/main/java/de/jplag/golang/GoListener.java b/languages/golang/src/main/java/de/jplag/golang/GoListener.java new file mode 100644 index 0000000000..e43de50c4f --- /dev/null +++ b/languages/golang/src/main/java/de/jplag/golang/GoListener.java @@ -0,0 +1,293 @@ +package de.jplag.golang; + +import static de.jplag.golang.GoTokenType.ARGUMENT; +import static de.jplag.golang.GoTokenType.ARRAY_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.ARRAY_BODY_END; +import static de.jplag.golang.GoTokenType.ARRAY_CONSTRUCTOR; +import static de.jplag.golang.GoTokenType.ARRAY_ELEMENT; +import static de.jplag.golang.GoTokenType.ASSIGNMENT; +import static de.jplag.golang.GoTokenType.BREAK; +import static de.jplag.golang.GoTokenType.CASE_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.CASE_BLOCK_END; +import static de.jplag.golang.GoTokenType.CONTINUE; +import static de.jplag.golang.GoTokenType.DEFER; +import static de.jplag.golang.GoTokenType.ELSE_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.ELSE_BLOCK_END; +import static de.jplag.golang.GoTokenType.FALLTHROUGH; +import static de.jplag.golang.GoTokenType.FOR_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.FOR_BLOCK_END; +import static de.jplag.golang.GoTokenType.FOR_STATEMENT; +import static de.jplag.golang.GoTokenType.FUNCTION_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.FUNCTION_BODY_END; +import static de.jplag.golang.GoTokenType.FUNCTION_DECLARATION; +import static de.jplag.golang.GoTokenType.FUNCTION_LITERAL; +import static de.jplag.golang.GoTokenType.FUNCTION_PARAMETER; +import static de.jplag.golang.GoTokenType.GO; +import static de.jplag.golang.GoTokenType.GOTO; +import static de.jplag.golang.GoTokenType.IF_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.IF_BLOCK_END; +import static de.jplag.golang.GoTokenType.IF_STATEMENT; +import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE; +import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_BEGIN; +import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_END; +import static de.jplag.golang.GoTokenType.IMPORT_DECLARATION; +import static de.jplag.golang.GoTokenType.INTERFACE_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.INTERFACE_BLOCK_END; +import static de.jplag.golang.GoTokenType.INTERFACE_DECLARATION; +import static de.jplag.golang.GoTokenType.INTERFACE_METHOD; +import static de.jplag.golang.GoTokenType.INVOCATION; +import static de.jplag.golang.GoTokenType.MAP_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.MAP_BODY_END; +import static de.jplag.golang.GoTokenType.MAP_CONSTRUCTOR; +import static de.jplag.golang.GoTokenType.MAP_ELEMENT; +import static de.jplag.golang.GoTokenType.MEMBER_DECLARATION; +import static de.jplag.golang.GoTokenType.NAMED_TYPE_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.NAMED_TYPE_BODY_END; +import static de.jplag.golang.GoTokenType.NAMED_TYPE_CONSTRUCTOR; +import static de.jplag.golang.GoTokenType.NAMED_TYPE_ELEMENT; +import static de.jplag.golang.GoTokenType.PACKAGE; +import static de.jplag.golang.GoTokenType.RECEIVER; +import static de.jplag.golang.GoTokenType.RECEIVE_STATEMENT; +import static de.jplag.golang.GoTokenType.RETURN; +import static de.jplag.golang.GoTokenType.SELECT_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.SELECT_BLOCK_END; +import static de.jplag.golang.GoTokenType.SELECT_STATEMENT; +import static de.jplag.golang.GoTokenType.SEND_STATEMENT; +import static de.jplag.golang.GoTokenType.SLICE_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.SLICE_BODY_END; +import static de.jplag.golang.GoTokenType.SLICE_CONSTRUCTOR; +import static de.jplag.golang.GoTokenType.SLICE_ELEMENT; +import static de.jplag.golang.GoTokenType.STATEMENT_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.STATEMENT_BLOCK_END; +import static de.jplag.golang.GoTokenType.STRUCT_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.STRUCT_BODY_END; +import static de.jplag.golang.GoTokenType.STRUCT_DECLARATION; +import static de.jplag.golang.GoTokenType.SWITCH_BLOCK_BEGIN; +import static de.jplag.golang.GoTokenType.SWITCH_BLOCK_END; +import static de.jplag.golang.GoTokenType.SWITCH_CASE; +import static de.jplag.golang.GoTokenType.SWITCH_STATEMENT; +import static de.jplag.golang.GoTokenType.TYPE_ASSERTION; +import static de.jplag.golang.GoTokenType.TYPE_CONSTRAINT; +import static de.jplag.golang.GoTokenType.VARIABLE_DECLARATION; + +import java.util.function.Function; + +import org.antlr.v4.runtime.ParserRuleContext; + +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.antlr.ContextVisitor; +import de.jplag.golang.grammar.GoParser.ArgumentsContext; +import de.jplag.golang.grammar.GoParser.ArrayTypeContext; +import de.jplag.golang.grammar.GoParser.AssignmentContext; +import de.jplag.golang.grammar.GoParser.BlockContext; +import de.jplag.golang.grammar.GoParser.BreakStmtContext; +import de.jplag.golang.grammar.GoParser.CommCaseContext; +import de.jplag.golang.grammar.GoParser.CommClauseContext; +import de.jplag.golang.grammar.GoParser.CompositeLitContext; +import de.jplag.golang.grammar.GoParser.ConstSpecContext; +import de.jplag.golang.grammar.GoParser.ContinueStmtContext; +import de.jplag.golang.grammar.GoParser.DeferStmtContext; +import de.jplag.golang.grammar.GoParser.ExprCaseClauseContext; +import de.jplag.golang.grammar.GoParser.ExprSwitchStmtContext; +import de.jplag.golang.grammar.GoParser.ExpressionContext; +import de.jplag.golang.grammar.GoParser.FallthroughStmtContext; +import de.jplag.golang.grammar.GoParser.FieldDeclContext; +import de.jplag.golang.grammar.GoParser.ForStmtContext; +import de.jplag.golang.grammar.GoParser.FunctionDeclContext; +import de.jplag.golang.grammar.GoParser.FunctionLitContext; +import de.jplag.golang.grammar.GoParser.GoStmtContext; +import de.jplag.golang.grammar.GoParser.GotoStmtContext; +import de.jplag.golang.grammar.GoParser.IfStmtContext; +import de.jplag.golang.grammar.GoParser.ImportDeclContext; +import de.jplag.golang.grammar.GoParser.ImportSpecContext; +import de.jplag.golang.grammar.GoParser.InterfaceTypeContext; +import de.jplag.golang.grammar.GoParser.KeyedElementContext; +import de.jplag.golang.grammar.GoParser.LiteralTypeContext; +import de.jplag.golang.grammar.GoParser.MapTypeContext; +import de.jplag.golang.grammar.GoParser.MethodDeclContext; +import de.jplag.golang.grammar.GoParser.MethodSpecContext; +import de.jplag.golang.grammar.GoParser.PackageClauseContext; +import de.jplag.golang.grammar.GoParser.ParameterDeclContext; +import de.jplag.golang.grammar.GoParser.ReceiverContext; +import de.jplag.golang.grammar.GoParser.RecvStmtContext; +import de.jplag.golang.grammar.GoParser.ReturnStmtContext; +import de.jplag.golang.grammar.GoParser.SelectStmtContext; +import de.jplag.golang.grammar.GoParser.SendStmtContext; +import de.jplag.golang.grammar.GoParser.ShortVarDeclContext; +import de.jplag.golang.grammar.GoParser.SliceTypeContext; +import de.jplag.golang.grammar.GoParser.StatementContext; +import de.jplag.golang.grammar.GoParser.StatementListContext; +import de.jplag.golang.grammar.GoParser.StructTypeContext; +import de.jplag.golang.grammar.GoParser.SwitchStmtContext; +import de.jplag.golang.grammar.GoParser.TypeAssertionContext; +import de.jplag.golang.grammar.GoParser.TypeCaseClauseContext; +import de.jplag.golang.grammar.GoParser.TypeNameContext; +import de.jplag.golang.grammar.GoParser.TypeSwitchStmtContext; +import de.jplag.golang.grammar.GoParser.VarDeclContext; + +/** + * Provides token extraction rules for {@link GoLanguage} Based on an older implementation of the language module; see + * JPlagGoListener.java in the history. + */ +public class GoListener extends AbstractAntlrListener { + public GoListener() { + metaDeclarations(); + + interfaceDeclarations(); + structDeclarations(); + + functionDeclarations(); + + controlFlowRules(); + statements(); + + objectCreation(); + controlFlowKeywords(); + } + + private void metaDeclarations() { + visit(PackageClauseContext.class).mapRange(PACKAGE); + + visit(ImportDeclContext.class).map(IMPORT_DECLARATION); + visit(ImportDeclContext.class).delegateTerminal(ImportDeclContext::L_PAREN).map(IMPORT_CLAUSE_BEGIN); + visit(ImportDeclContext.class).delegateTerminalExit(ImportDeclContext::R_PAREN).map(IMPORT_CLAUSE_END); + + visit(ImportSpecContext.class).mapRange(IMPORT_CLAUSE); + } + + private void interfaceDeclarations() { + visit(InterfaceTypeContext.class).mapEnter(INTERFACE_DECLARATION); + visit(InterfaceTypeContext.class).delegateTerminal(InterfaceTypeContext::L_CURLY).map(INTERFACE_BLOCK_BEGIN); + visit(InterfaceTypeContext.class).delegateTerminalExit(InterfaceTypeContext::R_CURLY).map(INTERFACE_BLOCK_END); + } + + private void structDeclarations() { + visit(StructTypeContext.class).map(STRUCT_DECLARATION); + visit(StructTypeContext.class).delegateTerminal(StructTypeContext::L_CURLY).map(STRUCT_BODY_BEGIN); + visit(StructTypeContext.class).delegateTerminalExit(StructTypeContext::R_CURLY).map(STRUCT_BODY_END); + + visit(FieldDeclContext.class).mapRange(MEMBER_DECLARATION); + } + + private void functionDeclarations() { + visit(FunctionDeclContext.class).delegateTerminal(FunctionDeclContext::FUNC).map(FUNCTION_DECLARATION); + visit(FunctionDeclContext.class).delegateTerminal(context -> context.block().L_CURLY()).map(FUNCTION_BODY_BEGIN); + visit(FunctionDeclContext.class).delegateTerminalExit(context -> context.block().R_CURLY()).map(FUNCTION_BODY_END); + + visit(MethodDeclContext.class).delegateTerminal(MethodDeclContext::FUNC).map(FUNCTION_DECLARATION); + visit(MethodDeclContext.class).delegateTerminal(context -> context.block().L_CURLY()).map(FUNCTION_BODY_BEGIN); + visit(MethodDeclContext.class).delegateTerminalExit(context -> context.block().R_CURLY()).map(FUNCTION_BODY_END); + + visit(ParameterDeclContext.class, context -> !(context.parent.parent instanceof ReceiverContext)).mapRange(FUNCTION_PARAMETER); + visit(ParameterDeclContext.class, context -> (context.parent.parent instanceof ReceiverContext)).mapRange(RECEIVER); + } + + private void controlFlowRules() { + visit(IfStmtContext.class).delegateTerminal(IfStmtContext::IF).map(IF_STATEMENT); + visit(BlockContext.class, context -> context.parent instanceof IfStmtContext ifStmt && context.equals((ifStmt).block(0))).map(IF_BLOCK_BEGIN, + IF_BLOCK_END); + visit(BlockContext.class, context -> context.parent instanceof IfStmtContext ifStmt && context.equals((ifStmt).block(1))) + .map(ELSE_BLOCK_BEGIN, ELSE_BLOCK_END); + + visit(ForStmtContext.class).map(FOR_STATEMENT); + visit(ForStmtContext.class).delegateTerminal(context -> context.block().L_CURLY()).map(FOR_BLOCK_BEGIN); + visit(ForStmtContext.class).delegateTerminalExit(context -> context.block().R_CURLY()).map(FOR_BLOCK_END); + + visit(SwitchStmtContext.class).map(SWITCH_STATEMENT); + visit(ExprSwitchStmtContext.class).delegateTerminal(ExprSwitchStmtContext::L_CURLY).map(SWITCH_BLOCK_BEGIN); + visit(TypeSwitchStmtContext.class).delegateTerminal(TypeSwitchStmtContext::L_CURLY).map(SWITCH_BLOCK_BEGIN); + visit(ExprSwitchStmtContext.class).delegateTerminalExit(ExprSwitchStmtContext::R_CURLY).map(SWITCH_BLOCK_END); + visit(TypeSwitchStmtContext.class).delegateTerminalExit(TypeSwitchStmtContext::R_CURLY).map(SWITCH_BLOCK_END); + + visit(ExprCaseClauseContext.class).map(SWITCH_CASE); + visit(StatementListContext.class, context -> context.parent instanceof ExprCaseClauseContext).map(CASE_BLOCK_BEGIN, CASE_BLOCK_END); + visit(TypeCaseClauseContext.class).map(SWITCH_CASE); + visit(StatementListContext.class, context -> context.parent instanceof TypeCaseClauseContext).map(CASE_BLOCK_BEGIN, CASE_BLOCK_END); + + visit(SelectStmtContext.class).map(SELECT_STATEMENT); + visit(SelectStmtContext.class).delegateTerminal(SelectStmtContext::L_CURLY).map(SELECT_BLOCK_BEGIN); + visit(SelectStmtContext.class).delegateTerminalExit(SelectStmtContext::R_CURLY).map(SELECT_BLOCK_END); + + visit(CommCaseContext.class).map(SWITCH_CASE); + visit(StatementListContext.class, context -> context.parent instanceof CommClauseContext).map(CASE_BLOCK_BEGIN, CASE_BLOCK_END); + } + + private void statements() { + visit(VarDeclContext.class).mapRange(VARIABLE_DECLARATION); + visit(ConstSpecContext.class).map(VARIABLE_DECLARATION); + + visit(FunctionLitContext.class).map(FUNCTION_LITERAL); + visit(FunctionLitContext.class).delegateContext(FunctionLitContext::block).map(FUNCTION_BODY_BEGIN, FUNCTION_BODY_END); + + visit(AssignmentContext.class).mapRange(ASSIGNMENT); + + visit(ShortVarDeclContext.class).map(VARIABLE_DECLARATION); + visit(ShortVarDeclContext.class).map(ASSIGNMENT); + + visit(ArgumentsContext.class).mapRange(INVOCATION); + visit(ExpressionContext.class, context -> hasAncestor(context, ArgumentsContext.class)).mapRange(ARGUMENT); + + visit(StatementContext.class).delegateContext(StatementContext::block).map(STATEMENT_BLOCK_BEGIN, STATEMENT_BLOCK_END); + } + + private void objectCreation() { + visitKeyedElement(LiteralTypeContext::arrayType).mapRange(ARRAY_ELEMENT); + visitKeyedElement(LiteralTypeContext::structType).mapRange(MEMBER_DECLARATION); + visitKeyedElement(LiteralTypeContext::mapType).mapRange(MAP_ELEMENT); + visitKeyedElement(LiteralTypeContext::sliceType).mapRange(SLICE_ELEMENT); + visitKeyedElement(LiteralTypeContext::typeName).mapRange(NAMED_TYPE_ELEMENT); + + visitCompositeLitChild(ArrayTypeContext.class).map(ARRAY_CONSTRUCTOR); + visitCompositeLitDelegate(ArrayTypeContext.class).map(ARRAY_BODY_BEGIN, ARRAY_BODY_END); + + visitCompositeLitChild(SliceTypeContext.class).map(SLICE_CONSTRUCTOR); + visitCompositeLitDelegate(SliceTypeContext.class).map(SLICE_BODY_BEGIN, SLICE_BODY_END); + + visitCompositeLitChild(MapTypeContext.class).map(MAP_CONSTRUCTOR); + visitCompositeLitDelegate(MapTypeContext.class).map(MAP_BODY_BEGIN, MAP_BODY_END); + + visitCompositeLitChild(TypeNameContext.class).map(NAMED_TYPE_CONSTRUCTOR); + visitCompositeLitDelegate(TypeNameContext.class).map(NAMED_TYPE_BODY_BEGIN, NAMED_TYPE_BODY_END); + visit(TypeNameContext.class, context -> context.parent instanceof InterfaceTypeContext).mapRange(TYPE_CONSTRAINT); + + visit(TypeAssertionContext.class).mapRange(TYPE_ASSERTION); + visit(MethodSpecContext.class).mapRange(INTERFACE_METHOD); + } + + private void controlFlowKeywords() { + visit(ReturnStmtContext.class).mapRange(RETURN); + visit(BreakStmtContext.class).mapRange(BREAK); + visit(ContinueStmtContext.class).mapRange(CONTINUE); + visit(FallthroughStmtContext.class).mapRange(FALLTHROUGH); + visit(GotoStmtContext.class).mapRange(GOTO); + visit(GoStmtContext.class).mapRange(GO); + visit(DeferStmtContext.class).mapRange(DEFER); + visit(SendStmtContext.class).mapRange(SEND_STATEMENT); + visit(RecvStmtContext.class).mapRange(RECEIVE_STATEMENT); + } + + private de.jplag.antlr.ContextVisitor visitCompositeLitChild(Class type) { + return visit(type, context -> context.parent.parent instanceof CompositeLitContext); + } + + private ContextVisitor visitCompositeLitDelegate(Class type) { + return visit(CompositeLitContext.class, + context -> context.literalType().children.stream().anyMatch(it -> type.isAssignableFrom(it.getClass()))); + } + + private ContextVisitor visitKeyedElement(Function typeGetter) { + return visit(KeyedElementContext.class, context -> { + CompositeLitContext parent = getAncestor(context, CompositeLitContext.class); + if (parent == null) { + return false; + } + + LiteralTypeContext typeContext = parent.literalType(); + if (typeContext == null) { + return false; + } + + return typeGetter.apply(typeContext) != null; + }); + } +} diff --git a/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java b/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java index 3d9da82c61..3e0a023fe7 100644 --- a/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java +++ b/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java @@ -1,61 +1,33 @@ package de.jplag.golang; -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.ParserRuleContext; -import org.antlr.v4.runtime.tree.ParseTree; -import org.antlr.v4.runtime.tree.ParseTreeWalker; -import de.jplag.AbstractParser; -import de.jplag.ParsingException; -import de.jplag.Token; -import de.jplag.TokenType; +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.antlr.AbstractAntlrParserAdapter; import de.jplag.golang.grammar.GoLexer; import de.jplag.golang.grammar.GoParser; -import de.jplag.util.FileUtils; - -public class GoParserAdapter extends AbstractParser { - private File currentFile; - private List tokens; - public List parse(Set files) throws ParsingException { - tokens = new ArrayList<>(); - for (File file : files) { - parseFile(file); - tokens.add(Token.fileEnd(file)); - } - return tokens; +public class GoParserAdapter extends AbstractAntlrParserAdapter { + @Override + protected Lexer createLexer(CharStream input) { + return new GoLexer(input); } - private void parseFile(File file) throws ParsingException { - try (BufferedReader reader = FileUtils.openFileReader(file)) { - currentFile = file; - - GoLexer lexer = new GoLexer(CharStreams.fromReader(reader)); - CommonTokenStream tokenStream = new CommonTokenStream(lexer); - GoParser parser = new GoParser(tokenStream); - - ParserRuleContext entryContext = parser.sourceFile(); - ParseTreeWalker treeWalker = new ParseTreeWalker(); + @Override + protected GoParser createParser(CommonTokenStream tokenStream) { + return new GoParser(tokenStream); + } - JPlagGoListener listener = new JPlagGoListener(this); - for (int i = 0; i < entryContext.getChildCount(); i++) { - ParseTree parseTree = entryContext.getChild(i); - treeWalker.walk(listener, parseTree); - } - } catch (IOException exception) { - throw new ParsingException(file, exception.getMessage(), exception); - } + @Override + protected ParserRuleContext getEntryContext(GoParser parser) { + return parser.sourceFile(); } - public void addToken(TokenType tokenType, int line, int column, int length) { - tokens.add(new Token(tokenType, currentFile, line, column, length)); + @Override + protected AbstractAntlrListener getListener() { + return new GoListener(); } } diff --git a/languages/golang/src/main/java/de/jplag/golang/JPlagGoListener.java b/languages/golang/src/main/java/de/jplag/golang/JPlagGoListener.java deleted file mode 100644 index 8774fb81d0..0000000000 --- a/languages/golang/src/main/java/de/jplag/golang/JPlagGoListener.java +++ /dev/null @@ -1,600 +0,0 @@ -package de.jplag.golang; - -import static de.jplag.golang.GoTokenType.ARGUMENT; -import static de.jplag.golang.GoTokenType.ARRAY_BODY_BEGIN; -import static de.jplag.golang.GoTokenType.ARRAY_BODY_END; -import static de.jplag.golang.GoTokenType.ARRAY_CONSTRUCTOR; -import static de.jplag.golang.GoTokenType.ARRAY_ELEMENT; -import static de.jplag.golang.GoTokenType.ASSIGNMENT; -import static de.jplag.golang.GoTokenType.BREAK; -import static de.jplag.golang.GoTokenType.CASE_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.CASE_BLOCK_END; -import static de.jplag.golang.GoTokenType.CONTINUE; -import static de.jplag.golang.GoTokenType.DEFER; -import static de.jplag.golang.GoTokenType.ELSE_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.ELSE_BLOCK_END; -import static de.jplag.golang.GoTokenType.FALLTHROUGH; -import static de.jplag.golang.GoTokenType.FOR_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.FOR_BLOCK_END; -import static de.jplag.golang.GoTokenType.FOR_STATEMENT; -import static de.jplag.golang.GoTokenType.FUNCTION_BODY_BEGIN; -import static de.jplag.golang.GoTokenType.FUNCTION_BODY_END; -import static de.jplag.golang.GoTokenType.FUNCTION_DECLARATION; -import static de.jplag.golang.GoTokenType.FUNCTION_LITERAL; -import static de.jplag.golang.GoTokenType.FUNCTION_PARAMETER; -import static de.jplag.golang.GoTokenType.GO; -import static de.jplag.golang.GoTokenType.GOTO; -import static de.jplag.golang.GoTokenType.IF_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.IF_BLOCK_END; -import static de.jplag.golang.GoTokenType.IF_STATEMENT; -import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE; -import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_BEGIN; -import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_END; -import static de.jplag.golang.GoTokenType.IMPORT_DECLARATION; -import static de.jplag.golang.GoTokenType.INTERFACE_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.INTERFACE_BLOCK_END; -import static de.jplag.golang.GoTokenType.INTERFACE_DECLARATION; -import static de.jplag.golang.GoTokenType.INTERFACE_METHOD; -import static de.jplag.golang.GoTokenType.INVOCATION; -import static de.jplag.golang.GoTokenType.MAP_BODY_BEGIN; -import static de.jplag.golang.GoTokenType.MAP_BODY_END; -import static de.jplag.golang.GoTokenType.MAP_CONSTRUCTOR; -import static de.jplag.golang.GoTokenType.MAP_ELEMENT; -import static de.jplag.golang.GoTokenType.MEMBER_DECLARATION; -import static de.jplag.golang.GoTokenType.NAMED_TYPE_BODY_BEGIN; -import static de.jplag.golang.GoTokenType.NAMED_TYPE_BODY_END; -import static de.jplag.golang.GoTokenType.NAMED_TYPE_CONSTRUCTOR; -import static de.jplag.golang.GoTokenType.NAMED_TYPE_ELEMENT; -import static de.jplag.golang.GoTokenType.PACKAGE; -import static de.jplag.golang.GoTokenType.RECEIVER; -import static de.jplag.golang.GoTokenType.RECEIVE_STATEMENT; -import static de.jplag.golang.GoTokenType.RETURN; -import static de.jplag.golang.GoTokenType.SELECT_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.SELECT_BLOCK_END; -import static de.jplag.golang.GoTokenType.SELECT_STATEMENT; -import static de.jplag.golang.GoTokenType.SEND_STATEMENT; -import static de.jplag.golang.GoTokenType.SLICE_BODY_BEGIN; -import static de.jplag.golang.GoTokenType.SLICE_BODY_END; -import static de.jplag.golang.GoTokenType.SLICE_CONSTRUCTOR; -import static de.jplag.golang.GoTokenType.SLICE_ELEMENT; -import static de.jplag.golang.GoTokenType.STATEMENT_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.STATEMENT_BLOCK_END; -import static de.jplag.golang.GoTokenType.STRUCT_BODY_BEGIN; -import static de.jplag.golang.GoTokenType.STRUCT_BODY_END; -import static de.jplag.golang.GoTokenType.STRUCT_DECLARATION; -import static de.jplag.golang.GoTokenType.SWITCH_BLOCK_BEGIN; -import static de.jplag.golang.GoTokenType.SWITCH_BLOCK_END; -import static de.jplag.golang.GoTokenType.SWITCH_CASE; -import static de.jplag.golang.GoTokenType.SWITCH_STATEMENT; -import static de.jplag.golang.GoTokenType.TYPE_ASSERTION; -import static de.jplag.golang.GoTokenType.TYPE_CONSTRAINT; -import static de.jplag.golang.GoTokenType.VARIABLE_DECLARATION; - -import java.util.Arrays; -import java.util.Deque; -import java.util.LinkedList; -import java.util.Optional; - -import org.antlr.v4.runtime.Token; -import org.antlr.v4.runtime.tree.TerminalNode; - -import de.jplag.TokenType; -import de.jplag.golang.grammar.GoParser; -import de.jplag.golang.grammar.GoParserBaseListener; - -public class JPlagGoListener extends GoParserBaseListener { - - private final GoParserAdapter parserAdapter; - private final Deque blockContexts; - - public JPlagGoListener(GoParserAdapter parserAdapter) { - this.parserAdapter = parserAdapter; - blockContexts = new LinkedList<>(); - } - - /** - * Passes a token of the given tokenType to the parserAdapter, representing the grammar's token given by token. - * @param tokenType the custom token type that occurred. - * @param token the corresponding grammar's token - */ - private void transformToken(TokenType tokenType, Token token) { - parserAdapter.addToken(tokenType, token.getLine(), token.getCharPositionInLine() + 1, token.getText().length()); - } - - /** - * Passes a token of the given tokenType to the parserAdapter, representing the current grammatical context given by - * start and end. - * @param tokenType the custom token type that occurred. - * @param start the first Token of the context - * @param end the last Token of the context - */ - private void transformToken(GoTokenType tokenType, Token start, Token end) { - parserAdapter.addToken(tokenType, start.getLine(), start.getCharPositionInLine() + 1, end.getStopIndex() - start.getStartIndex() + 1); - } - - private void enterContext(GoBlockContext context) { - blockContexts.push(context); - } - - private void expectAndLeave(GoBlockContext... contexts) { - GoBlockContext topContext = blockContexts.pop(); - assert Arrays.stream(contexts).anyMatch(context -> context == topContext); - } - - private GoBlockContext getCurrentContext() { - return blockContexts.peek(); - } - - /* TOP LEVEL STRUCTURES */ - - @Override - public void enterPackageClause(GoParser.PackageClauseContext context) { - transformToken(PACKAGE, context.getStart(), context.getStop()); - super.enterPackageClause(context); - } - - @Override - public void enterImportDecl(GoParser.ImportDeclContext context) { - transformToken(IMPORT_DECLARATION, context.getStart()); - - // if the children contain TerminalNodes, then it must be '(' and ')' - Optional listStart = context.children.stream().filter(TerminalNode.class::isInstance).map(TerminalNode.class::cast).findFirst(); - listStart.ifPresent(lParenTree -> transformToken(IMPORT_CLAUSE_BEGIN, lParenTree.getSymbol())); - - super.enterImportDecl(context); - } - - @Override - public void exitImportDecl(GoParser.ImportDeclContext context) { - if (context.getStop().getText().equals(")")) { - transformToken(IMPORT_CLAUSE_END, context.getStop()); - } - super.exitImportDecl(context); - } - - @Override - public void enterImportSpec(GoParser.ImportSpecContext context) { - transformToken(IMPORT_CLAUSE, context.getStart(), context.getStop()); - super.enterImportSpec(context); - } - - /* INTERFACE */ - - @Override - public void enterInterfaceType(GoParser.InterfaceTypeContext context) { - transformToken(INTERFACE_DECLARATION, context.getStart()); - enterContext(GoBlockContext.INTERFACE_BODY); - super.enterInterfaceType(context); - } - - @Override - public void exitInterfaceType(GoParser.InterfaceTypeContext context) { - expectAndLeave(GoBlockContext.INTERFACE_BODY); - super.exitInterfaceType(context); - } - - /* STRUCT */ - - @Override - public void enterStructType(GoParser.StructTypeContext context) { - transformToken(STRUCT_DECLARATION, context.getStart()); - enterContext(GoBlockContext.STRUCT_BODY); - super.enterStructType(context); - } - - @Override - public void exitStructType(GoParser.StructTypeContext context) { - expectAndLeave(GoBlockContext.STRUCT_BODY); - super.exitStructType(context); - } - - @Override - public void enterFieldDecl(GoParser.FieldDeclContext context) { - transformToken(MEMBER_DECLARATION, context.getStart(), context.getStop()); - super.enterFieldDecl(context); - } - - /* FUNCTION */ - - @Override - public void enterFunctionDecl(GoParser.FunctionDeclContext context) { - transformToken(FUNCTION_DECLARATION, context.getStart()); - enterContext(GoBlockContext.FUNCTION_BODY); - super.enterFunctionDecl(context); - } - - @Override - public void exitFunctionDecl(GoParser.FunctionDeclContext context) { - expectAndLeave(GoBlockContext.FUNCTION_BODY); - super.exitFunctionDecl(context); - } - - @Override - public void enterMethodDecl(GoParser.MethodDeclContext context) { - transformToken(FUNCTION_DECLARATION, context.getStart()); - enterContext(GoBlockContext.FUNCTION_BODY); - super.enterMethodDecl(context); - } - - @Override - public void exitMethodDecl(GoParser.MethodDeclContext context) { - expectAndLeave(GoBlockContext.FUNCTION_BODY); - super.exitMethodDecl(context); - } - - @Override - public void enterParameterDecl(GoParser.ParameterDeclContext context) { - if (context.parent.parent instanceof GoParser.ReceiverContext) { - transformToken(RECEIVER, context.getStart(), context.getStop()); - } else { - transformToken(FUNCTION_PARAMETER, context.getStart(), context.getStop()); - } - super.enterParameterDecl(context); - } - - /* CONTROL FLOW STATEMENTS */ - - @Override - public void enterIfStmt(GoParser.IfStmtContext context) { - transformToken(IF_STATEMENT, context.getStart()); - enterContext(GoBlockContext.IF_BLOCK); - super.enterIfStmt(context); - } - - @Override - public void exitIfStmt(GoParser.IfStmtContext context) { - expectAndLeave(GoBlockContext.IF_BLOCK, GoBlockContext.ELSE_BLOCK); - super.exitIfStmt(context); - } - - @Override - public void enterForStmt(GoParser.ForStmtContext context) { - transformToken(FOR_STATEMENT, context.getStart()); - enterContext(GoBlockContext.FOR_BLOCK); - super.enterForStmt(context); - } - - @Override - public void exitForStmt(GoParser.ForStmtContext context) { - expectAndLeave(GoBlockContext.FOR_BLOCK); - super.exitForStmt(context); - } - - @Override - public void enterSwitchStmt(GoParser.SwitchStmtContext context) { - transformToken(SWITCH_STATEMENT, context.getStart()); - enterContext(GoBlockContext.SWITCH_BLOCK); - super.enterSwitchStmt(context); - } - - @Override - public void exitSwitchStmt(GoParser.SwitchStmtContext context) { - expectAndLeave(GoBlockContext.SWITCH_BLOCK); - super.exitSwitchStmt(context); - } - - @Override - public void enterExprCaseClause(GoParser.ExprCaseClauseContext context) { - transformToken(SWITCH_CASE, context.getStart()); - var caseBlock = context.getChild(GoParser.StatementListContext.class, 0); - if (caseBlock != null) { - enterContext(GoBlockContext.CASE_BLOCK); - transformToken(CASE_BLOCK_BEGIN, caseBlock.getStart()); - } - super.enterExprCaseClause(context); - } - - @Override - public void exitExprCaseClause(GoParser.ExprCaseClauseContext context) { - if (getCurrentContext() == GoBlockContext.CASE_BLOCK) { - transformToken(CASE_BLOCK_END, context.getStop()); - expectAndLeave(GoBlockContext.CASE_BLOCK); - } - super.exitExprCaseClause(context); - } - - @Override - public void enterTypeCaseClause(GoParser.TypeCaseClauseContext context) { - transformToken(SWITCH_CASE, context.getStart()); - var caseBlock = context.getChild(GoParser.StatementListContext.class, 0); - if (caseBlock != null) { - enterContext(GoBlockContext.CASE_BLOCK); - transformToken(CASE_BLOCK_BEGIN, caseBlock.getStart()); - } - super.enterTypeCaseClause(context); - } - - @Override - public void exitTypeCaseClause(GoParser.TypeCaseClauseContext context) { - if (getCurrentContext() == GoBlockContext.CASE_BLOCK) { - transformToken(CASE_BLOCK_END, context.getStop()); - expectAndLeave(GoBlockContext.CASE_BLOCK); - } - super.exitTypeCaseClause(context); - } - - @Override - public void enterSelectStmt(GoParser.SelectStmtContext context) { - transformToken(SELECT_STATEMENT, context.getStart()); - enterContext(GoBlockContext.SELECT_CONTEXT); - super.enterSelectStmt(context); - } - - @Override - public void exitSelectStmt(GoParser.SelectStmtContext context) { - expectAndLeave(GoBlockContext.SELECT_CONTEXT); - super.exitSelectStmt(context); - } - - @Override - public void enterCommCase(GoParser.CommCaseContext context) { - transformToken(SWITCH_CASE, context.getStart()); - var caseBlock = context.getChild(GoParser.StatementListContext.class, 0); - if (caseBlock != null) { - enterContext(GoBlockContext.CASE_BLOCK); - transformToken(CASE_BLOCK_BEGIN, caseBlock.getStart()); - } - super.enterCommCase(context); - } - - @Override - public void exitCommCase(GoParser.CommCaseContext context) { - if (getCurrentContext() == GoBlockContext.CASE_BLOCK) { - transformToken(CASE_BLOCK_END, context.getStop()); - expectAndLeave(GoBlockContext.CASE_BLOCK); - } - super.exitCommCase(context); - } - - /* STATEMENTS */ - - @Override - public void enterVarDecl(GoParser.VarDeclContext context) { - transformToken(VARIABLE_DECLARATION, context.getStart(), context.getStop()); - super.enterVarDecl(context); - } - - @Override - public void enterConstSpec(GoParser.ConstSpecContext context) { - transformToken(VARIABLE_DECLARATION, context.getStart()); - super.enterConstSpec(context); - } - - @Override - public void enterFunctionLit(GoParser.FunctionLitContext context) { - transformToken(FUNCTION_LITERAL, context.getStart()); - enterContext(GoBlockContext.FUNCTION_BODY); - super.enterFunctionLit(context); - } - - @Override - public void exitFunctionLit(GoParser.FunctionLitContext context) { - expectAndLeave(GoBlockContext.FUNCTION_BODY); - super.exitFunctionLit(context); - } - - @Override - public void enterAssignment(GoParser.AssignmentContext context) { - transformToken(ASSIGNMENT, context.getStart(), context.getStop()); - super.enterAssignment(context); - } - - @Override - public void enterShortVarDecl(GoParser.ShortVarDeclContext context) { - transformToken(VARIABLE_DECLARATION, context.getStart()); - transformToken(ASSIGNMENT, context.getStart()); - super.enterShortVarDecl(context); - } - - @Override - public void enterArguments(GoParser.ArgumentsContext context) { - transformToken(INVOCATION, context.getStart(), context.getStop()); - - // Arguments consist of ExpressionLists, which consist of Expressions - // Get all Expressions of all ExpressionLists in this ArgumentsContext - context.getRuleContexts(GoParser.ExpressionListContext.class).stream() - .flatMap(child -> child.getRuleContexts(GoParser.ExpressionContext.class).stream()) - .forEachOrdered(arg -> transformToken(ARGUMENT, arg.getStart(), arg.getStop())); - super.enterArguments(context); - } - - @Override - public void enterStatement(GoParser.StatementContext context) { - enterContext(GoBlockContext.STATEMENT_BLOCK); - super.enterStatement(context); - } - - @Override - public void exitStatement(GoParser.StatementContext context) { - expectAndLeave(GoBlockContext.STATEMENT_BLOCK); - super.exitStatement(context); - } - - /* OBJECT CREATION */ - - @Override - public void enterKeyedElement(GoParser.KeyedElementContext context) { - Optional tokenType = getCurrentContext().getElement(); - tokenType.ifPresent(type -> transformToken(type, context.getStart(), context.getStop())); - super.enterKeyedElement(context); - } - - @Override - public void enterArrayType(GoParser.ArrayTypeContext context) { - // otherwise, it is just a type expression - if (context.parent.parent instanceof GoParser.CompositeLitContext) { - enterContext(GoBlockContext.ARRAY_BODY); - transformToken(ARRAY_CONSTRUCTOR, context.getStart(), context.getStop()); - } - super.enterArrayType(context); - } - - @Override - public void enterSliceType(GoParser.SliceTypeContext context) { - // otherwise, it is just a type expression - if (context.parent.parent instanceof GoParser.CompositeLitContext) { - enterContext(GoBlockContext.SLICE_BODY); - transformToken(SLICE_CONSTRUCTOR, context.getStart(), context.getStop()); - } - super.enterSliceType(context); - } - - @Override - public void exitCompositeLit(GoParser.CompositeLitContext context) { - expectAndLeave(GoBlockContext.MAP_BODY, GoBlockContext.SLICE_BODY, GoBlockContext.ARRAY_BODY, GoBlockContext.NAMED_TYPE_BODY); - super.exitCompositeLit(context); - } - - @Override - public void enterMapType(GoParser.MapTypeContext context) { - // otherwise, it is just a type expression - if (context.parent.parent instanceof GoParser.CompositeLitContext) { - enterContext(GoBlockContext.MAP_BODY); - transformToken(MAP_CONSTRUCTOR, context.getStart(), context.getStop()); - } - super.enterMapType(context); - } - - @Override - public void enterTypeName(GoParser.TypeNameContext context) { - if (context.parent.parent instanceof GoParser.CompositeLitContext) { - transformToken(NAMED_TYPE_CONSTRUCTOR, context.getStart()); - enterContext(GoBlockContext.NAMED_TYPE_BODY); - } else if (context.parent instanceof GoParser.InterfaceTypeContext) { - transformToken(TYPE_CONSTRAINT, context.getStart(), context.getStop()); - } - super.enterTypeName(context); - } - - @Override - public void enterTypeAssertion(GoParser.TypeAssertionContext context) { - transformToken(TYPE_ASSERTION, context.getStart(), context.getStop()); - super.enterTypeAssertion(context); - } - - @Override - public void enterMethodSpec(GoParser.MethodSpecContext context) { - transformToken(INTERFACE_METHOD, context.getStart(), context.getStop()); - super.enterMethodSpec(context); - } - - /* CONTROL FLOW KEYWORDS */ - - @Override - public void enterReturnStmt(GoParser.ReturnStmtContext context) { - transformToken(RETURN, context.getStart(), context.getStop()); - super.enterReturnStmt(context); - } - - @Override - public void enterBreakStmt(GoParser.BreakStmtContext context) { - transformToken(BREAK, context.getStart(), context.getStop()); - super.enterBreakStmt(context); - } - - @Override - public void enterContinueStmt(GoParser.ContinueStmtContext context) { - transformToken(CONTINUE, context.getStart(), context.getStop()); - super.enterContinueStmt(context); - } - - @Override - public void enterFallthroughStmt(GoParser.FallthroughStmtContext context) { - transformToken(FALLTHROUGH, context.getStart(), context.getStop()); - super.enterFallthroughStmt(context); - } - - @Override - public void enterGotoStmt(GoParser.GotoStmtContext context) { - transformToken(GOTO, context.getStart(), context.getStop()); - super.enterGotoStmt(context); - } - - @Override - public void enterGoStmt(GoParser.GoStmtContext context) { - transformToken(GO, context.getStart(), context.getStop()); - super.enterGoStmt(context); - } - - @Override - public void enterDeferStmt(GoParser.DeferStmtContext context) { - transformToken(DEFER, context.getStart(), context.getStop()); - super.enterDeferStmt(context); - } - - @Override - public void enterSendStmt(GoParser.SendStmtContext ctx) { - transformToken(SEND_STATEMENT, ctx.getStart(), ctx.getStop()); - super.enterSendStmt(ctx); - } - - @Override - public void enterRecvStmt(GoParser.RecvStmtContext ctx) { - transformToken(RECEIVE_STATEMENT, ctx.getStart(), ctx.getStop()); - super.enterRecvStmt(ctx); - } - - @Override - public void visitTerminal(TerminalNode node) { - Token token = node.getSymbol(); - switch (token.getText()) { - case "else" -> { - expectAndLeave(GoBlockContext.IF_BLOCK); - enterContext(GoBlockContext.ELSE_BLOCK); - } - case "{" -> transformToken(getCurrentContext().getBegin(), token); - case "}" -> transformToken(getCurrentContext().getEnd(), token); - default -> { - // do nothing. - } - } - super.visitTerminal(node); - } - - /** - * This enumeration provides sets of information regarding different types of nesting structures in Go. Each element is - * a tuple of a token for the beginning of a block, the end of the block, and optionally, for the elements contained. - *

- * As the Go parser does not differentiate between different kinds of blocks, a stack of these GoBlockContexts is - * required to be able to assign the correct token types for each block. - */ - private enum GoBlockContext { - ARRAY_BODY(ARRAY_BODY_BEGIN, ARRAY_BODY_END, Optional.of(ARRAY_ELEMENT)), - STRUCT_BODY(STRUCT_BODY_BEGIN, STRUCT_BODY_END, Optional.of(MEMBER_DECLARATION)), - MAP_BODY(MAP_BODY_BEGIN, MAP_BODY_END, Optional.of(MAP_ELEMENT)), - SLICE_BODY(SLICE_BODY_BEGIN, SLICE_BODY_END, Optional.of(SLICE_ELEMENT)), - NAMED_TYPE_BODY(NAMED_TYPE_BODY_BEGIN, NAMED_TYPE_BODY_END, Optional.of(NAMED_TYPE_ELEMENT)), - FUNCTION_BODY(FUNCTION_BODY_BEGIN, FUNCTION_BODY_END, Optional.empty()), - - IF_BLOCK(IF_BLOCK_BEGIN, IF_BLOCK_END, Optional.empty()), - ELSE_BLOCK(ELSE_BLOCK_BEGIN, ELSE_BLOCK_END, Optional.empty()), - FOR_BLOCK(FOR_BLOCK_BEGIN, FOR_BLOCK_END, Optional.empty()), - SWITCH_BLOCK(SWITCH_BLOCK_BEGIN, SWITCH_BLOCK_END, Optional.empty()), - SELECT_CONTEXT(SELECT_BLOCK_BEGIN, SELECT_BLOCK_END, Optional.empty()), - STATEMENT_BLOCK(STATEMENT_BLOCK_BEGIN, STATEMENT_BLOCK_END, Optional.empty()), - CASE_BLOCK(CASE_BLOCK_BEGIN, CASE_BLOCK_END, Optional.empty()), - INTERFACE_BODY(INTERFACE_BLOCK_BEGIN, INTERFACE_BLOCK_END, Optional.empty()); - - private final GoTokenType beginTokenType; - private final GoTokenType endTokenType; - private final Optional elementTokenType; - - GoBlockContext(GoTokenType beginTokenType, GoTokenType endTokenType, Optional elementTokenType) { - this.beginTokenType = beginTokenType; - this.endTokenType = endTokenType; - this.elementTokenType = elementTokenType; - } - - GoTokenType getBegin() { - return this.beginTokenType; - } - - GoTokenType getEnd() { - return this.endTokenType; - } - - public Optional getElement() { - return this.elementTokenType; - } - } -} diff --git a/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java b/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java index 0feafc7bc0..68d3553e63 100644 --- a/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java +++ b/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java @@ -1,156 +1,48 @@ package de.jplag.golang; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -import java.util.OptionalInt; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import de.jplag.ParsingException; -import de.jplag.SharedTokenType; -import de.jplag.Token; -import de.jplag.TokenPrinter; - -class GoLanguageTest { - /** - * Test source file that is supposed to produce a complete set of tokens, i.e. all types of tokens. - */ +import static de.jplag.golang.GoTokenType.ARGUMENT; +import static de.jplag.golang.GoTokenType.ASSIGNMENT; +import static de.jplag.golang.GoTokenType.FUNCTION_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.FUNCTION_BODY_END; +import static de.jplag.golang.GoTokenType.FUNCTION_DECLARATION; +import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE; +import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_BEGIN; +import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_END; +import static de.jplag.golang.GoTokenType.IMPORT_DECLARATION; +import static de.jplag.golang.GoTokenType.INVOCATION; +import static de.jplag.golang.GoTokenType.MEMBER_DECLARATION; +import static de.jplag.golang.GoTokenType.PACKAGE; +import static de.jplag.golang.GoTokenType.STRUCT_BODY_BEGIN; +import static de.jplag.golang.GoTokenType.STRUCT_BODY_END; +import static de.jplag.golang.GoTokenType.STRUCT_DECLARATION; +import static de.jplag.golang.GoTokenType.VARIABLE_DECLARATION; + +import de.jplag.testutils.LanguageModuleTest; +import de.jplag.testutils.datacollector.TestDataCollector; +import de.jplag.testutils.datacollector.TestSourceIgnoredLinesCollector; + +class GoLanguageTest extends LanguageModuleTest { private static final String COMPLETE_TEST_FILE = "Complete.go"; + // example files taken from antlr repo + private static final String CONSTANTS_TEST_FILE = "Constants.go"; + private static final String ARRAY_ELLIPSIS_DECLS_FILE = "ArrayEllipsisDecls.go"; - /** - * Regular expression that describes lines consisting only of whitespace and optionally a line comment. - */ - private static final String EMPTY_OR_SINGLE_LINE_COMMENT = "\\s*(//.*|/\\*.*\\*/)?"; - - /** - * Regular expression that describes lines containing the start of a multiline comment and no code before it. - */ - private static final String DELIMITED_COMMENT_START = "\\s*/\\*(?:(?!\\*/).)*$"; - - /** - * Regular expression that describes lines containing the end of a multiline comment and no more code after that. - */ - private static final String DELIMITED_COMMENT_END = ".*\\*/\\s*$"; - - private final Logger logger = LoggerFactory.getLogger(GoLanguageTest.class); - private final String[] testFiles = new String[] {COMPLETE_TEST_FILE}; - private final File testFileLocation = Path.of("src", "test", "resources", "de", "jplag", "golang").toFile(); - private GoLanguage language; - - @BeforeEach - void setup() { - language = new GoLanguage(); + public GoLanguageTest() { + super(new GoLanguage(), GoTokenType.class); } - @Test - void parseTestFiles() throws ParsingException { - for (String fileName : testFiles) { - List tokens = language.parse(Set.of(new File(testFileLocation, fileName))); - String output = TokenPrinter.printTokens(tokens, testFileLocation); - logger.info(output); + @Override + protected void collectTestData(TestDataCollector collector) { + collector.testFile(COMPLETE_TEST_FILE).testCoverages(); - testSourceCoverage(fileName, tokens); - if (fileName.equals(COMPLETE_TEST_FILE)) { - testTokenCoverage(tokens, fileName); - } - } + // Some basic tests, so we have at least some idea if the listener was changed + collector.testFile(CONSTANTS_TEST_FILE).testTokenSequence(PACKAGE, VARIABLE_DECLARATION, VARIABLE_DECLARATION); + collector.testFile(ARRAY_ELLIPSIS_DECLS_FILE).testSourceCoverage().testTokenSequence(PACKAGE, IMPORT_DECLARATION, IMPORT_CLAUSE_BEGIN, + IMPORT_CLAUSE, IMPORT_CLAUSE_END, STRUCT_DECLARATION, STRUCT_BODY_BEGIN, MEMBER_DECLARATION, STRUCT_BODY_END, FUNCTION_DECLARATION, + FUNCTION_BODY_BEGIN, VARIABLE_DECLARATION, ASSIGNMENT, INVOCATION, ARGUMENT, ARGUMENT, FUNCTION_BODY_END); } - /** - * Confirms that the code is covered to a basic extent, i.e. each line of code contains at least one token. - * @param fileName a code sample file name - * @param tokens the list of tokens generated from the sample - */ - private void testSourceCoverage(String fileName, List tokens) { - File testFile = new File(testFileLocation, fileName); - - List lines = null; - try { - lines = Files.readAllLines(testFile.toPath()); - } catch (IOException exception) { - logger.info("Error while reading test file %s".formatted(fileName), exception); - fail(); - } - - // All lines that contain code - var codeLines = getCodeLines(lines); - // All lines that contain a token - var tokenLines = tokens.stream().mapToInt(Token::getLine).distinct().boxed().toList(); - - if (codeLines.size() > tokenLines.size()) { - List missedLinesIndices = new ArrayList<>(codeLines); - missedLinesIndices.removeAll(tokenLines); - var missedLines = missedLinesIndices.stream().map(Object::toString).collect(Collectors.joining(", ")); - if (!missedLines.isBlank()) { - fail("Found lines in file '%s' that are not represented in the token list. \n\tMissed lines: %s".formatted(fileName, missedLines)); - } - } - OptionalInt differingLine = IntStream.range(0, codeLines.size()) - .dropWhile(index -> Objects.equals(codeLines.get(index), tokenLines.get(index))).findAny(); - differingLine.ifPresent( - i -> fail("Not all lines of code in '%s' are represented in tokens, starting with line %d.".formatted(fileName, codeLines.get(i)))); + @Override + protected void configureIgnoredLines(TestSourceIgnoredLinesCollector collector) { } - - /** - * Gets the line numbers of lines containing actual code, omitting empty lines and comment lines. - * @param lines lines of a code file - * @return a list of the line numbers of code lines - */ - private List getCodeLines(List lines) { - // This boxed boolean can be accessed from within the lambda method below - var state = new Object() { - boolean insideComment = false; - }; - - var codeLines = IntStream.rangeClosed(1, lines.size()).sequential().filter(idx -> { - String line = lines.get(idx - 1); - if (line.matches(EMPTY_OR_SINGLE_LINE_COMMENT)) { - return false; - } else if (line.matches(DELIMITED_COMMENT_START)) { - state.insideComment = true; - return false; - } else if (state.insideComment) { - // This fails if code follows after '*/'. If the code is formatted well, this should not happen. - if (line.matches(DELIMITED_COMMENT_END)) { - state.insideComment = false; - } - return false; - } - return true; - }); - - return codeLines.boxed().toList(); - - } - - /** - * Confirms that all Token types are 'reachable' with a complete code example. - * @param tokens list of tokens which is supposed to contain all types of tokens - * @param fileName The file name of the complete code example - */ - private void testTokenCoverage(List tokens, String fileName) { - var annotatedTokens = tokens.stream().map(Token::getType).collect(Collectors.toSet()); - assertTrue(annotatedTokens.contains(SharedTokenType.FILE_END)); - var annotatedGoTokens = annotatedTokens.stream().filter(GoTokenType.class::isInstance).collect(Collectors.toSet()); - var allGoTokens = GoTokenType.values(); - var missingGoTokens = Arrays.stream(allGoTokens).filter(token -> !annotatedGoTokens.contains(token)).toList(); - assertTrue(missingGoTokens.isEmpty(), "The following go tokens are missing in the code example '%s':\n".formatted(fileName) - + String.join("\n", missingGoTokens.stream().map(GoTokenType::getDescription).toList())); - } - } diff --git a/languages/golang/src/test/resources/de/jplag/go/ArrayEllipsisDecls.go b/languages/golang/src/test/resources/de/jplag/go/ArrayEllipsisDecls.go new file mode 100644 index 0000000000..0a9b83bec5 --- /dev/null +++ b/languages/golang/src/test/resources/de/jplag/go/ArrayEllipsisDecls.go @@ -0,0 +1,14 @@ +package samples + +import ( + "fmt" +) + +type Custom struct { + string +} + +func ArrayEllipsisDecls() { + stooges := [...]Custom{{"Moe"}, {"Larry"}, {"Curly"}} // len(stooges) == 3 + fmt.Println("Stooges: ", stooges) +} \ No newline at end of file diff --git a/languages/golang/src/test/resources/de/jplag/golang/Complete.go b/languages/golang/src/test/resources/de/jplag/go/Complete.go similarity index 100% rename from languages/golang/src/test/resources/de/jplag/golang/Complete.go rename to languages/golang/src/test/resources/de/jplag/go/Complete.go diff --git a/languages/golang/src/test/resources/de/jplag/go/Constants.go b/languages/golang/src/test/resources/de/jplag/go/Constants.go new file mode 100644 index 0000000000..1a3dab2879 --- /dev/null +++ b/languages/golang/src/test/resources/de/jplag/go/Constants.go @@ -0,0 +1,6 @@ +package A + +const ( + /*A*/ T = 1 << 0 + /*B*/ Ta = 1 << 1 +) \ No newline at end of file diff --git a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java index f1c5c2dc93..65f5ab89c8 100644 --- a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java +++ b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java @@ -52,7 +52,7 @@ public void parseFiles(Set files, final Parser parser) throws ParsingExcep ast.accept(scanner, null); parser.add(Token.semanticFileEnd(file)); } - } catch (IOException exception) { + } catch (Exception exception) { throw new ParsingException(null, exception.getMessage(), exception); } parsingExceptions.addAll(processErrors(listener)); diff --git a/languages/python-3/pom.xml b/languages/python-3/pom.xml index 92712c0b1a..fb6865e696 100644 --- a/languages/python-3/pom.xml +++ b/languages/python-3/pom.xml @@ -13,6 +13,11 @@ org.antlr antlr4-runtime + + de.jplag + language-antlr-utils + ${revision} + diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 index 9b5fee1dc4..8b36564b96 100644 --- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 +++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 @@ -28,190 +28,159 @@ * https://github.com/bkiers/python3-parser * Developed by : Bart Kiers, bart@big-o.nl */ + +// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true + lexer grammar Python3Lexer; // All comments that start with "///" are copy-pasted from // The Python Language Reference -tokens { INDENT, DEDENT } +tokens { + INDENT, + DEDENT +} options { - superClass=Python3LexerBase; + superClass = Python3LexerBase; } +// Insert here @header for C++ lexer. + /* * lexer rules */ -STRING - : STRING_LITERAL - | BYTES_LITERAL - ; - -NUMBER - : INTEGER - | FLOAT_NUMBER - | IMAG_NUMBER - ; - -INTEGER - : DECIMAL_INTEGER - | OCT_INTEGER - | HEX_INTEGER - | BIN_INTEGER - ; - -AND : 'and'; -AS : 'as'; -ASSERT : 'assert'; -ASYNC : 'async'; -AWAIT : 'await'; -BREAK : 'break'; -CASE : 'case' ; -CLASS : 'class'; -CONTINUE : 'continue'; -DEF : 'def'; -DEL : 'del'; -ELIF : 'elif'; -ELSE : 'else'; -EXCEPT : 'except'; -FALSE : 'False'; -FINALLY : 'finally'; -FOR : 'for'; -FROM : 'from'; -GLOBAL : 'global'; -IF : 'if'; -IMPORT : 'import'; -IN : 'in'; -IS : 'is'; -LAMBDA : 'lambda'; -MATCH : 'match' ; -NONE : 'None'; -NONLOCAL : 'nonlocal'; -NOT : 'not'; -OR : 'or'; -PASS : 'pass'; -RAISE : 'raise'; -RETURN : 'return'; -TRUE : 'True'; -TRY : 'try'; -UNDERSCORE : '_' ; -WHILE : 'while'; -WITH : 'with'; -YIELD : 'yield'; - -NEWLINE - : ( {this.atStartOfInput()}? SPACES - | ( '\r'? '\n' | '\r' | '\f' ) SPACES? - ) - {this.onNewLine();} - ; +STRING: STRING_LITERAL | BYTES_LITERAL; + +NUMBER: INTEGER | FLOAT_NUMBER | IMAG_NUMBER; + +INTEGER: DECIMAL_INTEGER | OCT_INTEGER | HEX_INTEGER | BIN_INTEGER; + +AND : 'and'; +AS : 'as'; +ASSERT : 'assert'; +ASYNC : 'async'; +AWAIT : 'await'; +BREAK : 'break'; +CASE : 'case'; +CLASS : 'class'; +CONTINUE : 'continue'; +DEF : 'def'; +DEL : 'del'; +ELIF : 'elif'; +ELSE : 'else'; +EXCEPT : 'except'; +FALSE : 'False'; +FINALLY : 'finally'; +FOR : 'for'; +FROM : 'from'; +GLOBAL : 'global'; +IF : 'if'; +IMPORT : 'import'; +IN : 'in'; +IS : 'is'; +LAMBDA : 'lambda'; +MATCH : 'match'; +NONE : 'None'; +NONLOCAL : 'nonlocal'; +NOT : 'not'; +OR : 'or'; +PASS : 'pass'; +RAISE : 'raise'; +RETURN : 'return'; +TRUE : 'True'; +TRY : 'try'; +UNDERSCORE : '_'; +WHILE : 'while'; +WITH : 'with'; +YIELD : 'yield'; + +NEWLINE: ({this.atStartOfInput()}? SPACES | ( '\r'? '\n' | '\r' | '\f') SPACES?) {this.onNewLine();}; /// identifier ::= id_start id_continue* -NAME - : ID_START ID_CONTINUE* - ; +NAME: ID_START ID_CONTINUE*; /// stringliteral ::= [stringprefix](shortstring | longstring) /// stringprefix ::= "r" | "u" | "R" | "U" | "f" | "F" /// | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" -STRING_LITERAL - : ( [rR] | [uU] | [fF] | ( [fF] [rR] ) | ( [rR] [fF] ) )? ( SHORT_STRING | LONG_STRING ) - ; +STRING_LITERAL: ( [rR] | [uU] | [fF] | ( [fF] [rR]) | ( [rR] [fF]))? ( SHORT_STRING | LONG_STRING); /// bytesliteral ::= bytesprefix(shortbytes | longbytes) /// bytesprefix ::= "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" -BYTES_LITERAL - : ( [bB] | ( [bB] [rR] ) | ( [rR] [bB] ) ) ( SHORT_BYTES | LONG_BYTES ) - ; +BYTES_LITERAL: ( [bB] | ( [bB] [rR]) | ( [rR] [bB])) ( SHORT_BYTES | LONG_BYTES); /// decimalinteger ::= nonzerodigit digit* | "0"+ -DECIMAL_INTEGER - : NON_ZERO_DIGIT DIGIT* - | '0'+ - ; +DECIMAL_INTEGER: NON_ZERO_DIGIT DIGIT* | '0'+; /// octinteger ::= "0" ("o" | "O") octdigit+ -OCT_INTEGER - : '0' [oO] OCT_DIGIT+ - ; +OCT_INTEGER: '0' [oO] OCT_DIGIT+; /// hexinteger ::= "0" ("x" | "X") hexdigit+ -HEX_INTEGER - : '0' [xX] HEX_DIGIT+ - ; +HEX_INTEGER: '0' [xX] HEX_DIGIT+; /// bininteger ::= "0" ("b" | "B") bindigit+ -BIN_INTEGER - : '0' [bB] BIN_DIGIT+ - ; +BIN_INTEGER: '0' [bB] BIN_DIGIT+; /// floatnumber ::= pointfloat | exponentfloat -FLOAT_NUMBER - : POINT_FLOAT - | EXPONENT_FLOAT - ; +FLOAT_NUMBER: POINT_FLOAT | EXPONENT_FLOAT; /// imagnumber ::= (floatnumber | intpart) ("j" | "J") -IMAG_NUMBER - : ( FLOAT_NUMBER | INT_PART ) [jJ] - ; - -DOT : '.'; -ELLIPSIS : '...'; -STAR : '*'; -OPEN_PAREN : '(' {this.openBrace();}; -CLOSE_PAREN : ')' {this.closeBrace();}; -COMMA : ','; -COLON : ':'; -SEMI_COLON : ';'; -POWER : '**'; -ASSIGN : '='; -OPEN_BRACK : '[' {this.openBrace();}; -CLOSE_BRACK : ']' {this.closeBrace();}; -OR_OP : '|'; -XOR : '^'; -AND_OP : '&'; -LEFT_SHIFT : '<<'; -RIGHT_SHIFT : '>>'; -ADD : '+'; -MINUS : '-'; -DIV : '/'; -MOD : '%'; -IDIV : '//'; -NOT_OP : '~'; -OPEN_BRACE : '{' {this.openBrace();}; -CLOSE_BRACE : '}' {this.closeBrace();}; -LESS_THAN : '<'; -GREATER_THAN : '>'; -EQUALS : '=='; -GT_EQ : '>='; -LT_EQ : '<='; -NOT_EQ_1 : '<>'; -NOT_EQ_2 : '!='; -AT : '@'; -ARROW : '->'; -ADD_ASSIGN : '+='; -SUB_ASSIGN : '-='; -MULT_ASSIGN : '*='; -AT_ASSIGN : '@='; -DIV_ASSIGN : '/='; -MOD_ASSIGN : '%='; -AND_ASSIGN : '&='; -OR_ASSIGN : '|='; -XOR_ASSIGN : '^='; -LEFT_SHIFT_ASSIGN : '<<='; +IMAG_NUMBER: ( FLOAT_NUMBER | INT_PART) [jJ]; + +DOT : '.'; +ELLIPSIS : '...'; +STAR : '*'; +OPEN_PAREN : '(' {this.openBrace();}; +CLOSE_PAREN : ')' {this.closeBrace();}; +COMMA : ','; +COLON : ':'; +SEMI_COLON : ';'; +POWER : '**'; +ASSIGN : '='; +OPEN_BRACK : '[' {this.openBrace();}; +CLOSE_BRACK : ']' {this.closeBrace();}; +OR_OP : '|'; +XOR : '^'; +AND_OP : '&'; +LEFT_SHIFT : '<<'; +RIGHT_SHIFT : '>>'; +ADD : '+'; +MINUS : '-'; +DIV : '/'; +MOD : '%'; +IDIV : '//'; +NOT_OP : '~'; +OPEN_BRACE : '{' {this.openBrace();}; +CLOSE_BRACE : '}' {this.closeBrace();}; +LESS_THAN : '<'; +GREATER_THAN : '>'; +EQUALS : '=='; +GT_EQ : '>='; +LT_EQ : '<='; +NOT_EQ_1 : '<>'; +NOT_EQ_2 : '!='; +AT : '@'; +ARROW : '->'; +ADD_ASSIGN : '+='; +SUB_ASSIGN : '-='; +MULT_ASSIGN : '*='; +AT_ASSIGN : '@='; +DIV_ASSIGN : '/='; +MOD_ASSIGN : '%='; +AND_ASSIGN : '&='; +OR_ASSIGN : '|='; +XOR_ASSIGN : '^='; +LEFT_SHIFT_ASSIGN : '<<='; RIGHT_SHIFT_ASSIGN : '>>='; -POWER_ASSIGN : '**='; -IDIV_ASSIGN : '//='; +POWER_ASSIGN : '**='; +IDIV_ASSIGN : '//='; -SKIP_ - : ( SPACES | COMMENT | LINE_JOINING ) -> skip - ; +SKIP_: ( SPACES | COMMENT | LINE_JOINING) -> skip; -UNKNOWN_CHAR - : . - ; +UNKNOWN_CHAR: .; /* * fragments @@ -220,143 +189,93 @@ UNKNOWN_CHAR /// shortstring ::= "'" shortstringitem* "'" | '"' shortstringitem* '"' /// shortstringitem ::= shortstringchar | stringescapeseq /// shortstringchar ::= -fragment SHORT_STRING - : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] )* '\'' - | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"' - ; +fragment SHORT_STRING: + '\'' (STRING_ESCAPE_SEQ | ~[\\\r\n\f'])* '\'' + | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"])* '"' +; /// longstring ::= "'''" longstringitem* "'''" | '"""' longstringitem* '"""' -fragment LONG_STRING - : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' - | '"""' LONG_STRING_ITEM*? '"""' - ; +fragment LONG_STRING: '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' | '"""' LONG_STRING_ITEM*? '"""'; /// longstringitem ::= longstringchar | stringescapeseq -fragment LONG_STRING_ITEM - : LONG_STRING_CHAR - | STRING_ESCAPE_SEQ - ; +fragment LONG_STRING_ITEM: LONG_STRING_CHAR | STRING_ESCAPE_SEQ; /// longstringchar ::= -fragment LONG_STRING_CHAR - : ~'\\' - ; +fragment LONG_STRING_CHAR: ~'\\'; /// stringescapeseq ::= "\" -fragment STRING_ESCAPE_SEQ - : '\\' . - | '\\' NEWLINE - ; +fragment STRING_ESCAPE_SEQ: '\\' . | '\\' NEWLINE; /// nonzerodigit ::= "1"..."9" -fragment NON_ZERO_DIGIT - : [1-9] - ; +fragment NON_ZERO_DIGIT: [1-9]; /// digit ::= "0"..."9" -fragment DIGIT - : [0-9] - ; +fragment DIGIT: [0-9]; /// octdigit ::= "0"..."7" -fragment OCT_DIGIT - : [0-7] - ; +fragment OCT_DIGIT: [0-7]; /// hexdigit ::= digit | "a"..."f" | "A"..."F" -fragment HEX_DIGIT - : [0-9a-fA-F] - ; +fragment HEX_DIGIT: [0-9a-fA-F]; /// bindigit ::= "0" | "1" -fragment BIN_DIGIT - : [01] - ; +fragment BIN_DIGIT: [01]; /// pointfloat ::= [intpart] fraction | intpart "." -fragment POINT_FLOAT - : INT_PART? FRACTION - | INT_PART '.' - ; +fragment POINT_FLOAT: INT_PART? FRACTION | INT_PART '.'; /// exponentfloat ::= (intpart | pointfloat) exponent -fragment EXPONENT_FLOAT - : ( INT_PART | POINT_FLOAT ) EXPONENT - ; +fragment EXPONENT_FLOAT: ( INT_PART | POINT_FLOAT) EXPONENT; /// intpart ::= digit+ -fragment INT_PART - : DIGIT+ - ; +fragment INT_PART: DIGIT+; /// fraction ::= "." digit+ -fragment FRACTION - : '.' DIGIT+ - ; +fragment FRACTION: '.' DIGIT+; /// exponent ::= ("e" | "E") ["+" | "-"] digit+ -fragment EXPONENT - : [eE] [+-]? DIGIT+ - ; +fragment EXPONENT: [eE] [+-]? DIGIT+; /// shortbytes ::= "'" shortbytesitem* "'" | '"' shortbytesitem* '"' /// shortbytesitem ::= shortbyteschar | bytesescapeseq -fragment SHORT_BYTES - : '\'' ( SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ )* '\'' - | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ )* '"' - ; +fragment SHORT_BYTES: + '\'' (SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ)* '\'' + | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ)* '"' +; /// longbytes ::= "'''" longbytesitem* "'''" | '"""' longbytesitem* '"""' -fragment LONG_BYTES - : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' - | '"""' LONG_BYTES_ITEM*? '"""' - ; +fragment LONG_BYTES: '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' | '"""' LONG_BYTES_ITEM*? '"""'; /// longbytesitem ::= longbyteschar | bytesescapeseq -fragment LONG_BYTES_ITEM - : LONG_BYTES_CHAR - | BYTES_ESCAPE_SEQ - ; +fragment LONG_BYTES_ITEM: LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ; /// shortbyteschar ::= -fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE - : [\u0000-\u0009] - | [\u000B-\u000C] - | [\u000E-\u0026] - | [\u0028-\u005B] - | [\u005D-\u007F] - ; - -fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE - : [\u0000-\u0009] - | [\u000B-\u000C] - | [\u000E-\u0021] - | [\u0023-\u005B] - | [\u005D-\u007F] - ; +fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE: + [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0026] + | [\u0028-\u005B] + | [\u005D-\u007F] +; + +fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE: + [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0021] + | [\u0023-\u005B] + | [\u005D-\u007F] +; /// longbyteschar ::= -fragment LONG_BYTES_CHAR - : [\u0000-\u005B] - | [\u005D-\u007F] - ; +fragment LONG_BYTES_CHAR: [\u0000-\u005B] | [\u005D-\u007F]; /// bytesescapeseq ::= "\" -fragment BYTES_ESCAPE_SEQ - : '\\' [\u0000-\u007F] - ; - -fragment SPACES - : [ \t]+ - ; +fragment BYTES_ESCAPE_SEQ: '\\' [\u0000-\u007F]; -fragment COMMENT - : '#' ~[\r\n\f]* - ; +fragment SPACES: [ \t]+; -fragment LINE_JOINING - : '\\' SPACES? ( '\r'? '\n' | '\r' | '\f') - ; +fragment COMMENT: '#' ~[\r\n\f]*; +fragment LINE_JOINING: '\\' SPACES? ( '\r'? '\n' | '\r' | '\f'); // TODO: ANTLR seems lack of some Unicode property support... //$ curl https://www.unicode.org/Public/13.0.0/ucd/PropList.txt | grep Other_ID_ @@ -369,36 +288,26 @@ fragment LINE_JOINING //1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE //19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE -fragment UNICODE_OIDS - : '\u1885'..'\u1886' - | '\u2118' - | '\u212e' - | '\u309b'..'\u309c' - ; +fragment UNICODE_OIDS: '\u1885' ..'\u1886' | '\u2118' | '\u212e' | '\u309b' ..'\u309c'; -fragment UNICODE_OIDC - : '\u00b7' - | '\u0387' - | '\u1369'..'\u1371' - | '\u19da' - ; +fragment UNICODE_OIDC: '\u00b7' | '\u0387' | '\u1369' ..'\u1371' | '\u19da'; /// id_start ::= -fragment ID_START - : '_' - | [\p{L}] - | [\p{Nl}] - //| [\p{Other_ID_Start}] - | UNICODE_OIDS - ; +fragment ID_START: + '_' + | [\p{L}] + | [\p{Nl}] + //| [\p{Other_ID_Start}] + | UNICODE_OIDS +; /// id_continue ::= -fragment ID_CONTINUE - : ID_START - | [\p{Mn}] - | [\p{Mc}] - | [\p{Nd}] - | [\p{Pc}] - //| [\p{Other_ID_Continue}] - | UNICODE_OIDC - ; +fragment ID_CONTINUE: + ID_START + | [\p{Mn}] + | [\p{Mc}] + | [\p{Nd}] + | [\p{Pc}] + //| [\p{Other_ID_Continue}] + | UNICODE_OIDC +; \ No newline at end of file diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 index 8b0143de64..4c5a27cf2a 100644 --- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 +++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 @@ -31,186 +31,623 @@ // Scraping from https://docs.python.org/3/reference/grammar.html +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + parser grammar Python3Parser; options { superClass = Python3ParserBase; - tokenVocab=Python3Lexer; + tokenVocab = Python3Lexer; } +// Insert here @header for C++ parser. + // All comments that start with "///" are copy-pasted from // The Python Language Reference -single_input: NEWLINE | simple_stmts | compound_stmt NEWLINE; -file_input: (NEWLINE | stmt)* EOF; -eval_input: testlist NEWLINE* EOF; - -decorator: '@' dotted_name ( '(' arglist? ')' )? NEWLINE; -decorators: decorator+; -decorated: decorators (classdef | funcdef | async_funcdef); - -async_funcdef: ASYNC funcdef; -funcdef: 'def' name parameters ('->' test)? ':' block; - -parameters: '(' typedargslist? ')'; -typedargslist: (tfpdef ('=' test)? (',' tfpdef ('=' test)?)* (',' ( - '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )? - | '**' tfpdef ','? )? )? - | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )? - | '**' tfpdef ','?); -tfpdef: name (':' test)?; -varargslist: (vfpdef ('=' test)? (',' vfpdef ('=' test)?)* (',' ( - '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )? - | '**' vfpdef (',')?)?)? - | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )? - | '**' vfpdef ','? -); -vfpdef: name; - -stmt: simple_stmts | compound_stmt; -simple_stmts: simple_stmt (';' simple_stmt)* ';'? NEWLINE; -simple_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt); -expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*); -annassign: ':' test ('=' test)?; -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* ','?; -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//='); +single_input + : NEWLINE + | simple_stmts + | compound_stmt NEWLINE + ; + +file_input + : (NEWLINE | stmt)* EOF + ; + +eval_input + : testlist NEWLINE* EOF + ; + +decorator + : '@' dotted_name ('(' arglist? ')')? NEWLINE + ; + +decorators + : decorator+ + ; + +decorated + : decorators (classdef | funcdef | async_funcdef) + ; + +async_funcdef + : ASYNC funcdef + ; + +funcdef + : 'def' name parameters ('->' test)? ':' block + ; + +parameters + : '(' typedargslist? ')' + ; + +typedargslist + : ( + tfpdef ('=' test)? (',' tfpdef ('=' test)?)* ( + ',' ( + '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)? + | '**' tfpdef ','? + )? + )? + | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)? + | '**' tfpdef ','? + ) + ; + +tfpdef + : name (':' test)? + ; + +varargslist + : ( + vfpdef ('=' test)? (',' vfpdef ('=' test)?)* ( + ',' ( + '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)? + | '**' vfpdef (',')? + )? + )? + | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)? + | '**' vfpdef ','? + ) + ; + +vfpdef + : name + ; + +stmt + : simple_stmts + | compound_stmt + ; + +simple_stmts + : simple_stmt (';' simple_stmt)* ';'? NEWLINE + ; + +simple_stmt + : ( + expr_stmt + | del_stmt + | pass_stmt + | flow_stmt + | import_stmt + | global_stmt + | nonlocal_stmt + | assert_stmt + ) + ; + +expr_stmt + : testlist_star_expr ( + annassign + | augassign (yield_expr | testlist) + | ('=' (yield_expr | testlist_star_expr))* + ) + ; + +annassign + : ':' test ('=' test)? + ; + +testlist_star_expr + : (test | star_expr) (',' (test | star_expr))* ','? + ; + +augassign + : ( + '+=' + | '-=' + | '*=' + | '@=' + | '/=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '**=' + | '//=' + ) + ; + // For normal and annotated assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist; -pass_stmt: 'pass'; -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt; -break_stmt: 'break'; -continue_stmt: 'continue'; -return_stmt: 'return' testlist?; -yield_stmt: yield_expr; -raise_stmt: 'raise' (test ('from' test)?)?; -import_stmt: import_name | import_from; -import_name: 'import' dotted_as_names; +del_stmt + : 'del' exprlist + ; + +pass_stmt + : 'pass' + ; + +flow_stmt + : break_stmt + | continue_stmt + | return_stmt + | raise_stmt + | yield_stmt + ; + +break_stmt + : 'break' + ; + +continue_stmt + : 'continue' + ; + +return_stmt + : 'return' testlist? + ; + +yield_stmt + : yield_expr + ; + +raise_stmt + : 'raise' (test ('from' test)?)? + ; + +import_stmt + : import_name + | import_from + ; + +import_name + : 'import' dotted_as_names + ; + // note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)); -import_as_name: name ('as' name)?; -dotted_as_name: dotted_name ('as' name)?; -import_as_names: import_as_name (',' import_as_name)* ','?; -dotted_as_names: dotted_as_name (',' dotted_as_name)*; -dotted_name: name ('.' name)*; -global_stmt: 'global' name (',' name)*; -nonlocal_stmt: 'nonlocal' name (',' name)*; -assert_stmt: 'assert' test (',' test)?; - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt; -async_stmt: ASYNC (funcdef | with_stmt | for_stmt); -if_stmt: 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)?; -while_stmt: 'while' test ':' block ('else' ':' block)?; -for_stmt: 'for' exprlist 'in' testlist ':' block ('else' ':' block)?; -try_stmt: ('try' ':' block - ((except_clause ':' block)+ - ('else' ':' block)? - ('finally' ':' block)? | - 'finally' ':' block)); -with_stmt: 'with' with_item (',' with_item)* ':' block; -with_item: test ('as' expr)?; +import_from + : ( + 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ( + '*' + | '(' import_as_names ')' + | import_as_names + ) + ) + ; + +import_as_name + : name ('as' name)? + ; + +dotted_as_name + : dotted_name ('as' name)? + ; + +import_as_names + : import_as_name (',' import_as_name)* ','? + ; + +dotted_as_names + : dotted_as_name (',' dotted_as_name)* + ; + +dotted_name + : name ('.' name)* + ; + +global_stmt + : 'global' name (',' name)* + ; + +nonlocal_stmt + : 'nonlocal' name (',' name)* + ; + +assert_stmt + : 'assert' test (',' test)? + ; + +compound_stmt + : if_stmt + | while_stmt + | for_stmt + | try_stmt + | with_stmt + | funcdef + | classdef + | decorated + | async_stmt + | match_stmt + ; + +async_stmt + : ASYNC (funcdef | with_stmt | for_stmt) + ; + +if_stmt + : 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)? + ; + +while_stmt + : 'while' test ':' block ('else' ':' block)? + ; + +for_stmt + : 'for' exprlist 'in' testlist ':' block ('else' ':' block)? + ; + +try_stmt + : ( + 'try' ':' block ( + (except_clause ':' block)+ ('else' ':' block)? ('finally' ':' block)? + | 'finally' ':' block + ) + ) + ; + +with_stmt + : 'with' with_item (',' with_item)* ':' block + ; + +with_item + : test ('as' expr)? + ; + // NB compile.c makes sure that the default except clause is last -except_clause: 'except' (test ('as' name)?)?; -block: simple_stmts | NEWLINE INDENT stmt+ DEDENT; -match_stmt: 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT ; -subject_expr: star_named_expression ',' star_named_expressions? | test ; -star_named_expressions: ',' star_named_expression+ ','? ; -star_named_expression: '*' expr | test ; -case_block: 'case' patterns guard? ':' block ; -guard: 'if' test ; -patterns: open_sequence_pattern | pattern ; -pattern: as_pattern | or_pattern ; -as_pattern: or_pattern 'as' pattern_capture_target ; -or_pattern: closed_pattern ('|' closed_pattern)* ; -closed_pattern: literal_pattern | capture_pattern | wildcard_pattern | value_pattern | group_pattern | sequence_pattern | mapping_pattern | class_pattern ; -literal_pattern: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ; -literal_expr: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ; -complex_number: signed_real_number '+' imaginary_number +except_clause + : 'except' (test ('as' name)?)? + ; + +block + : simple_stmts + | NEWLINE INDENT stmt+ DEDENT + ; + +match_stmt + : 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT + ; + +subject_expr + : star_named_expression ',' star_named_expressions? + | test + ; + +star_named_expressions + : ',' star_named_expression+ ','? + ; + +star_named_expression + : '*' expr + | test + ; + +case_block + : 'case' patterns guard? ':' block + ; + +guard + : 'if' test + ; + +patterns + : open_sequence_pattern + | pattern + ; + +pattern + : as_pattern + | or_pattern + ; + +as_pattern + : or_pattern 'as' pattern_capture_target + ; + +or_pattern + : closed_pattern ('|' closed_pattern)* + ; + +closed_pattern + : literal_pattern + | capture_pattern + | wildcard_pattern + | value_pattern + | group_pattern + | sequence_pattern + | mapping_pattern + | class_pattern + ; + +literal_pattern + : signed_number { this.CannotBePlusMinus() }? + | complex_number + | strings + | 'None' + | 'True' + | 'False' + ; + +literal_expr + : signed_number { this.CannotBePlusMinus() }? + | complex_number + | strings + | 'None' + | 'True' + | 'False' + ; + +complex_number + : signed_real_number '+' imaginary_number | signed_real_number '-' imaginary_number ; -signed_number: NUMBER | '-' NUMBER ; -signed_real_number: real_number | '-' real_number ; -real_number: NUMBER ; -imaginary_number: NUMBER ; -capture_pattern: pattern_capture_target ; -pattern_capture_target: /* cannot be '_' */ name { this.cannotBeDotLpEq() }? ; -wildcard_pattern: '_' ; -value_pattern: attr { this.cannotBeDotLpEq() }? ; -attr: name ('.' name)+ ; -name_or_attr: attr | name ; -group_pattern: '(' pattern ')' ; -sequence_pattern: - '[' maybe_sequence_pattern? ']' + +signed_number + : NUMBER + | '-' NUMBER + ; + +signed_real_number + : real_number + | '-' real_number + ; + +real_number + : NUMBER + ; + +imaginary_number + : NUMBER + ; + +capture_pattern + : pattern_capture_target + ; + +pattern_capture_target + : /* cannot be '_' */ name { this.CannotBeDotLpEq() }? + ; + +wildcard_pattern + : '_' + ; + +value_pattern + : attr { this.CannotBeDotLpEq() }? + ; + +attr + : name ('.' name)+ + ; + +name_or_attr + : attr + | name + ; + +group_pattern + : '(' pattern ')' + ; + +sequence_pattern + : '[' maybe_sequence_pattern? ']' | '(' open_sequence_pattern? ')' ; -open_sequence_pattern: maybe_star_pattern ',' maybe_sequence_pattern? ; -maybe_sequence_pattern: maybe_star_pattern (',' maybe_star_pattern)* ','? ; -maybe_star_pattern: star_pattern | pattern ; -star_pattern: - '*' pattern_capture_target + +open_sequence_pattern + : maybe_star_pattern ',' maybe_sequence_pattern? + ; + +maybe_sequence_pattern + : maybe_star_pattern (',' maybe_star_pattern)* ','? + ; + +maybe_star_pattern + : star_pattern + | pattern + ; + +star_pattern + : '*' pattern_capture_target | '*' wildcard_pattern ; -mapping_pattern: '{' '}' + +mapping_pattern + : '{' '}' | '{' double_star_pattern ','? '}' | '{' items_pattern ',' double_star_pattern ','? '}' | '{' items_pattern ','? '}' ; -items_pattern: key_value_pattern (',' key_value_pattern)* ; -key_value_pattern: (literal_expr | attr) ':' pattern ; -double_star_pattern: '**' pattern_capture_target ; -class_pattern: name_or_attr '(' ')' + +items_pattern + : key_value_pattern (',' key_value_pattern)* + ; + +key_value_pattern + : (literal_expr | attr) ':' pattern + ; + +double_star_pattern + : '**' pattern_capture_target + ; + +class_pattern + : name_or_attr '(' ')' | name_or_attr '(' positional_patterns ','? ')' | name_or_attr '(' keyword_patterns ','? ')' | name_or_attr '(' positional_patterns ',' keyword_patterns ','? ')' ; -positional_patterns: pattern (',' pattern)* ; -keyword_patterns: keyword_pattern (',' keyword_pattern)* ; -keyword_pattern: name '=' pattern ; - -test: or_test ('if' or_test 'else' test)? | lambdef; -test_nocond: or_test | lambdef_nocond; -lambdef: 'lambda' varargslist? ':' test; -lambdef_nocond: 'lambda' varargslist? ':' test_nocond; -or_test: and_test ('or' and_test)*; -and_test: not_test ('and' not_test)*; -not_test: 'not' not_test | comparison; -comparison: expr (comp_op expr)*; + +positional_patterns + : pattern (',' pattern)* + ; + +keyword_patterns + : keyword_pattern (',' keyword_pattern)* + ; + +keyword_pattern + : name '=' pattern + ; + +test + : or_test ('if' or_test 'else' test)? + | lambdef + ; + +test_nocond + : or_test + | lambdef_nocond + ; + +lambdef + : 'lambda' varargslist? ':' test + ; + +lambdef_nocond + : 'lambda' varargslist? ':' test_nocond + ; + +or_test + : and_test ('or' and_test)* + ; + +and_test + : not_test ('and' not_test)* + ; + +not_test + : 'not' not_test + | comparison + ; + +comparison + : expr (comp_op expr)* + ; + // <> isn't actually a valid comparison operator in Python. It's here for the // sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'; -star_expr: '*' expr; -expr: xor_expr ('|' xor_expr)*; -xor_expr: and_expr ('^' and_expr)*; -and_expr: shift_expr ('&' shift_expr)*; -shift_expr: arith_expr (('<<'|'>>') arith_expr)*; -arith_expr: term (('+'|'-') term)*; -term: factor (('*'|'@'|'/'|'%'|'//') factor)*; -factor: ('+'|'-'|'~') factor | power; -power: atom_expr ('**' factor)?; -atom_expr: AWAIT? atom trailer*; -atom: '(' (yield_expr|testlist_comp)? ')' - | '[' testlist_comp? ']' - | '{' dictorsetmaker? '}' - | name | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False' ; -name : NAME | '_' | 'match' ; -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* ','? ); -trailer: '(' arglist? ')' | '[' subscriptlist ']' | '.' name ; -subscriptlist: subscript_ (',' subscript_)* ','?; -subscript_: test | test? ':' test? sliceop?; -sliceop: ':' test?; -exprlist: (expr|star_expr) (',' (expr|star_expr))* ','?; -testlist: test (',' test)* ','?; -dictorsetmaker: ( ((test ':' test | '**' expr) - (comp_for | (',' (test ':' test | '**' expr))* ','?)) | - ((test | star_expr) - (comp_for | (',' (test | star_expr))* ','?)) ); - -classdef: 'class' name ('(' arglist? ')')? ':' block; - -arglist: argument (',' argument)* ','?; +comp_op + : '<' + | '>' + | '==' + | '>=' + | '<=' + | '<>' + | '!=' + | 'in' + | 'not' 'in' + | 'is' + | 'is' 'not' + ; + +star_expr + : '*' expr + ; + +expr + : atom_expr + | expr '**' expr + | ('+' | '-' | '~')+ expr + | expr ('*' | '@' | '/' | '%' | '//') expr + | expr ('+' | '-') expr + | expr ('<<' | '>>') expr + | expr '&' expr + | expr '^' expr + | expr '|' expr + ; + +//expr: xor_expr ('|' xor_expr)*; +//xor_expr: and_expr ('^' and_expr)*; +//and_expr: shift_expr ('&' shift_expr)*; +//shift_expr: arith_expr (('<<'|'>>') arith_expr)*; +//arith_expr: term (('+'|'-') term)*; +//term: factor (('*'|'@'|'/'|'%'|'//') factor)*; +//factor: ('+'|'-'|'~') factor | power; +//power: atom_expr ('**' factor)?; +atom_expr + : AWAIT? atom trailer* + ; + +atom + : '(' (yield_expr | testlist_comp)? ')' + | '[' testlist_comp? ']' + | '{' dictorsetmaker? '}' + | name + | NUMBER + | STRING+ + | '...' + | 'None' + | 'True' + | 'False' + ; + +name + : NAME + | '_' + | 'match' + ; + +testlist_comp + : (test | star_expr) (comp_for | (',' (test | star_expr))* ','?) + ; + +trailer + : '(' arglist? ')' + | '[' subscriptlist ']' + | '.' name + ; + +subscriptlist + : subscript_ (',' subscript_)* ','? + ; + +subscript_ + : test + | test? ':' test? sliceop? + ; + +sliceop + : ':' test? + ; + +exprlist + : (expr | star_expr) (',' (expr | star_expr))* ','? + ; + +testlist + : test (',' test)* ','? + ; + +dictorsetmaker + : ( + ((test ':' test | '**' expr) (comp_for | (',' (test ':' test | '**' expr))* ','?)) + | ((test | star_expr) (comp_for | (',' (test | star_expr))* ','?)) + ) + ; + +classdef + : 'class' name ('(' arglist? ')')? ':' block + ; + +arglist + : argument (',' argument)* ','? + ; // The reason that keywords are test nodes instead of NAME is that using NAME // results in an ambiguity. ast.c makes sure it's a NAME. @@ -221,19 +658,37 @@ arglist: argument (',' argument)* ','?; // Illegal combinations and orderings are blocked in ast.c: // multiple (test comp_for) arguments are blocked; keyword unpackings // that precede iterable unpackings are blocked; etc. -argument: ( test comp_for? | - test '=' test | - '**' test | - '*' test ); +argument + : (test comp_for? | test '=' test | '**' test | '*' test) + ; + +comp_iter + : comp_for + | comp_if + ; -comp_iter: comp_for | comp_if; -comp_for: ASYNC? 'for' exprlist 'in' or_test comp_iter?; -comp_if: 'if' test_nocond comp_iter?; +comp_for + : ASYNC? 'for' exprlist 'in' or_test comp_iter? + ; + +comp_if + : 'if' test_nocond comp_iter? + ; // not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: name; +encoding_decl + : name + ; -yield_expr: 'yield' yield_arg?; -yield_arg: 'from' test | testlist; +yield_expr + : 'yield' yield_arg? + ; + +yield_arg + : 'from' test + | testlist + ; -strings: STRING+ ; +strings + : STRING+ + ; \ No newline at end of file diff --git a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java b/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java deleted file mode 100644 index 695d07e40d..0000000000 --- a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java +++ /dev/null @@ -1,216 +0,0 @@ -package de.jplag.python3; - -import static de.jplag.python3.Python3TokenType.APPLY; -import static de.jplag.python3.Python3TokenType.ARRAY; -import static de.jplag.python3.Python3TokenType.ASSERT; -import static de.jplag.python3.Python3TokenType.ASSIGN; -import static de.jplag.python3.Python3TokenType.BREAK; -import static de.jplag.python3.Python3TokenType.CLASS_BEGIN; -import static de.jplag.python3.Python3TokenType.CLASS_END; -import static de.jplag.python3.Python3TokenType.CONTINUE; -import static de.jplag.python3.Python3TokenType.DEC_BEGIN; -import static de.jplag.python3.Python3TokenType.DEC_END; -import static de.jplag.python3.Python3TokenType.DEL; -import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN; -import static de.jplag.python3.Python3TokenType.EXCEPT_END; -import static de.jplag.python3.Python3TokenType.FINALLY; -import static de.jplag.python3.Python3TokenType.FOR_BEGIN; -import static de.jplag.python3.Python3TokenType.FOR_END; -import static de.jplag.python3.Python3TokenType.IF_BEGIN; -import static de.jplag.python3.Python3TokenType.IF_END; -import static de.jplag.python3.Python3TokenType.IMPORT; -import static de.jplag.python3.Python3TokenType.LAMBDA; -import static de.jplag.python3.Python3TokenType.METHOD_BEGIN; -import static de.jplag.python3.Python3TokenType.METHOD_END; -import static de.jplag.python3.Python3TokenType.RAISE; -import static de.jplag.python3.Python3TokenType.RETURN; -import static de.jplag.python3.Python3TokenType.TRY_BEGIN; -import static de.jplag.python3.Python3TokenType.WHILE_BEGIN; -import static de.jplag.python3.Python3TokenType.WHILE_END; -import static de.jplag.python3.Python3TokenType.WITH_BEGIN; -import static de.jplag.python3.Python3TokenType.WITH_END; -import static de.jplag.python3.Python3TokenType.YIELD; - -import org.antlr.v4.runtime.tree.TerminalNode; - -import de.jplag.python3.grammar.Python3Parser; -import de.jplag.python3.grammar.Python3ParserBaseListener; - -public class JplagPython3Listener extends Python3ParserBaseListener { - - private final Parser parser; - - public JplagPython3Listener(Parser parser) { - this.parser = parser; - } - - @Override - public void enterAssert_stmt(Python3Parser.Assert_stmtContext ctx) { - parser.add(ASSERT, ctx.getStart()); - } - - @Override - public void enterDecorated(Python3Parser.DecoratedContext ctx) { - parser.add(DEC_BEGIN, ctx.getStart()); - } - - @Override - public void exitDecorated(Python3Parser.DecoratedContext ctx) { - parser.addEnd(DEC_END, ctx.getStop()); - } - - @Override - public void enterRaise_stmt(Python3Parser.Raise_stmtContext ctx) { - parser.add(RAISE, ctx.getStart()); - } - - @Override - public void enterExcept_clause(Python3Parser.Except_clauseContext ctx) { - parser.add(EXCEPT_BEGIN, ctx.getStart()); - } - - @Override - public void exitExcept_clause(Python3Parser.Except_clauseContext ctx) { - parser.addEnd(EXCEPT_END, ctx.getStop()); - } - - @Override - public void enterDictorsetmaker(Python3Parser.DictorsetmakerContext ctx) { - parser.add(ARRAY, ctx.getStart()); - } - - @Override - public void enterReturn_stmt(Python3Parser.Return_stmtContext ctx) { - parser.add(RETURN, ctx.getStart()); - } - - @Override - public void enterWhile_stmt(Python3Parser.While_stmtContext ctx) { - parser.add(WHILE_BEGIN, ctx.getStart()); - } - - @Override - public void exitWhile_stmt(Python3Parser.While_stmtContext ctx) { - parser.addEnd(WHILE_END, ctx.getStop()); - } - - @Override - public void enterYield_arg(Python3Parser.Yield_argContext ctx) { - parser.add(YIELD, ctx.getStart()); - } - - @Override - public void enterImport_stmt(Python3Parser.Import_stmtContext ctx) { - parser.add(IMPORT, ctx.getStart()); - } - - @Override - public void enterLambdef(Python3Parser.LambdefContext ctx) { - parser.add(LAMBDA, ctx.getStart()); - } - - @Override - public void enterTry_stmt(Python3Parser.Try_stmtContext ctx) { - parser.add(TRY_BEGIN, ctx.getStart()); - } - - @Override - public void enterBreak_stmt(Python3Parser.Break_stmtContext ctx) { - parser.add(BREAK, ctx.getStart()); - } - - @Override - public void enterTestlist_comp(Python3Parser.Testlist_compContext ctx) { - if (ctx.getText().contains(",")) { - parser.add(ARRAY, ctx.getStart()); - } - } - - @Override - public void enterIf_stmt(Python3Parser.If_stmtContext ctx) { - parser.add(IF_BEGIN, ctx.getStart()); - } - - @Override - public void exitIf_stmt(Python3Parser.If_stmtContext ctx) { - parser.addEnd(IF_END, ctx.getStop()); - } - - @Override - public void enterWith_stmt(Python3Parser.With_stmtContext ctx) { - parser.add(WITH_BEGIN, ctx.getStart()); - } - - @Override - public void exitWith_stmt(Python3Parser.With_stmtContext ctx) { - parser.addEnd(WITH_END, ctx.getStop()); - } - - @Override - public void enterClassdef(Python3Parser.ClassdefContext ctx) { - parser.add(CLASS_BEGIN, ctx.getStart()); - } - - @Override - public void exitClassdef(Python3Parser.ClassdefContext ctx) { - parser.addEnd(CLASS_END, ctx.getStop()); - } - - @Override - public void enterTrailer(Python3Parser.TrailerContext ctx) { - if (ctx.getText().charAt(0) == '(') { - parser.add(APPLY, ctx.getStart()); - } else { - parser.add(ARRAY, ctx.getStart()); - } - } - - @Override - public void enterFuncdef(Python3Parser.FuncdefContext ctx) { - parser.add(METHOD_BEGIN, ctx.getStart()); - } - - @Override - public void exitFuncdef(Python3Parser.FuncdefContext ctx) { - parser.addEnd(METHOD_END, ctx.getStop()); - } - - @Override - public void enterAugassign(Python3Parser.AugassignContext ctx) { - parser.add(ASSIGN, ctx.getStart()); - } - - @Override - public void enterYield_stmt(Python3Parser.Yield_stmtContext ctx) { - parser.add(YIELD, ctx.getStart()); - } - - @Override - public void enterContinue_stmt(Python3Parser.Continue_stmtContext ctx) { - parser.add(CONTINUE, ctx.getStart()); - } - - @Override - public void enterFor_stmt(Python3Parser.For_stmtContext ctx) { - parser.add(FOR_BEGIN, ctx.getStart()); - } - - @Override - public void exitFor_stmt(Python3Parser.For_stmtContext ctx) { - parser.addEnd(FOR_END, ctx.getStop()); - } - - @Override - public void enterDel_stmt(Python3Parser.Del_stmtContext ctx) { - parser.add(DEL, ctx.getStart()); - } - - @Override - public void visitTerminal(TerminalNode node) { - if (node.getText().equals("=")) { - parser.add(ASSIGN, node.getSymbol()); - } else if (node.getText().equals("finally")) { - parser.add(FINALLY, node.getSymbol()); - } - } -} diff --git a/languages/python-3/src/main/java/de/jplag/python3/Parser.java b/languages/python-3/src/main/java/de/jplag/python3/Parser.java deleted file mode 100644 index 2dc352bfe2..0000000000 --- a/languages/python-3/src/main/java/de/jplag/python3/Parser.java +++ /dev/null @@ -1,78 +0,0 @@ -package de.jplag.python3; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CommonTokenStream; -import org.antlr.v4.runtime.tree.ParseTree; -import org.antlr.v4.runtime.tree.ParseTreeWalker; - -import de.jplag.AbstractParser; -import de.jplag.ParsingException; -import de.jplag.Token; -import de.jplag.TokenType; -import de.jplag.python3.grammar.Python3Lexer; -import de.jplag.python3.grammar.Python3Parser; -import de.jplag.python3.grammar.Python3Parser.File_inputContext; -import de.jplag.util.FileUtils; - -public class Parser extends AbstractParser { - - private List tokens; - private File currentFile; - - /** - * Creates the parser. - */ - public Parser() { - super(); - } - - public List parse(Set files) throws ParsingException { - tokens = new ArrayList<>(); - for (File file : files) { - logger.trace("Parsing file {}", file.getName()); - parseFile(file); - tokens.add(Token.fileEnd(file)); - } - return tokens; - } - - private void parseFile(File file) throws ParsingException { - try (BufferedReader reader = FileUtils.openFileReader(file)) { - currentFile = file; - - // create a lexer that feeds off of input CharStream - Python3Lexer lexer = new Python3Lexer(CharStreams.fromReader(reader)); - - // create a buffer of tokens pulled from the lexer - CommonTokenStream tokens = new CommonTokenStream(lexer); - - // create a parser that feeds off the tokens buffer - Python3Parser parser = new Python3Parser(tokens); - File_inputContext in = parser.file_input(); - - ParseTreeWalker ptw = new ParseTreeWalker(); - for (int i = 0; i < in.getChildCount(); i++) { - ParseTree pt = in.getChild(i); - ptw.walk(new JplagPython3Listener(this), pt); - } - - } catch (IOException e) { - throw new ParsingException(file, e.getMessage(), e); - } - } - - public void add(TokenType type, org.antlr.v4.runtime.Token token) { - tokens.add(new Token(type, currentFile, token.getLine(), token.getCharPositionInLine() + 1, token.getText().length())); - } - - public void addEnd(TokenType type, org.antlr.v4.runtime.Token token) { - tokens.add(new Token(type, currentFile, token.getLine(), tokens.get(tokens.size() - 1).getColumn() + 1, 0)); - } -} diff --git a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java index e4a684c9b9..8505224702 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java +++ b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java @@ -32,7 +32,9 @@ public enum Python3TokenType implements TokenType { YIELD("YIELD"), DEL("DEL"), WITH_BEGIN("WITH}"), - WITH_END("}WITH"); + WITH_END("}WITH"), + ASYNC("ASYNC"), + AWAIT("AWAIT"); private final String description; diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java index b5a8fd73f4..3df6587284 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java @@ -1,23 +1,16 @@ package de.jplag.python3; -import java.io.File; -import java.util.List; -import java.util.Set; - import org.kohsuke.MetaInfServices; -import de.jplag.ParsingException; -import de.jplag.Token; +import de.jplag.antlr.AbstractAntlrLanguage; @MetaInfServices(de.jplag.Language.class) -public class PythonLanguage implements de.jplag.Language { +public class PythonLanguage extends AbstractAntlrLanguage { private static final String IDENTIFIER = "python3"; - private final Parser parser; - public PythonLanguage() { - parser = new Parser(); + super(new PythonParserAdapter()); } @Override @@ -39,9 +32,4 @@ public String getIdentifier() { public int minimumTokenMatch() { return 12; } - - @Override - public List parse(Set files, boolean normalize) throws ParsingException { - return this.parser.parse(files); - } } diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java new file mode 100644 index 0000000000..aa0dabb18f --- /dev/null +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java @@ -0,0 +1,81 @@ +package de.jplag.python3; + +import static de.jplag.python3.Python3TokenType.*; + +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.python3.grammar.Python3Parser; +import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext; +import de.jplag.python3.grammar.Python3Parser.AugassignContext; +import de.jplag.python3.grammar.Python3Parser.Break_stmtContext; +import de.jplag.python3.grammar.Python3Parser.ClassdefContext; +import de.jplag.python3.grammar.Python3Parser.Continue_stmtContext; +import de.jplag.python3.grammar.Python3Parser.DecoratedContext; +import de.jplag.python3.grammar.Python3Parser.Del_stmtContext; +import de.jplag.python3.grammar.Python3Parser.DictorsetmakerContext; +import de.jplag.python3.grammar.Python3Parser.Except_clauseContext; +import de.jplag.python3.grammar.Python3Parser.For_stmtContext; +import de.jplag.python3.grammar.Python3Parser.FuncdefContext; +import de.jplag.python3.grammar.Python3Parser.If_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Import_stmtContext; +import de.jplag.python3.grammar.Python3Parser.LambdefContext; +import de.jplag.python3.grammar.Python3Parser.Raise_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Return_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Testlist_compContext; +import de.jplag.python3.grammar.Python3Parser.TrailerContext; +import de.jplag.python3.grammar.Python3Parser.Try_stmtContext; +import de.jplag.python3.grammar.Python3Parser.While_stmtContext; +import de.jplag.python3.grammar.Python3Parser.With_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Yield_argContext; +import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext; + +public class PythonListener extends AbstractAntlrListener { + public PythonListener() { + statements(); + controlStructures(); + contexts(); + values(); + } + + private void statements() { + visit(Assert_stmtContext.class).map(ASSERT); + visit(Raise_stmtContext.class).map(RAISE); + visit(Return_stmtContext.class).map(RETURN); + visit(Yield_argContext.class).map(YIELD); + visit(Yield_stmtContext.class).map(YIELD); + visit(Import_stmtContext.class).map(IMPORT); + visit(Break_stmtContext.class).map(BREAK); + visit(Continue_stmtContext.class).map(CONTINUE); + visit(Del_stmtContext.class).map(DEL); + visit(Python3Parser.FINALLY).map(FINALLY); + + visit(Python3Parser.ASYNC).map(ASYNC); + visit(Python3Parser.AWAIT).map(AWAIT); + + visit(Except_clauseContext.class).map(EXCEPT_BEGIN, EXCEPT_END); + } + + private void controlStructures() { + visit(While_stmtContext.class).map(WHILE_BEGIN, WHILE_END); + visit(Try_stmtContext.class).map(TRY_BEGIN); + visit(If_stmtContext.class).map(IF_BEGIN, IF_END); + visit(With_stmtContext.class).map(WITH_BEGIN, WITH_END); + visit(For_stmtContext.class).map(FOR_BEGIN, FOR_END); + } + + private void contexts() { + visit(DecoratedContext.class).map(DEC_BEGIN, DEC_END); + visit(LambdefContext.class).map(LAMBDA); + visit(ClassdefContext.class).map(CLASS_BEGIN, CLASS_END); + visit(FuncdefContext.class).map(METHOD_BEGIN, METHOD_END); + } + + private void values() { + visit(DictorsetmakerContext.class).map(ARRAY); + visit(Testlist_compContext.class, context -> context.getText().contains(",")).map(ARRAY); + visit(AugassignContext.class).map(ASSIGN); + visit(Python3Parser.ASSIGN).map(ASSIGN); + + visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) == '(').map(APPLY); + visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) != '(').map(ARRAY); + } +} diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java new file mode 100644 index 0000000000..8d99920f05 --- /dev/null +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java @@ -0,0 +1,33 @@ +package de.jplag.python3; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Lexer; +import org.antlr.v4.runtime.ParserRuleContext; + +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.antlr.AbstractAntlrParserAdapter; +import de.jplag.python3.grammar.Python3Lexer; +import de.jplag.python3.grammar.Python3Parser; + +public class PythonParserAdapter extends AbstractAntlrParserAdapter { + @Override + protected Lexer createLexer(CharStream input) { + return new Python3Lexer(input); + } + + @Override + protected Python3Parser createParser(CommonTokenStream tokenStream) { + return new Python3Parser(tokenStream); + } + + @Override + protected ParserRuleContext getEntryContext(Python3Parser parser) { + return parser.file_input(); + } + + @Override + protected AbstractAntlrListener getListener() { + return new PythonListener(); + } +} diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java index 0e24adf203..b5a0e55011 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java +++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java @@ -1,17 +1,18 @@ package de.jplag.python3.grammar; +import java.util.ArrayDeque; import java.util.Deque; -import java.util.LinkedList; -import org.antlr.v4.runtime.CharStream; -import org.antlr.v4.runtime.CommonToken; -import org.antlr.v4.runtime.Lexer; -import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.*; abstract class Python3LexerBase extends Lexer { - private LinkedList tokens = new LinkedList<>(); - private Deque indents = new LinkedList<>(); + // A queue where extra tokens are pushed on (see the NEWLINE lexer rule). + private java.util.LinkedList tokens = new java.util.LinkedList<>(); + // The stack that keeps track of the indentation level. + private Deque indents = new ArrayDeque<>(); + // The amount of opened braces, brackets and parenthesis. private int opened = 0; + // The most recently produced token. private Token lastToken = null; protected Python3LexerBase(CharStream input) { @@ -26,35 +27,38 @@ public void emit(Token t) { @Override public Token nextToken() { + // Check if the end-of-file is ahead and there are still some DEDENTS expected. if (_input.LA(1) == EOF && !this.indents.isEmpty()) { + // Remove any trailing EOF tokens from our buffer. + for (int i = tokens.size() - 1; i >= 0; i--) { + if (tokens.get(i).getType() == EOF) { + tokens.remove(i); + } + } + + // First emit an extra line break that serves as the end of the statement. this.emit(commonToken(Python3Lexer.NEWLINE, "\n")); - this.removeTrailingEofTokens(); + // Now emit as much DEDENT tokens as needed. while (!indents.isEmpty()) { this.emit(createDedent()); indents.pop(); } - this.emit(commonToken(EOF, "")); + // Put the EOF back on the token stream. + this.emit(commonToken(Python3Lexer.EOF, "")); } Token next = super.nextToken(); if (next.getChannel() == Token.DEFAULT_CHANNEL) { + // Keep track of the last token on the default channel. this.lastToken = next; } return tokens.isEmpty() ? next : tokens.poll(); } - private void removeTrailingEofTokens() { - for (int i = tokens.size() - 1; i >= 0; i--) { - if (tokens.get(i).getType() == EOF) { - tokens.remove(i); - } - } - } - private Token createDedent() { CommonToken dedent = commonToken(Python3Lexer.DEDENT, ""); dedent.setLine(this.lastToken.getLine()); @@ -67,21 +71,24 @@ private CommonToken commonToken(int type, String text) { return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop); } - /** - * Calculates the indentation of the provided spaces, taking the following rules into account: - *

- * "Tabs are replaced (from left to right) by one to eight spaces such that the total number of characters up to and - * including the replacement is a multiple of eight [...]" - *

- * -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation - **/ + // Calculates the indentation of the provided spaces, taking the + // following rules into account: + // + // "Tabs are replaced (from left to right) by one to eight spaces + // such that the total number of characters up to and including + // the replacement is a multiple of eight [...]" + // + // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation static int getIndentationCount(String spaces) { int count = 0; for (char ch : spaces.toCharArray()) { - if (ch == '\t') { - count += 8 - (count % 8); - } else { - count++; + switch (ch) { + case '\t': + count += 8 - (count % 8); + break; + default: + // A normal space char. + count++; } } @@ -104,21 +111,26 @@ void onNewLine() { String newLine = getText().replaceAll("[^\r\n\f]+", ""); String spaces = getText().replaceAll("[\r\n\f]+", ""); + // Strip newlines inside open clauses except if we are near EOF. We keep NEWLINEs near EOF to + // satisfy the final newline needed by the single_put rule used by the REPL. int next = _input.LA(1); int nextnext = _input.LA(2); if (opened > 0 || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) { + // If we're inside a list or on a blank line, ignore all indents, + // dedents and line breaks. skip(); } else { emit(commonToken(Python3Lexer.NEWLINE, newLine)); int indent = getIndentationCount(spaces); int previous = indents.isEmpty() ? 0 : indents.peek(); - if (indent == previous) { + // skip indents of the same size as the present indent-size skip(); } else if (indent > previous) { indents.push(indent); emit(commonToken(Python3Lexer.INDENT, spaces)); } else { + // Possibly emit more than 1 DEDENT token. while (!indents.isEmpty() && indents.peek() > indent) { this.emit(createDedent()); indents.pop(); @@ -129,10 +141,10 @@ void onNewLine() { @Override public void reset() { - tokens = new LinkedList<>(); - indents = new LinkedList<>(); + tokens = new java.util.LinkedList<>(); + indents = new ArrayDeque<>(); opened = 0; lastToken = null; super.reset(); } -} +} \ No newline at end of file diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java index 44b5926a45..713af92c1e 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java +++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java @@ -1,18 +1,17 @@ package de.jplag.python3.grammar; -import org.antlr.v4.runtime.Parser; -import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.*; public abstract class Python3ParserBase extends Parser { protected Python3ParserBase(TokenStream input) { super(input); } - public boolean cannotBePlusMinus() { + public boolean CannotBePlusMinus() { return true; } - public boolean cannotBeDotLpEq() { + public boolean CannotBeDotLpEq() { return true; } -} +} \ No newline at end of file diff --git a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py index 34d92252cc..bfd3e8bf46 100644 --- a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py +++ b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py @@ -500,4 +500,9 @@ def force_legacy_ssl_support(): def switchWithBreak(): while True: - break \ No newline at end of file + break + +async def x(): + return "" + +x = await x() \ No newline at end of file diff --git a/languages/scala/pom.xml b/languages/scala/pom.xml index 7c43463dc5..b9300f543c 100644 --- a/languages/scala/pom.xml +++ b/languages/scala/pom.xml @@ -10,7 +10,7 @@ scala - 2.13.12 + 2.13.14 2.13 @@ -25,7 +25,7 @@ org.scalameta scalameta_${scala.compat.version} - 4.8.15 + 4.9.5 @@ -35,7 +35,7 @@ net.alchim31.maven scala-maven-plugin - 4.8.1 + 4.9.1 diff --git a/pom.xml b/pom.xml index dba35b8c0e..2209f238ef 100644 --- a/pom.xml +++ b/pom.xml @@ -75,18 +75,18 @@ 21 21 2.43.0 - 2.0.12 + 2.0.13 5.10.2 2.7.7 4.13.1 - 2.35.0 - 2.29.0 - 2.36.0 + 2.36.0 + 2.30.0 + 2.37.0 1.0.0 - 5.0.0 + 5.1.0 @@ -117,7 +117,7 @@ edu.stanford.nlp stanford-corenlp - 4.5.6 + 4.5.7 @@ -140,7 +140,7 @@ com.fasterxml.jackson.core jackson-databind - 2.16.1 + 2.17.1 @@ -167,7 +167,7 @@ org.mockito mockito-core - 5.10.0 + 5.12.0 test @@ -203,7 +203,7 @@ org.apache.maven.plugins maven-jar-plugin - 3.3.0 + 3.4.1 @@ -223,7 +223,7 @@ org.apache.maven.plugins maven-assembly-plugin - 3.6.0 + 3.7.1 jar-with-dependencies @@ -244,7 +244,7 @@ org.jacoco jacoco-maven-plugin - 0.8.11 + 0.8.12 prepare-agent @@ -268,12 +268,12 @@ org.apache.maven.plugins maven-gpg-plugin - 3.1.0 + 3.2.4 org.apache.maven.plugins maven-deploy-plugin - 3.1.1 + 3.1.2 @@ -310,7 +310,7 @@ org.codehaus.mojo build-helper-maven-plugin - 3.5.0 + 3.6.0 add-source @@ -343,7 +343,7 @@ org.apache.maven.plugins maven-source-plugin - 3.3.0 + 3.3.1 attach-sources diff --git a/report-viewer/.gitignore b/report-viewer/.gitignore index 719bf30c14..9d12524399 100644 --- a/report-viewer/.gitignore +++ b/report-viewer/.gitignore @@ -29,3 +29,4 @@ coverage test-results/ playwright-report/ +tests/e2e/assets \ No newline at end of file diff --git a/report-viewer/index.html b/report-viewer/index.html index f4fedd2bd1..24ca69e3c6 100644 --- a/report-viewer/index.html +++ b/report-viewer/index.html @@ -6,7 +6,7 @@ JPlag Report Viewer - +