tokenType = getCurrentContext().getElement();
- tokenType.ifPresent(type -> transformToken(type, context.getStart(), context.getStop()));
- super.enterKeyedElement(context);
- }
-
- @Override
- public void enterArrayType(GoParser.ArrayTypeContext context) {
- // otherwise, it is just a type expression
- if (context.parent.parent instanceof GoParser.CompositeLitContext) {
- enterContext(GoBlockContext.ARRAY_BODY);
- transformToken(ARRAY_CONSTRUCTOR, context.getStart(), context.getStop());
- }
- super.enterArrayType(context);
- }
-
- @Override
- public void enterSliceType(GoParser.SliceTypeContext context) {
- // otherwise, it is just a type expression
- if (context.parent.parent instanceof GoParser.CompositeLitContext) {
- enterContext(GoBlockContext.SLICE_BODY);
- transformToken(SLICE_CONSTRUCTOR, context.getStart(), context.getStop());
- }
- super.enterSliceType(context);
- }
-
- @Override
- public void exitCompositeLit(GoParser.CompositeLitContext context) {
- expectAndLeave(GoBlockContext.MAP_BODY, GoBlockContext.SLICE_BODY, GoBlockContext.ARRAY_BODY, GoBlockContext.NAMED_TYPE_BODY);
- super.exitCompositeLit(context);
- }
-
- @Override
- public void enterMapType(GoParser.MapTypeContext context) {
- // otherwise, it is just a type expression
- if (context.parent.parent instanceof GoParser.CompositeLitContext) {
- enterContext(GoBlockContext.MAP_BODY);
- transformToken(MAP_CONSTRUCTOR, context.getStart(), context.getStop());
- }
- super.enterMapType(context);
- }
-
- @Override
- public void enterTypeName(GoParser.TypeNameContext context) {
- if (context.parent.parent instanceof GoParser.CompositeLitContext) {
- transformToken(NAMED_TYPE_CONSTRUCTOR, context.getStart());
- enterContext(GoBlockContext.NAMED_TYPE_BODY);
- } else if (context.parent instanceof GoParser.InterfaceTypeContext) {
- transformToken(TYPE_CONSTRAINT, context.getStart(), context.getStop());
- }
- super.enterTypeName(context);
- }
-
- @Override
- public void enterTypeAssertion(GoParser.TypeAssertionContext context) {
- transformToken(TYPE_ASSERTION, context.getStart(), context.getStop());
- super.enterTypeAssertion(context);
- }
-
- @Override
- public void enterMethodSpec(GoParser.MethodSpecContext context) {
- transformToken(INTERFACE_METHOD, context.getStart(), context.getStop());
- super.enterMethodSpec(context);
- }
-
- /* CONTROL FLOW KEYWORDS */
-
- @Override
- public void enterReturnStmt(GoParser.ReturnStmtContext context) {
- transformToken(RETURN, context.getStart(), context.getStop());
- super.enterReturnStmt(context);
- }
-
- @Override
- public void enterBreakStmt(GoParser.BreakStmtContext context) {
- transformToken(BREAK, context.getStart(), context.getStop());
- super.enterBreakStmt(context);
- }
-
- @Override
- public void enterContinueStmt(GoParser.ContinueStmtContext context) {
- transformToken(CONTINUE, context.getStart(), context.getStop());
- super.enterContinueStmt(context);
- }
-
- @Override
- public void enterFallthroughStmt(GoParser.FallthroughStmtContext context) {
- transformToken(FALLTHROUGH, context.getStart(), context.getStop());
- super.enterFallthroughStmt(context);
- }
-
- @Override
- public void enterGotoStmt(GoParser.GotoStmtContext context) {
- transformToken(GOTO, context.getStart(), context.getStop());
- super.enterGotoStmt(context);
- }
-
- @Override
- public void enterGoStmt(GoParser.GoStmtContext context) {
- transformToken(GO, context.getStart(), context.getStop());
- super.enterGoStmt(context);
- }
-
- @Override
- public void enterDeferStmt(GoParser.DeferStmtContext context) {
- transformToken(DEFER, context.getStart(), context.getStop());
- super.enterDeferStmt(context);
- }
-
- @Override
- public void enterSendStmt(GoParser.SendStmtContext ctx) {
- transformToken(SEND_STATEMENT, ctx.getStart(), ctx.getStop());
- super.enterSendStmt(ctx);
- }
-
- @Override
- public void enterRecvStmt(GoParser.RecvStmtContext ctx) {
- transformToken(RECEIVE_STATEMENT, ctx.getStart(), ctx.getStop());
- super.enterRecvStmt(ctx);
- }
-
- @Override
- public void visitTerminal(TerminalNode node) {
- Token token = node.getSymbol();
- switch (token.getText()) {
- case "else" -> {
- expectAndLeave(GoBlockContext.IF_BLOCK);
- enterContext(GoBlockContext.ELSE_BLOCK);
- }
- case "{" -> transformToken(getCurrentContext().getBegin(), token);
- case "}" -> transformToken(getCurrentContext().getEnd(), token);
- default -> {
- // do nothing.
- }
- }
- super.visitTerminal(node);
- }
-
- /**
- * This enumeration provides sets of information regarding different types of nesting structures in Go. Each element is
- * a tuple of a token for the beginning of a block, the end of the block, and optionally, for the elements contained.
- *
- * As the Go parser does not differentiate between different kinds of blocks, a stack of these GoBlockContexts is
- * required to be able to assign the correct token types for each block.
- */
- private enum GoBlockContext {
- ARRAY_BODY(ARRAY_BODY_BEGIN, ARRAY_BODY_END, Optional.of(ARRAY_ELEMENT)),
- STRUCT_BODY(STRUCT_BODY_BEGIN, STRUCT_BODY_END, Optional.of(MEMBER_DECLARATION)),
- MAP_BODY(MAP_BODY_BEGIN, MAP_BODY_END, Optional.of(MAP_ELEMENT)),
- SLICE_BODY(SLICE_BODY_BEGIN, SLICE_BODY_END, Optional.of(SLICE_ELEMENT)),
- NAMED_TYPE_BODY(NAMED_TYPE_BODY_BEGIN, NAMED_TYPE_BODY_END, Optional.of(NAMED_TYPE_ELEMENT)),
- FUNCTION_BODY(FUNCTION_BODY_BEGIN, FUNCTION_BODY_END, Optional.empty()),
-
- IF_BLOCK(IF_BLOCK_BEGIN, IF_BLOCK_END, Optional.empty()),
- ELSE_BLOCK(ELSE_BLOCK_BEGIN, ELSE_BLOCK_END, Optional.empty()),
- FOR_BLOCK(FOR_BLOCK_BEGIN, FOR_BLOCK_END, Optional.empty()),
- SWITCH_BLOCK(SWITCH_BLOCK_BEGIN, SWITCH_BLOCK_END, Optional.empty()),
- SELECT_CONTEXT(SELECT_BLOCK_BEGIN, SELECT_BLOCK_END, Optional.empty()),
- STATEMENT_BLOCK(STATEMENT_BLOCK_BEGIN, STATEMENT_BLOCK_END, Optional.empty()),
- CASE_BLOCK(CASE_BLOCK_BEGIN, CASE_BLOCK_END, Optional.empty()),
- INTERFACE_BODY(INTERFACE_BLOCK_BEGIN, INTERFACE_BLOCK_END, Optional.empty());
-
- private final GoTokenType beginTokenType;
- private final GoTokenType endTokenType;
- private final Optional elementTokenType;
-
- GoBlockContext(GoTokenType beginTokenType, GoTokenType endTokenType, Optional elementTokenType) {
- this.beginTokenType = beginTokenType;
- this.endTokenType = endTokenType;
- this.elementTokenType = elementTokenType;
- }
-
- GoTokenType getBegin() {
- return this.beginTokenType;
- }
-
- GoTokenType getEnd() {
- return this.endTokenType;
- }
-
- public Optional getElement() {
- return this.elementTokenType;
- }
- }
-}
diff --git a/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java b/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java
index 0feafc7bc0..68d3553e63 100644
--- a/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java
+++ b/languages/golang/src/test/java/de/jplag/golang/GoLanguageTest.java
@@ -1,156 +1,48 @@
package de.jplag.golang;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-import java.util.OptionalInt;
-import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import de.jplag.ParsingException;
-import de.jplag.SharedTokenType;
-import de.jplag.Token;
-import de.jplag.TokenPrinter;
-
-class GoLanguageTest {
- /**
- * Test source file that is supposed to produce a complete set of tokens, i.e. all types of tokens.
- */
+import static de.jplag.golang.GoTokenType.ARGUMENT;
+import static de.jplag.golang.GoTokenType.ASSIGNMENT;
+import static de.jplag.golang.GoTokenType.FUNCTION_BODY_BEGIN;
+import static de.jplag.golang.GoTokenType.FUNCTION_BODY_END;
+import static de.jplag.golang.GoTokenType.FUNCTION_DECLARATION;
+import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE;
+import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_BEGIN;
+import static de.jplag.golang.GoTokenType.IMPORT_CLAUSE_END;
+import static de.jplag.golang.GoTokenType.IMPORT_DECLARATION;
+import static de.jplag.golang.GoTokenType.INVOCATION;
+import static de.jplag.golang.GoTokenType.MEMBER_DECLARATION;
+import static de.jplag.golang.GoTokenType.PACKAGE;
+import static de.jplag.golang.GoTokenType.STRUCT_BODY_BEGIN;
+import static de.jplag.golang.GoTokenType.STRUCT_BODY_END;
+import static de.jplag.golang.GoTokenType.STRUCT_DECLARATION;
+import static de.jplag.golang.GoTokenType.VARIABLE_DECLARATION;
+
+import de.jplag.testutils.LanguageModuleTest;
+import de.jplag.testutils.datacollector.TestDataCollector;
+import de.jplag.testutils.datacollector.TestSourceIgnoredLinesCollector;
+
+class GoLanguageTest extends LanguageModuleTest {
private static final String COMPLETE_TEST_FILE = "Complete.go";
+ // example files taken from antlr repo
+ private static final String CONSTANTS_TEST_FILE = "Constants.go";
+ private static final String ARRAY_ELLIPSIS_DECLS_FILE = "ArrayEllipsisDecls.go";
- /**
- * Regular expression that describes lines consisting only of whitespace and optionally a line comment.
- */
- private static final String EMPTY_OR_SINGLE_LINE_COMMENT = "\\s*(//.*|/\\*.*\\*/)?";
-
- /**
- * Regular expression that describes lines containing the start of a multiline comment and no code before it.
- */
- private static final String DELIMITED_COMMENT_START = "\\s*/\\*(?:(?!\\*/).)*$";
-
- /**
- * Regular expression that describes lines containing the end of a multiline comment and no more code after that.
- */
- private static final String DELIMITED_COMMENT_END = ".*\\*/\\s*$";
-
- private final Logger logger = LoggerFactory.getLogger(GoLanguageTest.class);
- private final String[] testFiles = new String[] {COMPLETE_TEST_FILE};
- private final File testFileLocation = Path.of("src", "test", "resources", "de", "jplag", "golang").toFile();
- private GoLanguage language;
-
- @BeforeEach
- void setup() {
- language = new GoLanguage();
+ public GoLanguageTest() {
+ super(new GoLanguage(), GoTokenType.class);
}
- @Test
- void parseTestFiles() throws ParsingException {
- for (String fileName : testFiles) {
- List tokens = language.parse(Set.of(new File(testFileLocation, fileName)));
- String output = TokenPrinter.printTokens(tokens, testFileLocation);
- logger.info(output);
+ @Override
+ protected void collectTestData(TestDataCollector collector) {
+ collector.testFile(COMPLETE_TEST_FILE).testCoverages();
- testSourceCoverage(fileName, tokens);
- if (fileName.equals(COMPLETE_TEST_FILE)) {
- testTokenCoverage(tokens, fileName);
- }
- }
+ // Some basic tests, so we have at least some idea if the listener was changed
+ collector.testFile(CONSTANTS_TEST_FILE).testTokenSequence(PACKAGE, VARIABLE_DECLARATION, VARIABLE_DECLARATION);
+ collector.testFile(ARRAY_ELLIPSIS_DECLS_FILE).testSourceCoverage().testTokenSequence(PACKAGE, IMPORT_DECLARATION, IMPORT_CLAUSE_BEGIN,
+ IMPORT_CLAUSE, IMPORT_CLAUSE_END, STRUCT_DECLARATION, STRUCT_BODY_BEGIN, MEMBER_DECLARATION, STRUCT_BODY_END, FUNCTION_DECLARATION,
+ FUNCTION_BODY_BEGIN, VARIABLE_DECLARATION, ASSIGNMENT, INVOCATION, ARGUMENT, ARGUMENT, FUNCTION_BODY_END);
}
- /**
- * Confirms that the code is covered to a basic extent, i.e. each line of code contains at least one token.
- * @param fileName a code sample file name
- * @param tokens the list of tokens generated from the sample
- */
- private void testSourceCoverage(String fileName, List tokens) {
- File testFile = new File(testFileLocation, fileName);
-
- List lines = null;
- try {
- lines = Files.readAllLines(testFile.toPath());
- } catch (IOException exception) {
- logger.info("Error while reading test file %s".formatted(fileName), exception);
- fail();
- }
-
- // All lines that contain code
- var codeLines = getCodeLines(lines);
- // All lines that contain a token
- var tokenLines = tokens.stream().mapToInt(Token::getLine).distinct().boxed().toList();
-
- if (codeLines.size() > tokenLines.size()) {
- List missedLinesIndices = new ArrayList<>(codeLines);
- missedLinesIndices.removeAll(tokenLines);
- var missedLines = missedLinesIndices.stream().map(Object::toString).collect(Collectors.joining(", "));
- if (!missedLines.isBlank()) {
- fail("Found lines in file '%s' that are not represented in the token list. \n\tMissed lines: %s".formatted(fileName, missedLines));
- }
- }
- OptionalInt differingLine = IntStream.range(0, codeLines.size())
- .dropWhile(index -> Objects.equals(codeLines.get(index), tokenLines.get(index))).findAny();
- differingLine.ifPresent(
- i -> fail("Not all lines of code in '%s' are represented in tokens, starting with line %d.".formatted(fileName, codeLines.get(i))));
+ @Override
+ protected void configureIgnoredLines(TestSourceIgnoredLinesCollector collector) {
}
-
- /**
- * Gets the line numbers of lines containing actual code, omitting empty lines and comment lines.
- * @param lines lines of a code file
- * @return a list of the line numbers of code lines
- */
- private List getCodeLines(List lines) {
- // This boxed boolean can be accessed from within the lambda method below
- var state = new Object() {
- boolean insideComment = false;
- };
-
- var codeLines = IntStream.rangeClosed(1, lines.size()).sequential().filter(idx -> {
- String line = lines.get(idx - 1);
- if (line.matches(EMPTY_OR_SINGLE_LINE_COMMENT)) {
- return false;
- } else if (line.matches(DELIMITED_COMMENT_START)) {
- state.insideComment = true;
- return false;
- } else if (state.insideComment) {
- // This fails if code follows after '*/'. If the code is formatted well, this should not happen.
- if (line.matches(DELIMITED_COMMENT_END)) {
- state.insideComment = false;
- }
- return false;
- }
- return true;
- });
-
- return codeLines.boxed().toList();
-
- }
-
- /**
- * Confirms that all Token types are 'reachable' with a complete code example.
- * @param tokens list of tokens which is supposed to contain all types of tokens
- * @param fileName The file name of the complete code example
- */
- private void testTokenCoverage(List tokens, String fileName) {
- var annotatedTokens = tokens.stream().map(Token::getType).collect(Collectors.toSet());
- assertTrue(annotatedTokens.contains(SharedTokenType.FILE_END));
- var annotatedGoTokens = annotatedTokens.stream().filter(GoTokenType.class::isInstance).collect(Collectors.toSet());
- var allGoTokens = GoTokenType.values();
- var missingGoTokens = Arrays.stream(allGoTokens).filter(token -> !annotatedGoTokens.contains(token)).toList();
- assertTrue(missingGoTokens.isEmpty(), "The following go tokens are missing in the code example '%s':\n".formatted(fileName)
- + String.join("\n", missingGoTokens.stream().map(GoTokenType::getDescription).toList()));
- }
-
}
diff --git a/languages/golang/src/test/resources/de/jplag/go/ArrayEllipsisDecls.go b/languages/golang/src/test/resources/de/jplag/go/ArrayEllipsisDecls.go
new file mode 100644
index 0000000000..0a9b83bec5
--- /dev/null
+++ b/languages/golang/src/test/resources/de/jplag/go/ArrayEllipsisDecls.go
@@ -0,0 +1,14 @@
+package samples
+
+import (
+ "fmt"
+)
+
+type Custom struct {
+ string
+}
+
+func ArrayEllipsisDecls() {
+ stooges := [...]Custom{{"Moe"}, {"Larry"}, {"Curly"}} // len(stooges) == 3
+ fmt.Println("Stooges: ", stooges)
+}
\ No newline at end of file
diff --git a/languages/golang/src/test/resources/de/jplag/golang/Complete.go b/languages/golang/src/test/resources/de/jplag/go/Complete.go
similarity index 100%
rename from languages/golang/src/test/resources/de/jplag/golang/Complete.go
rename to languages/golang/src/test/resources/de/jplag/go/Complete.go
diff --git a/languages/golang/src/test/resources/de/jplag/go/Constants.go b/languages/golang/src/test/resources/de/jplag/go/Constants.go
new file mode 100644
index 0000000000..1a3dab2879
--- /dev/null
+++ b/languages/golang/src/test/resources/de/jplag/go/Constants.go
@@ -0,0 +1,6 @@
+package A
+
+const (
+ /*A*/ T = 1 << 0
+ /*B*/ Ta = 1 << 1
+)
\ No newline at end of file
diff --git a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java
index f1c5c2dc93..65f5ab89c8 100644
--- a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java
+++ b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java
@@ -52,7 +52,7 @@ public void parseFiles(Set files, final Parser parser) throws ParsingExcep
ast.accept(scanner, null);
parser.add(Token.semanticFileEnd(file));
}
- } catch (IOException exception) {
+ } catch (Exception exception) {
throw new ParsingException(null, exception.getMessage(), exception);
}
parsingExceptions.addAll(processErrors(listener));
diff --git a/languages/python-3/pom.xml b/languages/python-3/pom.xml
index 92712c0b1a..fb6865e696 100644
--- a/languages/python-3/pom.xml
+++ b/languages/python-3/pom.xml
@@ -13,6 +13,11 @@
org.antlr
antlr4-runtime
+
+ de.jplag
+ language-antlr-utils
+ ${revision}
+
diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4
index 9b5fee1dc4..8b36564b96 100644
--- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4
+++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4
@@ -28,190 +28,159 @@
* https://github.com/bkiers/python3-parser
* Developed by : Bart Kiers, bart@big-o.nl
*/
+
+// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
+// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
+// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
+
lexer grammar Python3Lexer;
// All comments that start with "///" are copy-pasted from
// The Python Language Reference
-tokens { INDENT, DEDENT }
+tokens {
+ INDENT,
+ DEDENT
+}
options {
- superClass=Python3LexerBase;
+ superClass = Python3LexerBase;
}
+// Insert here @header for C++ lexer.
+
/*
* lexer rules
*/
-STRING
- : STRING_LITERAL
- | BYTES_LITERAL
- ;
-
-NUMBER
- : INTEGER
- | FLOAT_NUMBER
- | IMAG_NUMBER
- ;
-
-INTEGER
- : DECIMAL_INTEGER
- | OCT_INTEGER
- | HEX_INTEGER
- | BIN_INTEGER
- ;
-
-AND : 'and';
-AS : 'as';
-ASSERT : 'assert';
-ASYNC : 'async';
-AWAIT : 'await';
-BREAK : 'break';
-CASE : 'case' ;
-CLASS : 'class';
-CONTINUE : 'continue';
-DEF : 'def';
-DEL : 'del';
-ELIF : 'elif';
-ELSE : 'else';
-EXCEPT : 'except';
-FALSE : 'False';
-FINALLY : 'finally';
-FOR : 'for';
-FROM : 'from';
-GLOBAL : 'global';
-IF : 'if';
-IMPORT : 'import';
-IN : 'in';
-IS : 'is';
-LAMBDA : 'lambda';
-MATCH : 'match' ;
-NONE : 'None';
-NONLOCAL : 'nonlocal';
-NOT : 'not';
-OR : 'or';
-PASS : 'pass';
-RAISE : 'raise';
-RETURN : 'return';
-TRUE : 'True';
-TRY : 'try';
-UNDERSCORE : '_' ;
-WHILE : 'while';
-WITH : 'with';
-YIELD : 'yield';
-
-NEWLINE
- : ( {this.atStartOfInput()}? SPACES
- | ( '\r'? '\n' | '\r' | '\f' ) SPACES?
- )
- {this.onNewLine();}
- ;
+STRING: STRING_LITERAL | BYTES_LITERAL;
+
+NUMBER: INTEGER | FLOAT_NUMBER | IMAG_NUMBER;
+
+INTEGER: DECIMAL_INTEGER | OCT_INTEGER | HEX_INTEGER | BIN_INTEGER;
+
+AND : 'and';
+AS : 'as';
+ASSERT : 'assert';
+ASYNC : 'async';
+AWAIT : 'await';
+BREAK : 'break';
+CASE : 'case';
+CLASS : 'class';
+CONTINUE : 'continue';
+DEF : 'def';
+DEL : 'del';
+ELIF : 'elif';
+ELSE : 'else';
+EXCEPT : 'except';
+FALSE : 'False';
+FINALLY : 'finally';
+FOR : 'for';
+FROM : 'from';
+GLOBAL : 'global';
+IF : 'if';
+IMPORT : 'import';
+IN : 'in';
+IS : 'is';
+LAMBDA : 'lambda';
+MATCH : 'match';
+NONE : 'None';
+NONLOCAL : 'nonlocal';
+NOT : 'not';
+OR : 'or';
+PASS : 'pass';
+RAISE : 'raise';
+RETURN : 'return';
+TRUE : 'True';
+TRY : 'try';
+UNDERSCORE : '_';
+WHILE : 'while';
+WITH : 'with';
+YIELD : 'yield';
+
+NEWLINE: ({this.atStartOfInput()}? SPACES | ( '\r'? '\n' | '\r' | '\f') SPACES?) {this.onNewLine();};
/// identifier ::= id_start id_continue*
-NAME
- : ID_START ID_CONTINUE*
- ;
+NAME: ID_START ID_CONTINUE*;
/// stringliteral ::= [stringprefix](shortstring | longstring)
/// stringprefix ::= "r" | "u" | "R" | "U" | "f" | "F"
/// | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"
-STRING_LITERAL
- : ( [rR] | [uU] | [fF] | ( [fF] [rR] ) | ( [rR] [fF] ) )? ( SHORT_STRING | LONG_STRING )
- ;
+STRING_LITERAL: ( [rR] | [uU] | [fF] | ( [fF] [rR]) | ( [rR] [fF]))? ( SHORT_STRING | LONG_STRING);
/// bytesliteral ::= bytesprefix(shortbytes | longbytes)
/// bytesprefix ::= "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB"
-BYTES_LITERAL
- : ( [bB] | ( [bB] [rR] ) | ( [rR] [bB] ) ) ( SHORT_BYTES | LONG_BYTES )
- ;
+BYTES_LITERAL: ( [bB] | ( [bB] [rR]) | ( [rR] [bB])) ( SHORT_BYTES | LONG_BYTES);
/// decimalinteger ::= nonzerodigit digit* | "0"+
-DECIMAL_INTEGER
- : NON_ZERO_DIGIT DIGIT*
- | '0'+
- ;
+DECIMAL_INTEGER: NON_ZERO_DIGIT DIGIT* | '0'+;
/// octinteger ::= "0" ("o" | "O") octdigit+
-OCT_INTEGER
- : '0' [oO] OCT_DIGIT+
- ;
+OCT_INTEGER: '0' [oO] OCT_DIGIT+;
/// hexinteger ::= "0" ("x" | "X") hexdigit+
-HEX_INTEGER
- : '0' [xX] HEX_DIGIT+
- ;
+HEX_INTEGER: '0' [xX] HEX_DIGIT+;
/// bininteger ::= "0" ("b" | "B") bindigit+
-BIN_INTEGER
- : '0' [bB] BIN_DIGIT+
- ;
+BIN_INTEGER: '0' [bB] BIN_DIGIT+;
/// floatnumber ::= pointfloat | exponentfloat
-FLOAT_NUMBER
- : POINT_FLOAT
- | EXPONENT_FLOAT
- ;
+FLOAT_NUMBER: POINT_FLOAT | EXPONENT_FLOAT;
/// imagnumber ::= (floatnumber | intpart) ("j" | "J")
-IMAG_NUMBER
- : ( FLOAT_NUMBER | INT_PART ) [jJ]
- ;
-
-DOT : '.';
-ELLIPSIS : '...';
-STAR : '*';
-OPEN_PAREN : '(' {this.openBrace();};
-CLOSE_PAREN : ')' {this.closeBrace();};
-COMMA : ',';
-COLON : ':';
-SEMI_COLON : ';';
-POWER : '**';
-ASSIGN : '=';
-OPEN_BRACK : '[' {this.openBrace();};
-CLOSE_BRACK : ']' {this.closeBrace();};
-OR_OP : '|';
-XOR : '^';
-AND_OP : '&';
-LEFT_SHIFT : '<<';
-RIGHT_SHIFT : '>>';
-ADD : '+';
-MINUS : '-';
-DIV : '/';
-MOD : '%';
-IDIV : '//';
-NOT_OP : '~';
-OPEN_BRACE : '{' {this.openBrace();};
-CLOSE_BRACE : '}' {this.closeBrace();};
-LESS_THAN : '<';
-GREATER_THAN : '>';
-EQUALS : '==';
-GT_EQ : '>=';
-LT_EQ : '<=';
-NOT_EQ_1 : '<>';
-NOT_EQ_2 : '!=';
-AT : '@';
-ARROW : '->';
-ADD_ASSIGN : '+=';
-SUB_ASSIGN : '-=';
-MULT_ASSIGN : '*=';
-AT_ASSIGN : '@=';
-DIV_ASSIGN : '/=';
-MOD_ASSIGN : '%=';
-AND_ASSIGN : '&=';
-OR_ASSIGN : '|=';
-XOR_ASSIGN : '^=';
-LEFT_SHIFT_ASSIGN : '<<=';
+IMAG_NUMBER: ( FLOAT_NUMBER | INT_PART) [jJ];
+
+DOT : '.';
+ELLIPSIS : '...';
+STAR : '*';
+OPEN_PAREN : '(' {this.openBrace();};
+CLOSE_PAREN : ')' {this.closeBrace();};
+COMMA : ',';
+COLON : ':';
+SEMI_COLON : ';';
+POWER : '**';
+ASSIGN : '=';
+OPEN_BRACK : '[' {this.openBrace();};
+CLOSE_BRACK : ']' {this.closeBrace();};
+OR_OP : '|';
+XOR : '^';
+AND_OP : '&';
+LEFT_SHIFT : '<<';
+RIGHT_SHIFT : '>>';
+ADD : '+';
+MINUS : '-';
+DIV : '/';
+MOD : '%';
+IDIV : '//';
+NOT_OP : '~';
+OPEN_BRACE : '{' {this.openBrace();};
+CLOSE_BRACE : '}' {this.closeBrace();};
+LESS_THAN : '<';
+GREATER_THAN : '>';
+EQUALS : '==';
+GT_EQ : '>=';
+LT_EQ : '<=';
+NOT_EQ_1 : '<>';
+NOT_EQ_2 : '!=';
+AT : '@';
+ARROW : '->';
+ADD_ASSIGN : '+=';
+SUB_ASSIGN : '-=';
+MULT_ASSIGN : '*=';
+AT_ASSIGN : '@=';
+DIV_ASSIGN : '/=';
+MOD_ASSIGN : '%=';
+AND_ASSIGN : '&=';
+OR_ASSIGN : '|=';
+XOR_ASSIGN : '^=';
+LEFT_SHIFT_ASSIGN : '<<=';
RIGHT_SHIFT_ASSIGN : '>>=';
-POWER_ASSIGN : '**=';
-IDIV_ASSIGN : '//=';
+POWER_ASSIGN : '**=';
+IDIV_ASSIGN : '//=';
-SKIP_
- : ( SPACES | COMMENT | LINE_JOINING ) -> skip
- ;
+SKIP_: ( SPACES | COMMENT | LINE_JOINING) -> skip;
-UNKNOWN_CHAR
- : .
- ;
+UNKNOWN_CHAR: .;
/*
* fragments
@@ -220,143 +189,93 @@ UNKNOWN_CHAR
/// shortstring ::= "'" shortstringitem* "'" | '"' shortstringitem* '"'
/// shortstringitem ::= shortstringchar | stringescapeseq
/// shortstringchar ::=
-fragment SHORT_STRING
- : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] )* '\''
- | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"'
- ;
+fragment SHORT_STRING:
+ '\'' (STRING_ESCAPE_SEQ | ~[\\\r\n\f'])* '\''
+ | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"])* '"'
+;
/// longstring ::= "'''" longstringitem* "'''" | '"""' longstringitem* '"""'
-fragment LONG_STRING
- : '\'\'\'' LONG_STRING_ITEM*? '\'\'\''
- | '"""' LONG_STRING_ITEM*? '"""'
- ;
+fragment LONG_STRING: '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' | '"""' LONG_STRING_ITEM*? '"""';
/// longstringitem ::= longstringchar | stringescapeseq
-fragment LONG_STRING_ITEM
- : LONG_STRING_CHAR
- | STRING_ESCAPE_SEQ
- ;
+fragment LONG_STRING_ITEM: LONG_STRING_CHAR | STRING_ESCAPE_SEQ;
/// longstringchar ::=
-fragment LONG_STRING_CHAR
- : ~'\\'
- ;
+fragment LONG_STRING_CHAR: ~'\\';
/// stringescapeseq ::= "\"
-fragment STRING_ESCAPE_SEQ
- : '\\' .
- | '\\' NEWLINE
- ;
+fragment STRING_ESCAPE_SEQ: '\\' . | '\\' NEWLINE;
/// nonzerodigit ::= "1"..."9"
-fragment NON_ZERO_DIGIT
- : [1-9]
- ;
+fragment NON_ZERO_DIGIT: [1-9];
/// digit ::= "0"..."9"
-fragment DIGIT
- : [0-9]
- ;
+fragment DIGIT: [0-9];
/// octdigit ::= "0"..."7"
-fragment OCT_DIGIT
- : [0-7]
- ;
+fragment OCT_DIGIT: [0-7];
/// hexdigit ::= digit | "a"..."f" | "A"..."F"
-fragment HEX_DIGIT
- : [0-9a-fA-F]
- ;
+fragment HEX_DIGIT: [0-9a-fA-F];
/// bindigit ::= "0" | "1"
-fragment BIN_DIGIT
- : [01]
- ;
+fragment BIN_DIGIT: [01];
/// pointfloat ::= [intpart] fraction | intpart "."
-fragment POINT_FLOAT
- : INT_PART? FRACTION
- | INT_PART '.'
- ;
+fragment POINT_FLOAT: INT_PART? FRACTION | INT_PART '.';
/// exponentfloat ::= (intpart | pointfloat) exponent
-fragment EXPONENT_FLOAT
- : ( INT_PART | POINT_FLOAT ) EXPONENT
- ;
+fragment EXPONENT_FLOAT: ( INT_PART | POINT_FLOAT) EXPONENT;
/// intpart ::= digit+
-fragment INT_PART
- : DIGIT+
- ;
+fragment INT_PART: DIGIT+;
/// fraction ::= "." digit+
-fragment FRACTION
- : '.' DIGIT+
- ;
+fragment FRACTION: '.' DIGIT+;
/// exponent ::= ("e" | "E") ["+" | "-"] digit+
-fragment EXPONENT
- : [eE] [+-]? DIGIT+
- ;
+fragment EXPONENT: [eE] [+-]? DIGIT+;
/// shortbytes ::= "'" shortbytesitem* "'" | '"' shortbytesitem* '"'
/// shortbytesitem ::= shortbyteschar | bytesescapeseq
-fragment SHORT_BYTES
- : '\'' ( SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ )* '\''
- | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ )* '"'
- ;
+fragment SHORT_BYTES:
+ '\'' (SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ)* '\''
+ | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ)* '"'
+;
/// longbytes ::= "'''" longbytesitem* "'''" | '"""' longbytesitem* '"""'
-fragment LONG_BYTES
- : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\''
- | '"""' LONG_BYTES_ITEM*? '"""'
- ;
+fragment LONG_BYTES: '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' | '"""' LONG_BYTES_ITEM*? '"""';
/// longbytesitem ::= longbyteschar | bytesescapeseq
-fragment LONG_BYTES_ITEM
- : LONG_BYTES_CHAR
- | BYTES_ESCAPE_SEQ
- ;
+fragment LONG_BYTES_ITEM: LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ;
/// shortbyteschar ::=
-fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE
- : [\u0000-\u0009]
- | [\u000B-\u000C]
- | [\u000E-\u0026]
- | [\u0028-\u005B]
- | [\u005D-\u007F]
- ;
-
-fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE
- : [\u0000-\u0009]
- | [\u000B-\u000C]
- | [\u000E-\u0021]
- | [\u0023-\u005B]
- | [\u005D-\u007F]
- ;
+fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE:
+ [\u0000-\u0009]
+ | [\u000B-\u000C]
+ | [\u000E-\u0026]
+ | [\u0028-\u005B]
+ | [\u005D-\u007F]
+;
+
+fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE:
+ [\u0000-\u0009]
+ | [\u000B-\u000C]
+ | [\u000E-\u0021]
+ | [\u0023-\u005B]
+ | [\u005D-\u007F]
+;
/// longbyteschar ::=
-fragment LONG_BYTES_CHAR
- : [\u0000-\u005B]
- | [\u005D-\u007F]
- ;
+fragment LONG_BYTES_CHAR: [\u0000-\u005B] | [\u005D-\u007F];
/// bytesescapeseq ::= "\"
-fragment BYTES_ESCAPE_SEQ
- : '\\' [\u0000-\u007F]
- ;
-
-fragment SPACES
- : [ \t]+
- ;
+fragment BYTES_ESCAPE_SEQ: '\\' [\u0000-\u007F];
-fragment COMMENT
- : '#' ~[\r\n\f]*
- ;
+fragment SPACES: [ \t]+;
-fragment LINE_JOINING
- : '\\' SPACES? ( '\r'? '\n' | '\r' | '\f')
- ;
+fragment COMMENT: '#' ~[\r\n\f]*;
+fragment LINE_JOINING: '\\' SPACES? ( '\r'? '\n' | '\r' | '\f');
// TODO: ANTLR seems lack of some Unicode property support...
//$ curl https://www.unicode.org/Public/13.0.0/ucd/PropList.txt | grep Other_ID_
@@ -369,36 +288,26 @@ fragment LINE_JOINING
//1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE
//19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE
-fragment UNICODE_OIDS
- : '\u1885'..'\u1886'
- | '\u2118'
- | '\u212e'
- | '\u309b'..'\u309c'
- ;
+fragment UNICODE_OIDS: '\u1885' ..'\u1886' | '\u2118' | '\u212e' | '\u309b' ..'\u309c';
-fragment UNICODE_OIDC
- : '\u00b7'
- | '\u0387'
- | '\u1369'..'\u1371'
- | '\u19da'
- ;
+fragment UNICODE_OIDC: '\u00b7' | '\u0387' | '\u1369' ..'\u1371' | '\u19da';
/// id_start ::=
-fragment ID_START
- : '_'
- | [\p{L}]
- | [\p{Nl}]
- //| [\p{Other_ID_Start}]
- | UNICODE_OIDS
- ;
+fragment ID_START:
+ '_'
+ | [\p{L}]
+ | [\p{Nl}]
+ //| [\p{Other_ID_Start}]
+ | UNICODE_OIDS
+;
/// id_continue ::=
-fragment ID_CONTINUE
- : ID_START
- | [\p{Mn}]
- | [\p{Mc}]
- | [\p{Nd}]
- | [\p{Pc}]
- //| [\p{Other_ID_Continue}]
- | UNICODE_OIDC
- ;
+fragment ID_CONTINUE:
+ ID_START
+ | [\p{Mn}]
+ | [\p{Mc}]
+ | [\p{Nd}]
+ | [\p{Pc}]
+ //| [\p{Other_ID_Continue}]
+ | UNICODE_OIDC
+;
\ No newline at end of file
diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4
index 8b0143de64..4c5a27cf2a 100644
--- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4
+++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4
@@ -31,186 +31,623 @@
// Scraping from https://docs.python.org/3/reference/grammar.html
+// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
+// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging
+
parser grammar Python3Parser;
options {
superClass = Python3ParserBase;
- tokenVocab=Python3Lexer;
+ tokenVocab = Python3Lexer;
}
+// Insert here @header for C++ parser.
+
// All comments that start with "///" are copy-pasted from
// The Python Language Reference
-single_input: NEWLINE | simple_stmts | compound_stmt NEWLINE;
-file_input: (NEWLINE | stmt)* EOF;
-eval_input: testlist NEWLINE* EOF;
-
-decorator: '@' dotted_name ( '(' arglist? ')' )? NEWLINE;
-decorators: decorator+;
-decorated: decorators (classdef | funcdef | async_funcdef);
-
-async_funcdef: ASYNC funcdef;
-funcdef: 'def' name parameters ('->' test)? ':' block;
-
-parameters: '(' typedargslist? ')';
-typedargslist: (tfpdef ('=' test)? (',' tfpdef ('=' test)?)* (',' (
- '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )?
- | '**' tfpdef ','? )? )?
- | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )?
- | '**' tfpdef ','?);
-tfpdef: name (':' test)?;
-varargslist: (vfpdef ('=' test)? (',' vfpdef ('=' test)?)* (',' (
- '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )?
- | '**' vfpdef (',')?)?)?
- | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )?
- | '**' vfpdef ','?
-);
-vfpdef: name;
-
-stmt: simple_stmts | compound_stmt;
-simple_stmts: simple_stmt (';' simple_stmt)* ';'? NEWLINE;
-simple_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
- import_stmt | global_stmt | nonlocal_stmt | assert_stmt);
-expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
- ('=' (yield_expr|testlist_star_expr))*);
-annassign: ':' test ('=' test)?;
-testlist_star_expr: (test|star_expr) (',' (test|star_expr))* ','?;
-augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
- '<<=' | '>>=' | '**=' | '//=');
+single_input
+ : NEWLINE
+ | simple_stmts
+ | compound_stmt NEWLINE
+ ;
+
+file_input
+ : (NEWLINE | stmt)* EOF
+ ;
+
+eval_input
+ : testlist NEWLINE* EOF
+ ;
+
+decorator
+ : '@' dotted_name ('(' arglist? ')')? NEWLINE
+ ;
+
+decorators
+ : decorator+
+ ;
+
+decorated
+ : decorators (classdef | funcdef | async_funcdef)
+ ;
+
+async_funcdef
+ : ASYNC funcdef
+ ;
+
+funcdef
+ : 'def' name parameters ('->' test)? ':' block
+ ;
+
+parameters
+ : '(' typedargslist? ')'
+ ;
+
+typedargslist
+ : (
+ tfpdef ('=' test)? (',' tfpdef ('=' test)?)* (
+ ',' (
+ '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)?
+ | '**' tfpdef ','?
+ )?
+ )?
+ | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)?
+ | '**' tfpdef ','?
+ )
+ ;
+
+tfpdef
+ : name (':' test)?
+ ;
+
+varargslist
+ : (
+ vfpdef ('=' test)? (',' vfpdef ('=' test)?)* (
+ ',' (
+ '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)?
+ | '**' vfpdef (',')?
+ )?
+ )?
+ | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)?
+ | '**' vfpdef ','?
+ )
+ ;
+
+vfpdef
+ : name
+ ;
+
+stmt
+ : simple_stmts
+ | compound_stmt
+ ;
+
+simple_stmts
+ : simple_stmt (';' simple_stmt)* ';'? NEWLINE
+ ;
+
+simple_stmt
+ : (
+ expr_stmt
+ | del_stmt
+ | pass_stmt
+ | flow_stmt
+ | import_stmt
+ | global_stmt
+ | nonlocal_stmt
+ | assert_stmt
+ )
+ ;
+
+expr_stmt
+ : testlist_star_expr (
+ annassign
+ | augassign (yield_expr | testlist)
+ | ('=' (yield_expr | testlist_star_expr))*
+ )
+ ;
+
+annassign
+ : ':' test ('=' test)?
+ ;
+
+testlist_star_expr
+ : (test | star_expr) (',' (test | star_expr))* ','?
+ ;
+
+augassign
+ : (
+ '+='
+ | '-='
+ | '*='
+ | '@='
+ | '/='
+ | '%='
+ | '&='
+ | '|='
+ | '^='
+ | '<<='
+ | '>>='
+ | '**='
+ | '//='
+ )
+ ;
+
// For normal and annotated assignments, additional restrictions enforced by the interpreter
-del_stmt: 'del' exprlist;
-pass_stmt: 'pass';
-flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt;
-break_stmt: 'break';
-continue_stmt: 'continue';
-return_stmt: 'return' testlist?;
-yield_stmt: yield_expr;
-raise_stmt: 'raise' (test ('from' test)?)?;
-import_stmt: import_name | import_from;
-import_name: 'import' dotted_as_names;
+del_stmt
+ : 'del' exprlist
+ ;
+
+pass_stmt
+ : 'pass'
+ ;
+
+flow_stmt
+ : break_stmt
+ | continue_stmt
+ | return_stmt
+ | raise_stmt
+ | yield_stmt
+ ;
+
+break_stmt
+ : 'break'
+ ;
+
+continue_stmt
+ : 'continue'
+ ;
+
+return_stmt
+ : 'return' testlist?
+ ;
+
+yield_stmt
+ : yield_expr
+ ;
+
+raise_stmt
+ : 'raise' (test ('from' test)?)?
+ ;
+
+import_stmt
+ : import_name
+ | import_from
+ ;
+
+import_name
+ : 'import' dotted_as_names
+ ;
+
// note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
-import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
- 'import' ('*' | '(' import_as_names ')' | import_as_names));
-import_as_name: name ('as' name)?;
-dotted_as_name: dotted_name ('as' name)?;
-import_as_names: import_as_name (',' import_as_name)* ','?;
-dotted_as_names: dotted_as_name (',' dotted_as_name)*;
-dotted_name: name ('.' name)*;
-global_stmt: 'global' name (',' name)*;
-nonlocal_stmt: 'nonlocal' name (',' name)*;
-assert_stmt: 'assert' test (',' test)?;
-
-compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt;
-async_stmt: ASYNC (funcdef | with_stmt | for_stmt);
-if_stmt: 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)?;
-while_stmt: 'while' test ':' block ('else' ':' block)?;
-for_stmt: 'for' exprlist 'in' testlist ':' block ('else' ':' block)?;
-try_stmt: ('try' ':' block
- ((except_clause ':' block)+
- ('else' ':' block)?
- ('finally' ':' block)? |
- 'finally' ':' block));
-with_stmt: 'with' with_item (',' with_item)* ':' block;
-with_item: test ('as' expr)?;
+import_from
+ : (
+ 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' (
+ '*'
+ | '(' import_as_names ')'
+ | import_as_names
+ )
+ )
+ ;
+
+import_as_name
+ : name ('as' name)?
+ ;
+
+dotted_as_name
+ : dotted_name ('as' name)?
+ ;
+
+import_as_names
+ : import_as_name (',' import_as_name)* ','?
+ ;
+
+dotted_as_names
+ : dotted_as_name (',' dotted_as_name)*
+ ;
+
+dotted_name
+ : name ('.' name)*
+ ;
+
+global_stmt
+ : 'global' name (',' name)*
+ ;
+
+nonlocal_stmt
+ : 'nonlocal' name (',' name)*
+ ;
+
+assert_stmt
+ : 'assert' test (',' test)?
+ ;
+
+compound_stmt
+ : if_stmt
+ | while_stmt
+ | for_stmt
+ | try_stmt
+ | with_stmt
+ | funcdef
+ | classdef
+ | decorated
+ | async_stmt
+ | match_stmt
+ ;
+
+async_stmt
+ : ASYNC (funcdef | with_stmt | for_stmt)
+ ;
+
+if_stmt
+ : 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)?
+ ;
+
+while_stmt
+ : 'while' test ':' block ('else' ':' block)?
+ ;
+
+for_stmt
+ : 'for' exprlist 'in' testlist ':' block ('else' ':' block)?
+ ;
+
+try_stmt
+ : (
+ 'try' ':' block (
+ (except_clause ':' block)+ ('else' ':' block)? ('finally' ':' block)?
+ | 'finally' ':' block
+ )
+ )
+ ;
+
+with_stmt
+ : 'with' with_item (',' with_item)* ':' block
+ ;
+
+with_item
+ : test ('as' expr)?
+ ;
+
// NB compile.c makes sure that the default except clause is last
-except_clause: 'except' (test ('as' name)?)?;
-block: simple_stmts | NEWLINE INDENT stmt+ DEDENT;
-match_stmt: 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT ;
-subject_expr: star_named_expression ',' star_named_expressions? | test ;
-star_named_expressions: ',' star_named_expression+ ','? ;
-star_named_expression: '*' expr | test ;
-case_block: 'case' patterns guard? ':' block ;
-guard: 'if' test ;
-patterns: open_sequence_pattern | pattern ;
-pattern: as_pattern | or_pattern ;
-as_pattern: or_pattern 'as' pattern_capture_target ;
-or_pattern: closed_pattern ('|' closed_pattern)* ;
-closed_pattern: literal_pattern | capture_pattern | wildcard_pattern | value_pattern | group_pattern | sequence_pattern | mapping_pattern | class_pattern ;
-literal_pattern: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ;
-literal_expr: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ;
-complex_number: signed_real_number '+' imaginary_number
+except_clause
+ : 'except' (test ('as' name)?)?
+ ;
+
+block
+ : simple_stmts
+ | NEWLINE INDENT stmt+ DEDENT
+ ;
+
+match_stmt
+ : 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT
+ ;
+
+subject_expr
+ : star_named_expression ',' star_named_expressions?
+ | test
+ ;
+
+star_named_expressions
+ : ',' star_named_expression+ ','?
+ ;
+
+star_named_expression
+ : '*' expr
+ | test
+ ;
+
+case_block
+ : 'case' patterns guard? ':' block
+ ;
+
+guard
+ : 'if' test
+ ;
+
+patterns
+ : open_sequence_pattern
+ | pattern
+ ;
+
+pattern
+ : as_pattern
+ | or_pattern
+ ;
+
+as_pattern
+ : or_pattern 'as' pattern_capture_target
+ ;
+
+or_pattern
+ : closed_pattern ('|' closed_pattern)*
+ ;
+
+closed_pattern
+ : literal_pattern
+ | capture_pattern
+ | wildcard_pattern
+ | value_pattern
+ | group_pattern
+ | sequence_pattern
+ | mapping_pattern
+ | class_pattern
+ ;
+
+literal_pattern
+ : signed_number { this.CannotBePlusMinus() }?
+ | complex_number
+ | strings
+ | 'None'
+ | 'True'
+ | 'False'
+ ;
+
+literal_expr
+ : signed_number { this.CannotBePlusMinus() }?
+ | complex_number
+ | strings
+ | 'None'
+ | 'True'
+ | 'False'
+ ;
+
+complex_number
+ : signed_real_number '+' imaginary_number
| signed_real_number '-' imaginary_number
;
-signed_number: NUMBER | '-' NUMBER ;
-signed_real_number: real_number | '-' real_number ;
-real_number: NUMBER ;
-imaginary_number: NUMBER ;
-capture_pattern: pattern_capture_target ;
-pattern_capture_target: /* cannot be '_' */ name { this.cannotBeDotLpEq() }? ;
-wildcard_pattern: '_' ;
-value_pattern: attr { this.cannotBeDotLpEq() }? ;
-attr: name ('.' name)+ ;
-name_or_attr: attr | name ;
-group_pattern: '(' pattern ')' ;
-sequence_pattern:
- '[' maybe_sequence_pattern? ']'
+
+signed_number
+ : NUMBER
+ | '-' NUMBER
+ ;
+
+signed_real_number
+ : real_number
+ | '-' real_number
+ ;
+
+real_number
+ : NUMBER
+ ;
+
+imaginary_number
+ : NUMBER
+ ;
+
+capture_pattern
+ : pattern_capture_target
+ ;
+
+pattern_capture_target
+ : /* cannot be '_' */ name { this.CannotBeDotLpEq() }?
+ ;
+
+wildcard_pattern
+ : '_'
+ ;
+
+value_pattern
+ : attr { this.CannotBeDotLpEq() }?
+ ;
+
+attr
+ : name ('.' name)+
+ ;
+
+name_or_attr
+ : attr
+ | name
+ ;
+
+group_pattern
+ : '(' pattern ')'
+ ;
+
+sequence_pattern
+ : '[' maybe_sequence_pattern? ']'
| '(' open_sequence_pattern? ')'
;
-open_sequence_pattern: maybe_star_pattern ',' maybe_sequence_pattern? ;
-maybe_sequence_pattern: maybe_star_pattern (',' maybe_star_pattern)* ','? ;
-maybe_star_pattern: star_pattern | pattern ;
-star_pattern:
- '*' pattern_capture_target
+
+open_sequence_pattern
+ : maybe_star_pattern ',' maybe_sequence_pattern?
+ ;
+
+maybe_sequence_pattern
+ : maybe_star_pattern (',' maybe_star_pattern)* ','?
+ ;
+
+maybe_star_pattern
+ : star_pattern
+ | pattern
+ ;
+
+star_pattern
+ : '*' pattern_capture_target
| '*' wildcard_pattern
;
-mapping_pattern: '{' '}'
+
+mapping_pattern
+ : '{' '}'
| '{' double_star_pattern ','? '}'
| '{' items_pattern ',' double_star_pattern ','? '}'
| '{' items_pattern ','? '}'
;
-items_pattern: key_value_pattern (',' key_value_pattern)* ;
-key_value_pattern: (literal_expr | attr) ':' pattern ;
-double_star_pattern: '**' pattern_capture_target ;
-class_pattern: name_or_attr '(' ')'
+
+items_pattern
+ : key_value_pattern (',' key_value_pattern)*
+ ;
+
+key_value_pattern
+ : (literal_expr | attr) ':' pattern
+ ;
+
+double_star_pattern
+ : '**' pattern_capture_target
+ ;
+
+class_pattern
+ : name_or_attr '(' ')'
| name_or_attr '(' positional_patterns ','? ')'
| name_or_attr '(' keyword_patterns ','? ')'
| name_or_attr '(' positional_patterns ',' keyword_patterns ','? ')'
;
-positional_patterns: pattern (',' pattern)* ;
-keyword_patterns: keyword_pattern (',' keyword_pattern)* ;
-keyword_pattern: name '=' pattern ;
-
-test: or_test ('if' or_test 'else' test)? | lambdef;
-test_nocond: or_test | lambdef_nocond;
-lambdef: 'lambda' varargslist? ':' test;
-lambdef_nocond: 'lambda' varargslist? ':' test_nocond;
-or_test: and_test ('or' and_test)*;
-and_test: not_test ('and' not_test)*;
-not_test: 'not' not_test | comparison;
-comparison: expr (comp_op expr)*;
+
+positional_patterns
+ : pattern (',' pattern)*
+ ;
+
+keyword_patterns
+ : keyword_pattern (',' keyword_pattern)*
+ ;
+
+keyword_pattern
+ : name '=' pattern
+ ;
+
+test
+ : or_test ('if' or_test 'else' test)?
+ | lambdef
+ ;
+
+test_nocond
+ : or_test
+ | lambdef_nocond
+ ;
+
+lambdef
+ : 'lambda' varargslist? ':' test
+ ;
+
+lambdef_nocond
+ : 'lambda' varargslist? ':' test_nocond
+ ;
+
+or_test
+ : and_test ('or' and_test)*
+ ;
+
+and_test
+ : not_test ('and' not_test)*
+ ;
+
+not_test
+ : 'not' not_test
+ | comparison
+ ;
+
+comparison
+ : expr (comp_op expr)*
+ ;
+
// <> isn't actually a valid comparison operator in Python. It's here for the
// sake of a __future__ import described in PEP 401 (which really works :-)
-comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not';
-star_expr: '*' expr;
-expr: xor_expr ('|' xor_expr)*;
-xor_expr: and_expr ('^' and_expr)*;
-and_expr: shift_expr ('&' shift_expr)*;
-shift_expr: arith_expr (('<<'|'>>') arith_expr)*;
-arith_expr: term (('+'|'-') term)*;
-term: factor (('*'|'@'|'/'|'%'|'//') factor)*;
-factor: ('+'|'-'|'~') factor | power;
-power: atom_expr ('**' factor)?;
-atom_expr: AWAIT? atom trailer*;
-atom: '(' (yield_expr|testlist_comp)? ')'
- | '[' testlist_comp? ']'
- | '{' dictorsetmaker? '}'
- | name | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False' ;
-name : NAME | '_' | 'match' ;
-testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* ','? );
-trailer: '(' arglist? ')' | '[' subscriptlist ']' | '.' name ;
-subscriptlist: subscript_ (',' subscript_)* ','?;
-subscript_: test | test? ':' test? sliceop?;
-sliceop: ':' test?;
-exprlist: (expr|star_expr) (',' (expr|star_expr))* ','?;
-testlist: test (',' test)* ','?;
-dictorsetmaker: ( ((test ':' test | '**' expr)
- (comp_for | (',' (test ':' test | '**' expr))* ','?)) |
- ((test | star_expr)
- (comp_for | (',' (test | star_expr))* ','?)) );
-
-classdef: 'class' name ('(' arglist? ')')? ':' block;
-
-arglist: argument (',' argument)* ','?;
+comp_op
+ : '<'
+ | '>'
+ | '=='
+ | '>='
+ | '<='
+ | '<>'
+ | '!='
+ | 'in'
+ | 'not' 'in'
+ | 'is'
+ | 'is' 'not'
+ ;
+
+star_expr
+ : '*' expr
+ ;
+
+expr
+ : atom_expr
+ | expr '**' expr
+ | ('+' | '-' | '~')+ expr
+ | expr ('*' | '@' | '/' | '%' | '//') expr
+ | expr ('+' | '-') expr
+ | expr ('<<' | '>>') expr
+ | expr '&' expr
+ | expr '^' expr
+ | expr '|' expr
+ ;
+
+//expr: xor_expr ('|' xor_expr)*;
+//xor_expr: and_expr ('^' and_expr)*;
+//and_expr: shift_expr ('&' shift_expr)*;
+//shift_expr: arith_expr (('<<'|'>>') arith_expr)*;
+//arith_expr: term (('+'|'-') term)*;
+//term: factor (('*'|'@'|'/'|'%'|'//') factor)*;
+//factor: ('+'|'-'|'~') factor | power;
+//power: atom_expr ('**' factor)?;
+atom_expr
+ : AWAIT? atom trailer*
+ ;
+
+atom
+ : '(' (yield_expr | testlist_comp)? ')'
+ | '[' testlist_comp? ']'
+ | '{' dictorsetmaker? '}'
+ | name
+ | NUMBER
+ | STRING+
+ | '...'
+ | 'None'
+ | 'True'
+ | 'False'
+ ;
+
+name
+ : NAME
+ | '_'
+ | 'match'
+ ;
+
+testlist_comp
+ : (test | star_expr) (comp_for | (',' (test | star_expr))* ','?)
+ ;
+
+trailer
+ : '(' arglist? ')'
+ | '[' subscriptlist ']'
+ | '.' name
+ ;
+
+subscriptlist
+ : subscript_ (',' subscript_)* ','?
+ ;
+
+subscript_
+ : test
+ | test? ':' test? sliceop?
+ ;
+
+sliceop
+ : ':' test?
+ ;
+
+exprlist
+ : (expr | star_expr) (',' (expr | star_expr))* ','?
+ ;
+
+testlist
+ : test (',' test)* ','?
+ ;
+
+dictorsetmaker
+ : (
+ ((test ':' test | '**' expr) (comp_for | (',' (test ':' test | '**' expr))* ','?))
+ | ((test | star_expr) (comp_for | (',' (test | star_expr))* ','?))
+ )
+ ;
+
+classdef
+ : 'class' name ('(' arglist? ')')? ':' block
+ ;
+
+arglist
+ : argument (',' argument)* ','?
+ ;
// The reason that keywords are test nodes instead of NAME is that using NAME
// results in an ambiguity. ast.c makes sure it's a NAME.
@@ -221,19 +658,37 @@ arglist: argument (',' argument)* ','?;
// Illegal combinations and orderings are blocked in ast.c:
// multiple (test comp_for) arguments are blocked; keyword unpackings
// that precede iterable unpackings are blocked; etc.
-argument: ( test comp_for? |
- test '=' test |
- '**' test |
- '*' test );
+argument
+ : (test comp_for? | test '=' test | '**' test | '*' test)
+ ;
+
+comp_iter
+ : comp_for
+ | comp_if
+ ;
-comp_iter: comp_for | comp_if;
-comp_for: ASYNC? 'for' exprlist 'in' or_test comp_iter?;
-comp_if: 'if' test_nocond comp_iter?;
+comp_for
+ : ASYNC? 'for' exprlist 'in' or_test comp_iter?
+ ;
+
+comp_if
+ : 'if' test_nocond comp_iter?
+ ;
// not used in grammar, but may appear in "node" passed from Parser to Compiler
-encoding_decl: name;
+encoding_decl
+ : name
+ ;
-yield_expr: 'yield' yield_arg?;
-yield_arg: 'from' test | testlist;
+yield_expr
+ : 'yield' yield_arg?
+ ;
+
+yield_arg
+ : 'from' test
+ | testlist
+ ;
-strings: STRING+ ;
+strings
+ : STRING+
+ ;
\ No newline at end of file
diff --git a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java b/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java
deleted file mode 100644
index 695d07e40d..0000000000
--- a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java
+++ /dev/null
@@ -1,216 +0,0 @@
-package de.jplag.python3;
-
-import static de.jplag.python3.Python3TokenType.APPLY;
-import static de.jplag.python3.Python3TokenType.ARRAY;
-import static de.jplag.python3.Python3TokenType.ASSERT;
-import static de.jplag.python3.Python3TokenType.ASSIGN;
-import static de.jplag.python3.Python3TokenType.BREAK;
-import static de.jplag.python3.Python3TokenType.CLASS_BEGIN;
-import static de.jplag.python3.Python3TokenType.CLASS_END;
-import static de.jplag.python3.Python3TokenType.CONTINUE;
-import static de.jplag.python3.Python3TokenType.DEC_BEGIN;
-import static de.jplag.python3.Python3TokenType.DEC_END;
-import static de.jplag.python3.Python3TokenType.DEL;
-import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN;
-import static de.jplag.python3.Python3TokenType.EXCEPT_END;
-import static de.jplag.python3.Python3TokenType.FINALLY;
-import static de.jplag.python3.Python3TokenType.FOR_BEGIN;
-import static de.jplag.python3.Python3TokenType.FOR_END;
-import static de.jplag.python3.Python3TokenType.IF_BEGIN;
-import static de.jplag.python3.Python3TokenType.IF_END;
-import static de.jplag.python3.Python3TokenType.IMPORT;
-import static de.jplag.python3.Python3TokenType.LAMBDA;
-import static de.jplag.python3.Python3TokenType.METHOD_BEGIN;
-import static de.jplag.python3.Python3TokenType.METHOD_END;
-import static de.jplag.python3.Python3TokenType.RAISE;
-import static de.jplag.python3.Python3TokenType.RETURN;
-import static de.jplag.python3.Python3TokenType.TRY_BEGIN;
-import static de.jplag.python3.Python3TokenType.WHILE_BEGIN;
-import static de.jplag.python3.Python3TokenType.WHILE_END;
-import static de.jplag.python3.Python3TokenType.WITH_BEGIN;
-import static de.jplag.python3.Python3TokenType.WITH_END;
-import static de.jplag.python3.Python3TokenType.YIELD;
-
-import org.antlr.v4.runtime.tree.TerminalNode;
-
-import de.jplag.python3.grammar.Python3Parser;
-import de.jplag.python3.grammar.Python3ParserBaseListener;
-
-public class JplagPython3Listener extends Python3ParserBaseListener {
-
- private final Parser parser;
-
- public JplagPython3Listener(Parser parser) {
- this.parser = parser;
- }
-
- @Override
- public void enterAssert_stmt(Python3Parser.Assert_stmtContext ctx) {
- parser.add(ASSERT, ctx.getStart());
- }
-
- @Override
- public void enterDecorated(Python3Parser.DecoratedContext ctx) {
- parser.add(DEC_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitDecorated(Python3Parser.DecoratedContext ctx) {
- parser.addEnd(DEC_END, ctx.getStop());
- }
-
- @Override
- public void enterRaise_stmt(Python3Parser.Raise_stmtContext ctx) {
- parser.add(RAISE, ctx.getStart());
- }
-
- @Override
- public void enterExcept_clause(Python3Parser.Except_clauseContext ctx) {
- parser.add(EXCEPT_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitExcept_clause(Python3Parser.Except_clauseContext ctx) {
- parser.addEnd(EXCEPT_END, ctx.getStop());
- }
-
- @Override
- public void enterDictorsetmaker(Python3Parser.DictorsetmakerContext ctx) {
- parser.add(ARRAY, ctx.getStart());
- }
-
- @Override
- public void enterReturn_stmt(Python3Parser.Return_stmtContext ctx) {
- parser.add(RETURN, ctx.getStart());
- }
-
- @Override
- public void enterWhile_stmt(Python3Parser.While_stmtContext ctx) {
- parser.add(WHILE_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitWhile_stmt(Python3Parser.While_stmtContext ctx) {
- parser.addEnd(WHILE_END, ctx.getStop());
- }
-
- @Override
- public void enterYield_arg(Python3Parser.Yield_argContext ctx) {
- parser.add(YIELD, ctx.getStart());
- }
-
- @Override
- public void enterImport_stmt(Python3Parser.Import_stmtContext ctx) {
- parser.add(IMPORT, ctx.getStart());
- }
-
- @Override
- public void enterLambdef(Python3Parser.LambdefContext ctx) {
- parser.add(LAMBDA, ctx.getStart());
- }
-
- @Override
- public void enterTry_stmt(Python3Parser.Try_stmtContext ctx) {
- parser.add(TRY_BEGIN, ctx.getStart());
- }
-
- @Override
- public void enterBreak_stmt(Python3Parser.Break_stmtContext ctx) {
- parser.add(BREAK, ctx.getStart());
- }
-
- @Override
- public void enterTestlist_comp(Python3Parser.Testlist_compContext ctx) {
- if (ctx.getText().contains(",")) {
- parser.add(ARRAY, ctx.getStart());
- }
- }
-
- @Override
- public void enterIf_stmt(Python3Parser.If_stmtContext ctx) {
- parser.add(IF_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitIf_stmt(Python3Parser.If_stmtContext ctx) {
- parser.addEnd(IF_END, ctx.getStop());
- }
-
- @Override
- public void enterWith_stmt(Python3Parser.With_stmtContext ctx) {
- parser.add(WITH_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitWith_stmt(Python3Parser.With_stmtContext ctx) {
- parser.addEnd(WITH_END, ctx.getStop());
- }
-
- @Override
- public void enterClassdef(Python3Parser.ClassdefContext ctx) {
- parser.add(CLASS_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitClassdef(Python3Parser.ClassdefContext ctx) {
- parser.addEnd(CLASS_END, ctx.getStop());
- }
-
- @Override
- public void enterTrailer(Python3Parser.TrailerContext ctx) {
- if (ctx.getText().charAt(0) == '(') {
- parser.add(APPLY, ctx.getStart());
- } else {
- parser.add(ARRAY, ctx.getStart());
- }
- }
-
- @Override
- public void enterFuncdef(Python3Parser.FuncdefContext ctx) {
- parser.add(METHOD_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitFuncdef(Python3Parser.FuncdefContext ctx) {
- parser.addEnd(METHOD_END, ctx.getStop());
- }
-
- @Override
- public void enterAugassign(Python3Parser.AugassignContext ctx) {
- parser.add(ASSIGN, ctx.getStart());
- }
-
- @Override
- public void enterYield_stmt(Python3Parser.Yield_stmtContext ctx) {
- parser.add(YIELD, ctx.getStart());
- }
-
- @Override
- public void enterContinue_stmt(Python3Parser.Continue_stmtContext ctx) {
- parser.add(CONTINUE, ctx.getStart());
- }
-
- @Override
- public void enterFor_stmt(Python3Parser.For_stmtContext ctx) {
- parser.add(FOR_BEGIN, ctx.getStart());
- }
-
- @Override
- public void exitFor_stmt(Python3Parser.For_stmtContext ctx) {
- parser.addEnd(FOR_END, ctx.getStop());
- }
-
- @Override
- public void enterDel_stmt(Python3Parser.Del_stmtContext ctx) {
- parser.add(DEL, ctx.getStart());
- }
-
- @Override
- public void visitTerminal(TerminalNode node) {
- if (node.getText().equals("=")) {
- parser.add(ASSIGN, node.getSymbol());
- } else if (node.getText().equals("finally")) {
- parser.add(FINALLY, node.getSymbol());
- }
- }
-}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/Parser.java b/languages/python-3/src/main/java/de/jplag/python3/Parser.java
deleted file mode 100644
index 2dc352bfe2..0000000000
--- a/languages/python-3/src/main/java/de/jplag/python3/Parser.java
+++ /dev/null
@@ -1,78 +0,0 @@
-package de.jplag.python3;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
-import org.antlr.v4.runtime.CharStreams;
-import org.antlr.v4.runtime.CommonTokenStream;
-import org.antlr.v4.runtime.tree.ParseTree;
-import org.antlr.v4.runtime.tree.ParseTreeWalker;
-
-import de.jplag.AbstractParser;
-import de.jplag.ParsingException;
-import de.jplag.Token;
-import de.jplag.TokenType;
-import de.jplag.python3.grammar.Python3Lexer;
-import de.jplag.python3.grammar.Python3Parser;
-import de.jplag.python3.grammar.Python3Parser.File_inputContext;
-import de.jplag.util.FileUtils;
-
-public class Parser extends AbstractParser {
-
- private List tokens;
- private File currentFile;
-
- /**
- * Creates the parser.
- */
- public Parser() {
- super();
- }
-
- public List parse(Set files) throws ParsingException {
- tokens = new ArrayList<>();
- for (File file : files) {
- logger.trace("Parsing file {}", file.getName());
- parseFile(file);
- tokens.add(Token.fileEnd(file));
- }
- return tokens;
- }
-
- private void parseFile(File file) throws ParsingException {
- try (BufferedReader reader = FileUtils.openFileReader(file)) {
- currentFile = file;
-
- // create a lexer that feeds off of input CharStream
- Python3Lexer lexer = new Python3Lexer(CharStreams.fromReader(reader));
-
- // create a buffer of tokens pulled from the lexer
- CommonTokenStream tokens = new CommonTokenStream(lexer);
-
- // create a parser that feeds off the tokens buffer
- Python3Parser parser = new Python3Parser(tokens);
- File_inputContext in = parser.file_input();
-
- ParseTreeWalker ptw = new ParseTreeWalker();
- for (int i = 0; i < in.getChildCount(); i++) {
- ParseTree pt = in.getChild(i);
- ptw.walk(new JplagPython3Listener(this), pt);
- }
-
- } catch (IOException e) {
- throw new ParsingException(file, e.getMessage(), e);
- }
- }
-
- public void add(TokenType type, org.antlr.v4.runtime.Token token) {
- tokens.add(new Token(type, currentFile, token.getLine(), token.getCharPositionInLine() + 1, token.getText().length()));
- }
-
- public void addEnd(TokenType type, org.antlr.v4.runtime.Token token) {
- tokens.add(new Token(type, currentFile, token.getLine(), tokens.get(tokens.size() - 1).getColumn() + 1, 0));
- }
-}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java
index e4a684c9b9..8505224702 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java
@@ -32,7 +32,9 @@ public enum Python3TokenType implements TokenType {
YIELD("YIELD"),
DEL("DEL"),
WITH_BEGIN("WITH}"),
- WITH_END("}WITH");
+ WITH_END("}WITH"),
+ ASYNC("ASYNC"),
+ AWAIT("AWAIT");
private final String description;
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
index b5a8fd73f4..3df6587284 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
@@ -1,23 +1,16 @@
package de.jplag.python3;
-import java.io.File;
-import java.util.List;
-import java.util.Set;
-
import org.kohsuke.MetaInfServices;
-import de.jplag.ParsingException;
-import de.jplag.Token;
+import de.jplag.antlr.AbstractAntlrLanguage;
@MetaInfServices(de.jplag.Language.class)
-public class PythonLanguage implements de.jplag.Language {
+public class PythonLanguage extends AbstractAntlrLanguage {
private static final String IDENTIFIER = "python3";
- private final Parser parser;
-
public PythonLanguage() {
- parser = new Parser();
+ super(new PythonParserAdapter());
}
@Override
@@ -39,9 +32,4 @@ public String getIdentifier() {
public int minimumTokenMatch() {
return 12;
}
-
- @Override
- public List parse(Set files, boolean normalize) throws ParsingException {
- return this.parser.parse(files);
- }
}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
new file mode 100644
index 0000000000..aa0dabb18f
--- /dev/null
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
@@ -0,0 +1,81 @@
+package de.jplag.python3;
+
+import static de.jplag.python3.Python3TokenType.*;
+
+import de.jplag.antlr.AbstractAntlrListener;
+import de.jplag.python3.grammar.Python3Parser;
+import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.AugassignContext;
+import de.jplag.python3.grammar.Python3Parser.Break_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.ClassdefContext;
+import de.jplag.python3.grammar.Python3Parser.Continue_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.DecoratedContext;
+import de.jplag.python3.grammar.Python3Parser.Del_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.DictorsetmakerContext;
+import de.jplag.python3.grammar.Python3Parser.Except_clauseContext;
+import de.jplag.python3.grammar.Python3Parser.For_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.FuncdefContext;
+import de.jplag.python3.grammar.Python3Parser.If_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Import_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.LambdefContext;
+import de.jplag.python3.grammar.Python3Parser.Raise_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Return_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Testlist_compContext;
+import de.jplag.python3.grammar.Python3Parser.TrailerContext;
+import de.jplag.python3.grammar.Python3Parser.Try_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.While_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.With_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Yield_argContext;
+import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext;
+
+public class PythonListener extends AbstractAntlrListener {
+ public PythonListener() {
+ statements();
+ controlStructures();
+ contexts();
+ values();
+ }
+
+ private void statements() {
+ visit(Assert_stmtContext.class).map(ASSERT);
+ visit(Raise_stmtContext.class).map(RAISE);
+ visit(Return_stmtContext.class).map(RETURN);
+ visit(Yield_argContext.class).map(YIELD);
+ visit(Yield_stmtContext.class).map(YIELD);
+ visit(Import_stmtContext.class).map(IMPORT);
+ visit(Break_stmtContext.class).map(BREAK);
+ visit(Continue_stmtContext.class).map(CONTINUE);
+ visit(Del_stmtContext.class).map(DEL);
+ visit(Python3Parser.FINALLY).map(FINALLY);
+
+ visit(Python3Parser.ASYNC).map(ASYNC);
+ visit(Python3Parser.AWAIT).map(AWAIT);
+
+ visit(Except_clauseContext.class).map(EXCEPT_BEGIN, EXCEPT_END);
+ }
+
+ private void controlStructures() {
+ visit(While_stmtContext.class).map(WHILE_BEGIN, WHILE_END);
+ visit(Try_stmtContext.class).map(TRY_BEGIN);
+ visit(If_stmtContext.class).map(IF_BEGIN, IF_END);
+ visit(With_stmtContext.class).map(WITH_BEGIN, WITH_END);
+ visit(For_stmtContext.class).map(FOR_BEGIN, FOR_END);
+ }
+
+ private void contexts() {
+ visit(DecoratedContext.class).map(DEC_BEGIN, DEC_END);
+ visit(LambdefContext.class).map(LAMBDA);
+ visit(ClassdefContext.class).map(CLASS_BEGIN, CLASS_END);
+ visit(FuncdefContext.class).map(METHOD_BEGIN, METHOD_END);
+ }
+
+ private void values() {
+ visit(DictorsetmakerContext.class).map(ARRAY);
+ visit(Testlist_compContext.class, context -> context.getText().contains(",")).map(ARRAY);
+ visit(AugassignContext.class).map(ASSIGN);
+ visit(Python3Parser.ASSIGN).map(ASSIGN);
+
+ visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) == '(').map(APPLY);
+ visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) != '(').map(ARRAY);
+ }
+}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java
new file mode 100644
index 0000000000..8d99920f05
--- /dev/null
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java
@@ -0,0 +1,33 @@
+package de.jplag.python3;
+
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.ParserRuleContext;
+
+import de.jplag.antlr.AbstractAntlrListener;
+import de.jplag.antlr.AbstractAntlrParserAdapter;
+import de.jplag.python3.grammar.Python3Lexer;
+import de.jplag.python3.grammar.Python3Parser;
+
+public class PythonParserAdapter extends AbstractAntlrParserAdapter {
+ @Override
+ protected Lexer createLexer(CharStream input) {
+ return new Python3Lexer(input);
+ }
+
+ @Override
+ protected Python3Parser createParser(CommonTokenStream tokenStream) {
+ return new Python3Parser(tokenStream);
+ }
+
+ @Override
+ protected ParserRuleContext getEntryContext(Python3Parser parser) {
+ return parser.file_input();
+ }
+
+ @Override
+ protected AbstractAntlrListener getListener() {
+ return new PythonListener();
+ }
+}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
index 0e24adf203..b5a0e55011 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
@@ -1,17 +1,18 @@
package de.jplag.python3.grammar;
+import java.util.ArrayDeque;
import java.util.Deque;
-import java.util.LinkedList;
-import org.antlr.v4.runtime.CharStream;
-import org.antlr.v4.runtime.CommonToken;
-import org.antlr.v4.runtime.Lexer;
-import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.*;
abstract class Python3LexerBase extends Lexer {
- private LinkedList tokens = new LinkedList<>();
- private Deque indents = new LinkedList<>();
+ // A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
+ private java.util.LinkedList tokens = new java.util.LinkedList<>();
+ // The stack that keeps track of the indentation level.
+ private Deque indents = new ArrayDeque<>();
+ // The amount of opened braces, brackets and parenthesis.
private int opened = 0;
+ // The most recently produced token.
private Token lastToken = null;
protected Python3LexerBase(CharStream input) {
@@ -26,35 +27,38 @@ public void emit(Token t) {
@Override
public Token nextToken() {
+ // Check if the end-of-file is ahead and there are still some DEDENTS expected.
if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
+ // Remove any trailing EOF tokens from our buffer.
+ for (int i = tokens.size() - 1; i >= 0; i--) {
+ if (tokens.get(i).getType() == EOF) {
+ tokens.remove(i);
+ }
+ }
+
+ // First emit an extra line break that serves as the end of the statement.
this.emit(commonToken(Python3Lexer.NEWLINE, "\n"));
- this.removeTrailingEofTokens();
+ // Now emit as much DEDENT tokens as needed.
while (!indents.isEmpty()) {
this.emit(createDedent());
indents.pop();
}
- this.emit(commonToken(EOF, ""));
+ // Put the EOF back on the token stream.
+ this.emit(commonToken(Python3Lexer.EOF, ""));
}
Token next = super.nextToken();
if (next.getChannel() == Token.DEFAULT_CHANNEL) {
+ // Keep track of the last token on the default channel.
this.lastToken = next;
}
return tokens.isEmpty() ? next : tokens.poll();
}
- private void removeTrailingEofTokens() {
- for (int i = tokens.size() - 1; i >= 0; i--) {
- if (tokens.get(i).getType() == EOF) {
- tokens.remove(i);
- }
- }
- }
-
private Token createDedent() {
CommonToken dedent = commonToken(Python3Lexer.DEDENT, "");
dedent.setLine(this.lastToken.getLine());
@@ -67,21 +71,24 @@ private CommonToken commonToken(int type, String text) {
return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
}
- /**
- * Calculates the indentation of the provided spaces, taking the following rules into account:
- *
- * "Tabs are replaced (from left to right) by one to eight spaces such that the total number of characters up to and
- * including the replacement is a multiple of eight [...]"
- *
- * -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
- **/
+ // Calculates the indentation of the provided spaces, taking the
+ // following rules into account:
+ //
+ // "Tabs are replaced (from left to right) by one to eight spaces
+ // such that the total number of characters up to and including
+ // the replacement is a multiple of eight [...]"
+ //
+ // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
static int getIndentationCount(String spaces) {
int count = 0;
for (char ch : spaces.toCharArray()) {
- if (ch == '\t') {
- count += 8 - (count % 8);
- } else {
- count++;
+ switch (ch) {
+ case '\t':
+ count += 8 - (count % 8);
+ break;
+ default:
+ // A normal space char.
+ count++;
}
}
@@ -104,21 +111,26 @@ void onNewLine() {
String newLine = getText().replaceAll("[^\r\n\f]+", "");
String spaces = getText().replaceAll("[\r\n\f]+", "");
+ // Strip newlines inside open clauses except if we are near EOF. We keep NEWLINEs near EOF to
+ // satisfy the final newline needed by the single_put rule used by the REPL.
int next = _input.LA(1);
int nextnext = _input.LA(2);
if (opened > 0 || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) {
+ // If we're inside a list or on a blank line, ignore all indents,
+ // dedents and line breaks.
skip();
} else {
emit(commonToken(Python3Lexer.NEWLINE, newLine));
int indent = getIndentationCount(spaces);
int previous = indents.isEmpty() ? 0 : indents.peek();
-
if (indent == previous) {
+ // skip indents of the same size as the present indent-size
skip();
} else if (indent > previous) {
indents.push(indent);
emit(commonToken(Python3Lexer.INDENT, spaces));
} else {
+ // Possibly emit more than 1 DEDENT token.
while (!indents.isEmpty() && indents.peek() > indent) {
this.emit(createDedent());
indents.pop();
@@ -129,10 +141,10 @@ void onNewLine() {
@Override
public void reset() {
- tokens = new LinkedList<>();
- indents = new LinkedList<>();
+ tokens = new java.util.LinkedList<>();
+ indents = new ArrayDeque<>();
opened = 0;
lastToken = null;
super.reset();
}
-}
+}
\ No newline at end of file
diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
index 44b5926a45..713af92c1e 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
@@ -1,18 +1,17 @@
package de.jplag.python3.grammar;
-import org.antlr.v4.runtime.Parser;
-import org.antlr.v4.runtime.TokenStream;
+import org.antlr.v4.runtime.*;
public abstract class Python3ParserBase extends Parser {
protected Python3ParserBase(TokenStream input) {
super(input);
}
- public boolean cannotBePlusMinus() {
+ public boolean CannotBePlusMinus() {
return true;
}
- public boolean cannotBeDotLpEq() {
+ public boolean CannotBeDotLpEq() {
return true;
}
-}
+}
\ No newline at end of file
diff --git a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py
index 34d92252cc..bfd3e8bf46 100644
--- a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py
+++ b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py
@@ -500,4 +500,9 @@ def force_legacy_ssl_support():
def switchWithBreak():
while True:
- break
\ No newline at end of file
+ break
+
+async def x():
+ return ""
+
+x = await x()
\ No newline at end of file
diff --git a/languages/scala/pom.xml b/languages/scala/pom.xml
index 7c43463dc5..b9300f543c 100644
--- a/languages/scala/pom.xml
+++ b/languages/scala/pom.xml
@@ -10,7 +10,7 @@
scala
- 2.13.12
+ 2.13.14
2.13
@@ -25,7 +25,7 @@
org.scalameta
scalameta_${scala.compat.version}
- 4.8.15
+ 4.9.5
@@ -35,7 +35,7 @@
net.alchim31.maven
scala-maven-plugin
- 4.8.1
+ 4.9.1
diff --git a/pom.xml b/pom.xml
index dba35b8c0e..2209f238ef 100644
--- a/pom.xml
+++ b/pom.xml
@@ -75,18 +75,18 @@
21
21
2.43.0
- 2.0.12
+ 2.0.13
5.10.2
2.7.7
4.13.1
- 2.35.0
- 2.29.0
- 2.36.0
+ 2.36.0
+ 2.30.0
+ 2.37.0
1.0.0
- 5.0.0
+ 5.1.0
@@ -117,7 +117,7 @@
edu.stanford.nlp
stanford-corenlp
- 4.5.6
+ 4.5.7
@@ -140,7 +140,7 @@
com.fasterxml.jackson.core
jackson-databind
- 2.16.1
+ 2.17.1
@@ -167,7 +167,7 @@
org.mockito
mockito-core
- 5.10.0
+ 5.12.0
test
@@ -203,7 +203,7 @@
org.apache.maven.plugins
maven-jar-plugin
- 3.3.0
+ 3.4.1
@@ -223,7 +223,7 @@
org.apache.maven.plugins
maven-assembly-plugin
- 3.6.0
+ 3.7.1
jar-with-dependencies
@@ -244,7 +244,7 @@
org.jacoco
jacoco-maven-plugin
- 0.8.11
+ 0.8.12
prepare-agent
@@ -268,12 +268,12 @@
org.apache.maven.plugins
maven-gpg-plugin
- 3.1.0
+ 3.2.4
org.apache.maven.plugins
maven-deploy-plugin
- 3.1.1
+ 3.1.2
@@ -310,7 +310,7 @@
org.codehaus.mojo
build-helper-maven-plugin
- 3.5.0
+ 3.6.0
add-source
@@ -343,7 +343,7 @@
org.apache.maven.plugins
maven-source-plugin
- 3.3.0
+ 3.3.1
attach-sources
diff --git a/report-viewer/.gitignore b/report-viewer/.gitignore
index 719bf30c14..9d12524399 100644
--- a/report-viewer/.gitignore
+++ b/report-viewer/.gitignore
@@ -29,3 +29,4 @@ coverage
test-results/
playwright-report/
+tests/e2e/assets
\ No newline at end of file
diff --git a/report-viewer/index.html b/report-viewer/index.html
index f4fedd2bd1..24ca69e3c6 100644
--- a/report-viewer/index.html
+++ b/report-viewer/index.html
@@ -6,7 +6,7 @@
JPlag Report Viewer
-
+