Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-write/Fix antlr-utils #1263

Merged
merged 13 commits into from
Sep 25, 2023
Original file line number Diff line number Diff line change
@@ -1,218 +1,85 @@
package de.jplag.antlr;

import java.io.File;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;

import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeListener;
import org.antlr.v4.runtime.tree.TerminalNode;

import de.jplag.TokenType;
import de.jplag.semantics.VariableRegistry;

/**
* Base class for Antlr listeners. You can use the create*Mapping functions to map antlr tokens to jplag tokens.
* <p>
* You should create a constructor matching one of the constructors and create your mapping after calling super.
* Base class for Antlr listeners. This is a quasi-static class that is only created once per language. Use by
* overwriting the constructor, calling super(), and then calling the visit methods.
*/
@SuppressWarnings("unused")
public class AbstractAntlrListener implements ParseTreeListener {
private final List<ContextTokenBuilder<ParserRuleContext>> startMappings;
private final List<ContextTokenBuilder<ParserRuleContext>> endMappings;

private final List<TerminalTokenBuilder> terminalMapping;

private final TokenCollector collector;
private final File currentFile;

private VariableRegistry variableRegistry;

/**
* New instance
* @param collector The token collector
* @param currentFile The currently processed file
* @param extractsSemantics If true, the listener will extract semantics along with every token
*/
public AbstractAntlrListener(TokenCollector collector, File currentFile, boolean extractsSemantics) {
this.collector = collector;
this.currentFile = currentFile;

this.startMappings = new ArrayList<>();
this.endMappings = new ArrayList<>();

this.terminalMapping = new ArrayList<>();

if (extractsSemantics) {
this.variableRegistry = new VariableRegistry();
}
}

/**
* Creates a new AbstractAntlrListener, that does not collect semantics information
* @param collector The collector, obtained by the parser
* @param currentFile The current file, obtained by the parser
*/
public AbstractAntlrListener(TokenCollector collector, File currentFile) {
this(collector, currentFile, false);
}

@Override
public void visitTerminal(TerminalNode terminalNode) {
this.terminalMapping.stream().filter(mapping -> mapping.matches(terminalNode.getSymbol()))
.forEach(mapping -> mapping.createToken(terminalNode.getSymbol(), variableRegistry));
}

@Override
public void visitErrorNode(ErrorNode errorNode) {
// does nothing, because we do not handle error nodes right now.
}

@Override
public void enterEveryRule(ParserRuleContext rule) {
this.startMappings.stream().filter(mapping -> mapping.matches(rule)).forEach(mapping -> mapping.createToken(rule, variableRegistry));
}
public abstract class AbstractAntlrListener {
private final List<ContextVisitor<ParserRuleContext>> contextVisitors;
private final List<TerminalVisitor> terminalVisitors;

@Override
public void exitEveryRule(ParserRuleContext rule) {
this.endMappings.stream().filter(mapping -> mapping.matches(rule)).forEach(mapping -> mapping.createToken(rule, variableRegistry));
protected AbstractAntlrListener() {
brodmo marked this conversation as resolved.
Show resolved Hide resolved
contextVisitors = new ArrayList<>();
terminalVisitors = new ArrayList<>();
}

/**
* Creates a mapping using the start token from antlr as the location
* @param antlrType The antlr context type
* @param jplagType The Jplag token type
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
*/
protected <T extends ParserRuleContext> ContextTokenBuilder<T> mapEnter(Class<T> antlrType, TokenType jplagType) {
return this.mapEnter(antlrType, jplagType, it -> true);
}

/**
* Creates a mapping using the start token from antlr as the location
* @param antlrType The antlr context type
* @param jplagType The Jplag token type
* @param condition The condition under which the mapping applies
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
*/
@SuppressWarnings("unchecked")
protected <T extends ParserRuleContext> ContextTokenBuilder<T> mapEnter(Class<T> antlrType, TokenType jplagType, Predicate<T> condition) {
ContextTokenBuilder<T> builder = initTypeBuilder(antlrType, jplagType, condition, ContextTokenBuilderType.START);
this.startMappings.add((ContextTokenBuilder<ParserRuleContext>) builder);
return builder;
}

/**
* Creates a mapping using the stop token from antlr as the location
* @param antlrType The antlr context type
* @param jplagType The Jplag token type
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
*/
protected <T extends ParserRuleContext> ContextTokenBuilder<T> mapExit(Class<T> antlrType, TokenType jplagType) {
return this.mapExit(antlrType, jplagType, it -> true);
}

/**
* Creates a mapping using the stop token from antlr as the location
* @param antlrType The antlr context type
* @param jplagType The Jplag token type
* @param condition The condition under which the mapping applies
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
* Visit the given node.
* @param antlrType The antlr type of the node.
* @param condition An additional condition for the visit.
* @return A visitor for the node.
* @param <T> The class of the node.
*/
@SuppressWarnings("unchecked")
protected <T extends ParserRuleContext> ContextTokenBuilder<T> mapExit(Class<T> antlrType, TokenType jplagType, Predicate<T> condition) {
ContextTokenBuilder<T> builder = initTypeBuilder(antlrType, jplagType, condition, ContextTokenBuilderType.STOP);
this.endMappings.add((ContextTokenBuilder<ParserRuleContext>) builder);
return builder;
public <T extends ParserRuleContext> ContextVisitor<T> visit(Class<T> antlrType, Predicate<T> condition) {
Predicate<T> typeCheck = rule -> rule.getClass() == antlrType;
ContextVisitor<T> visitor = new ContextVisitor<>(typeCheck.and(condition));
contextVisitors.add((ContextVisitor<ParserRuleContext>) visitor);
return visitor;
}

/**
* Creates a mapping using the beginning of the start token as the start location and the distance from the start to the
* stop token as the length
* @param antlrType The antlr context type
* @param jplagType The Jplag token type
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
* Visit the given node.
* @param antlrType The antlr type of the node.
* @return A visitor for the node.
* @param <T> The class of the node.
*/
protected <T extends ParserRuleContext> ContextTokenBuilder<T> mapRange(Class<T> antlrType, TokenType jplagType) {
return this.mapRange(antlrType, jplagType, it -> true);
public <T extends ParserRuleContext> ContextVisitor<T> visit(Class<T> antlrType) {
return visit(antlrType, ignore -> true);
}

/**
* Creates a mapping using the beginning of the start token as the start location and the distance from the start to the
* stop token as the length
* @param antlrType The antlr context type
* @param jplagType The Jplag token type
* @param condition The condition under which the mapping applies
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
* Visit the given terminal.
* @param terminalType The type of the terminal.
* @param condition An additional condition for the visit.
* @return A visitor for the node.
*/
@SuppressWarnings("unchecked")
protected <T extends ParserRuleContext> ContextTokenBuilder<T> mapRange(Class<T> antlrType, TokenType jplagType, Predicate<T> condition) {
ContextTokenBuilder<T> builder = initTypeBuilder(antlrType, jplagType, condition, ContextTokenBuilderType.RANGE);
this.startMappings.add((ContextTokenBuilder<ParserRuleContext>) builder);
return builder;
public TerminalVisitor visit(int terminalType, Predicate<Token> condition) {
Predicate<Token> typeCheck = rule -> rule.getType() == terminalType;
TerminalVisitor visitor = new TerminalVisitor(typeCheck.and(condition));
terminalVisitors.add(visitor);
return visitor;
}

/**
* Creates a start mapping from antlrType to startType and a stop mapping from antlrType to stopType.
* @param antlrType The antlr token type
* @param startType The token type for the start mapping
* @param stopType The token type for the stop mapping
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
* Visit the given terminal.
* @param terminalType The type of the terminal.
* @return A visitor for the node.
*/
protected <T extends ParserRuleContext> RangeBuilder<T> mapEnterExit(Class<T> antlrType, TokenType startType, TokenType stopType) {
return mapEnterExit(antlrType, startType, stopType, it -> true);
public TerminalVisitor visit(int terminalType) {
return visit(terminalType, ignore -> true);
}

/**
* Creates a start mapping from antlrType to startType and a stop mapping from antlrType to stopType.
* @param antlrType The antlr token type
* @param startType The token type for the start mapping
* @param stopType The token type for the stop mapping
* @param condition The condition under which the mapping applies
* @param <T> The type of {@link ParserRuleContext}
* @return The builder for the token
*/
protected <T extends ParserRuleContext> RangeBuilder<T> mapEnterExit(Class<T> antlrType, TokenType startType, TokenType stopType,
Predicate<T> condition) {
ContextTokenBuilder<T> start = this.mapEnter(antlrType, startType, condition);
ContextTokenBuilder<T> end = this.mapExit(antlrType, stopType, condition);
return new RangeBuilder<>(start, end);
void visitTerminal(HandlerData<Token> data) {
brodmo marked this conversation as resolved.
Show resolved Hide resolved
this.terminalVisitors.stream().filter(visitor -> visitor.matches(data.entity())).forEach(visitor -> visitor.enter(data));
}

/**
* Creates a mapping for terminal tokens
* @param terminalType The type of the terminal node
* @param jplagType The jplag token type
* @return The builder for the token
*/
protected TerminalTokenBuilder mapTerminal(int terminalType, TokenType jplagType) {
return this.mapTerminal(terminalType, jplagType, it -> true);
void enterEveryRule(HandlerData<ParserRuleContext> data) {
this.contextVisitors.stream().filter(visitor -> visitor.matches(data.entity())).forEach(visitor -> visitor.enter(data));
}

/**
* Creates a mapping for terminal tokens
* @param terminalType The type of the terminal node
* @param jplagType The jplag token type
* @param condition The condition under which the mapping applies
* @return The builder for the token
*/
protected TerminalTokenBuilder mapTerminal(int terminalType, TokenType jplagType, Predicate<org.antlr.v4.runtime.Token> condition) {
TerminalTokenBuilder builder = new TerminalTokenBuilder(jplagType, token -> token.getType() == terminalType && condition.test(token),
this.collector, this.currentFile);
this.terminalMapping.add(builder);
return builder;
void exitEveryRule(HandlerData<ParserRuleContext> data) {
this.contextVisitors.stream().filter(visitor -> visitor.matches(data.entity())).forEach(visitor -> visitor.exit(data));
}

/**
Expand All @@ -224,7 +91,7 @@ protected TerminalTokenBuilder mapTerminal(int terminalType, TokenType jplagType
* @return an ancestor of the specified type, or null if not found.
*/
@SafeVarargs
protected final <T extends ParserRuleContext> T getAncestor(ParserRuleContext context, Class<T> ancestor,
protected static <T extends ParserRuleContext> T getAncestor(ParserRuleContext context, Class<T> ancestor,
Class<? extends ParserRuleContext>... stops) {
ParserRuleContext currentContext = context;
Set<Class<? extends ParserRuleContext>> forbidden = Set.of(stops);
Expand All @@ -251,7 +118,7 @@ protected final <T extends ParserRuleContext> T getAncestor(ParserRuleContext co
* @see #getAncestor(ParserRuleContext, Class, Class[])
*/
@SafeVarargs
protected final boolean hasAncestor(ParserRuleContext context, Class<? extends ParserRuleContext> parent,
protected static boolean hasAncestor(ParserRuleContext context, Class<? extends ParserRuleContext> parent,
Class<? extends ParserRuleContext>... stops) {
return getAncestor(context, parent, stops) != null;
}
Expand All @@ -263,7 +130,7 @@ protected final boolean hasAncestor(ParserRuleContext context, Class<? extends P
* @param <T> the type to search for.
* @return the first appearance of an element of the given type in the subtree, or null if no such element exists.
*/
protected final <T extends ParserRuleContext> T getDescendant(ParserRuleContext context, Class<T> descendant) {
protected static <T extends ParserRuleContext> T getDescendant(ParserRuleContext context, Class<T> descendant) {
// simple iterative bfs
ArrayDeque<ParserRuleContext> queue = new ArrayDeque<>();
queue.add(context);
Expand All @@ -280,10 +147,4 @@ protected final <T extends ParserRuleContext> T getDescendant(ParserRuleContext
}
return null;
}

private <T extends ParserRuleContext> ContextTokenBuilder<T> initTypeBuilder(Class<T> antlrType, TokenType jplagType, Predicate<T> condition,
ContextTokenBuilderType type) {
return new ContextTokenBuilder<>(jplagType, rule -> rule.getClass() == antlrType && condition.test(antlrType.cast(rule)), this.collector,
this.currentFile, type);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

Expand All @@ -25,19 +26,43 @@
* @param <T> The type of the antlr parser
*/
public abstract class AbstractAntlrParserAdapter<T extends Parser> extends AbstractParser {

private final boolean extractsSemantics;

/**
* New instance
* @param extractsSemantics If true, the listener will extract semantics along with every token
*/
protected AbstractAntlrParserAdapter(boolean extractsSemantics) {
super();
brodmo marked this conversation as resolved.
Show resolved Hide resolved
this.extractsSemantics = extractsSemantics;
}

/**
* New instance
*/
protected AbstractAntlrParserAdapter() {
this(false);
}

/**
* Parsers the set of files
* @param files The files
* @return The extracted tokens
* @throws ParsingException If anything goes wrong
*/
public List<Token> parse(Set<File> files) throws ParsingException {
TokenCollector collector = new TokenCollector();

for (File file : files) {
List<File> filesList = new ArrayList<>(files);
if (files.isEmpty())
brodmo marked this conversation as resolved.
Show resolved Hide resolved
return new ArrayList<>();
File firstFile = filesList.remove(0);
TokenCollector collector = new TokenCollector(extractsSemantics, firstFile);
parseFile(firstFile, collector);
for (File file : filesList) {
collector.addFileEndToken(file); // takes the NEXT file
brodmo marked this conversation as resolved.
Show resolved Hide resolved
parseFile(file, collector);
}

collector.addFileEndToken(null);
return collector.getTokens();
}

Expand All @@ -46,16 +71,12 @@ private void parseFile(File file, TokenCollector collector) throws ParsingExcept
Lexer lexer = this.createLexer(CharStreams.fromReader(reader));
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
T parser = this.createParser(tokenStream);

ParserRuleContext entryContext = this.getEntryContext(parser);
ParseTreeWalker treeWalker = new ParseTreeWalker();

AbstractAntlrListener listener = this.createListener(collector, file);
InternalListener listener = new InternalListener(this.getListener(), collector);
for (ParseTree child : entryContext.children) {
treeWalker.walk(listener, child);
}

collector.addToken(Token.fileEnd(file));
} catch (IOException exception) {
throw new ParsingException(file, exception.getMessage(), exception);
}
Expand Down Expand Up @@ -83,10 +104,7 @@ private void parseFile(File file, TokenCollector collector) throws ParsingExcept
protected abstract ParserRuleContext getEntryContext(T parser);

/**
* Creates the listener
* @param collector The token collector
* @param currentFile The current file
* @return The parser
* @return The listener. Should be created once statically since it never changes.
*/
protected abstract AbstractAntlrListener createListener(TokenCollector collector, File currentFile);
protected abstract AbstractAntlrListener getListener();
}
Loading