Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add normalization info to cpp tokens #1371

Merged
merged 10 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion core/src/main/java/de/jplag/Submission.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,11 @@ private static File createErrorDirectory(String... subdirectoryNames) {

try {
tokenList = language.parse(new HashSet<>(files));
if (logger.isDebugEnabled()) {
for (Token token : tokenList) {
logger.debug(String.join(" | ", token.getType().toString(), Integer.toString(token.getLine()), token.getSemantics().toString()));
}
}
} catch (ParsingException e) {
logger.warn("Failed to parse submission {} with error {}", this, e.getMessage(), e);
tokenList = null;
Expand All @@ -272,7 +277,7 @@ private static File createErrorDirectory(String... subdirectoryNames) {
*/
void normalize() {
List<Integer> originalOrder = getOrder(tokenList);
TokenStringNormalizer.normalize(tokenList);
tokenList = TokenStringNormalizer.normalize(tokenList);
List<Integer> normalizedOrder = getOrder(tokenList);

logger.debug("original line order: {}", originalOrder);
Expand Down
1 change: 1 addition & 0 deletions core/src/main/java/de/jplag/SubmissionSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ private void parseSubmissions(List<Submission> submissions) {

int tooShort = 0;
for (Submission submission : submissions) {
logger.info("Parsing submission {}", submission.getName());
boolean ok;

logger.trace("------ Parsing submission: " + submission.getName());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.jplag.normalization;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
Expand All @@ -20,13 +21,15 @@ private TokenStringNormalizer() {
}

/**
* Normalizes the token string it receives in place. Tokens representing dead code have been eliminated and tokens
* representing subsequent independent statements have been put in a fixed order. Works by first constructing a
* Normalization Graph and then turning it back into a token string.
* Performs token string normalization. Tokens representing dead code have been eliminated and tokens representing
* subsequent independent statements have been put in a fixed order. Works by first constructing a Normalization Graph
* and then turning it back into a token string.
* @param tokens The original token string, remains unaltered.
* @return The normalized token string.
*/
public static void normalize(List<Token> tokens) {
public static List<Token> normalize(List<Token> tokens) {
SimpleDirectedGraph<Statement, MultipleEdge> normalizationGraph = new NormalizationGraphConstructor(tokens).get();
tokens.clear();
List<Token> normalizedTokens = new ArrayList<>(tokens.size());
spreadKeep(normalizationGraph);
PriorityQueue<Statement> roots = normalizationGraph.vertexSet().stream() //
.filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) //
Expand All @@ -36,7 +39,7 @@ public static void normalize(List<Token> tokens) {
do {
Statement statement = roots.poll();
if (statement.semantics().keep()) {
tokens.addAll(statement.tokens());
normalizedTokens.addAll(statement.tokens());
}
for (Statement successor : Graphs.successorListOf(normalizationGraph, statement)) {
normalizationGraph.removeEdge(statement, successor);
Expand All @@ -47,6 +50,7 @@ public static void normalize(List<Token> tokens) {
} while (!roots.isEmpty());
roots = newRoots;
}
return normalizedTokens;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,14 @@ protected static <T extends ParserRuleContext> T getDescendant(ParserRuleContext
queue.add(context);
while (!queue.isEmpty()) {
ParserRuleContext next = queue.removeFirst();
for (ParseTree tree : next.children) {
if (tree.getClass() == descendant) {
return descendant.cast(tree);
}
if (tree instanceof ParserRuleContext parserRuleContext) {
queue.addLast(parserRuleContext);
if (next.children != null) {
for (ParseTree tree : next.children) {
if (tree.getClass() == descendant) {
return descendant.cast(tree);
}
if (tree instanceof ParserRuleContext parserRuleContext) {
queue.addLast(parserRuleContext);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.util.function.*;

import org.antlr.v4.runtime.Token;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.TokenType;
import de.jplag.semantics.CodeSemantics;
Expand All @@ -15,6 +17,8 @@
* @param <T> The type of the visited entity.
*/
public abstract class AbstractVisitor<T> {
private static final Logger logger = LoggerFactory.getLogger(AbstractVisitor.class);

private final Predicate<T> condition;
private final List<Consumer<HandlerData<T>>> entryHandlers;
private TokenType entryTokenType;
Expand Down Expand Up @@ -85,7 +89,7 @@ public AbstractVisitor<T> withSemantics(Function<T, CodeSemantics> semanticsSupp
* @return Self
*/
public AbstractVisitor<T> withSemantics(Supplier<CodeSemantics> semanticsSupplier) {
this.entrySemantics = ignore -> semanticsSupplier.get();
withSemantics(ignore -> semanticsSupplier.get());
return this;
}

Expand All @@ -110,7 +114,10 @@ boolean matches(T entity) {
* Enter a given entity, injecting the needed dependencies.
*/
void enter(HandlerData<T> data) {
addToken(data, entryTokenType, entrySemantics, this::extractEnterToken);
if (entryTokenType == null && entrySemantics != null) {
logger.warn("Received semantics, but no token type, so no token was generated and the semantics discarded");
}
addToken(data, entryTokenType, entrySemantics, this::extractEnterToken); // addToken takes null token types
entryHandlers.forEach(handler -> handler.accept(data));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*/
public class ContextVisitor<T extends ParserRuleContext> extends AbstractVisitor<T> {
private final List<Consumer<HandlerData<T>>> exitHandlers;
private TokenType exitToken;
private TokenType exitTokenType;
private Function<T, CodeSemantics> exitSemantics;

ContextVisitor(Predicate<T> condition) {
Expand Down Expand Up @@ -51,7 +51,7 @@ public AbstractVisitor<T> onExit(Consumer<T> handler) {
* @return Self
*/
public ContextVisitor<T> mapExit(TokenType tokenType) {
exitToken = tokenType;
exitTokenType = tokenType;
return this;
}

Expand Down Expand Up @@ -89,8 +89,7 @@ public ContextVisitor<T> withSemantics(Function<T, CodeSemantics> semantics) {

@Override
public ContextVisitor<T> withSemantics(Supplier<CodeSemantics> semantics) {
super.withSemantics(semantics);
this.exitSemantics = ignore -> semantics.get();
withSemantics(ignore -> semantics.get());
return this;
}

Expand Down Expand Up @@ -129,7 +128,7 @@ public ContextVisitor<T> addClassScope() {
* Exit a given entity, injecting the needed dependencies.
*/
void exit(HandlerData<T> data) {
addToken(data, exitToken, exitSemantics, ParserRuleContext::getStop);
addToken(data, exitTokenType, exitSemantics, ParserRuleContext::getStop);
exitHandlers.forEach(handler -> handler.accept(data));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import java.util.logging.Logger;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.Token;
import de.jplag.TokenType;
Expand All @@ -16,7 +18,8 @@
* Collects the tokens during parsing.
*/
public class TokenCollector {
private static final Logger logger = Logger.getLogger(TokenCollector.class.getName());
private static final Logger logger = LoggerFactory.getLogger(TokenCollector.class);

private final List<Token> collected;
private final boolean extractsSemantics;
private File file;
Expand All @@ -39,9 +42,6 @@ List<Token> getTokens() {
<T> void addToken(TokenType jplagType, Function<T, CodeSemantics> semanticsSupplier, T entity,
Function<T, org.antlr.v4.runtime.Token> extractToken, VariableRegistry variableRegistry) {
if (jplagType == null) {
if (semanticsSupplier != null) {
logger.warning("Received semantics, but no token type, so no token was generated and the semantics discarded");
}
return;
}
org.antlr.v4.runtime.Token antlrToken = extractToken.apply(entity);
Expand All @@ -58,7 +58,7 @@ <T> void addToken(TokenType jplagType, Function<T, CodeSemantics> semanticsSuppl
variableRegistry.updateSemantics(semantics);
} else {
if (semanticsSupplier != null) {
logger.warning(() -> String.format("Received semantics for token %s despite not expecting any", jplagType.getDescription()));
logger.warn("Received semantics for token {} despite not expecting any", jplagType.getDescription());
}
token = new Token(jplagType, this.file, line, column, length);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Registry of variables to assist in generating token semantics.
*/
public class VariableRegistry {
private static final Logger logger = LoggerFactory.getLogger(VariableRegistry.class);

private CodeSemantics semantics;
private Map<String, Variable> fileVariables;
private Deque<Map<String, Variable>> classVariables; // map class name to map of variable names to variables
Expand Down Expand Up @@ -113,6 +118,7 @@ public void updateSemantics(CodeSemantics semantics) {
* @param mutable Whether the variable is mutable.
*/
public void registerVariable(String variableName, VariableScope scope, boolean mutable) {
logger.debug("Register variable {}", variableName);
Variable variable = new Variable(variableName, scope, mutable);
switch (scope) {
case FILE -> fileVariables.put(variableName, variable);
Expand All @@ -133,6 +139,7 @@ public void registerVariable(String variableName, VariableScope scope, boolean m
* "this" keyword in Java, for example.
*/
public void registerVariableAccess(String variableName, boolean isClassVariable) {
logger.debug("{} {}", variableName, nextVariableAccessType);
if (ignoreNextVariableAccess) {
ignoreNextVariableAccess = false;
return;
Expand All @@ -152,9 +159,12 @@ public void registerVariableAccess(String variableName, boolean isClassVariable)
*/
public void addAllNonLocalVariablesAsReads() {
Set<Variable> nonLocalVariables = new HashSet<>(fileVariables.values());
nonLocalVariables.addAll(classVariables.getFirst().values());
for (Variable variable : nonLocalVariables)
semantics.addRead(variable);
if (!classVariables.isEmpty()) {
nonLocalVariables.addAll(classVariables.getFirst().values());
for (Variable variable : nonLocalVariables) {
semantics.addRead(variable);
}
}
}

private Variable getVariable(String variableName) {
Expand Down
5 changes: 5 additions & 0 deletions languages/cpp2/src/main/java/de/jplag/cpp2/CPPLanguage.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,9 @@ public String getIdentifier() {
public int minimumTokenMatch() {
return 12;
}

@Override
public boolean tokensHaveSemantics() {
return true;
}
}
Loading
Loading