Skip to content

Commit

Permalink
Merge pull request #1371 from mbrdl/cpp-normalize-new
Browse files Browse the repository at this point in the history
Add normalization info to cpp tokens
  • Loading branch information
tsaglam authored Nov 27, 2023
2 parents acd9651 + 3f7f6d9 commit e2d8bb7
Show file tree
Hide file tree
Showing 12 changed files with 198 additions and 78 deletions.
7 changes: 6 additions & 1 deletion core/src/main/java/de/jplag/Submission.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,11 @@ private static File createErrorDirectory(String... subdirectoryNames) {

try {
tokenList = language.parse(new HashSet<>(files));
if (logger.isDebugEnabled()) {
for (Token token : tokenList) {
logger.debug(String.join(" | ", token.getType().toString(), Integer.toString(token.getLine()), token.getSemantics().toString()));
}
}
} catch (ParsingException e) {
logger.warn("Failed to parse submission {} with error {}", this, e.getMessage(), e);
tokenList = null;
Expand All @@ -272,7 +277,7 @@ private static File createErrorDirectory(String... subdirectoryNames) {
*/
void normalize() {
List<Integer> originalOrder = getOrder(tokenList);
TokenStringNormalizer.normalize(tokenList);
tokenList = TokenStringNormalizer.normalize(tokenList);
List<Integer> normalizedOrder = getOrder(tokenList);

logger.debug("original line order: {}", originalOrder);
Expand Down
1 change: 1 addition & 0 deletions core/src/main/java/de/jplag/SubmissionSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ private void parseSubmissions(List<Submission> submissions) {

int tooShort = 0;
for (Submission submission : submissions) {
logger.info("Parsing submission {}", submission.getName());
boolean ok;

logger.trace("------ Parsing submission: " + submission.getName());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.jplag.normalization;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
Expand All @@ -20,13 +21,15 @@ private TokenStringNormalizer() {
}

/**
* Normalizes the token string it receives in place. Tokens representing dead code have been eliminated and tokens
* representing subsequent independent statements have been put in a fixed order. Works by first constructing a
* Normalization Graph and then turning it back into a token string.
* Performs token string normalization. Tokens representing dead code have been eliminated and tokens representing
* subsequent independent statements have been put in a fixed order. Works by first constructing a Normalization Graph
* and then turning it back into a token string.
* @param tokens The original token string, remains unaltered.
* @return The normalized token string.
*/
public static void normalize(List<Token> tokens) {
public static List<Token> normalize(List<Token> tokens) {
SimpleDirectedGraph<Statement, MultipleEdge> normalizationGraph = new NormalizationGraphConstructor(tokens).get();
tokens.clear();
List<Token> normalizedTokens = new ArrayList<>(tokens.size());
spreadKeep(normalizationGraph);
PriorityQueue<Statement> roots = normalizationGraph.vertexSet().stream() //
.filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) //
Expand All @@ -36,7 +39,7 @@ public static void normalize(List<Token> tokens) {
do {
Statement statement = roots.poll();
if (statement.semantics().keep()) {
tokens.addAll(statement.tokens());
normalizedTokens.addAll(statement.tokens());
}
for (Statement successor : Graphs.successorListOf(normalizationGraph, statement)) {
normalizationGraph.removeEdge(statement, successor);
Expand All @@ -47,6 +50,7 @@ public static void normalize(List<Token> tokens) {
} while (!roots.isEmpty());
roots = newRoots;
}
return normalizedTokens;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,14 @@ protected static <T extends ParserRuleContext> T getDescendant(ParserRuleContext
queue.add(context);
while (!queue.isEmpty()) {
ParserRuleContext next = queue.removeFirst();
for (ParseTree tree : next.children) {
if (tree.getClass() == descendant) {
return descendant.cast(tree);
}
if (tree instanceof ParserRuleContext parserRuleContext) {
queue.addLast(parserRuleContext);
if (next.children != null) {
for (ParseTree tree : next.children) {
if (tree.getClass() == descendant) {
return descendant.cast(tree);
}
if (tree instanceof ParserRuleContext parserRuleContext) {
queue.addLast(parserRuleContext);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.util.function.*;

import org.antlr.v4.runtime.Token;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.TokenType;
import de.jplag.semantics.CodeSemantics;
Expand All @@ -15,6 +17,8 @@
* @param <T> The type of the visited entity.
*/
public abstract class AbstractVisitor<T> {
private static final Logger logger = LoggerFactory.getLogger(AbstractVisitor.class);

private final Predicate<T> condition;
private final List<Consumer<HandlerData<T>>> entryHandlers;
private TokenType entryTokenType;
Expand Down Expand Up @@ -85,7 +89,7 @@ public AbstractVisitor<T> withSemantics(Function<T, CodeSemantics> semanticsSupp
* @return Self
*/
public AbstractVisitor<T> withSemantics(Supplier<CodeSemantics> semanticsSupplier) {
this.entrySemantics = ignore -> semanticsSupplier.get();
withSemantics(ignore -> semanticsSupplier.get());
return this;
}

Expand All @@ -110,7 +114,10 @@ boolean matches(T entity) {
* Enter a given entity, injecting the needed dependencies.
*/
void enter(HandlerData<T> data) {
addToken(data, entryTokenType, entrySemantics, this::extractEnterToken);
if (entryTokenType == null && entrySemantics != null) {
logger.warn("Received semantics, but no token type, so no token was generated and the semantics discarded");
}
addToken(data, entryTokenType, entrySemantics, this::extractEnterToken); // addToken takes null token types
entryHandlers.forEach(handler -> handler.accept(data));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*/
public class ContextVisitor<T extends ParserRuleContext> extends AbstractVisitor<T> {
private final List<Consumer<HandlerData<T>>> exitHandlers;
private TokenType exitToken;
private TokenType exitTokenType;
private Function<T, CodeSemantics> exitSemantics;

ContextVisitor(Predicate<T> condition) {
Expand Down Expand Up @@ -51,7 +51,7 @@ public AbstractVisitor<T> onExit(Consumer<T> handler) {
* @return Self
*/
public ContextVisitor<T> mapExit(TokenType tokenType) {
exitToken = tokenType;
exitTokenType = tokenType;
return this;
}

Expand Down Expand Up @@ -89,8 +89,7 @@ public ContextVisitor<T> withSemantics(Function<T, CodeSemantics> semantics) {

@Override
public ContextVisitor<T> withSemantics(Supplier<CodeSemantics> semantics) {
super.withSemantics(semantics);
this.exitSemantics = ignore -> semantics.get();
withSemantics(ignore -> semantics.get());
return this;
}

Expand Down Expand Up @@ -129,7 +128,7 @@ public ContextVisitor<T> addClassScope() {
* Exit a given entity, injecting the needed dependencies.
*/
void exit(HandlerData<T> data) {
addToken(data, exitToken, exitSemantics, ParserRuleContext::getStop);
addToken(data, exitTokenType, exitSemantics, ParserRuleContext::getStop);
exitHandlers.forEach(handler -> handler.accept(data));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import java.util.logging.Logger;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.jplag.Token;
import de.jplag.TokenType;
Expand All @@ -16,7 +18,8 @@
* Collects the tokens during parsing.
*/
public class TokenCollector {
private static final Logger logger = Logger.getLogger(TokenCollector.class.getName());
private static final Logger logger = LoggerFactory.getLogger(TokenCollector.class);

private final List<Token> collected;
private final boolean extractsSemantics;
private File file;
Expand All @@ -39,9 +42,6 @@ List<Token> getTokens() {
<T> void addToken(TokenType jplagType, Function<T, CodeSemantics> semanticsSupplier, T entity,
Function<T, org.antlr.v4.runtime.Token> extractToken, VariableRegistry variableRegistry) {
if (jplagType == null) {
if (semanticsSupplier != null) {
logger.warning("Received semantics, but no token type, so no token was generated and the semantics discarded");
}
return;
}
org.antlr.v4.runtime.Token antlrToken = extractToken.apply(entity);
Expand All @@ -58,7 +58,7 @@ <T> void addToken(TokenType jplagType, Function<T, CodeSemantics> semanticsSuppl
variableRegistry.updateSemantics(semantics);
} else {
if (semanticsSupplier != null) {
logger.warning(() -> String.format("Received semantics for token %s despite not expecting any", jplagType.getDescription()));
logger.warn("Received semantics for token {} despite not expecting any", jplagType.getDescription());
}
token = new Token(jplagType, this.file, line, column, length);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Registry of variables to assist in generating token semantics.
*/
public class VariableRegistry {
private static final Logger logger = LoggerFactory.getLogger(VariableRegistry.class);

private CodeSemantics semantics;
private Map<String, Variable> fileVariables;
private Deque<Map<String, Variable>> classVariables; // map class name to map of variable names to variables
Expand Down Expand Up @@ -113,6 +118,7 @@ public void updateSemantics(CodeSemantics semantics) {
* @param mutable Whether the variable is mutable.
*/
public void registerVariable(String variableName, VariableScope scope, boolean mutable) {
logger.debug("Register variable {}", variableName);
Variable variable = new Variable(variableName, scope, mutable);
switch (scope) {
case FILE -> fileVariables.put(variableName, variable);
Expand All @@ -133,6 +139,7 @@ public void registerVariable(String variableName, VariableScope scope, boolean m
* "this" keyword in Java, for example.
*/
public void registerVariableAccess(String variableName, boolean isClassVariable) {
logger.debug("{} {}", variableName, nextVariableAccessType);
if (ignoreNextVariableAccess) {
ignoreNextVariableAccess = false;
return;
Expand All @@ -152,9 +159,12 @@ public void registerVariableAccess(String variableName, boolean isClassVariable)
*/
public void addAllNonLocalVariablesAsReads() {
Set<Variable> nonLocalVariables = new HashSet<>(fileVariables.values());
nonLocalVariables.addAll(classVariables.getFirst().values());
for (Variable variable : nonLocalVariables)
semantics.addRead(variable);
if (!classVariables.isEmpty()) {
nonLocalVariables.addAll(classVariables.getFirst().values());
for (Variable variable : nonLocalVariables) {
semantics.addRead(variable);
}
}
}

private Variable getVariable(String variableName) {
Expand Down
5 changes: 5 additions & 0 deletions languages/cpp2/src/main/java/de/jplag/cpp2/CPPLanguage.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,9 @@ public String getIdentifier() {
public int minimumTokenMatch() {
return 12;
}

@Override
public boolean tokensHaveSemantics() {
return true;
}
}
Loading

0 comments on commit e2d8bb7

Please sign in to comment.