diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 92c8fd5c8d..5610a19873 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -24,7 +24,7 @@ import org.slf4j.LoggerFactory; import de.jplag.exceptions.LanguageException; -import de.jplag.normalization.TokenStringNormalizer; +import de.jplag.normalization.TokenSequenceNormalizer; import de.jplag.options.JPlagOptions; /** @@ -259,7 +259,7 @@ private static File createErrorDirectory(String... subdirectoryNames) { */ void normalize() { List originalOrder = getOrder(tokenList); - tokenList = TokenStringNormalizer.normalize(tokenList); + tokenList = TokenSequenceNormalizer.normalize(tokenList); List normalizedOrder = getOrder(tokenList); logger.debug("original line order: {}", originalOrder); diff --git a/core/src/main/java/de/jplag/SubmissionSet.java b/core/src/main/java/de/jplag/SubmissionSet.java index f7c1438bbb..884d800450 100644 --- a/core/src/main/java/de/jplag/SubmissionSet.java +++ b/core/src/main/java/de/jplag/SubmissionSet.java @@ -99,6 +99,11 @@ public List getInvalidSubmissions() { return invalidSubmissions; } + /** + * Normalizes the token sequences of all submissions (including basecode). This makes the token sequence invariant to + * dead code insertion and independent statement reordering by removing dead tokens and optionally reordering tokens to + * a deterministic order. + */ public void normalizeSubmissions() { if (baseCodeSubmission != null) { baseCodeSubmission.normalize(); diff --git a/core/src/main/java/de/jplag/normalization/MultipleEdge.java b/core/src/main/java/de/jplag/normalization/MultipleEdge.java index b10fda2ea5..732d3d3cf2 100644 --- a/core/src/main/java/de/jplag/normalization/MultipleEdge.java +++ b/core/src/main/java/de/jplag/normalization/MultipleEdge.java @@ -6,7 +6,7 @@ import de.jplag.semantics.Variable; /** - * Models a multiple edge in the normalization graph. Contains multiple edges. + * Models multiple edges between two nodes in the normalization graph. */ class MultipleEdge { private final Set edges; diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java similarity index 78% rename from core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java rename to core/src/main/java/de/jplag/normalization/NormalizationGraph.java index fc995e69d7..e07c873b50 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -14,21 +14,28 @@ import de.jplag.semantics.Variable; /** - * Constructs the normalization graph. + * Token normalization graph, which is a directed graph based on nodes of type {@link Statement} and edges of type + * {@link MultipleEdge}. This class class inherits from {@link SimpleDirectedGraph} to provide a data structure for the + * token sequence normalization. */ -class NormalizationGraphConstructor { - private final SimpleDirectedGraph graph; +public class NormalizationGraph extends SimpleDirectedGraph { + + private static final long serialVersionUID = -8407465274643809647L; // generated + private int bidirectionalBlockDepth; - private final Collection fullPositionSignificanceIncoming; - private Statement lastFullPositionSignificance; - private Statement lastPartialPositionSignificance; - private final Map> variableReads; - private final Map> variableWrites; - private final Set inCurrentBidirectionalBlock; - private Statement current; - - NormalizationGraphConstructor(List tokens) { - graph = new SimpleDirectedGraph<>(MultipleEdge.class); + private final transient Collection fullPositionSignificanceIncoming; + private transient Statement lastFullPositionSignificance; + private transient Statement lastPartialPositionSignificance; + private final transient Map> variableReads; + private final transient Map> variableWrites; + private final transient Set inCurrentBidirectionalBlock; + private transient Statement current; + + /** + * Creates a new normalization graph. + */ + public NormalizationGraph(List tokens) { + super(MultipleEdge.class); bidirectionalBlockDepth = 0; fullPositionSignificanceIncoming = new ArrayList<>(); variableReads = new HashMap<>(); @@ -45,12 +52,8 @@ class NormalizationGraphConstructor { addStatement(builderForCurrent.build()); } - SimpleDirectedGraph get() { - return graph; - } - private void addStatement(Statement statement) { - graph.addVertex(statement); + addVertex(statement); this.current = statement; processBidirectionalBlock(); processFullPositionSignificance(); @@ -123,10 +126,10 @@ private void processWrites() { * @param cause the variable that caused the edge, may be null */ private void addIncomingEdgeToCurrent(Statement start, EdgeType type, Variable cause) { - MultipleEdge multipleEdge = graph.getEdge(start, current); + MultipleEdge multipleEdge = getEdge(start, current); if (multipleEdge == null) { multipleEdge = new MultipleEdge(); - graph.addEdge(start, current, multipleEdge); + addEdge(start, current, multipleEdge); } multipleEdge.addEdge(type, cause); } @@ -135,4 +138,5 @@ private void addVariableToMap(Map> variableMap, variableMap.putIfAbsent(variable, new ArrayList<>()); variableMap.get(variable).add(current); } + } diff --git a/core/src/main/java/de/jplag/normalization/Statement.java b/core/src/main/java/de/jplag/normalization/Statement.java index a749a57740..81f9b33640 100644 --- a/core/src/main/java/de/jplag/normalization/Statement.java +++ b/core/src/main/java/de/jplag/normalization/Statement.java @@ -8,7 +8,7 @@ import de.jplag.semantics.CodeSemantics; /** - * Models statements, which are the nodes of the normalization graph. + * Models statements, which are the nodes of the normalization graph. A statement refers to one or more tokens. */ class Statement implements Comparable { @@ -16,6 +16,11 @@ class Statement implements Comparable { private final int lineNumber; private final CodeSemantics semantics; + /** + * Constructs a new Statement. + * @param tokens the list of tokens that represent this statement. + * @param lineNumber the line number where this statement occurs in the source code. + */ Statement(List tokens, int lineNumber) { this.tokens = Collections.unmodifiableList(tokens); this.lineNumber = lineNumber; @@ -30,8 +35,8 @@ CodeSemantics semantics() { return semantics; } - void markKeep() { - semantics.markKeep(); + void markAsCritical() { + semantics.markAsCritical(); } private int tokenOrdinal(Token token) { diff --git a/core/src/main/java/de/jplag/normalization/StatementBuilder.java b/core/src/main/java/de/jplag/normalization/StatementBuilder.java index eef5d0c821..f9f3bd5008 100644 --- a/core/src/main/java/de/jplag/normalization/StatementBuilder.java +++ b/core/src/main/java/de/jplag/normalization/StatementBuilder.java @@ -13,6 +13,10 @@ class StatementBuilder { private final List tokens; private final int lineNumber; + /** + * Constructs a new StatementBuilder. + * @param lineNumber the line number where the statement starts in the source code. + */ StatementBuilder(int lineNumber) { this.lineNumber = lineNumber; this.tokens = new ArrayList<>(); diff --git a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java similarity index 55% rename from core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java rename to core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java index 8ffafffbf7..9a1256300e 100644 --- a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenSequenceNormalizer.java @@ -1,7 +1,6 @@ package de.jplag.normalization; import java.util.ArrayList; -import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; @@ -9,29 +8,35 @@ import java.util.stream.Collectors; import org.jgrapht.Graphs; -import org.jgrapht.graph.SimpleDirectedGraph; import de.jplag.Token; /** * Performs token sequence normalization. */ -public class TokenStringNormalizer { +public final class TokenSequenceNormalizer { - private TokenStringNormalizer() { + private TokenSequenceNormalizer() { + // private constructor for non-instantiability. } /** * Performs token sequence normalization. Tokens representing dead code have been eliminated and tokens representing - * subsequent independent statements have been put in a fixed order. Works by first constructing a Normalization Graph - * and then turning it back into a token sequence. + * subsequent independent statements have been put in a fixed order if sorting is true. Works by first constructing a + * Normalization Graph and then turning it back into a token sequence. For more information refer to the + * corresponding paper * @param tokens The original token sequence, remains unaltered. - * @return The normalized token sequence as unmodifiable list. + * @return The normalized token sequence. */ public static List normalize(List tokens) { - SimpleDirectedGraph normalizationGraph = new NormalizationGraphConstructor(tokens).get(); + NormalizationGraph graph = new NormalizationGraph(tokens); + propagateCriticalityStatus(graph); + return normalizeWithSorting(tokens, graph); + } + + // Add tokens in normalized original order, removing dead tokens + private static List normalizeWithSorting(List tokens, NormalizationGraph normalizationGraph) { List normalizedTokens = new ArrayList<>(tokens.size()); - spreadKeep(normalizationGraph); PriorityQueue roots = normalizationGraph.vertexSet().stream() // .filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) // .collect(Collectors.toCollection(PriorityQueue::new)); @@ -39,7 +44,7 @@ public static List normalize(List tokens) { PriorityQueue newRoots = new PriorityQueue<>(); do { Statement statement = roots.poll(); - if (statement.semantics().keep()) { + if (statement.semantics().isCritical()) { normalizedTokens.addAll(statement.tokens()); } for (Statement successor : Graphs.successorListOf(normalizationGraph, statement)) { @@ -51,26 +56,29 @@ public static List normalize(List tokens) { } while (!roots.isEmpty()); roots = newRoots; } - return Collections.unmodifiableList(normalizedTokens); + return normalizedTokens; } /** - * Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated. + * Spread criticality status to every node that does not represent dead code. Nodes without keep criticality are later + * eliminated (dead nodes). Before calling this method, only the statements that directly affect the behavior are marked + * as critical. After calling this method, this also holds true for statement that (transitively) depend (read/write) on + * the critical ones. */ - private static void spreadKeep(SimpleDirectedGraph normalizationGraph) { + private static void propagateCriticalityStatus(NormalizationGraph normalizationGraph) { Queue visit = new LinkedList<>(normalizationGraph.vertexSet().stream() // - .filter(tl -> tl.semantics().keep()).toList()); + .filter(tl -> tl.semantics().isCritical()).toList()); while (!visit.isEmpty()) { Statement current = visit.remove(); for (Statement predecessor : Graphs.predecessorListOf(normalizationGraph, current)) { // performance of iteration? - if (!predecessor.semantics().keep() && normalizationGraph.getEdge(predecessor, current).isVariableFlow()) { - predecessor.markKeep(); + if (!predecessor.semantics().isCritical() && normalizationGraph.getEdge(predecessor, current).isVariableFlow()) { + predecessor.markAsCritical(); visit.add(predecessor); } } for (Statement successor : Graphs.successorListOf(normalizationGraph, current)) { - if (!successor.semantics().keep() && normalizationGraph.getEdge(current, successor).isVariableReverseFlow()) { - successor.markKeep(); + if (!successor.semantics().isCritical() && normalizationGraph.getEdge(current, successor).isVariableReverseFlow()) { + successor.markAsCritical(); visit.add(successor); } } diff --git a/core/src/test/java/de/jplag/NormalizationTest.java b/core/src/test/java/de/jplag/NormalizationTest.java index c6a9db9ed1..f1ba200194 100644 --- a/core/src/test/java/de/jplag/NormalizationTest.java +++ b/core/src/test/java/de/jplag/NormalizationTest.java @@ -39,4 +39,4 @@ void testReorderingNormalization() { void testInsertionReorderingNormalization() { Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInsertedReordered.java")); } -} +} \ No newline at end of file diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 2eb99262d5..7da3304a92 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -7,11 +7,11 @@ import java.util.Set; /** - * Contains semantic information about a code snippet, in our case either a token or a statement. + * Contains semantic information about a code fragment, in our case either a token or a statement. */ public class CodeSemantics { - private boolean keep; + private boolean critical; private PositionSignificance positionSignificance; private final int bidirectionalBlockDepthChange; private final Set reads; @@ -19,47 +19,47 @@ public class CodeSemantics { /** * Creates new semantics. reads and writes, which each contain the variables which were (potentially) read from/written - * to in this code snippet, are created empty. - * @param keep Whether the code snippet must be kept or if it may be removed. - * @param positionSignificance In which way the position of the code snippet relative to other code snippets of the same - * type is significant. For the possible options see {@link PositionSignificance}. - * @param bidirectionalBlockDepthChange How the code snippet affects the depth of bidirectional blocks, meaning blocks + * to in this code fragment, are created empty. + * @param critical Whether the code fragment must be kept as it affects the program behavior or if it may be removed. + * @param positionSignificance In which way the position of the code fragment relative to other tokens of the same type + * is significant. For the possible options see {@link PositionSignificance}. + * @param bidirectionalBlockDepthChange How the code fragment affects the depth of bidirectional blocks, meaning blocks * where any statement within it may be executed after any other. This will typically be a loop. - * @param reads A set of the variables which were (potentially) read from in the code snippet. - * @param writes A set of the variables which were (potentially) written to in the code snippet. + * @param reads A set of the variables which were (potentially) read from in the code fragment. + * @param writes A set of the variables which were (potentially) written to in the code fragment. */ - private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange, Set reads, + private CodeSemantics(boolean critical, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange, Set reads, Set writes) { - this.keep = keep; + this.critical = critical; this.positionSignificance = positionSignificance; this.bidirectionalBlockDepthChange = bidirectionalBlockDepthChange; this.reads = reads; this.writes = writes; } - private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange) { - this(keep, positionSignificance, bidirectionalBlockDepthChange, new HashSet<>(), new HashSet<>()); + private CodeSemantics(boolean critical, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange) { + this(critical, positionSignificance, bidirectionalBlockDepthChange, new HashSet<>(), new HashSet<>()); } /** - * Creates new semantics with the following meaning: The code snippet may be removed, and its position relative to other - * code snippets may change. Example: An assignment to a local variable. + * Creates new semantics with the following meaning: The code fragment may be removed, and its position relative to + * other code fragments may change. Example: An assignment to a local variable. */ public CodeSemantics() { this(false, PositionSignificance.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position relative to - * other code snippets may change. Example: An attribute declaration. + * @return new semantics with the following meaning: The code fragment may not be removed, and its position relative to + * other code fragments may change. Example: An attribute declaration. */ public static CodeSemantics createKeep() { return new CodeSemantics(true, PositionSignificance.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to other code snippets of the same type. Example: A method call which is guaranteed to not result in an + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to other code fragments of the same type. Example: A method call which is guaranteed to not result in an * exception. */ public static CodeSemantics createCritical() { @@ -67,16 +67,16 @@ public static CodeSemantics createCritical() { } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to all other code snippets. Example: A return statement. + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to all other code fragments. Example: A return statement. */ public static CodeSemantics createControl() { return new CodeSemantics(true, PositionSignificance.FULL, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to all other code snippets, which also begins a bidirectional block. Example: The beginning of a while + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to all other code fragments, which also begins a bidirectional block. Example: The beginning of a while * loop. */ public static CodeSemantics createLoopBegin() { @@ -84,71 +84,71 @@ public static CodeSemantics createLoopBegin() { } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay - * invariant to all other code snippets, which also ends a bidirectional block. Example: The end of a while loop. + * @return new semantics with the following meaning: The code fragment may not be removed, and its position must stay + * invariant to all other code fragments, which also ends a bidirectional block. Example: The end of a while loop. */ public static CodeSemantics createLoopEnd() { return new CodeSemantics(true, PositionSignificance.FULL, -1); } /** - * @return whether this code snippet must be kept. + * @return whether this token is critical to the program behavior. */ - public boolean keep() { - return keep; + public boolean isCritical() { + return critical; } /** - * Mark this code snippet as having to be kept. + * Mark this token as critical to the program behavior. */ - public void markKeep() { - keep = true; + public void markAsCritical() { + critical = true; } /** - * @return the change this code snippet causes in the depth of bidirectional loops. + * @return the change this code fragment causes in the depth of bidirectional loops. */ public int bidirectionalBlockDepthChange() { return bidirectionalBlockDepthChange; } /** - * @return whether this code snippet has partial position significance. + * @return whether this code fragment has partial position significance. */ public boolean hasPartialPositionSignificance() { return positionSignificance == PositionSignificance.PARTIAL; } /** - * @return whether this code snippet has full position significance. + * @return whether this code fragment has full position significance. */ public boolean hasFullPositionSignificance() { return positionSignificance == PositionSignificance.FULL; } /** - * Mark this code snippet as having full position significance. + * Mark this code fragment as having full position significance. */ public void markFullPositionSignificance() { positionSignificance = PositionSignificance.FULL; } /** - * @return an unmodifiable set of the variables which were (potentially) read from in this code snippet. + * @return an unmodifiable set of the variables which were (potentially) read from in this code fragment. */ public Set reads() { return Collections.unmodifiableSet(reads); } /** - * @return an unmodifiable set of the variables which were (potentially) written to in this code snippet. + * @return an unmodifiable set of the variables which were (potentially) written to in this code fragment. */ public Set writes() { return Collections.unmodifiableSet(writes); } /** - * Add a variable to the set of variables which were (potentially) read from in this code snippet. + * Add a variable to the set of variables which were (potentially) read from in this code fragment. * @param variable The variable which is added. */ public void addRead(Variable variable) { @@ -156,7 +156,7 @@ public void addRead(Variable variable) { } /** - * Add a variable to the set of variables which were (potentially) written to in this code snippet. + * Add a variable to the set of variables which were (potentially) written to in this code fragment. * @param variable The variable which is added. */ public void addWrite(Variable variable) { @@ -182,7 +182,7 @@ public static CodeSemantics join(List semanticsList) { Set reads = new HashSet<>(); Set writes = new HashSet<>(); for (CodeSemantics semantics : semanticsList) { - keep = keep || semantics.keep; + keep = keep || semantics.critical; if (semantics.positionSignificance.compareTo(positionSignificance) > 0) { positionSignificance = semantics.positionSignificance; } @@ -196,7 +196,7 @@ public static CodeSemantics join(List semanticsList) { @Override public String toString() { List properties = new LinkedList<>(); - if (keep) { + if (critical) { properties.add("keep"); } if (positionSignificance != PositionSignificance.NONE) {