Skip to content

Commit

Permalink
Expanded error message for GreedyStringTiling out of bounds
Browse files Browse the repository at this point in the history
  • Loading branch information
TwoOfTwelve committed Nov 28, 2024
1 parent 87030d1 commit 217f8b8
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions core/src/main/java/de/jplag/GreedyStringTiling.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Collectors;

import de.jplag.options.JPlagOptions;

Expand All @@ -30,7 +31,12 @@ public class GreedyStringTiling {
private final Map<Submission, int[]> cachedTokenValueLists = new IdentityHashMap<>();
private final Map<Submission, SubsequenceHashLookupTable> cachedHashLookupTables = new IdentityHashMap<>();

private static final String ERROR_INDEX_OUT_OF_BOUNDS = "GST index out of bounds. This is probably a random issue caused by multithreading issues. Length: %s, Index: %s";
private static final String ERROR_INDEX_OUT_OF_BOUNDS = """
GST index out of bounds. This is probably a random issue caused by multithreading issues.
Length: %s, Index: %s
TokenCount: %s, TokenList: %s
CachedTokenCount: %s
""".trim().stripIndent();

public GreedyStringTiling(JPlagOptions options) {
this.options = options;
Expand Down Expand Up @@ -117,14 +123,14 @@ private JPlagComparison compareInternal(Submission leftSubmission, Submission ri
List<Match> iterationMatches = new ArrayList<>();
for (int leftStartIndex = 0; leftStartIndex < leftValues.length - maximumMatchLength; leftStartIndex++) {
int leftSubsequenceHash = leftLookupTable.subsequenceHashForStartIndex(leftStartIndex);
if (checkMark(leftMarked, leftStartIndex) || leftSubsequenceHash == SubsequenceHashLookupTable.NO_HASH) {
if (checkMark(leftMarked, leftStartIndex, leftSubmission) || leftSubsequenceHash == SubsequenceHashLookupTable.NO_HASH) {
continue;
}
List<Integer> possiblyMatchingRightStartIndexes = rightLookupTable
.startIndexesOfPossiblyMatchingSubsequencesForSubsequenceHash(leftSubsequenceHash);
for (Integer rightStartIndex : possiblyMatchingRightStartIndexes) {
// comparison uses >= because it is assumed that the last token is a pivot (FILE_END)
if (checkMark(rightMarked, rightStartIndex) || maximumMatchLength >= rightValues.length - rightStartIndex) {
if (checkMark(rightMarked, rightStartIndex, rightSubmission) || maximumMatchLength >= rightValues.length - rightStartIndex) {
continue;
}

Expand Down Expand Up @@ -231,9 +237,11 @@ private int[] tokenValueListFromSubmission(Submission submission) {
}));
}

private boolean checkMark(boolean[] marks, int index) {
private boolean checkMark(boolean[] marks, int index, Submission submission) {
if (index >= marks.length) {
throw new IllegalStateException(String.format(ERROR_INDEX_OUT_OF_BOUNDS, marks.length, index));
throw new IllegalStateException(String.format(ERROR_INDEX_OUT_OF_BOUNDS, marks.length, index, submission.getTokenList().size(),
submission.getTokenList().stream().map(it -> it.getType().getDescription()).collect(Collectors.joining(", ")),
cachedTokenValueLists.get(submission).length));
}

return marks[index];
Expand Down

0 comments on commit 217f8b8

Please sign in to comment.