diff --git a/core/src/main/java/de/jplag/GreedyStringTiling.java b/core/src/main/java/de/jplag/GreedyStringTiling.java index e08450584..9bbe28833 100644 --- a/core/src/main/java/de/jplag/GreedyStringTiling.java +++ b/core/src/main/java/de/jplag/GreedyStringTiling.java @@ -9,6 +9,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; import de.jplag.options.JPlagOptions; @@ -30,7 +31,12 @@ public class GreedyStringTiling { private final Map cachedTokenValueLists = new IdentityHashMap<>(); private final Map cachedHashLookupTables = new IdentityHashMap<>(); - private static final String ERROR_INDEX_OUT_OF_BOUNDS = "GST index out of bounds. This is probably a random issue caused by multithreading issues. Length: %s, Index: %s"; + private static final String ERROR_INDEX_OUT_OF_BOUNDS = """ + GST index out of bounds. This is probably a random issue caused by multithreading issues. + Length: %s, Index: %s + TokenCount: %s, TokenList: %s + CachedTokenCount: %s + """.trim().stripIndent(); public GreedyStringTiling(JPlagOptions options) { this.options = options; @@ -117,14 +123,14 @@ private JPlagComparison compareInternal(Submission leftSubmission, Submission ri List iterationMatches = new ArrayList<>(); for (int leftStartIndex = 0; leftStartIndex < leftValues.length - maximumMatchLength; leftStartIndex++) { int leftSubsequenceHash = leftLookupTable.subsequenceHashForStartIndex(leftStartIndex); - if (checkMark(leftMarked, leftStartIndex) || leftSubsequenceHash == SubsequenceHashLookupTable.NO_HASH) { + if (checkMark(leftMarked, leftStartIndex, leftSubmission) || leftSubsequenceHash == SubsequenceHashLookupTable.NO_HASH) { continue; } List possiblyMatchingRightStartIndexes = rightLookupTable .startIndexesOfPossiblyMatchingSubsequencesForSubsequenceHash(leftSubsequenceHash); for (Integer rightStartIndex : possiblyMatchingRightStartIndexes) { // comparison uses >= because it is assumed that the last token is a pivot (FILE_END) - if (checkMark(rightMarked, rightStartIndex) || maximumMatchLength >= rightValues.length - rightStartIndex) { + if (checkMark(rightMarked, rightStartIndex, rightSubmission) || maximumMatchLength >= rightValues.length - rightStartIndex) { continue; } @@ -231,9 +237,11 @@ private int[] tokenValueListFromSubmission(Submission submission) { })); } - private boolean checkMark(boolean[] marks, int index) { + private boolean checkMark(boolean[] marks, int index, Submission submission) { if (index >= marks.length) { - throw new IllegalStateException(String.format(ERROR_INDEX_OUT_OF_BOUNDS, marks.length, index)); + throw new IllegalStateException(String.format(ERROR_INDEX_OUT_OF_BOUNDS, marks.length, index, submission.getTokenList().size(), + submission.getTokenList().stream().map(it -> it.getType().getDescription()).collect(Collectors.joining(", ")), + cachedTokenValueLists.get(submission).length)); } return marks[index];