diff --git a/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java b/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java
index b65376594..8f79e4302 100644
--- a/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java
+++ b/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java
@@ -36,7 +36,6 @@
*
* @see
* github.com/Microsoft/vscode-textmate/blob/master/src/tests/tests.ts
- *
*/
public class GrammarSuiteTest {
@@ -45,7 +44,7 @@ public class GrammarSuiteTest {
// TODO: fix thoses tests:
// It seems that problem comes from with encoding. OnigString should support UTF-16 like
// https://github.com/atom/node-oniguruma/blob/master/src/onig-string.cc
- private static final List IGNORE_TESTS = List.of("TEST #24", "TEST #66", "Issue #82", "Issue #119");
+ private static final List IGNORE_TESTS = List.of("TEST #24", "TEST #66");
@TestFactory
@DisplayName("Tokenization /first-mate/")
diff --git a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java
index d92c4ddad..af526844f 100644
--- a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java
+++ b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java
@@ -1,5 +1,5 @@
/**
- * Copyright (c) 2015-2017 Angelo ZERR.
+ * Copyright (c) 2015-2017 Angelo ZERR.
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
@@ -11,8 +11,8 @@
* Initial license: MIT
*
* Contributors:
- * - Microsoft Corporation: Initial code, written in TypeScript, licensed under MIT license
- * - Angelo Zerr - translation and adaptation to Java
+ * - Microsoft Corporation: Initial code, written in TypeScript, licensed under MIT license
+ * - Angelo Zerr - translation and adaptation to Java
*/
package org.eclipse.tm4e.core.grammar;
@@ -20,6 +20,7 @@
import java.util.List;
import java.util.Objects;
+import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.tm4e.core.internal.grammar.ScopeListElement;
import org.eclipse.tm4e.core.internal.rule.IRuleRegistry;
import org.eclipse.tm4e.core.internal.rule.Rule;
@@ -33,7 +34,7 @@
*/
public class StackElement {
- public static final StackElement NULL = new StackElement(null, 0, 0, null, null, null);
+ public static final StackElement NULL = new StackElement(null, 0, 0, 0, false, null, null, null);
/**
* The position on the current line where this state was pushed.
@@ -42,10 +43,19 @@ public class StackElement {
*/
private int enterPosition;
+ /**
+ * The captured anchor position when this stack element was pushed.
+ * This is relevant only while tokenizing a line, to restore the anchor position when popping.
+ * Its value is meaningless across lines.
+ */
+ private int anchorPos;
+
/**
* The previous state on the stack (or null for the root state).
*/
+ @Nullable
public final StackElement parent;
+
/**
* The depth of the stack.
*/
@@ -55,13 +65,21 @@ public class StackElement {
* The state (rule) that this element represents.
*/
public final int ruleId;
+
+ /**
+ * The state has entered and captured \n. This means that the next line should have an anchorPosition of 0.
+ */
+ public final boolean beginRuleCapturedEOL;
+
/**
* The "pop" (end) condition for this state in case that it was dynamically generated through captured text.
*/
public final String endRule;
+
/**
* The list of scopes containing the "name" for this state.
*/
+
public final ScopeListElement nameScopesList;
/**
* The list of scopes containing the "contentName" (besides "name") for this state.
@@ -69,11 +87,21 @@ public class StackElement {
*/
public final ScopeListElement contentNameScopesList;
- public StackElement(StackElement parent, int ruleId, int enterPos, String endRule, ScopeListElement nameScopesList, ScopeListElement contentNameScopesList) {
+ public StackElement(
+ @Nullable StackElement parent,
+ int ruleId,
+ int enterPos,
+ int anchorPos,
+ boolean beginRuleCapturedEOL,
+ @Nullable String endRule,
+ ScopeListElement nameScopesList,
+ ScopeListElement contentNameScopesList) {
this.parent = parent;
this.depth = (this.parent != null ? this.parent.depth + 1 : 1);
this.ruleId = ruleId;
this.enterPosition = enterPos;
+ this.anchorPos = anchorPos;
+ this.beginRuleCapturedEOL = beginRuleCapturedEOL;
this.endRule = endRule;
this.nameScopesList = nameScopesList;
this.contentNameScopesList = contentNameScopesList;
@@ -82,44 +110,71 @@ public StackElement(StackElement parent, int ruleId, int enterPos, String endRul
/**
* A structural equals check. Does not take into account `scopes`.
*/
- private static boolean structuralEquals(StackElement a, StackElement b) {
+ private static boolean structuralEquals(@Nullable StackElement a, @Nullable StackElement b) {
+ do {
+ if (a == b) {
+ return true;
+ }
+
+ if (a == null && b == null) {
+ // End of list reached for both
+ return true;
+ }
+
+ if (a == null || b == null) {
+ // End of list reached only for one
+ return false;
+ }
+
+ if (a.depth != b.depth || a.ruleId != b.ruleId || !Objects.equals(a.endRule, b.endRule)) {
+ return false;
+ }
+
+ // Go to previous pair
+ a = a.parent;
+ b = b.parent;
+ } while (true);
+ }
+
+ @SuppressWarnings("null")
+ private static boolean equals(@Nullable StackElement a, @Nullable StackElement b) {
if (a == b) {
return true;
}
- if (a == null || b == null) {
+ if (!structuralEquals(a, b)) {
return false;
}
- return a.depth == b.depth && a.ruleId == b.ruleId && Objects.equals(a.endRule, b.endRule) && structuralEquals(a.parent, b.parent);
+ return a.contentNameScopesList.equals(b.contentNameScopesList);
}
@Override
public boolean equals(Object other) {
- if (other == this) {
- return true;
- }
- if (other == null) {
+ if (other == null || other.getClass() != StackElement.class) {
return false;
}
- if (!(other instanceof StackElement)) {
- return false;
- }
- StackElement stackElement = (StackElement)other;
- return structuralEquals(this, stackElement) && this.contentNameScopesList.equals(stackElement.contentNameScopesList);
+ return equals(this, (StackElement) other);
}
@Override
public int hashCode() {
- return Objects.hash(depth, ruleId, endRule, parent, contentNameScopesList);
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + Objects.hash(endRule, parent, contentNameScopesList);
+ result = prime * result + depth;
+ result = prime * result + ruleId;
+ return result;
}
public void reset() {
StackElement el = this;
while (el != null) {
el.enterPosition = -1;
+ el.anchorPos = -1;
el = el.parent;
}
}
+ @Nullable
public StackElement pop() {
return this.parent;
}
@@ -131,8 +186,19 @@ public StackElement safePop() {
return this;
}
- public StackElement push(int ruleId, int enterPos, String endRule, ScopeListElement nameScopesList, ScopeListElement contentNameScopesList) {
- return new StackElement(this, ruleId, enterPos, endRule, nameScopesList, contentNameScopesList);
+ public StackElement push(int ruleId,
+ int enterPos,
+ int anchorPos,
+ boolean beginRuleCapturedEOL,
+ @Nullable String endRule,
+ ScopeListElement nameScopesList,
+ ScopeListElement contentNameScopesList) {
+ return new StackElement(this, ruleId, enterPos, anchorPos, beginRuleCapturedEOL, endRule, nameScopesList,
+ contentNameScopesList);
+ }
+
+ public int getAnchorPos() {
+ return anchorPos;
}
public int getEnterPos() {
@@ -147,7 +213,8 @@ private void appendString(List res) {
if (this.parent != null) {
this.parent.appendString(res);
}
- res.add('(' + Integer.toString(this.ruleId) + ')'); //, TODO-${this.nameScopesList}, TODO-${this.contentNameScopesList})`;
+ res.add('(' + Integer.toString(this.ruleId) + ')'); // , TODO-${this.nameScopesList},
+ // TODO-${this.contentNameScopesList})`;
}
@Override
@@ -161,14 +228,18 @@ public StackElement setContentNameScopesList(ScopeListElement contentNameScopesL
if (this.contentNameScopesList.equals(contentNameScopesList)) {
return this;
}
- return this.parent.push(this.ruleId, this.enterPosition, this.endRule, this.nameScopesList, contentNameScopesList);
+ final var parent = this.parent;
+ assert parent != null;
+ return parent.push(this.ruleId, this.enterPosition, this.anchorPos, this.beginRuleCapturedEOL, this.endRule,
+ this.nameScopesList, contentNameScopesList);
}
public StackElement setEndRule(String endRule) {
if (this.endRule != null && this.endRule.equals(endRule)) {
return this;
}
- return new StackElement(this.parent, this.ruleId, this.enterPosition, endRule, this.nameScopesList, this.contentNameScopesList);
+ return new StackElement(this.parent, this.ruleId, this.enterPosition, this.anchorPos, this.beginRuleCapturedEOL,
+ this.endRule, this.nameScopesList, this.contentNameScopesList);
}
public boolean hasSameRuleAs(final StackElement other) {
diff --git a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java
index e67221fd4..692d26c09 100644
--- a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java
+++ b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java
@@ -50,9 +50,9 @@
/**
* TextMate grammar implementation.
*
- * @see
+ * @see
* github.com/Microsoft/vscode-textmate/blob/master/src/grammar.ts
- *
*/
public final class Grammar implements IGrammar, IRuleFactoryHelper {
@@ -198,7 +198,7 @@ public ITokenizeLineResult2 tokenizeLine2(String lineText, StackElement prevStat
}
@SuppressWarnings("unchecked")
- private T tokenize(String lineText, StackElement prevState, boolean emitBinaryTokens) {
+ private T tokenize(String lineText, @Nullable StackElement prevState, boolean emitBinaryTokens) {
if (this.rootId == -1) {
this.rootId = RuleFactory.getCompiledRuleId(this.rawGrammar.getRepository().getSelf(), this,
this.rawGrammar.getRepository());
@@ -219,7 +219,7 @@ private T tokenize(String lineText, StackElement prevState, boolean emitBina
ScopeListElement scopeList = new ScopeListElement(null, rootScopeName, rootMetadata);
- prevState = new StackElement(null, this.rootId, -1, null, scopeList, scopeList);
+ prevState = new StackElement(null, this.rootId, -1, -1, false, null, scopeList, scopeList);
} else {
isFirstLine = false;
prevState.reset();
diff --git a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java
index 9e2800f54..26ea4a5b7 100644
--- a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java
+++ b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java
@@ -35,6 +35,11 @@
import org.eclipse.tm4e.core.internal.rule.MatchRule;
import org.eclipse.tm4e.core.internal.rule.Rule;
+/**
+ * @see
+ * github.com/Microsoft/vscode-textmate/blob/master/src/grammar.ts
+ */
final class LineTokenizer {
private static final Logger LOGGER = System.getLogger(LineTokenizer.class.getName());
@@ -145,8 +150,9 @@ private void scanNext() {
lineTokens.produce(stack, captureIndices[0].getEnd());
// pop
- StackElement popped = stack;
+ final var popped = stack;
stack = stack.pop();
+ anchorPosition = popped.getAnchorPos();
if (!hasAdvanced && popped.getEnterPos() == linePos) {
// Grammar pushed & popped a rule without advancing
@@ -171,7 +177,8 @@ private void scanNext() {
// push it on the stack rule
String scopeName = rule.getName(lineText.string, captureIndices);
ScopeListElement nameScopesList = stack.contentNameScopesList.push(grammar, scopeName);
- stack = stack.push(matchedRuleId, linePos, null, nameScopesList, nameScopesList);
+ stack = stack.push(matchedRuleId, linePos, anchorPosition,
+ captureIndices[0].getEnd() == lineText.bytesCount, null, nameScopesList, nameScopesList);
if (rule instanceof BeginEndRule) {
BeginEndRule pushedRule = (BeginEndRule) rule;
@@ -450,11 +457,11 @@ private void handleCaptures(Grammar grammar, OnigString lineText, boolean isFirs
ScopeListElement contentNameScopesList = nameScopesList.push(grammar, contentName);
// the capture requires additional matching
- StackElement stackClone = stack.push(retokenizeCapturedWithRuleId, captureIndex.getStart(), null,
- nameScopesList, contentNameScopesList);
- tokenizeString(grammar,
- OnigString.of(lineText.string.substring(0, captureIndex.getEnd())),
- (isFirstLine && captureIndex.getStart() == 0), captureIndex.getStart(), stackClone, lineTokens);
+ StackElement stackClone = stack.push(retokenizeCapturedWithRuleId, captureIndex.getStart(), -1, false,
+ null, nameScopesList, contentNameScopesList);
+ final var onigSubStr = OnigString.of(lineText.string.substring(0, captureIndex.getEnd()));
+ tokenizeString(grammar, onigSubStr, (isFirstLine && captureIndex.getStart() == 0),
+ captureIndex.getStart(), stackClone, lineTokens);
continue;
}
@@ -485,7 +492,7 @@ private void handleCaptures(Grammar grammar, OnigString lineText, boolean isFirs
*/
private WhileCheckResult checkWhileConditions(Grammar grammar, OnigString lineText, boolean isFirstLine,
int linePos, StackElement stack, LineTokens lineTokens) {
- int currentanchorPosition = -1;
+ int currentanchorPosition = stack.beginRuleCapturedEOL ? 0 : -1;
List whileRules = new ArrayList<>();
for (StackElement node = stack; node != null; node = node.pop()) {
Rule nodeRule = node.getRule(grammar);