Skip to content

Commit

Permalink
Fix parsing issues
Browse files Browse the repository at this point in the history
  • Loading branch information
sebthom committed Apr 28, 2022
1 parent ef63495 commit f5addea
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
*
* @see <a href="https://github.com/Microsoft/vscode-textmate/blob/master/src/tests/tests.ts">
* github.com/Microsoft/vscode-textmate/blob/master/src/tests/tests.ts</a>
*
*/
public class GrammarSuiteTest {

Expand All @@ -45,7 +44,7 @@ public class GrammarSuiteTest {
// TODO: fix thoses tests:
// It seems that problem comes from with encoding. OnigString should support UTF-16 like
// https://github.com/atom/node-oniguruma/blob/master/src/onig-string.cc
private static final List<String> IGNORE_TESTS = List.of("TEST #24", "TEST #66", "Issue #82", "Issue #119");
private static final List<String> IGNORE_TESTS = List.of("TEST #24", "TEST #66");

@TestFactory
@DisplayName("Tokenization /first-mate/")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2015-2017 Angelo ZERR.
* Copyright (c) 2015-2017 Angelo ZERR.
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
Expand All @@ -11,15 +11,16 @@
* Initial license: MIT
*
* Contributors:
* - Microsoft Corporation: Initial code, written in TypeScript, licensed under MIT license
* - Angelo Zerr <[email protected]> - translation and adaptation to Java
* - Microsoft Corporation: Initial code, written in TypeScript, licensed under MIT license
* - Angelo Zerr <[email protected]> - translation and adaptation to Java
*/
package org.eclipse.tm4e.core.grammar;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.tm4e.core.internal.grammar.ScopeListElement;
import org.eclipse.tm4e.core.internal.rule.IRuleRegistry;
import org.eclipse.tm4e.core.internal.rule.Rule;
Expand All @@ -33,7 +34,7 @@
*/
public class StackElement {

public static final StackElement NULL = new StackElement(null, 0, 0, null, null, null);
public static final StackElement NULL = new StackElement(null, 0, 0, 0, false, null, null, null);

/**
* The position on the current line where this state was pushed.
Expand All @@ -42,10 +43,19 @@ public class StackElement {
*/
private int enterPosition;

/**
* The captured anchor position when this stack element was pushed.
* This is relevant only while tokenizing a line, to restore the anchor position when popping.
* Its value is meaningless across lines.
*/
private int anchorPos;

/**
* The previous state on the stack (or null for the root state).
*/
@Nullable
public final StackElement parent;

/**
* The depth of the stack.
*/
Expand All @@ -55,25 +65,43 @@ public class StackElement {
* The state (rule) that this element represents.
*/
public final int ruleId;

/**
* The state has entered and captured \n. This means that the next line should have an anchorPosition of 0.
*/
public final boolean beginRuleCapturedEOL;

/**
* The "pop" (end) condition for this state in case that it was dynamically generated through captured text.
*/
public final String endRule;

/**
* The list of scopes containing the "name" for this state.
*/

public final ScopeListElement nameScopesList;
/**
* The list of scopes containing the "contentName" (besides "name") for this state.
* This list **must** contain as an element `scopeName`.
*/
public final ScopeListElement contentNameScopesList;

public StackElement(StackElement parent, int ruleId, int enterPos, String endRule, ScopeListElement nameScopesList, ScopeListElement contentNameScopesList) {
public StackElement(
@Nullable StackElement parent,
int ruleId,
int enterPos,
int anchorPos,
boolean beginRuleCapturedEOL,
@Nullable String endRule,
ScopeListElement nameScopesList,
ScopeListElement contentNameScopesList) {
this.parent = parent;
this.depth = (this.parent != null ? this.parent.depth + 1 : 1);
this.ruleId = ruleId;
this.enterPosition = enterPos;
this.anchorPos = anchorPos;
this.beginRuleCapturedEOL = beginRuleCapturedEOL;
this.endRule = endRule;
this.nameScopesList = nameScopesList;
this.contentNameScopesList = contentNameScopesList;
Expand All @@ -82,44 +110,71 @@ public StackElement(StackElement parent, int ruleId, int enterPos, String endRul
/**
* A structural equals check. Does not take into account `scopes`.
*/
private static boolean structuralEquals(StackElement a, StackElement b) {
private static boolean structuralEquals(@Nullable StackElement a, @Nullable StackElement b) {
do {
if (a == b) {
return true;
}

if (a == null && b == null) {
// End of list reached for both
return true;
}

if (a == null || b == null) {
// End of list reached only for one
return false;
}

if (a.depth != b.depth || a.ruleId != b.ruleId || !Objects.equals(a.endRule, b.endRule)) {
return false;
}

// Go to previous pair
a = a.parent;
b = b.parent;
} while (true);
}

@SuppressWarnings("null")
private static boolean equals(@Nullable StackElement a, @Nullable StackElement b) {
if (a == b) {
return true;
}
if (a == null || b == null) {
if (!structuralEquals(a, b)) {
return false;
}
return a.depth == b.depth && a.ruleId == b.ruleId && Objects.equals(a.endRule, b.endRule) && structuralEquals(a.parent, b.parent);
return a.contentNameScopesList.equals(b.contentNameScopesList);
}

@Override
public boolean equals(Object other) {
if (other == this) {
return true;
}
if (other == null) {
if (other == null || other.getClass() != StackElement.class) {
return false;
}
if (!(other instanceof StackElement)) {
return false;
}
StackElement stackElement = (StackElement)other;
return structuralEquals(this, stackElement) && this.contentNameScopesList.equals(stackElement.contentNameScopesList);
return equals(this, (StackElement) other);
}

@Override
public int hashCode() {
return Objects.hash(depth, ruleId, endRule, parent, contentNameScopesList);
final int prime = 31;
int result = 1;
result = prime * result + Objects.hash(endRule, parent, contentNameScopesList);
result = prime * result + depth;
result = prime * result + ruleId;
return result;
}

public void reset() {
StackElement el = this;
while (el != null) {
el.enterPosition = -1;
el.anchorPos = -1;
el = el.parent;
}
}

@Nullable
public StackElement pop() {
return this.parent;
}
Expand All @@ -131,8 +186,19 @@ public StackElement safePop() {
return this;
}

public StackElement push(int ruleId, int enterPos, String endRule, ScopeListElement nameScopesList, ScopeListElement contentNameScopesList) {
return new StackElement(this, ruleId, enterPos, endRule, nameScopesList, contentNameScopesList);
public StackElement push(int ruleId,
int enterPos,
int anchorPos,
boolean beginRuleCapturedEOL,
@Nullable String endRule,
ScopeListElement nameScopesList,
ScopeListElement contentNameScopesList) {
return new StackElement(this, ruleId, enterPos, anchorPos, beginRuleCapturedEOL, endRule, nameScopesList,
contentNameScopesList);
}

public int getAnchorPos() {
return anchorPos;
}

public int getEnterPos() {
Expand All @@ -147,7 +213,8 @@ private void appendString(List<String> res) {
if (this.parent != null) {
this.parent.appendString(res);
}
res.add('(' + Integer.toString(this.ruleId) + ')'); //, TODO-${this.nameScopesList}, TODO-${this.contentNameScopesList})`;
res.add('(' + Integer.toString(this.ruleId) + ')'); // , TODO-${this.nameScopesList},
// TODO-${this.contentNameScopesList})`;
}

@Override
Expand All @@ -161,14 +228,18 @@ public StackElement setContentNameScopesList(ScopeListElement contentNameScopesL
if (this.contentNameScopesList.equals(contentNameScopesList)) {
return this;
}
return this.parent.push(this.ruleId, this.enterPosition, this.endRule, this.nameScopesList, contentNameScopesList);
final var parent = this.parent;
assert parent != null;
return parent.push(this.ruleId, this.enterPosition, this.anchorPos, this.beginRuleCapturedEOL, this.endRule,
this.nameScopesList, contentNameScopesList);
}

public StackElement setEndRule(String endRule) {
if (this.endRule != null && this.endRule.equals(endRule)) {
return this;
}
return new StackElement(this.parent, this.ruleId, this.enterPosition, endRule, this.nameScopesList, this.contentNameScopesList);
return new StackElement(this.parent, this.ruleId, this.enterPosition, this.anchorPos, this.beginRuleCapturedEOL,
this.endRule, this.nameScopesList, this.contentNameScopesList);
}

public boolean hasSameRuleAs(final StackElement other) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@
/**
* TextMate grammar implementation.
*
* @see <a href="https://github.com/Microsoft/vscode-textmate/blob/master/src/grammar.ts">
* @see <a href=
* "https://github.com/microsoft/vscode-textmate/blob/9157c7f869219dbaf9a5a5607f099c00fe694a29/src/grammar.ts#L459">
* github.com/Microsoft/vscode-textmate/blob/master/src/grammar.ts</a>
*
*/
public final class Grammar implements IGrammar, IRuleFactoryHelper {

Expand Down Expand Up @@ -198,7 +198,7 @@ public ITokenizeLineResult2 tokenizeLine2(String lineText, StackElement prevStat
}

@SuppressWarnings("unchecked")
private <T> T tokenize(String lineText, StackElement prevState, boolean emitBinaryTokens) {
private <T> T tokenize(String lineText, @Nullable StackElement prevState, boolean emitBinaryTokens) {
if (this.rootId == -1) {
this.rootId = RuleFactory.getCompiledRuleId(this.rawGrammar.getRepository().getSelf(), this,
this.rawGrammar.getRepository());
Expand All @@ -219,7 +219,7 @@ private <T> T tokenize(String lineText, StackElement prevState, boolean emitBina

ScopeListElement scopeList = new ScopeListElement(null, rootScopeName, rootMetadata);

prevState = new StackElement(null, this.rootId, -1, null, scopeList, scopeList);
prevState = new StackElement(null, this.rootId, -1, -1, false, null, scopeList, scopeList);
} else {
isFirstLine = false;
prevState.reset();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@
import org.eclipse.tm4e.core.internal.rule.MatchRule;
import org.eclipse.tm4e.core.internal.rule.Rule;

/**
* @see <a href=
* "https://github.com/microsoft/vscode-textmate/blob/9157c7f869219dbaf9a5a5607f099c00fe694a29/src/grammar.ts#L1028">
* github.com/Microsoft/vscode-textmate/blob/master/src/grammar.ts</a>
*/
final class LineTokenizer {

private static final Logger LOGGER = System.getLogger(LineTokenizer.class.getName());
Expand Down Expand Up @@ -145,8 +150,9 @@ private void scanNext() {
lineTokens.produce(stack, captureIndices[0].getEnd());

// pop
StackElement popped = stack;
final var popped = stack;
stack = stack.pop();
anchorPosition = popped.getAnchorPos();

if (!hasAdvanced && popped.getEnterPos() == linePos) {
// Grammar pushed & popped a rule without advancing
Expand All @@ -171,7 +177,8 @@ private void scanNext() {
// push it on the stack rule
String scopeName = rule.getName(lineText.string, captureIndices);
ScopeListElement nameScopesList = stack.contentNameScopesList.push(grammar, scopeName);
stack = stack.push(matchedRuleId, linePos, null, nameScopesList, nameScopesList);
stack = stack.push(matchedRuleId, linePos, anchorPosition,
captureIndices[0].getEnd() == lineText.bytesCount, null, nameScopesList, nameScopesList);

if (rule instanceof BeginEndRule) {
BeginEndRule pushedRule = (BeginEndRule) rule;
Expand Down Expand Up @@ -450,11 +457,11 @@ private void handleCaptures(Grammar grammar, OnigString lineText, boolean isFirs
ScopeListElement contentNameScopesList = nameScopesList.push(grammar, contentName);

// the capture requires additional matching
StackElement stackClone = stack.push(retokenizeCapturedWithRuleId, captureIndex.getStart(), null,
nameScopesList, contentNameScopesList);
tokenizeString(grammar,
OnigString.of(lineText.string.substring(0, captureIndex.getEnd())),
(isFirstLine && captureIndex.getStart() == 0), captureIndex.getStart(), stackClone, lineTokens);
StackElement stackClone = stack.push(retokenizeCapturedWithRuleId, captureIndex.getStart(), -1, false,
null, nameScopesList, contentNameScopesList);
final var onigSubStr = OnigString.of(lineText.string.substring(0, captureIndex.getEnd()));
tokenizeString(grammar, onigSubStr, (isFirstLine && captureIndex.getStart() == 0),
captureIndex.getStart(), stackClone, lineTokens);
continue;
}

Expand Down Expand Up @@ -485,7 +492,7 @@ private void handleCaptures(Grammar grammar, OnigString lineText, boolean isFirs
*/
private WhileCheckResult checkWhileConditions(Grammar grammar, OnigString lineText, boolean isFirstLine,
int linePos, StackElement stack, LineTokens lineTokens) {
int currentanchorPosition = -1;
int currentanchorPosition = stack.beginRuleCapturedEOL ? 0 : -1;
List<WhileStack> whileRules = new ArrayList<>();
for (StackElement node = stack; node != null; node = node.pop()) {
Rule nodeRule = node.getRule(grammar);
Expand Down

0 comments on commit f5addea

Please sign in to comment.