From 33bef274ad99cdda65afb79dd900666bef0388f8 Mon Sep 17 00:00:00 2001 From: Henning Dieterichs Date: Mon, 9 Aug 2021 12:37:19 +0200 Subject: [PATCH] Adds tokenizer test. --- .../bracketPairColorizer/tokenizer.test.ts | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 src/vs/editor/test/common/model/bracketPairColorizer/tokenizer.test.ts diff --git a/src/vs/editor/test/common/model/bracketPairColorizer/tokenizer.test.ts b/src/vs/editor/test/common/model/bracketPairColorizer/tokenizer.test.ts new file mode 100644 index 0000000000000..2b2a00b769e8b --- /dev/null +++ b/src/vs/editor/test/common/model/bracketPairColorizer/tokenizer.test.ts @@ -0,0 +1,167 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert = require('assert'); +import { DisposableStore } from 'vs/base/common/lifecycle'; +import { TokenizationResult2 } from 'vs/editor/common/core/token'; +import { LanguageAgnosticBracketTokens } from 'vs/editor/common/model/bracketPairColorizer/brackets'; +import { Length, lengthAdd, lengthsToRange, lengthZero } from 'vs/editor/common/model/bracketPairColorizer/length'; +import { TextBufferTokenizer, Token, Tokenizer, TokenKind } from 'vs/editor/common/model/bracketPairColorizer/tokenizer'; +import { TextModel } from 'vs/editor/common/model/textModel'; +import { IState, ITokenizationSupport, LanguageId, LanguageIdentifier, MetadataConsts, StandardTokenType, TokenizationRegistry } from 'vs/editor/common/modes'; +import { LanguageConfigurationRegistry } from 'vs/editor/common/modes/languageConfigurationRegistry'; +import { createTextModel } from 'vs/editor/test/common/editorTestUtils'; + +suite('Bracket Pair Colorizer - Tokenizer', () => { + test('Basic', () => { + const mode1 = new LanguageIdentifier('testMode1', 2); + + const tStandard = (text: string) => new TokenInfo(text, mode1.id, StandardTokenType.Other); + const tComment = (text: string) => new TokenInfo(text, mode1.id, StandardTokenType.Comment); + const document = new TokenizedDocument([ + tStandard(' { } '), tStandard('be'), tStandard('gin end'), tStandard('\n'), + tStandard('hello'), tComment('{'), tStandard('}'), + ]); + + const disposableStore = new DisposableStore(); + disposableStore.add(TokenizationRegistry.register(mode1.language, document.getTokenizationSupport())); + disposableStore.add(LanguageConfigurationRegistry.register(mode1, { + brackets: [['{', '}'], ['[', ']'], ['(', ')']], + })); + + const brackets = new LanguageAgnosticBracketTokens([['begin', 'end']]); + + const model = createTextModel(document.getText(), {}, mode1); + model.forceTokenization(model.getLineCount()); + + const tokens = readAllTokens(new TextBufferTokenizer(model, brackets)); + + assert.deepStrictEqual(toArr(tokens, model), [ + { category: -1, kind: 'Text', languageId: -1, text: ' ', }, + { category: 2000, kind: 'OpeningBracket', languageId: 2, text: '{', }, + { category: -1, kind: 'Text', languageId: -1, text: ' ', }, + { category: 2000, kind: 'ClosingBracket', languageId: 2, text: '}', }, + { category: -1, kind: 'Text', languageId: -1, text: ' \nhello{', }, + { category: 2000, kind: 'ClosingBracket', languageId: 2, text: '}', } + ]); + + disposableStore.dispose(); + }); +}); + +function readAllTokens(tokenizer: Tokenizer): Token[] { + const tokens = new Array(); + while (true) { + const token = tokenizer.read(); + if (!token) { + break; + } + tokens.push(token); + } + return tokens; +} + +function toArr(tokens: Token[], model: TextModel): any[] { + const result = new Array(); + let offset = lengthZero; + for (const token of tokens) { + result.push(tokenToObj(token, offset, model)); + offset = lengthAdd(offset, token.length); + } + return result; +} + +function tokenToObj(token: Token, offset: Length, model: TextModel): any { + return { + text: model.getValueInRange(lengthsToRange(offset, lengthAdd(offset, token.length))), + category: token.category, + kind: { + [TokenKind.ClosingBracket]: 'ClosingBracket', + [TokenKind.OpeningBracket]: 'OpeningBracket', + [TokenKind.Text]: 'Text', + }[token.kind], + languageId: token.languageId, + }; +} + +class TokenizedDocument { + private readonly tokensByLine: readonly TokenInfo[][]; + constructor(tokens: TokenInfo[]) { + const tokensByLine = new Array(); + let curLine = new Array(); + + for (const token of tokens) { + const lines = token.text.split('\n'); + let first = true; + while (lines.length > 0) { + if (!first) { + tokensByLine.push(curLine); + curLine = new Array(); + } else { + first = false; + } + + if (lines[0].length > 0) { + curLine.push(token.withText(lines[0])); + } + lines.pop(); + } + } + + tokensByLine.push(curLine); + + this.tokensByLine = tokensByLine; + } + + getText() { + return this.tokensByLine.map(t => t.map(t => t.text).join('')).join('\n'); + } + + getTokenizationSupport(): ITokenizationSupport { + class State implements IState { + constructor(public readonly lineNumber: number) { } + + clone(): IState { + return new State(this.lineNumber); + } + + equals(other: IState): boolean { + return this.lineNumber === (other as State).lineNumber; + } + } + + return { + getInitialState: () => new State(0), + tokenize: () => { throw new Error('Method not implemented.'); }, + tokenize2: (line: string, hasEOL: boolean, state: IState, offsetDelta: number): TokenizationResult2 => { + const state2 = state as State; + const tokens = this.tokensByLine[state2.lineNumber]; + const arr = new Array(); + let offset = 0; + for (const t of tokens) { + arr.push(offset, t.getMetadata()); + offset += t.text.length; + } + + return new TokenizationResult2(new Uint32Array(arr), new State(state2.lineNumber + 1)); + } + }; + } +} + +class TokenInfo { + constructor(public readonly text: string, public readonly languageId: LanguageId, public readonly tokenType: StandardTokenType) { } + + getMetadata(): number { + return ( + (this.languageId << MetadataConsts.LANGUAGEID_OFFSET) + | (this.tokenType << MetadataConsts.TOKEN_TYPE_OFFSET) + ) >>> 0; + } + + withText(text: string): TokenInfo { + return new TokenInfo(text, this.languageId, this.tokenType); + } +}