Skip to content

Commit

Permalink
monaco: update tokenization
Browse files Browse the repository at this point in the history
The commit updates the monaco tokenization with the following
improvements for performance:

- skip tokenizations for lines exceeding the limit.
- return partial tokenizations for line which take too long to tokenize.

The fix ultimately resolves an issue which prevented large files from
being opened and the application hanging.

Signed-off-by: vince-fugnitto <[email protected]>
  • Loading branch information
vince-fugnitto committed Jul 15, 2022
1 parent a70c66a commit 22f698b
Showing 1 changed file with 6 additions and 19 deletions.
25 changes: 6 additions & 19 deletions packages/monaco/src/browser/textmate/textmate-tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,42 +49,29 @@ export interface TokenizerOption {

}

export namespace TokenizerOption {
/**
* The default TextMate tokenizer option.
*
* @deprecated Use the current value of `editor.maxTokenizationLineLength` preference instead.
*/
export const DEFAULT: TokenizerOption = {
lineLimit: 400
};
}

export function createTextmateTokenizer(grammar: IGrammar, options: TokenizerOption): monaco.languages.EncodedTokensProvider & monaco.languages.TokensProvider {
if (options.lineLimit !== undefined && (options.lineLimit <= 0 || !Number.isInteger(options.lineLimit))) {
throw new Error(`The 'lineLimit' must be a positive integer. It was ${options.lineLimit}.`);
}
return {
getInitialState: () => new TokenizerState(INITIAL),
tokenizeEncoded(line: string, state: TokenizerState): monaco.languages.IEncodedLineTokens {
let processedLine = line;
if (options.lineLimit !== undefined && line.length > options.lineLimit) {
// Line is too long to be tokenized
processedLine = line.substring(0, options.lineLimit);
// Skip tokenizing the line if it exceeds the line limit.
return { endState: state.ruleStack, tokens: new Uint32Array() };
}
const result = grammar.tokenizeLine2(processedLine, state.ruleStack);
const result = grammar.tokenizeLine2(line, state.ruleStack, 500);
return {
endState: new TokenizerState(result.ruleStack),
tokens: result.tokens
};
},
tokenize(line: string, state: TokenizerState): monaco.languages.ILineTokens {
let processedLine = line;
if (options.lineLimit !== undefined && line.length > options.lineLimit) {
// Line is too long to be tokenized
processedLine = line.substring(0, options.lineLimit);
// Skip tokenizing the line if it exceeds the line limit.
return { endState: state.ruleStack, tokens: [] };
}
const result = grammar.tokenizeLine(processedLine, state.ruleStack);
const result = grammar.tokenizeLine(line, state.ruleStack, 500);
return {
endState: new TokenizerState(result.ruleStack),
tokens: result.tokens.map(t => ({
Expand Down

0 comments on commit 22f698b

Please sign in to comment.