monaco: update tokenization

The commit updates the monaco tokenization with the following improvements for performance: - skip tokenizations for lines exceeding the limit. - return partial tokenizations for line which take too long to tokenize. The fix ultimately resolves an issue which prevented large files from being opened and the application hanging. Signed-off-by: vince-fugnitto <[email protected]>
eclipse-theia · Jul 15, 2022 · 22f698b · 22f698b
1 parent a70c66a
commit 22f698b
Showing 1 changed file with 6 additions and 19 deletions.
diff --git a/packages/monaco/src/browser/textmate/textmate-tokenizer.ts b/packages/monaco/src/browser/textmate/textmate-tokenizer.ts
@@ -49,42 +49,29 @@ export interface TokenizerOption {
 
 }
 
-export namespace TokenizerOption {
-    /**
-     * The default TextMate tokenizer option.
-     *
-     * @deprecated Use the current value of `editor.maxTokenizationLineLength` preference instead.
-     */
-    export const DEFAULT: TokenizerOption = {
-        lineLimit: 400
-    };
-}
-
 export function createTextmateTokenizer(grammar: IGrammar, options: TokenizerOption): monaco.languages.EncodedTokensProvider & monaco.languages.TokensProvider {
     if (options.lineLimit !== undefined && (options.lineLimit <= 0 || !Number.isInteger(options.lineLimit))) {
         throw new Error(`The 'lineLimit' must be a positive integer. It was ${options.lineLimit}.`);
     }
     return {
         getInitialState: () => new TokenizerState(INITIAL),
         tokenizeEncoded(line: string, state: TokenizerState): monaco.languages.IEncodedLineTokens {
-            let processedLine = line;
             if (options.lineLimit !== undefined && line.length > options.lineLimit) {
-                // Line is too long to be tokenized
-                processedLine = line.substring(0, options.lineLimit);
+                // Skip tokenizing the line if it exceeds the line limit.
+                return { endState: state.ruleStack, tokens: new Uint32Array() };
             }
-            const result = grammar.tokenizeLine2(processedLine, state.ruleStack);
+            const result = grammar.tokenizeLine2(line, state.ruleStack, 500);
             return {
                 endState: new TokenizerState(result.ruleStack),
                 tokens: result.tokens
             };
         },
         tokenize(line: string, state: TokenizerState): monaco.languages.ILineTokens {
-            let processedLine = line;
             if (options.lineLimit !== undefined && line.length > options.lineLimit) {
-                // Line is too long to be tokenized
-                processedLine = line.substring(0, options.lineLimit);
+                // Skip tokenizing the line if it exceeds the line limit.
+                return { endState: state.ruleStack, tokens: [] };
             }
-            const result = grammar.tokenizeLine(processedLine, state.ruleStack);
+            const result = grammar.tokenizeLine(line, state.ruleStack, 500);
             return {
                 endState: new TokenizerState(result.ruleStack),
                 tokens: result.tokens.map(t => ({