Skip to content

Commit

Permalink
fix: do not peek beyond comments if indent, dedent, or newline aren't…
Browse files Browse the repository at this point in the history
… valid
  • Loading branch information
amaanq committed Sep 21, 2023
1 parent c01fb4e commit 9b23918
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 137 deletions.
256 changes: 128 additions & 128 deletions src/parser.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <tree_sitter/parser.h>
#include "tree_sitter/parser.h"

#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
Expand All @@ -16,7 +16,7 @@
#define MAX_ALIAS_SEQUENCE_LENGTH 10
#define PRODUCTION_ID_COUNT 140

enum {
enum ts_symbol_identifiers {
sym_identifier = 1,
anon_sym_SEMI = 2,
anon_sym_import = 3,
Expand Down Expand Up @@ -288,8 +288,8 @@ enum {
aux_sym_format_specifier_repeat1 = 269,
alias_sym_as_pattern_target = 270,
alias_sym_format_expression = 271,
anon_alias_sym_is_SPACEnot = 272,
anon_alias_sym_not_SPACEin = 273,
anon_alias_sym_isnot = 272,
anon_alias_sym_notin = 273,
};

static const char * const ts_symbol_names[] = {
Expand Down Expand Up @@ -565,8 +565,8 @@ static const char * const ts_symbol_names[] = {
[aux_sym_format_specifier_repeat1] = "format_specifier_repeat1",
[alias_sym_as_pattern_target] = "as_pattern_target",
[alias_sym_format_expression] = "format_expression",
[anon_alias_sym_is_SPACEnot] = "is not",
[anon_alias_sym_not_SPACEin] = "not in",
[anon_alias_sym_isnot] = "is not",
[anon_alias_sym_notin] = "not in",
};

static const TSSymbol ts_symbol_map[] = {
Expand Down Expand Up @@ -842,8 +842,8 @@ static const TSSymbol ts_symbol_map[] = {
[aux_sym_format_specifier_repeat1] = aux_sym_format_specifier_repeat1,
[alias_sym_as_pattern_target] = alias_sym_as_pattern_target,
[alias_sym_format_expression] = alias_sym_format_expression,
[anon_alias_sym_is_SPACEnot] = anon_alias_sym_is_SPACEnot,
[anon_alias_sym_not_SPACEin] = anon_alias_sym_not_SPACEin,
[anon_alias_sym_isnot] = anon_alias_sym_isnot,
[anon_alias_sym_notin] = anon_alias_sym_notin,
};

static const TSSymbolMetadata ts_symbol_metadata[] = {
Expand Down Expand Up @@ -1939,17 +1939,17 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = true,
.named = true,
},
[anon_alias_sym_is_SPACEnot] = {
[anon_alias_sym_isnot] = {
.visible = true,
.named = false,
},
[anon_alias_sym_not_SPACEin] = {
[anon_alias_sym_notin] = {
.visible = true,
.named = false,
},
};

enum {
enum ts_field_identifiers {
field_alias = 1,
field_alternative = 2,
field_argument = 3,
Expand Down Expand Up @@ -2650,12 +2650,12 @@ static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE
[3] = sym_block,
},
[49] = {
[0] = anon_alias_sym_not_SPACEin,
[1] = anon_alias_sym_not_SPACEin,
[0] = anon_alias_sym_notin,
[1] = anon_alias_sym_notin,
},
[50] = {
[0] = anon_alias_sym_is_SPACEnot,
[1] = anon_alias_sym_is_SPACEnot,
[0] = anon_alias_sym_isnot,
[1] = anon_alias_sym_isnot,
},
[51] = {
[0] = alias_sym_format_expression,
Expand Down Expand Up @@ -12751,119 +12751,6 @@ static const TSLexMode ts_lex_modes[STATE_COUNT] = {
[2818] = {.lex_state = 54, .external_lex_state = 11},
};

enum {
ts_external_token__newline = 0,
ts_external_token__indent = 1,
ts_external_token__dedent = 2,
ts_external_token_string_start = 3,
ts_external_token__string_content = 4,
ts_external_token_escape_interpolation = 5,
ts_external_token_string_end = 6,
ts_external_token_comment = 7,
ts_external_token_RBRACK = 8,
ts_external_token_RPAREN = 9,
ts_external_token_RBRACE = 10,
};

static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {
[ts_external_token__newline] = sym__newline,
[ts_external_token__indent] = sym__indent,
[ts_external_token__dedent] = sym__dedent,
[ts_external_token_string_start] = sym_string_start,
[ts_external_token__string_content] = sym__string_content,
[ts_external_token_escape_interpolation] = sym_escape_interpolation,
[ts_external_token_string_end] = sym_string_end,
[ts_external_token_comment] = sym_comment,
[ts_external_token_RBRACK] = anon_sym_RBRACK,
[ts_external_token_RPAREN] = anon_sym_RPAREN,
[ts_external_token_RBRACE] = anon_sym_RBRACE,
};

static const bool ts_external_scanner_states[17][EXTERNAL_TOKEN_COUNT] = {
[1] = {
[ts_external_token__newline] = true,
[ts_external_token__indent] = true,
[ts_external_token__dedent] = true,
[ts_external_token_string_start] = true,
[ts_external_token__string_content] = true,
[ts_external_token_escape_interpolation] = true,
[ts_external_token_string_end] = true,
[ts_external_token_comment] = true,
[ts_external_token_RBRACK] = true,
[ts_external_token_RPAREN] = true,
[ts_external_token_RBRACE] = true,
},
[2] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[3] = {
[ts_external_token__dedent] = true,
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[4] = {
[ts_external_token__newline] = true,
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[5] = {
[ts_external_token__newline] = true,
[ts_external_token__indent] = true,
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[6] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
[ts_external_token_RBRACE] = true,
},
[7] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
[ts_external_token_RPAREN] = true,
},
[8] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
[ts_external_token_RBRACK] = true,
},
[9] = {
[ts_external_token__newline] = true,
[ts_external_token_comment] = true,
},
[10] = {
[ts_external_token_comment] = true,
[ts_external_token_RBRACE] = true,
},
[11] = {
[ts_external_token_comment] = true,
[ts_external_token_RPAREN] = true,
},
[12] = {
[ts_external_token_comment] = true,
},
[13] = {
[ts_external_token_comment] = true,
[ts_external_token_RBRACK] = true,
},
[14] = {
[ts_external_token__string_content] = true,
[ts_external_token_escape_interpolation] = true,
[ts_external_token_string_end] = true,
[ts_external_token_comment] = true,
},
[15] = {
[ts_external_token__dedent] = true,
[ts_external_token_comment] = true,
},
[16] = {
[ts_external_token__newline] = true,
[ts_external_token__indent] = true,
[ts_external_token_comment] = true,
},
};

static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
[0] = {
[ts_builtin_sym_end] = ACTIONS(1),
Expand Down Expand Up @@ -132805,6 +132692,119 @@ static const TSParseActionEntry ts_parse_actions[] = {
[5071] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2540),
};

enum ts_external_scanner_symbol_identifiers {
ts_external_token__newline = 0,
ts_external_token__indent = 1,
ts_external_token__dedent = 2,
ts_external_token_string_start = 3,
ts_external_token__string_content = 4,
ts_external_token_escape_interpolation = 5,
ts_external_token_string_end = 6,
ts_external_token_comment = 7,
ts_external_token_RBRACK = 8,
ts_external_token_RPAREN = 9,
ts_external_token_RBRACE = 10,
};

static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {
[ts_external_token__newline] = sym__newline,
[ts_external_token__indent] = sym__indent,
[ts_external_token__dedent] = sym__dedent,
[ts_external_token_string_start] = sym_string_start,
[ts_external_token__string_content] = sym__string_content,
[ts_external_token_escape_interpolation] = sym_escape_interpolation,
[ts_external_token_string_end] = sym_string_end,
[ts_external_token_comment] = sym_comment,
[ts_external_token_RBRACK] = anon_sym_RBRACK,
[ts_external_token_RPAREN] = anon_sym_RPAREN,
[ts_external_token_RBRACE] = anon_sym_RBRACE,
};

static const bool ts_external_scanner_states[17][EXTERNAL_TOKEN_COUNT] = {
[1] = {
[ts_external_token__newline] = true,
[ts_external_token__indent] = true,
[ts_external_token__dedent] = true,
[ts_external_token_string_start] = true,
[ts_external_token__string_content] = true,
[ts_external_token_escape_interpolation] = true,
[ts_external_token_string_end] = true,
[ts_external_token_comment] = true,
[ts_external_token_RBRACK] = true,
[ts_external_token_RPAREN] = true,
[ts_external_token_RBRACE] = true,
},
[2] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[3] = {
[ts_external_token__dedent] = true,
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[4] = {
[ts_external_token__newline] = true,
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[5] = {
[ts_external_token__newline] = true,
[ts_external_token__indent] = true,
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
},
[6] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
[ts_external_token_RBRACE] = true,
},
[7] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
[ts_external_token_RPAREN] = true,
},
[8] = {
[ts_external_token_string_start] = true,
[ts_external_token_comment] = true,
[ts_external_token_RBRACK] = true,
},
[9] = {
[ts_external_token__newline] = true,
[ts_external_token_comment] = true,
},
[10] = {
[ts_external_token_comment] = true,
[ts_external_token_RBRACE] = true,
},
[11] = {
[ts_external_token_comment] = true,
[ts_external_token_RPAREN] = true,
},
[12] = {
[ts_external_token_comment] = true,
},
[13] = {
[ts_external_token_comment] = true,
[ts_external_token_RBRACK] = true,
},
[14] = {
[ts_external_token__string_content] = true,
[ts_external_token_escape_interpolation] = true,
[ts_external_token_string_end] = true,
[ts_external_token_comment] = true,
},
[15] = {
[ts_external_token__dedent] = true,
[ts_external_token_comment] = true,
},
[16] = {
[ts_external_token__newline] = true,
[ts_external_token__indent] = true,
[ts_external_token_comment] = true,
},
};

#ifdef __cplusplus
extern "C" {
#endif
Expand Down
22 changes: 13 additions & 9 deletions src/scanner.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include "tree_sitter/parser.h"

#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <tree_sitter/parser.h>

#define MAX(a, b) ((a) > (b) ? (a) : (b))

Expand Down Expand Up @@ -173,9 +174,9 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
valid_symbols[CLOSE_PAREN] ||
valid_symbols[CLOSE_BRACKET];

bool advanced_once = false;
bool advanced_once = false;
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.len > 0 &&
(lexer->lookahead == '{' || lexer->lookahead == '}') &&
(lexer->lookahead == '{' || lexer->lookahead == '}') &&
!error_recovery_mode) {
Delimiter delimiter = VEC_BACK(scanner->delimiters);
if (is_format(&delimiter)) {
Expand All @@ -200,7 +201,8 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
int32_t end_char = end_character(&delimiter);
bool has_content = advanced_once;
while (lexer->lookahead) {
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') &&
if ((advanced_once || lexer->lookahead == '{' ||
lexer->lookahead == '}') &&
is_format(&delimiter)) {
lexer->mark_end(lexer);
lexer->result_symbol = STRING_CONTENT;
Expand All @@ -216,12 +218,12 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
advance(lexer);
}
// Step over newlines
if (lexer -> lookahead == '\r') {
advance(lexer);
if (lexer -> lookahead == '\n') {
if (lexer->lookahead == '\r') {
advance(lexer);
if (lexer->lookahead == '\n') {
advance(lexer);
}
} else if (lexer -> lookahead == '\n') {
} else if (lexer->lookahead == '\n') {
advance(lexer);
}
continue;
Expand Down Expand Up @@ -309,7 +311,9 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
} else if (lexer->lookahead == '\t') {
indent_length += 8;
skip(lexer);
} else if (lexer->lookahead == '#') {
} else if (lexer->lookahead == '#' &&
(valid_symbols[INDENT] || valid_symbols[DEDENT] ||
valid_symbols[NEWLINE])) {
// If we haven't found an EOL yet,
// then this is a comment after an expression:
// foo = bar # comment
Expand Down

0 comments on commit 9b23918

Please sign in to comment.