Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bit-pack the lexer's token info #4270

Merged
merged 21 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 126 additions & 86 deletions toolchain/lex/lex.cpp

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions toolchain/lex/testdata/basic_syntax.carbon
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@
// TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/basic_syntax.carbon
// CHECK:STDOUT: - filename: basic_syntax.carbon
// CHECK:STDOUT: tokens: [
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true },
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' },

fn run(String program) {
// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'fn', has_trailing_space: true },
// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: 'run', identifier: 0 },
// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'fn', has_leading_space: true },
// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: 'run', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 3, kind: 'OpenParen', line: {{ *}}[[@LINE-3]], column: 7, indent: 1, spelling: '(', closing_token: 6 },
// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-4]], column: 8, indent: 1, spelling: 'String', has_trailing_space: true },
// CHECK:STDOUT: { index: 5, kind: 'Identifier', line: {{ *}}[[@LINE-5]], column: 15, indent: 1, spelling: 'program', identifier: 1 },
// CHECK:STDOUT: { index: 6, kind: 'CloseParen', line: {{ *}}[[@LINE-6]], column: 22, indent: 1, spelling: ')', opening_token: 3, has_trailing_space: true },
// CHECK:STDOUT: { index: 7, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-7]], column: 24, indent: 1, spelling: '{', closing_token: 11, has_trailing_space: true },
// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-4]], column: 8, indent: 1, spelling: 'String' },
// CHECK:STDOUT: { index: 5, kind: 'Identifier', line: {{ *}}[[@LINE-5]], column: 15, indent: 1, spelling: 'program', identifier: 1, has_leading_space: true },
// CHECK:STDOUT: { index: 6, kind: 'CloseParen', line: {{ *}}[[@LINE-6]], column: 22, indent: 1, spelling: ')', opening_token: 3 },
// CHECK:STDOUT: { index: 7, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-7]], column: 24, indent: 1, spelling: '{', closing_token: 11, has_leading_space: true },
return True;
// CHECK:STDOUT: { index: 8, kind: 'Return', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'return', has_trailing_space: true },
// CHECK:STDOUT: { index: 9, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 10, indent: 3, spelling: 'True', identifier: 2 },
// CHECK:STDOUT: { index: 10, kind: 'Semi', line: {{ *}}[[@LINE-3]], column: 14, indent: 3, spelling: ';', has_trailing_space: true },
// CHECK:STDOUT: { index: 8, kind: 'Return', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'return', has_leading_space: true },
// CHECK:STDOUT: { index: 9, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 10, indent: 3, spelling: 'True', identifier: 2, has_leading_space: true },
// CHECK:STDOUT: { index: 10, kind: 'Semi', line: {{ *}}[[@LINE-3]], column: 14, indent: 3, spelling: ';' },
}
// CHECK:STDOUT: { index: 11, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', opening_token: 7, has_trailing_space: true },
// CHECK:STDOUT: { index: 11, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', opening_token: 7, has_leading_space: true },

// CHECK:STDOUT: { index: 12, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
// CHECK:STDOUT: { index: 12, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true },
// CHECK:STDOUT: ]
4 changes: 2 additions & 2 deletions toolchain/lex/testdata/fail_bad_comment_introducers.carbon
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/fail_bad_comment_introducers.carbon
// CHECK:STDOUT: - filename: fail_bad_comment_introducers.carbon
// CHECK:STDOUT: tokens: [
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true },
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' },
//

// Comments have to have whitespace after `//` currently.
Expand Down Expand Up @@ -58,5 +58,5 @@

// An extra un-indented comment line to anchor the end of the file checks.

// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true },
// CHECK:STDOUT: ]
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
// CHECK:STDERR: ^
// CHECK:STDOUT: - filename: fail_bad_comment_introducers_mid_block_indent_change.carbon
// CHECK:STDOUT: tokens: [
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true },
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' },

// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true },
// CHECK:STDOUT: ]
46 changes: 23 additions & 23 deletions toolchain/lex/testdata/fail_bad_raw_identifier.carbon
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// --- fail_bad_raw_identifier.carbon
// CHECK:STDOUT: - filename: fail_bad_raw_identifier.carbon
// CHECK:STDOUT: tokens: [
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true },
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' },


// Missing the character after `#`.
Expand All @@ -20,87 +20,87 @@
// CHECK:STDERR: ^
// CHECK:STDERR:
r#
// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 },
// CHECK:STDOUT: { index: 2, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#', has_trailing_space: true },
// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 2, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#' },

// Not a valid identifier.
// CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:2: ERROR: Encountered unrecognized characters while parsing.
// CHECK:STDERR: r#3
// CHECK:STDERR: ^
// CHECK:STDERR:
r#3
// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 },
// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 4, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#' },
// CHECK:STDOUT: { index: 5, kind: 'IntLiteral', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: '3', value: `3`, has_trailing_space: true },
// CHECK:STDOUT: { index: 5, kind: 'IntLiteral', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: '3', value: `3` },

// Non ascii start to identifier.
// CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:2: ERROR: Encountered unrecognized characters while parsing.
// CHECK:STDERR: r#á
// CHECK:STDERR: ^
// CHECK:STDERR:
r#á
// CHECK:STDOUT: { index: 6, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 },
// CHECK:STDOUT: { index: 7, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#á', has_trailing_space: true },
// CHECK:STDOUT: { index: 6, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 7, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#á' },

// Raw `r` identifier doesn't start a second raw identifier.
// CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:4: ERROR: Encountered unrecognized characters while parsing.
// CHECK:STDERR: r#r#foo
// CHECK:STDERR: ^
// CHECK:STDERR:
r#r#foo
// CHECK:STDOUT: { index: 8, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 },
// CHECK:STDOUT: { index: 8, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 9, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: '#' },
// CHECK:STDOUT: { index: 10, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true },
// CHECK:STDOUT: { index: 10, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1 },

// Other identifier characters don't start a raw identifier.
// CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:2: ERROR: Encountered unrecognized characters while parsing.
// CHECK:STDERR: s#foo
// CHECK:STDERR: ^
// CHECK:STDERR:
s#foo
// CHECK:STDOUT: { index: 11, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 's', identifier: 2 },
// CHECK:STDOUT: { index: 11, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 's', identifier: 2, has_leading_space: true },
// CHECK:STDOUT: { index: 12, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#' },
// CHECK:STDOUT: { index: 13, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true },
// CHECK:STDOUT: { index: 13, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: 'foo', identifier: 1 },

// Identifier ending in `r` doesn't start a raw identifier.
// CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:4: ERROR: Encountered unrecognized characters while parsing.
// CHECK:STDERR: arr#foo
// CHECK:STDERR: ^
// CHECK:STDERR:
arr#foo
// CHECK:STDOUT: { index: 14, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'arr', identifier: 3 },
// CHECK:STDOUT: { index: 14, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'arr', identifier: 3, has_leading_space: true },
// CHECK:STDOUT: { index: 15, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: '#' },
// CHECK:STDOUT: { index: 16, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true },
// CHECK:STDOUT: { index: 16, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1 },

// Whitespace between `r` and `#` isn't allowed.
// CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:3: ERROR: Encountered unrecognized characters while parsing.
// CHECK:STDERR: r #foo
// CHECK:STDERR: ^
// CHECK:STDERR:
r #foo
// CHECK:STDOUT: { index: 17, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_trailing_space: true },
// CHECK:STDOUT: { index: 18, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 3, indent: 1, spelling: '#' },
// CHECK:STDOUT: { index: 19, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 4, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true },
// CHECK:STDOUT: { index: 17, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 18, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 3, indent: 1, spelling: '#', has_leading_space: true },
// CHECK:STDOUT: { index: 19, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 4, indent: 1, spelling: 'foo', identifier: 1 },

// This is an `r` identifier followed by a string literal.
r#"hello"#
// CHECK:STDOUT: { index: 20, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 },
// CHECK:STDOUT: { index: 21, kind: 'StringLiteral', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#"hello"#', value: `hello`, has_trailing_space: true },
// CHECK:STDOUT: { index: 20, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 21, kind: 'StringLiteral', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#"hello"#', value: `hello` },

// CHECK:STDOUT: { index: 22, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
// CHECK:STDOUT: { index: 22, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true },
// CHECK:STDOUT: ]
// --- fail_hash_at_start_of_file.carbon
// CHECK:STDOUT: - filename: fail_hash_at_start_of_file.carbon
// CHECK:STDOUT: tokens: [
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true },
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' },

// Ensure that we correctly handle a `#` as the first token in the file.
// CHECK:STDERR: fail_hash_at_start_of_file.carbon:[[@LINE+3]]:1: ERROR: Encountered unrecognized characters while parsing.
// CHECK:STDERR: #foo
// CHECK:STDERR: ^
#foo
// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '#' },
// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: 'foo', identifier: 0, has_trailing_space: true },
// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '#', has_leading_space: true },
// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: 'foo', identifier: 0 },

// CHECK:STDOUT: { index: 3, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
// CHECK:STDOUT: { index: 3, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true },
// CHECK:STDOUT: ]
16 changes: 8 additions & 8 deletions toolchain/lex/testdata/fail_block_string_second_line.carbon
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ var s: String = '''
// CHECK:STDERR: ^
// CHECK:STDOUT: - filename: fail_block_string_second_line.carbon
// CHECK:STDOUT: tokens: [
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true },
// CHECK:STDOUT: { index: 1, kind: 'Var', line: {{ *}}[[@LINE-17]], column: 1, indent: 1, spelling: 'var', has_trailing_space: true },
// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-18]], column: 5, indent: 1, spelling: 's', identifier: 0 },
// CHECK:STDOUT: { index: 3, kind: 'Colon', line: {{ *}}[[@LINE-19]], column: 6, indent: 1, spelling: ':', has_trailing_space: true },
// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-20]], column: 8, indent: 1, spelling: 'String', has_trailing_space: true },
// CHECK:STDOUT: { index: 5, kind: 'Equal', line: {{ *}}[[@LINE-21]], column: 15, indent: 1, spelling: '=', has_trailing_space: true },
// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' },
// CHECK:STDOUT: { index: 1, kind: 'Var', line: {{ *}}[[@LINE-17]], column: 1, indent: 1, spelling: 'var', has_leading_space: true },
// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-18]], column: 5, indent: 1, spelling: 's', identifier: 0, has_leading_space: true },
// CHECK:STDOUT: { index: 3, kind: 'Colon', line: {{ *}}[[@LINE-19]], column: 6, indent: 1, spelling: ':' },
// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-20]], column: 8, indent: 1, spelling: 'String', has_leading_space: true },
// CHECK:STDOUT: { index: 5, kind: 'Equal', line: {{ *}}[[@LINE-21]], column: 15, indent: 1, spelling: '=', has_leading_space: true },
// CHECK:STDOUT: { index: 6, kind: 'StringLiteral', line: {{ *}}[[@LINE-22]], column: 17, indent: 1, spelling: ''''
// CHECK:STDOUT: error here: '''', value: `error here: `, has_trailing_space: true },
// CHECK:STDOUT: error here: '''', value: `error here: `, has_leading_space: true },

// CHECK:STDOUT: { index: 7, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' },
// CHECK:STDOUT: { index: 7, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true },
// CHECK:STDOUT: ]
Loading
Loading