Skip to content

Commit

Permalink
lexer: allow TABS for indentation
Browse files Browse the repository at this point in the history
Allow TAB character to be used for indentation.

This is useful to have TAB character used as indentation, especially when
parts of build.ninja are hand-written as HEREDOCs in otherwise
TAB-indented file (either mandated by style for other part of project, or
required by language itself).

Changing lexer is easy thanks to the use of re2c, syntax is perhaps a bit
too permissive now, but that is job of the parser to reject use of mixed
indentation.

Let's stop complaining that:
ninja: error: build.ninja:3: expected 'command =' line
when it is exactly:
	command = cc $cflags -c $in -o $out

Closes #1598
Signed-of-by: Przemek Kitszel <[email protected]>
  • Loading branch information
pkitszel committed Mar 13, 2024
1 parent ab510c7 commit b6060be
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 44 deletions.
65 changes: 33 additions & 32 deletions src/lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,9 @@ const char* Lexer::TokenErrorHint(Token expected) {

string Lexer::DescribeLastError() {
if (last_token_) {
switch (last_token_[0]) {
case '\t':
return "tabs are not allowed, use spaces";
}
return "lexing error <"+string(last_token_)+">";
}
return "lexing error";
return "lexing error (EOF?)";
}

void Lexer::UnreadToken() {
Expand All @@ -130,7 +127,7 @@ Lexer::Token Lexer::ReadToken() {
unsigned int yyaccept = 0;
static const unsigned char yybm[] = {
0, 128, 128, 128, 128, 128, 128, 128,
128, 128, 0, 128, 128, 128, 128, 128,
128, 160, 0, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
160, 128, 128, 128, 128, 128, 128, 128,
Expand Down Expand Up @@ -164,16 +161,17 @@ Lexer::Token Lexer::ReadToken() {
};
yych = *p;
if (yybm[0+yych] & 32) {
goto yy9;
goto yy6;
}
if (yych <= '^') {
if (yych <= ',') {
if (yych <= '\f') {
if (yych <= 0x00) goto yy2;
if (yych == '\n') goto yy6;
if (yych <= 0x08) goto yy4;
if (yych <= '\n') goto yy9;
goto yy4;
} else {
if (yych <= '\r') goto yy8;
if (yych <= '\r') goto yy11;
if (yych == '#') goto yy12;
goto yy4;
}
Expand Down Expand Up @@ -228,31 +226,32 @@ Lexer::Token Lexer::ReadToken() {
yy5:
{ token = ERROR; break; }
yy6:
++p;
{ token = NEWLINE; break; }
yy8:
yych = *++p;
if (yych == '\n') goto yy28;
goto yy5;
yy9:
yyaccept = 0;
yych = *(q = ++p);
if (yybm[0+yych] & 32) {
goto yy9;
goto yy6;
}
if (yych <= '\f') {
if (yych == '\n') goto yy6;
if (yych <= 0x08) goto yy8;
if (yych <= '\n') goto yy9;
} else {
if (yych <= '\r') goto yy30;
if (yych == '#') goto yy32;
if (yych <= '\r') goto yy28;
if (yych == '#') goto yy30;
}
yy11:
yy8:
{ token = INDENT; break; }
yy9:
++p;
{ token = NEWLINE; break; }
yy11:
yych = *++p;
if (yych == '\n') goto yy32;
goto yy5;
yy12:
yyaccept = 1;
yych = *(q = ++p);
if (yych <= 0x00) goto yy5;
goto yy33;
goto yy31;
yy13:
yych = *++p;
yy14:
Expand Down Expand Up @@ -296,25 +295,27 @@ Lexer::Token Lexer::ReadToken() {
if (yych == '|') goto yy44;
{ token = PIPE; break; }
yy28:
++p;
{ token = NEWLINE; break; }
yy30:
yych = *++p;
if (yych == '\n') goto yy28;
yy31:
if (yych == '\n') goto yy32;
yy29:
p = q;
if (yyaccept == 0) {
goto yy11;
goto yy8;
} else {
goto yy5;
}
yy32:
yy30:
yych = *++p;
yy33:
yy31:
if (yybm[0+yych] & 128) {
goto yy32;
goto yy30;
}
if (yych <= 0x00) goto yy31;
if (yych <= 0x00) goto yy29;
goto yy34;
yy32:
++p;
{ token = NEWLINE; break; }
yy34:
++p;
{ continue; }
yy36:
Expand Down
15 changes: 6 additions & 9 deletions src/lexer.in.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,9 @@ const char* Lexer::TokenErrorHint(Token expected) {

string Lexer::DescribeLastError() {
if (last_token_) {
switch (last_token_[0]) {
case '\t':
return "tabs are not allowed, use spaces";
}
return "lexing error <"+string(last_token_)+">";
}
return "lexing error";
return "lexing error (EOF?)";
}

void Lexer::UnreadToken() {
Expand All @@ -133,10 +130,10 @@ Lexer::Token Lexer::ReadToken() {
simple_varname = [a-zA-Z0-9_-]+;
varname = [a-zA-Z0-9_.-]+;
[ ]*"#"[^\000\n]*"\n" { continue; }
[ ]*"\r\n" { token = NEWLINE; break; }
[ ]*"\n" { token = NEWLINE; break; }
[ ]+ { token = INDENT; break; }
[ \t]*"#"[^\000\n]*"\n" { continue; }
[ \t]*"\r\n" { token = NEWLINE; break; }
[ \t]*"\n" { token = NEWLINE; break; }
[ \t]+ { token = INDENT; break; }
"build" { token = BUILD; break; }
"pool" { token = POOL; break; }
"rule" { token = RULE; break; }
Expand Down
6 changes: 3 additions & 3 deletions src/lexer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ TEST(Lexer, CommentEOF) {

TEST(Lexer, Tabs) {
// Verify we print a useful error on a disallowed character.
Lexer lexer(" \tfoobar");
Lexer lexer("\tcommand = foo $in");
Lexer::Token token = lexer.ReadToken();
EXPECT_EQ(Lexer::INDENT, token);

token = lexer.ReadToken();
EXPECT_EQ(Lexer::ERROR, token);
EXPECT_EQ("tabs are not allowed, use spaces", lexer.DescribeLastError());
EXPECT_EQ(Lexer::IDENT, token);
}

0 comments on commit b6060be

Please sign in to comment.