From 2aff63223caeac835ac28fcd301f621cb045b74b Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Sun, 29 Apr 2018 17:34:34 +0200 Subject: [PATCH] build: update the grammar/parser generation rules CockroachDB uses a two-step process to generate a SQL syntax parser: the main definition file `sql.y` is first pre-processed (to add error handling and contextual help) into another `.y` file, and then that file is given as input to `goyacc`. Prior to this patch, the result of preprocessing would also be named `sql.y` (albeit in a separate directory). This creates a hurdle when troubleshooting changes to the grammar: when goyacc complains, it refers to its input file by name, without directory context. For the contributor unaware of the two-step process described above, it is confusing to see an error refer to a file named "`sql.y`", open that file in its expected location (`sql/parser`) and see no relevant context there (because the error really refers to `sql/parser/gen/sql.y`). The first change in this patch aims to alleviate this confusion, by clarifying that `goyacc` operates on a different file: the intermediate file is now called `sql-gen.y`. The seond change works around a bug in `goyacc`: when generating an error message, `goyacc` computes a line number based on all non-comment lines in the `.y` file (specifically, multi-line comments with `/*...*/` are treated as one line). This means that if `goyacc` is given a file with multi-line comments, the line number information becomes unusable to navigate to the context of the error. This patch works around this bug in `goyacc` by stripping comments from `sql-gen.y`. Release note: None --- Makefile | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 2a4e17e92d5e..87f60352c255 100644 --- a/Makefile +++ b/Makefile @@ -1155,9 +1155,9 @@ ui-maintainer-clean: ui-clean rm -rf $(UI_ROOT)/node_modules $(YARN_INSTALLED_TARGET) .SECONDARY: $(SQLPARSER_ROOT)/gen/sql.go.tmp -$(SQLPARSER_ROOT)/gen/sql.go.tmp: $(SQLPARSER_ROOT)/gen/sql.y $(BOOTSTRAP_TARGET) +$(SQLPARSER_ROOT)/gen/sql.go.tmp: $(SQLPARSER_ROOT)/gen/sql-gen.y $(BOOTSTRAP_TARGET) set -euo pipefail; \ - ret=$$(cd $(SQLPARSER_ROOT)/gen && goyacc -p sql -o sql.go.tmp sql.y); \ + ret=$$(cd $(SQLPARSER_ROOT)/gen && goyacc -p sql -o sql.go.tmp sql-gen.y); \ if expr "$$ret" : ".*conflicts" >/dev/null; then \ echo "$$ret"; exit 1; \ fi @@ -1172,8 +1172,8 @@ $(PKG_ROOT)/sql/lex/tokens.go: $(SQLPARSER_ROOT)/gen/sql.go.tmp echo; \ echo "package lex"; \ echo; \ - grep '^const [A-Z][_A-Z0-9]* ' $^) > $@ - + grep '^const [A-Z][_A-Z0-9]* ' $^) > $@.tmp || rm $@.tmp + mv -f $@.tmp $@ # The lex package is now the primary source for the token constant # definitions. Modify the code generated by goyacc here to refer to @@ -1182,7 +1182,8 @@ $(SQLPARSER_ROOT)/sql.go: $(SQLPARSER_ROOT)/gen/sql.go.tmp (echo "// Code generated by goyacc. DO NOT EDIT."; \ echo "// GENERATED FILE DO NOT EDIT"; \ cat $^ | \ - sed -E 's/^const ([A-Z][_A-Z0-9]*) =.*$$/const \1 = lex.\1/g') > $@ + sed -E 's/^const ([A-Z][_A-Z0-9]*) =.*$$/const \1 = lex.\1/g') > $@.tmp || rm $@.tmp + mv -f $@.tmp $@ # This modifies the grammar to: # - improve the types used by the generated parser for non-terminals @@ -1198,8 +1199,8 @@ $(SQLPARSER_ROOT)/sql.go: $(SQLPARSER_ROOT)/gen/sql.go.tmp # is stripped from the string. # Then translate the original syntax file, with the types determined # above being replaced with the union type in their type declarations. -.SECONDARY: $(SQLPARSER_ROOT)/gen/sql.y -$(SQLPARSER_ROOT)/gen/sql.y: $(SQLPARSER_ROOT)/sql.y $(SQLPARSER_ROOT)/replace_help_rules.awk +.SECONDARY: $(SQLPARSER_ROOT)/gen/sql-gen.y +$(SQLPARSER_ROOT)/gen/sql-gen.y: $(SQLPARSER_ROOT)/sql.y $(SQLPARSER_ROOT)/replace_help_rules.awk mkdir -p $(SQLPARSER_ROOT)/gen set -euo pipefail; \ TYPES=$$(awk '/func.*sqlSymUnion/ {print $$(NF - 1)}' $(SQLPARSER_ROOT)/sql.y | \ @@ -1207,7 +1208,9 @@ $(SQLPARSER_ROOT)/gen/sql.y: $(SQLPARSER_ROOT)/sql.y $(SQLPARSER_ROOT)/replace_h tr '\n' '|' | \ sed -E '$$s/.$$//'); \ sed -E "s_(type|token) <($$TYPES)>_\1 /* <\2> */_" < $(SQLPARSER_ROOT)/sql.y | \ - awk -f $(SQLPARSER_ROOT)/replace_help_rules.awk > $@ + awk -f $(SQLPARSER_ROOT)/replace_help_rules.awk | \ + sed -Ee 's,//.*$$,,g;s,/[*]([^*]|[*][^/])*[*]/, ,g;s/ +$$//g' > $@.tmp || rm $@.tmp + mv -f $@.tmp $@ $(PKG_ROOT)/sql/lex/reserved_keywords.go: $(SQLPARSER_ROOT)/sql.y $(SQLPARSER_ROOT)/reserved_keywords.awk awk -f $(SQLPARSER_ROOT)/reserved_keywords.awk < $< > $@.tmp || rm $@.tmp @@ -1229,7 +1232,7 @@ unused_unreserved_keywords: $(SQLPARSER_ROOT)/sql.y $(SQLPARSER_ROOT)/unreserved fi \ done -$(SQLPARSER_ROOT)/helpmap_test.go: $(SQLPARSER_ROOT)/gen/sql.y $(SQLPARSER_ROOT)/help_gen_test.sh +$(SQLPARSER_ROOT)/helpmap_test.go: $(SQLPARSER_ROOT)/gen/sql-gen.y $(SQLPARSER_ROOT)/help_gen_test.sh @$(SQLPARSER_ROOT)/help_gen_test.sh < $< >$@.tmp || rm $@.tmp mv -f $@.tmp $@ gofmt -s -w $@