From 4800cf2bf673a250f27cd0a375b9374dcf22eb89 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Tue, 24 Nov 2020 13:16:35 -0500 Subject: [PATCH] colexec,bazel: re-work code-gen through bazel Now that we've added a way to source a specific template file in `execgen` (instead of relying on hard-coded paths, see #56982), we can simplify how we generate eg.go files. This lets us parallelize the generation of these files, as the more fine-grained dependency tracking lets bazel generate each eg.go file concurrently (previously we had to specify the superset of all template files as dependencies for the generation of each individual eg.go file). There's one exception for the generation of like_ops.eg.go, the generation of which appears to want read from a second template file. We've special-cased the generation of this file into it's own thing. Release note: None --- pkg/sql/colexec/BUILD.bazel | 67 ++++------------ pkg/sql/colexec/COLEXEC.bzl | 155 ++++++++++++++++++++++++++---------- 2 files changed, 129 insertions(+), 93 deletions(-) diff --git a/pkg/sql/colexec/BUILD.bazel b/pkg/sql/colexec/BUILD.bazel index ff48f4de4d99..35c9fc9d69a8 100644 --- a/pkg/sql/colexec/BUILD.bazel +++ b/pkg/sql/colexec/BUILD.bazel @@ -1,5 +1,5 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") -load(":COLEXEC.bzl", "generate") +load(":COLEXEC.bzl", "eg_go_filegroup", "gen_eg_go_rules", "gen_like_ops_rule") go_library( name = "colexec", @@ -52,6 +52,7 @@ go_library( "unordered_distinct.go", "utils.go", ":gen-exec", # keep + ":gen-like-ops", # keep ], importpath = "github.com/cockroachdb/cockroach/pkg/sql/colexec", visibility = ["//visibility:public"], @@ -200,57 +201,17 @@ go_test( ) # Define a file group for all the .eg.go targets. -targets = [ - "and_or_projection.eg.go", - "cast.eg.go", - "const.eg.go", - "default_cmp_expr.eg.go", - "default_cmp_proj_ops.eg.go", - "default_cmp_sel_ops.eg.go", - "distinct.eg.go", - "hashjoiner.eg.go", - "hashtable_distinct.eg.go", - "hashtable_full_default.eg.go", - "hashtable_full_deleting.eg.go", - "hash_aggregator.eg.go", - "hash_utils.eg.go", - "is_null_ops.eg.go", - "like_ops.eg.go", - "mergejoinbase.eg.go", - "mergejoiner_exceptall.eg.go", - "mergejoiner_fullouter.eg.go", - "mergejoiner_inner.eg.go", - "mergejoiner_intersectall.eg.go", - "mergejoiner_leftanti.eg.go", - "mergejoiner_leftouter.eg.go", - "mergejoiner_leftsemi.eg.go", - "mergejoiner_rightanti.eg.go", - "mergejoiner_rightouter.eg.go", - "mergejoiner_rightsemi.eg.go", - "ordered_synchronizer.eg.go", - "proj_const_left_ops.eg.go", - "proj_const_right_ops.eg.go", - "proj_non_const_ops.eg.go", - "quicksort.eg.go", - "rank.eg.go", - "relative_rank.eg.go", - "row_number.eg.go", - "rowstovec.eg.go", - "selection_ops.eg.go", - "select_in.eg.go", - "sort.eg.go", - "substring.eg.go", - "values_differ.eg.go", - "vec_comparators.eg.go", - "window_peer_grouper.eg.go", -] +eg_go_filegroup(name = "gen-exec") -# Define a generator for all execgen code. Look towards COLEXEC.bzl for the -# implementation. -generate( - name = "gen-exec", - execgen = "//pkg/sql/colexec/execgen/cmd/execgen", - goimports = "//vendor/github.com/cockroachdb/gostdlib/x/tools/cmd/goimports", - targets = targets, - templates = glob(["*_tmpl.go"]), +# Define gen rules for individual eg.go files. +gen_eg_go_rules() + +# Special-case the gen rule for like_ops.eg.go. +gen_like_ops_rule( + name = "gen-like-ops", + target = "like_ops.eg.go", + templates = [ + "selection_ops_tmpl.go", + "proj_const_ops_tmpl.go", + ], ) diff --git a/pkg/sql/colexec/COLEXEC.bzl b/pkg/sql/colexec/COLEXEC.bzl index e25257f59c41..9e5a8f216f66 100644 --- a/pkg/sql/colexec/COLEXEC.bzl +++ b/pkg/sql/colexec/COLEXEC.bzl @@ -1,44 +1,119 @@ -def _generate_impl(ctx): - output_files, input_files = [], [] - # Gather the full list of all input template files (deconstructing it from - # the filegroup input). See [1] for a reference of all types/members within - # skylark. - # - # [1]: https://docs.bazel.build/versions/master/skylark/lib/skylark-overview.html - for tmpl in ctx.attr.templates: - input_files.extend(tmpl.files.to_list()) +# Map between target name and relevant template. +targets = [ + ('and_or_projection.eg.go', 'and_or_projection_tmpl.go'), + ('cast.eg.go', 'cast_tmpl.go'), + ('const.eg.go', 'const_tmpl.go'), + ('default_cmp_expr.eg.go', 'default_cmp_expr_tmpl.go'), + ('default_cmp_proj_ops.eg.go', 'default_cmp_proj_ops_tmpl.go'), + ('default_cmp_sel_ops.eg.go', 'default_cmp_sel_ops_tmpl.go'), + ('distinct.eg.go', 'distinct_tmpl.go'), + ('hash_aggregator.eg.go', 'hash_aggregator_tmpl.go'), + ('hash_utils.eg.go', 'hash_utils_tmpl.go'), + ('hashjoiner.eg.go', 'hashjoiner_tmpl.go'), + ('hashtable_distinct.eg.go', 'hashtable_tmpl.go'), + ('hashtable_full_default.eg.go', 'hashtable_tmpl.go'), + ('hashtable_full_deleting.eg.go', 'hashtable_tmpl.go'), + ('is_null_ops.eg.go', 'is_null_ops_tmpl.go'), + # ('like_ops.eg.go', ...); See `gen_like_ops_rule` below. + ('mergejoinbase.eg.go', 'mergejoinbase_tmpl.go'), + ('mergejoiner_exceptall.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_fullouter.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_inner.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_intersectall.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_leftanti.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_leftouter.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_leftsemi.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_rightanti.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_rightouter.eg.go', 'mergejoiner_tmpl.go'), + ('mergejoiner_rightsemi.eg.go', 'mergejoiner_tmpl.go'), + ('ordered_synchronizer.eg.go', 'ordered_synchronizer_tmpl.go'), + ('proj_const_left_ops.eg.go', 'proj_const_ops_tmpl.go'), + ('proj_const_right_ops.eg.go', 'proj_const_ops_tmpl.go'), + ('proj_non_const_ops.eg.go', 'proj_non_const_ops_tmpl.go'), + ('quicksort.eg.go', 'quicksort_tmpl.go'), + ('rank.eg.go', 'rank_tmpl.go'), + ('relative_rank.eg.go', 'relative_rank_tmpl.go'), + ('row_number.eg.go', 'row_number_tmpl.go'), + ('rowstovec.eg.go', 'rowstovec_tmpl.go'), + ('select_in.eg.go', 'select_in_tmpl.go'), + ('selection_ops.eg.go', 'selection_ops_tmpl.go'), + ('sort.eg.go', 'sort_tmpl.go'), + ('substring.eg.go', 'substring_tmpl.go'), + ('values_differ.eg.go', 'values_differ_tmpl.go'), + ('vec_comparators.eg.go', 'vec_comparators_tmpl.go'), + ('window_peer_grouper.eg.go', 'window_peer_grouper_tmpl.go'), +] - execgen_binary = ctx.executable.execgen - goimports_binary = ctx.executable.goimports - input_binaries = [execgen_binary, goimports_binary] +def rule_name_for(target): + # e.g. 'vec_comparators.eg.go' -> 'gen-vec-comparators' + return 'gen-{}'.format(target.replace('.eg.go', '').replace('_', '-')) - # For each target, run execgen and goimports. - for generated_file_name in ctx.attr.targets: - generated_file = ctx.actions.declare_file(generated_file_name) - ctx.actions.run_shell( - tools = input_binaries, - inputs = input_files, - outputs = [generated_file], - progress_message = "Generating pkg/cmd/colexec/%s" % generated_file_name, - command = """ - ln -s external/cockroach/pkg pkg - %s -fmt=false pkg/sql/colexec/%s > %s - %s -w %s - """ % (execgen_binary.path, generated_file_name, generated_file.path, goimports_binary.path, generated_file.path), - ) - output_files.append(generated_file) +# Define a file group for all the .eg.go targets. +def eg_go_filegroup(name): + native.filegroup( + name = name, + srcs = [':{}'.format(rule_name_for(target)) for target, _ in targets], + ) + +# Define gen rules for individual eg.go files. +def gen_eg_go_rules(): + # Define some aliases for ease of use. + native.alias( + name = "execgen", + actual = "//pkg/sql/colexec/execgen/cmd/execgen", + ) + native.alias( + name = "goimports", + actual = "//vendor/github.com/cockroachdb/gostdlib/x/tools/cmd/goimports", + ) - # Construct the full set of output files for bazel to be able to analyse. - # See https://docs.bazel.build/versions/master/skylark/lib/DefaultInfo.html. - return [DefaultInfo(files = depset(output_files))] + for target, template in targets: + name = rule_name_for(target) + + native.genrule( + name = name, + srcs = [template], + outs = [target], + # `$@` lets us substitute in the output path[1]. The symlink below + # is frowned upon for genrules[2]. That said, when testing + # pkg/sql/colexec through bazel it expects to find the template + # files in a path other than what SRCS would suggest. We haven't + # really investigated why. For now lets just symlink the relevant + # files into the "right" path within the bazel sandbox[3]. + # + # [1]: https://docs.bazel.build/versions/3.7.0/be/general.html#genrule_args + # [2]: https://docs.bazel.build/versions/3.7.0/be/general.html#general-advice + # [3]: https://github.com/cockroachdb/cockroach/pull/57027 + cmd = """ + ln -s external/cockroach/pkg pkg + $(location :execgen) -template $(SRCS) \ + -fmt=false pkg/sql/colexec/$@ > $@ + $(location :goimports) -w $@ + """, + tools = [":execgen", ":goimports"], + ) -generate = rule( - implementation = _generate_impl, - # Source all the necessary template files, output targets, the execgen and goimports binaries. - attrs = { - "templates": attr.label_list(mandatory = True, allow_files = ["tmpl.go"]), - "targets": attr.string_list(mandatory = True), - "execgen": attr.label(mandatory = True, executable = True, allow_files = True, cfg = "exec"), - "goimports": attr.label(mandatory = True, executable = True, allow_files = True, cfg = "exec"), - }, -) +# TODO(irfansharif): We should be able to use `gen_eg_go_rules` to generate +# like_ops.eg.go. It's special-cased here because execgen reads two separate +# template files[1] when generating like_ops.eg.go. +# +# [1]: https://github.com/cockroachdb/cockroach/blob/1f23ef2b4e/pkg/sql/colexec/execgen/cmd/execgen/like_ops_gen.go#L48 +def gen_like_ops_rule(name, templates, target): + native.genrule( + name = name, + srcs = templates, + outs = [target], + # See TODO above. We should ideally be using $(SRCS) to point to the + # template file, but like_ops.eg.go needs access to two template files. + # We point to the first, which is the template the generator is + # registered with, but we also need to include the other in our srcs so + # it's included in the sandbox when generating the file (-template only + # expects one argument). + cmd = """ + ln -s external/cockroach/pkg pkg + $(location :execgen) -template $(location %s) \ + -fmt=false pkg/sql/colexec/$@ > $@ + $(location :goimports) -w $@ + """ % (templates[0]), + tools = [":execgen", ":goimports"], # Make use of the same aliases defined above. + )