Skip to content

Commit

Permalink
Filter unmatching configuration-specific dependencies (#55)
Browse files Browse the repository at this point in the history
Bazel 6 got the ability to report configuration transitions:
bazelbuild/bazel#15038

This allows us to stop over-estimating affected targets when
dependencies exist in platforms other than the current one.
  • Loading branch information
illicitonion authored Mar 28, 2023
1 parent 976079d commit b52b74a
Show file tree
Hide file tree
Showing 11 changed files with 241 additions and 63 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/bazelbuild/bazel-gazelle v0.25.1-0.20220406134132-bd319f810c16
github.com/google/btree v1.1.2
github.com/google/uuid v1.3.0
github.com/hashicorp/go-version v1.6.0
github.com/otiai10/copy v1.7.1-0.20211223015809-9aae5f77261f
github.com/wI2L/jsondiff v0.2.0
google.golang.org/protobuf v1.27.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/otiai10/copy v1.7.1-0.20211223015809-9aae5f77261f h1:P7Ab27T4In6ExIHmjOe88b1BHpuHlr4Vr75hX2QKAXw=
github.com/otiai10/copy v1.7.1-0.20211223015809-9aae5f77261f/go.mod h1:rmRl6QPdJj6EiUqXQ/4Nn2lLXoNQjFCQbbNrxgc/t3U=
github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE=
Expand Down
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ go_library(
"//third_party/protobuf/bazel/build",
"@bazel_gazelle//label:go_default_library",
"@com_github_aristanetworks_goarista//path",
"@com_github_hashicorp_go_version//:go-version",
"@com_github_wi2l_jsondiff//:jsondiff",
"@org_golang_google_protobuf//encoding/protojson",
"@org_golang_google_protobuf//proto",
Expand Down
178 changes: 128 additions & 50 deletions pkg/hash_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/binary"
"fmt"
"io"
"log"
"os"
"path/filepath"
"sort"
Expand All @@ -16,21 +17,50 @@ import (
"github.com/bazel-contrib/target-determinator/third_party/protobuf/bazel/analysis"
"github.com/bazel-contrib/target-determinator/third_party/protobuf/bazel/build"
gazelle_label "github.com/bazelbuild/bazel-gazelle/label"
"github.com/hashicorp/go-version"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/proto"
)

// NewTargetHashCache creates a TargetHashCache which uses context for metadata lookups.
func NewTargetHashCache(context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget, bazelRelease string) *TargetHashCache {
bazelVersionSupportsConfiguredRuleInputs := isConfiguredRuleInputsSupported(bazelRelease)

// For I'm sure good reasons, source files end up with configuration checksums of "null" rather than the empty string.
// Dependencies on them in the configured_rule_inputs field, however, use the empty string.
// So we normalise "null"s to empty strings.
for _, configs := range context {
if v, ok := configs["null"]; ok {
configs[""] = v
delete(configs, "null")
}
}

return &TargetHashCache{
context: context,
fileHashCache: &fileHashCache{
cache: make(map[string]*cacheEntry),
},
bazelRelease: bazelRelease,
cache: make(map[gazelle_label.Label]map[Configuration]*cacheEntry),
frozen: false,
bazelRelease: bazelRelease,
bazelVersionSupportsConfiguredRuleInputs: bazelVersionSupportsConfiguredRuleInputs,
cache: make(map[gazelle_label.Label]map[Configuration]*cacheEntry),
frozen: false,
}
}

func isConfiguredRuleInputsSupported(releaseString string) bool {
releasePrefix := "release "
explanation := " - assuming cquery does not support configured rule inputs (which is supported since bazel 6), which may lead to over-estimates of affected targets"
if !strings.HasPrefix(releaseString, releasePrefix) {
log.Printf("Bazel wasn't a released version%s", explanation)
return false
}
v, err := version.NewVersion(releaseString[len(releasePrefix):])
if err != nil {
log.Printf("Failed to parse Bazel version %q: %s", releaseString, explanation)
return false
}
return v.GreaterThanOrEqual(version.Must(version.NewVersion("6.0.0")))
}

// TargetHashCache caches hash computations for targets and files, so that transitive hashes can be
Expand All @@ -42,9 +72,10 @@ func NewTargetHashCache(context map[gazelle_label.Label]map[Configuration]*analy
// In the future we may pre-cache file hashes to avoid this hazard (and to allow more efficient
// use of threadpools when hashing files).
type TargetHashCache struct {
context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget
fileHashCache *fileHashCache
bazelRelease string
context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget
fileHashCache *fileHashCache
bazelRelease string
bazelVersionSupportsConfiguredRuleInputs bool

frozen bool

Expand All @@ -57,17 +88,17 @@ var notComputedBeforeFrozen = fmt.Errorf("TargetHashCache has already been froze

// Hash hashes a given LabelAndConfiguration, returning a sha256 which will change if any of the
// following change:
// * Values of attributes of the label (if it's a rule)
// * Contents or mode of source files which are direct inputs to the rule (if it's a rule).
// * The name of the rule class (e.g. `java_binary`) of the rule (if it's a rule).
// * The rule definition, if it's a rule which was implemented in starlark.
// - Values of attributes of the label (if it's a rule)
// - Contents or mode of source files which are direct inputs to the rule (if it's a rule).
// - The name of the rule class (e.g. `java_binary`) of the rule (if it's a rule).
// - The rule definition, if it's a rule which was implemented in starlark.
// Note that this is known to over-estimate - it currently factors in the whole contents of any
// .bzl files loaded to define the rule, where some of this contents may not be relevant.
// * The configuration the label is configured in.
// - The configuration the label is configured in.
// Note that this is known to over-estimate - per-language fragments are not filtered from this
// configuration, which means C++-affecting options are considered to affect Java.
// * The above recursively for all rules and files which are depended on by the given
// LabelAndConfiguration.
// - The above recursively for all rules and files which are depended on by the given
// LabelAndConfiguration.
// Note that this is known to over-estimate - the configuration of dependencies isn't easily
// surfaced by Bazel, so if a dependency exists in multiple configurations, all of them will be
// mixed into the hash, even if only one of the configurations is actually relevant.
Expand Down Expand Up @@ -277,26 +308,28 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
}
}

ruleInputsBefore := ss.NewSortedSet(ruleBefore.GetRuleInput())
ruleInputsAfter := ss.NewSortedSet(ruleAfter.GetRuleInput())
ruleInputLabelsBefore, labelsToKnownConfigurationsBefore, err := indexRuleInputs(before, ruleBefore)
if err != nil {
return nil, err
}
ruleInputLabelsAfter, labelsToKnownConfigurationsAfter, err := indexRuleInputs(after, ruleAfter)
if err != nil {
return nil, err
}

for _, ruleInputLabelString := range ruleInputsAfter.SortedSlice() {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputLabelString)
if err != nil {
return nil, fmt.Errorf("failed to parse ruleInput %s: %w", ruleInputLabelString, err)
}
if !ruleInputsBefore.Contains(ruleInputLabelString) {
for _, ruleInputLabel := range ruleInputLabelsAfter.SortedSlice() {
if !ruleInputLabelsBefore.Contains(ruleInputLabel) {
differences = append(differences, Difference{
Category: "RuleInputAdded",
Key: ruleInputLabelString,
Key: ruleInputLabel.String(),
})
} else {
// Ideally we would know the configuration of each of these ruleInputs from the
// query information, so we could filter away e.g. host changes when we only have a target dep.
// Unfortunately, Bazel doesn't currently expose this.
// See https://github.com/bazelbuild/bazel/issues/14610#issuecomment-1024460141
knownConfigurationsBefore := before.KnownConfigurations(ruleInputLabel)
knownConfigurationsAfter := after.KnownConfigurations(ruleInputLabel)
knownConfigurationsBefore := labelsToKnownConfigurationsBefore[ruleInputLabel]
knownConfigurationsAfter := labelsToKnownConfigurationsAfter[ruleInputLabel]

for _, knownConfigurationAfter := range knownConfigurationsAfter.SortedSlice() {
if knownConfigurationsBefore.Contains(knownConfigurationAfter) {
Expand All @@ -317,7 +350,7 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
} else {
differences = append(differences, Difference{
Category: "RuleInputChanged",
Key: ruleInputLabelString,
Key: ruleInputLabel.String(),
After: fmt.Sprintf("Configuration: %v", knownConfigurationAfter),
})
}
Expand All @@ -326,25 +359,53 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
if !knownConfigurationsAfter.Contains(knownConfigurationBefore) {
differences = append(differences, Difference{
Category: "RuleInputChanged",
Key: ruleInputLabelString,
Key: ruleInputLabel.String(),
Before: fmt.Sprintf("Configuration: %v", knownConfigurationBefore),
})
}
}
}
}
for _, ruleInputLabel := range ruleInputsBefore.SortedSlice() {
if !ruleInputsAfter.Contains(ruleInputLabel) {
for _, ruleInputLabel := range ruleInputLabelsBefore.SortedSlice() {
if !ruleInputLabelsAfter.Contains(ruleInputLabel) {
differences = append(differences, Difference{
Category: "RuleInputRemoved",
Key: ruleInputLabel,
Key: ruleInputLabel.String(),
})
}
}

return differences, nil
}

func indexRuleInputs(index *TargetHashCache, rule *build.Rule) (*ss.SortedSet[gazelle_label.Label], map[gazelle_label.Label]*ss.SortedSet[Configuration], error) {
ruleInputs := ss.NewSortedSetFn([]gazelle_label.Label{}, CompareLabels)
labelsToConfigurations := make(map[gazelle_label.Label]*ss.SortedSet[Configuration])
if index.bazelVersionSupportsConfiguredRuleInputs {
for _, configuredRuleInput := range rule.GetConfiguredRuleInput() {
ruleInputLabel, err := ParseCanonicalLabel(configuredRuleInput.GetLabel())
if err != nil {
return nil, nil, fmt.Errorf("failed to parse configuredRuleInput label %s: %w", configuredRuleInput.GetLabel(), err)
}
ruleInputs.Add(ruleInputLabel)
if _, ok := labelsToConfigurations[ruleInputLabel]; !ok {
labelsToConfigurations[ruleInputLabel] = ss.NewSortedSet([]Configuration{})
}
labelsToConfigurations[ruleInputLabel].Add(Configuration(configuredRuleInput.GetConfigurationChecksum()))
}
} else {
for _, ruleInputString := range rule.GetRuleInput() {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputString)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse ruleInput label %s: %w", ruleInputString, err)
}
ruleInputs.Add(ruleInputLabel)
labelsToConfigurations[ruleInputLabel] = index.KnownConfigurations(ruleInputLabel)
}
}
return ruleInputs, labelsToConfigurations, nil
}

func formatLabelWithConfiguration(label gazelle_label.Label, configuration Configuration) string {
s := label.String()
if configuration != "null" {
Expand Down Expand Up @@ -453,34 +514,51 @@ func hashRule(thc *TargetHashCache, rule *build.Rule, configuration *analysis.Co
}

// Hash rule inputs
for _, ruleInputLabelString := range rule.RuleInput {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputLabelString)
if err != nil {
return nil, fmt.Errorf("failed to parse ruleInput label %s: %w", ruleInputLabelString, err)
}
for _, configuration := range thc.KnownConfigurations(ruleInputLabel).SortedSlice() {
if thc.bazelVersionSupportsConfiguredRuleInputs {
for _, configuredRuleInput := range rule.ConfiguredRuleInput {
ruleInputLabel, err := ParseCanonicalLabel(configuredRuleInput.GetLabel())
if err != nil {
return nil, fmt.Errorf("failed to parse configuredRuleInput label %s: %w", configuredRuleInput.GetLabel(), err)
}
configuration := Configuration(configuredRuleInput.GetConfigurationChecksum())
ruleInputHash, err := thc.Hash(LabelAndConfiguration{Label: ruleInputLabel, Configuration: configuration})
if err != nil {
if err == labelNotFound {
// Two issues (so far) have been found which lead to targets being listed in
// ruleInputs but not in the output of a deps query:
//
// cquery doesn't filter ruleInputs according to used configurations, which means
// targets may appear in a Target's ruleInputs even though they weren't returned by
// a transitive `deps` cquery.
// Assume that a missing target should have been pruned, and that we should ignore it.
// See https://github.com/bazelbuild/bazel/issues/14610
//
// Some targets are also just sometimes missing for reasons we don't yet know.
// See https://github.com/bazelbuild/bazel/issues/14617
continue
}
return nil, err
return nil, fmt.Errorf("failed to hash configuredRuleInput %s %s: %w", ruleInputLabel, configuration, err)
}
writeLabel(hasher, ruleInputLabel)
hasher.Write([]byte(configuration))
hasher.Write(ruleInputHash)
}
} else {
for _, ruleInputLabelString := range rule.RuleInput {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputLabelString)
if err != nil {
return nil, fmt.Errorf("failed to parse ruleInput label %s: %w", ruleInputLabelString, err)
}
for _, configuration := range thc.KnownConfigurations(ruleInputLabel).SortedSlice() {
ruleInputHash, err := thc.Hash(LabelAndConfiguration{Label: ruleInputLabel, Configuration: configuration})
if err != nil {
if err == labelNotFound {
// Two issues (so far) have been found which lead to targets being listed in
// ruleInputs but not in the output of a deps query:
//
// cquery doesn't filter ruleInputs according to used configurations, which means
// targets may appear in a Target's ruleInputs even though they weren't returned by
// a transitive `deps` cquery.
// Assume that a missing target should have been pruned, and that we should ignore it.
// See https://github.com/bazelbuild/bazel/issues/14610
//
// Some targets are also just sometimes missing for reasons we don't yet know.
// See https://github.com/bazelbuild/bazel/issues/14617
continue
}
return nil, err
}
writeLabel(hasher, ruleInputLabel)
hasher.Write([]byte(configuration))
hasher.Write(ruleInputHash)
}
}
}

return hasher.Sum(nil), nil
Expand Down
14 changes: 10 additions & 4 deletions pkg/target_determinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ func doQueryDeps(context *Context, targets TargetsList) (*QueryResults, error) {
}

depsPattern := fmt.Sprintf("deps(%s)", targets.String())
transitiveResult, err := runToCqueryResult(context, depsPattern)
transitiveResult, err := runToCqueryResult(context, depsPattern, true)
if err != nil {
retErr := fmt.Errorf("failed to cquery %v: %w", depsPattern, err)
return &QueryResults{
Expand All @@ -670,7 +670,7 @@ func doQueryDeps(context *Context, targets TargetsList) (*QueryResults, error) {
return nil, fmt.Errorf("failed to parse cquery result: %w", err)
}

matchingTargetResults, err := runToCqueryResult(context, targets.String())
matchingTargetResults, err := runToCqueryResult(context, targets.String(), false)
if err != nil {
return nil, fmt.Errorf("failed to run top-level cquery: %w", err)
}
Expand Down Expand Up @@ -725,14 +725,20 @@ func doQueryDeps(context *Context, targets TargetsList) (*QueryResults, error) {
return queryResults, nil
}

func runToCqueryResult(context *Context, pattern string) (*analysis.CqueryResult, error) {
func runToCqueryResult(context *Context, pattern string, includeTransitions bool) (*analysis.CqueryResult, error) {
log.Printf("Running cquery on %s", pattern)
var stdout bytes.Buffer
var stderr bytes.Buffer

args := []string{"--output=proto"}
if includeTransitions {
args = append(args, "--transitions=lite")
}
args = append(args, pattern)

returnVal, err := context.BazelCmd.Execute(
BazelCmdConfig{Dir: context.WorkspacePath, Stdout: &stdout, Stderr: &stderr},
[]string{"--output_base", context.BazelOutputBase}, "cquery", "--output=proto", pattern)
[]string{"--output_base", context.BazelOutputBase}, "cquery", args...)

if returnVal != 0 || err != nil {
return nil, fmt.Errorf("failed to run cquery on %s: %w. Stderr:\n%v", pattern, err, stderr.String())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.nio.file.Path;
import java.util.Set;
import org.junit.Ignore;
import org.junit.Test;

public class BazelDiffIntegrationTest extends Tests {
private static final String BAZEL_DIFF =
Expand Down Expand Up @@ -106,6 +107,20 @@ public void changingTargetConfigurationDoesNotAffectHostConfiguration() {}
@Ignore("bazel-diff doesn't inspect configurations.")
public void changingHostConfigurationDoesNotAffectTargetConfiguration() {}

@Override
@Test
public void ignoredPlatformSpecificSrcChanged() throws Exception {
allowOverBuilds("bazel-diff doesn't filter platform-specific changes");
super.ignoredPlatformSpecificSrcChanged();
}

@Override
@Test
public void ignoredPlatformSpecificDepChanged() throws Exception {
allowOverBuilds("bazel-diff doesn't filter platform-specific changes");
super.ignoredPlatformSpecificDepChanged();
}

// Submodules

@Override
Expand Down
Loading

0 comments on commit b52b74a

Please sign in to comment.