Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter unmatching configuration-specific dependencies #55

Merged
merged 4 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/bazelbuild/bazel-gazelle v0.25.1-0.20220406134132-bd319f810c16
github.com/google/btree v1.1.2
github.com/google/uuid v1.3.0
github.com/hashicorp/go-version v1.6.0
github.com/otiai10/copy v1.7.1-0.20211223015809-9aae5f77261f
github.com/wI2L/jsondiff v0.2.0
google.golang.org/protobuf v1.27.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/otiai10/copy v1.7.1-0.20211223015809-9aae5f77261f h1:P7Ab27T4In6ExIHmjOe88b1BHpuHlr4Vr75hX2QKAXw=
github.com/otiai10/copy v1.7.1-0.20211223015809-9aae5f77261f/go.mod h1:rmRl6QPdJj6EiUqXQ/4Nn2lLXoNQjFCQbbNrxgc/t3U=
github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE=
Expand Down
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ go_library(
"//third_party/protobuf/bazel/build",
"@bazel_gazelle//label:go_default_library",
"@com_github_aristanetworks_goarista//path",
"@com_github_hashicorp_go_version//:go-version",
"@com_github_wi2l_jsondiff//:jsondiff",
"@org_golang_google_protobuf//encoding/protojson",
"@org_golang_google_protobuf//proto",
Expand Down
178 changes: 128 additions & 50 deletions pkg/hash_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/binary"
"fmt"
"io"
"log"
"os"
"path/filepath"
"sort"
Expand All @@ -16,21 +17,50 @@ import (
"github.com/bazel-contrib/target-determinator/third_party/protobuf/bazel/analysis"
"github.com/bazel-contrib/target-determinator/third_party/protobuf/bazel/build"
gazelle_label "github.com/bazelbuild/bazel-gazelle/label"
"github.com/hashicorp/go-version"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/proto"
)

// NewTargetHashCache creates a TargetHashCache which uses context for metadata lookups.
func NewTargetHashCache(context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget, bazelRelease string) *TargetHashCache {
bazelVersionSupportsConfiguredRuleInputs := isConfiguredRuleInputsSupported(bazelRelease)

// For I'm sure good reasons, source files end up with configuration checksums of "null" rather than the empty string.
// Dependencies on them in the configured_rule_inputs field, however, use the empty string.
// So we normalise "null"s to empty strings.
for _, configs := range context {
if v, ok := configs["null"]; ok {
configs[""] = v
delete(configs, "null")
}
}

return &TargetHashCache{
context: context,
fileHashCache: &fileHashCache{
cache: make(map[string]*cacheEntry),
},
bazelRelease: bazelRelease,
cache: make(map[gazelle_label.Label]map[Configuration]*cacheEntry),
frozen: false,
bazelRelease: bazelRelease,
bazelVersionSupportsConfiguredRuleInputs: bazelVersionSupportsConfiguredRuleInputs,
cache: make(map[gazelle_label.Label]map[Configuration]*cacheEntry),
frozen: false,
}
}

func isConfiguredRuleInputsSupported(releaseString string) bool {
releasePrefix := "release "
explanation := " - assuming cquery does not support configured rule inputs (which is supported since bazel 6), which may lead to over-estimates of affected targets"
if !strings.HasPrefix(releaseString, releasePrefix) {
log.Printf("Bazel wasn't a released version%s", explanation)
return false
}
v, err := version.NewVersion(releaseString[len(releasePrefix):])
if err != nil {
log.Printf("Failed to parse Bazel version %q: %s", releaseString, explanation)
illicitonion marked this conversation as resolved.
Show resolved Hide resolved
return false
}
return v.GreaterThanOrEqual(version.Must(version.NewVersion("6.0.0")))
}

// TargetHashCache caches hash computations for targets and files, so that transitive hashes can be
Expand All @@ -42,9 +72,10 @@ func NewTargetHashCache(context map[gazelle_label.Label]map[Configuration]*analy
// In the future we may pre-cache file hashes to avoid this hazard (and to allow more efficient
// use of threadpools when hashing files).
type TargetHashCache struct {
context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget
fileHashCache *fileHashCache
bazelRelease string
context map[gazelle_label.Label]map[Configuration]*analysis.ConfiguredTarget
fileHashCache *fileHashCache
bazelRelease string
bazelVersionSupportsConfiguredRuleInputs bool

frozen bool

Expand All @@ -57,17 +88,17 @@ var notComputedBeforeFrozen = fmt.Errorf("TargetHashCache has already been froze

// Hash hashes a given LabelAndConfiguration, returning a sha256 which will change if any of the
// following change:
// * Values of attributes of the label (if it's a rule)
// * Contents or mode of source files which are direct inputs to the rule (if it's a rule).
// * The name of the rule class (e.g. `java_binary`) of the rule (if it's a rule).
// * The rule definition, if it's a rule which was implemented in starlark.
// - Values of attributes of the label (if it's a rule)
// - Contents or mode of source files which are direct inputs to the rule (if it's a rule).
// - The name of the rule class (e.g. `java_binary`) of the rule (if it's a rule).
// - The rule definition, if it's a rule which was implemented in starlark.
// Note that this is known to over-estimate - it currently factors in the whole contents of any
// .bzl files loaded to define the rule, where some of this contents may not be relevant.
// * The configuration the label is configured in.
// - The configuration the label is configured in.
// Note that this is known to over-estimate - per-language fragments are not filtered from this
// configuration, which means C++-affecting options are considered to affect Java.
// * The above recursively for all rules and files which are depended on by the given
// LabelAndConfiguration.
// - The above recursively for all rules and files which are depended on by the given
// LabelAndConfiguration.
// Note that this is known to over-estimate - the configuration of dependencies isn't easily
// surfaced by Bazel, so if a dependency exists in multiple configurations, all of them will be
// mixed into the hash, even if only one of the configurations is actually relevant.
Expand Down Expand Up @@ -277,26 +308,28 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
}
}

ruleInputsBefore := ss.NewSortedSet(ruleBefore.GetRuleInput())
ruleInputsAfter := ss.NewSortedSet(ruleAfter.GetRuleInput())
ruleInputLabelsBefore, labelsToKnownConfigurationsBefore, err := indexRuleInputs(before, ruleBefore)
if err != nil {
return nil, err
}
ruleInputLabelsAfter, labelsToKnownConfigurationsAfter, err := indexRuleInputs(after, ruleAfter)
if err != nil {
return nil, err
}

for _, ruleInputLabelString := range ruleInputsAfter.SortedSlice() {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputLabelString)
if err != nil {
return nil, fmt.Errorf("failed to parse ruleInput %s: %w", ruleInputLabelString, err)
}
if !ruleInputsBefore.Contains(ruleInputLabelString) {
for _, ruleInputLabel := range ruleInputLabelsAfter.SortedSlice() {
if !ruleInputLabelsBefore.Contains(ruleInputLabel) {
differences = append(differences, Difference{
Category: "RuleInputAdded",
Key: ruleInputLabelString,
Key: ruleInputLabel.String(),
})
} else {
// Ideally we would know the configuration of each of these ruleInputs from the
// query information, so we could filter away e.g. host changes when we only have a target dep.
// Unfortunately, Bazel doesn't currently expose this.
// See https://github.com/bazelbuild/bazel/issues/14610#issuecomment-1024460141
knownConfigurationsBefore := before.KnownConfigurations(ruleInputLabel)
knownConfigurationsAfter := after.KnownConfigurations(ruleInputLabel)
knownConfigurationsBefore := labelsToKnownConfigurationsBefore[ruleInputLabel]
knownConfigurationsAfter := labelsToKnownConfigurationsAfter[ruleInputLabel]

for _, knownConfigurationAfter := range knownConfigurationsAfter.SortedSlice() {
if knownConfigurationsBefore.Contains(knownConfigurationAfter) {
Expand All @@ -317,7 +350,7 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
} else {
differences = append(differences, Difference{
Category: "RuleInputChanged",
Key: ruleInputLabelString,
Key: ruleInputLabel.String(),
After: fmt.Sprintf("Configuration: %v", knownConfigurationAfter),
})
}
Expand All @@ -326,25 +359,53 @@ func WalkDiffs(before *TargetHashCache, after *TargetHashCache, labelAndConfigur
if !knownConfigurationsAfter.Contains(knownConfigurationBefore) {
differences = append(differences, Difference{
Category: "RuleInputChanged",
Key: ruleInputLabelString,
Key: ruleInputLabel.String(),
Before: fmt.Sprintf("Configuration: %v", knownConfigurationBefore),
})
}
}
}
}
for _, ruleInputLabel := range ruleInputsBefore.SortedSlice() {
if !ruleInputsAfter.Contains(ruleInputLabel) {
for _, ruleInputLabel := range ruleInputLabelsBefore.SortedSlice() {
if !ruleInputLabelsAfter.Contains(ruleInputLabel) {
differences = append(differences, Difference{
Category: "RuleInputRemoved",
Key: ruleInputLabel,
Key: ruleInputLabel.String(),
})
}
}

return differences, nil
}

func indexRuleInputs(index *TargetHashCache, rule *build.Rule) (*ss.SortedSet[gazelle_label.Label], map[gazelle_label.Label]*ss.SortedSet[Configuration], error) {
ruleInputs := ss.NewSortedSetFn([]gazelle_label.Label{}, CompareLabels)
labelsToConfigurations := make(map[gazelle_label.Label]*ss.SortedSet[Configuration])
if index.bazelVersionSupportsConfiguredRuleInputs {
for _, configuredRuleInput := range rule.GetConfiguredRuleInput() {
ruleInputLabel, err := ParseCanonicalLabel(configuredRuleInput.GetLabel())
if err != nil {
return nil, nil, fmt.Errorf("failed to parse configuredRuleInput label %s: %w", configuredRuleInput.GetLabel(), err)
}
ruleInputs.Add(ruleInputLabel)
if _, ok := labelsToConfigurations[ruleInputLabel]; !ok {
labelsToConfigurations[ruleInputLabel] = ss.NewSortedSet([]Configuration{})
}
labelsToConfigurations[ruleInputLabel].Add(Configuration(configuredRuleInput.GetConfigurationChecksum()))
}
} else {
for _, ruleInputString := range rule.GetRuleInput() {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputString)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse ruleInput label %s: %w", ruleInputString, err)
}
ruleInputs.Add(ruleInputLabel)
labelsToConfigurations[ruleInputLabel] = index.KnownConfigurations(ruleInputLabel)
}
}
return ruleInputs, labelsToConfigurations, nil
}

func formatLabelWithConfiguration(label gazelle_label.Label, configuration Configuration) string {
s := label.String()
if configuration != "null" {
Expand Down Expand Up @@ -453,34 +514,51 @@ func hashRule(thc *TargetHashCache, rule *build.Rule, configuration *analysis.Co
}

// Hash rule inputs
for _, ruleInputLabelString := range rule.RuleInput {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputLabelString)
if err != nil {
return nil, fmt.Errorf("failed to parse ruleInput label %s: %w", ruleInputLabelString, err)
}
for _, configuration := range thc.KnownConfigurations(ruleInputLabel).SortedSlice() {
if thc.bazelVersionSupportsConfiguredRuleInputs {
for _, configuredRuleInput := range rule.ConfiguredRuleInput {
ruleInputLabel, err := ParseCanonicalLabel(configuredRuleInput.GetLabel())
if err != nil {
return nil, fmt.Errorf("failed to parse configuredRuleInput label %s: %w", configuredRuleInput.GetLabel(), err)
}
configuration := Configuration(configuredRuleInput.GetConfigurationChecksum())
ruleInputHash, err := thc.Hash(LabelAndConfiguration{Label: ruleInputLabel, Configuration: configuration})
if err != nil {
if err == labelNotFound {
// Two issues (so far) have been found which lead to targets being listed in
// ruleInputs but not in the output of a deps query:
//
// cquery doesn't filter ruleInputs according to used configurations, which means
// targets may appear in a Target's ruleInputs even though they weren't returned by
// a transitive `deps` cquery.
// Assume that a missing target should have been pruned, and that we should ignore it.
// See https://github.com/bazelbuild/bazel/issues/14610
//
// Some targets are also just sometimes missing for reasons we don't yet know.
// See https://github.com/bazelbuild/bazel/issues/14617
continue
}
return nil, err
return nil, fmt.Errorf("failed to hash configuredRuleInput %s %s: %w", ruleInputLabel, configuration, err)
}
writeLabel(hasher, ruleInputLabel)
hasher.Write([]byte(configuration))
hasher.Write(ruleInputHash)
}
} else {
for _, ruleInputLabelString := range rule.RuleInput {
ruleInputLabel, err := ParseCanonicalLabel(ruleInputLabelString)
if err != nil {
return nil, fmt.Errorf("failed to parse ruleInput label %s: %w", ruleInputLabelString, err)
}
for _, configuration := range thc.KnownConfigurations(ruleInputLabel).SortedSlice() {
ruleInputHash, err := thc.Hash(LabelAndConfiguration{Label: ruleInputLabel, Configuration: configuration})
if err != nil {
if err == labelNotFound {
// Two issues (so far) have been found which lead to targets being listed in
// ruleInputs but not in the output of a deps query:
//
// cquery doesn't filter ruleInputs according to used configurations, which means
// targets may appear in a Target's ruleInputs even though they weren't returned by
// a transitive `deps` cquery.
// Assume that a missing target should have been pruned, and that we should ignore it.
// See https://github.com/bazelbuild/bazel/issues/14610
//
// Some targets are also just sometimes missing for reasons we don't yet know.
// See https://github.com/bazelbuild/bazel/issues/14617
continue
}
return nil, err
}
writeLabel(hasher, ruleInputLabel)
hasher.Write([]byte(configuration))
hasher.Write(ruleInputHash)
}
}
}

return hasher.Sum(nil), nil
Expand Down
14 changes: 10 additions & 4 deletions pkg/target_determinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ func doQueryDeps(context *Context, targets TargetsList) (*QueryResults, error) {
}

depsPattern := fmt.Sprintf("deps(%s)", targets.String())
transitiveResult, err := runToCqueryResult(context, depsPattern)
transitiveResult, err := runToCqueryResult(context, depsPattern, true)
if err != nil {
retErr := fmt.Errorf("failed to cquery %v: %w", depsPattern, err)
return &QueryResults{
Expand All @@ -670,7 +670,7 @@ func doQueryDeps(context *Context, targets TargetsList) (*QueryResults, error) {
return nil, fmt.Errorf("failed to parse cquery result: %w", err)
}

matchingTargetResults, err := runToCqueryResult(context, targets.String())
matchingTargetResults, err := runToCqueryResult(context, targets.String(), false)
if err != nil {
return nil, fmt.Errorf("failed to run top-level cquery: %w", err)
}
Expand Down Expand Up @@ -725,14 +725,20 @@ func doQueryDeps(context *Context, targets TargetsList) (*QueryResults, error) {
return queryResults, nil
}

func runToCqueryResult(context *Context, pattern string) (*analysis.CqueryResult, error) {
func runToCqueryResult(context *Context, pattern string, includeTransitions bool) (*analysis.CqueryResult, error) {
log.Printf("Running cquery on %s", pattern)
var stdout bytes.Buffer
var stderr bytes.Buffer

args := []string{"--output=proto"}
if includeTransitions {
args = append(args, "--transitions=lite")
}
args = append(args, pattern)

returnVal, err := context.BazelCmd.Execute(
BazelCmdConfig{Dir: context.WorkspacePath, Stdout: &stdout, Stderr: &stderr},
[]string{"--output_base", context.BazelOutputBase}, "cquery", "--output=proto", pattern)
[]string{"--output_base", context.BazelOutputBase}, "cquery", args...)

if returnVal != 0 || err != nil {
return nil, fmt.Errorf("failed to run cquery on %s: %w. Stderr:\n%v", pattern, err, stderr.String())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.nio.file.Path;
import java.util.Set;
import org.junit.Ignore;
import org.junit.Test;

public class BazelDiffIntegrationTest extends Tests {
private static final String BAZEL_DIFF =
Expand Down Expand Up @@ -106,6 +107,20 @@ public void changingTargetConfigurationDoesNotAffectHostConfiguration() {}
@Ignore("bazel-diff doesn't inspect configurations.")
public void changingHostConfigurationDoesNotAffectTargetConfiguration() {}

@Override
@Test
public void ignoredPlatformSpecificSrcChanged() throws Exception {
allowOverBuilds("bazel-diff doesn't filter platform-specific changes");
super.ignoredPlatformSpecificSrcChanged();
}

@Override
@Test
public void ignoredPlatformSpecificDepChanged() throws Exception {
allowOverBuilds("bazel-diff doesn't filter platform-specific changes");
super.ignoredPlatformSpecificDepChanged();
}

// Submodules

@Override
Expand Down
Loading