diff --git a/README.md b/README.md index 064acf9422c4..a947398571cc 100644 --- a/README.md +++ b/README.md @@ -482,6 +482,10 @@ default-image-pull-source: "" # - "./out/**/*.json" exclude: [] +# allows users to exclude synthetic binary packages from the sbom +# these packages are removed if an overlap with a non-synthetic package is found +exclude-overlap-by-ownership: true + # os and/or architecture to use when referencing container images (e.g. "windows/armv6" or "arm64") # same as --platform; SYFT_PLATFORM env var platform: "" diff --git a/internal/config/application.go b/internal/config/application.go index ea417aa17792..e7d6539fea41 100644 --- a/internal/config/application.go +++ b/internal/config/application.go @@ -42,31 +42,32 @@ type Application struct { ConfigPath string `yaml:"configPath,omitempty" json:"configPath" mapstructure:"config"` Verbosity uint `yaml:"verbosity,omitempty" json:"verbosity" mapstructure:"verbosity"` // -q, indicates to not show any status output to stderr (ETUI or logging UI) - Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` - Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output - OutputTemplatePath string `yaml:"output-template-path" json:"output-template-path" mapstructure:"output-template-path"` // -t template file to use for output - File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to - CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not - Dev development `yaml:"dev" json:"dev" mapstructure:"dev"` - Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options - Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"` - Package pkg `yaml:"package" json:"package" mapstructure:"package"` - Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"` - LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"` - Python python `yaml:"python" json:"python" mapstructure:"python"` - Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"` - FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` - FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"` - FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"` - Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"` - Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"` - Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` - Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` - Name string `yaml:"name" json:"name" mapstructure:"name"` - Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"` - Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel - DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source - BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths + Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` + Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output + OutputTemplatePath string `yaml:"output-template-path" json:"output-template-path" mapstructure:"output-template-path"` // -t template file to use for output + File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to + CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not + Dev development `yaml:"dev" json:"dev" mapstructure:"dev"` + Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options + Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"` + Package pkg `yaml:"package" json:"package" mapstructure:"package"` + Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"` + LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"` + Python python `yaml:"python" json:"python" mapstructure:"python"` + Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"` + FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` + FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"` + FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"` + Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"` + Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"` + Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` + Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` + Name string `yaml:"name" json:"name" mapstructure:"name"` + Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"` + Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel + DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source + BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths + ExcludeBinaryOverlapByOwnership bool `yaml:"exclude-binary-overlap-by-ownership" json:"exclude-binary-overlap-by-ownership" mapstructure:"exclude-binary-overlap-by-ownership"` // exclude synthetic binary packages owned by os package files } func (cfg Application) ToCatalogerConfig() cataloger.Config { @@ -76,8 +77,9 @@ func (cfg Application) ToCatalogerConfig() cataloger.Config { IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives, Scope: cfg.Package.Cataloger.ScopeOpt, }, - Catalogers: cfg.Catalogers, - Parallelism: cfg.Parallelism, + Catalogers: cfg.Catalogers, + Parallelism: cfg.Parallelism, + ExcludeBinaryOverlapByOwnership: cfg.ExcludeBinaryOverlapByOwnership, Golang: golangCataloger.NewGoCatalogerOpts(). WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses). WithLocalModCacheDir(cfg.Golang.LocalModCacheDir). @@ -221,6 +223,7 @@ func loadDefaultValues(v *viper.Viper) { v.SetDefault("catalogers", nil) v.SetDefault("parallelism", 1) v.SetDefault("default-image-pull-source", "") + v.SetDefault("exclude-binary-overlap-by-ownership", true) // for each field in the configuration struct, see if the field implements the defaultValueLoader interface and invoke it if it does value := reflect.ValueOf(Application{}) diff --git a/syft/lib.go b/syft/lib.go index b45307012187..af24ce6e802a 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -74,11 +74,34 @@ func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection, catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...) - relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...) + // apply exclusions to the package catalog + // default config value for this is true + // https://github.com/anchore/syft/issues/931 + if cfg.ExcludeBinaryOverlapByOwnership { + for _, r := range relationships { + if cataloger.Exclude(r, catalog) { + catalog.Delete(r.To.ID()) + relationships = removeRelationshipsByID(relationships, r.To.ID()) + } + } + } + // no need to consider source relationships for os -> binary exclusions + relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...) return catalog, relationships, release, err } +func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship { + // https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating + filtered := relationships[:0] + for _, r := range relationships { + if r.To.ID() != id && r.From.ID() != id { + filtered = append(filtered, r) + } + } + return filtered +} + func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship { relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method? for p := range c.Enumerate() { diff --git a/syft/pkg/cataloger/alpm/cataloger.go b/syft/pkg/cataloger/alpm/cataloger.go index 39bc7d816c05..10512c4349d7 100644 --- a/syft/pkg/cataloger/alpm/cataloger.go +++ b/syft/pkg/cataloger/alpm/cataloger.go @@ -5,9 +5,9 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -const catalogerName = "alpmdb-cataloger" +const CatalogerName = "alpmdb-cataloger" func NewAlpmdbCataloger() *generic.Cataloger { - return generic.NewCataloger(catalogerName). + return generic.NewCataloger(CatalogerName). WithParserByGlobs(parseAlpmDB, pkg.AlpmDBGlob) } diff --git a/syft/pkg/cataloger/apkdb/cataloger.go b/syft/pkg/cataloger/apkdb/cataloger.go index d3cea71263cd..646107f11e1b 100644 --- a/syft/pkg/cataloger/apkdb/cataloger.go +++ b/syft/pkg/cataloger/apkdb/cataloger.go @@ -8,10 +8,10 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -const catalogerName = "apkdb-cataloger" +const CatalogerName = "apkdb-cataloger" // NewApkdbCataloger returns a new Alpine DB cataloger object. func NewApkdbCataloger() *generic.Cataloger { - return generic.NewCataloger(catalogerName). + return generic.NewCataloger(CatalogerName). WithParserByGlobs(parseApkDB, pkg.ApkDBGlob) } diff --git a/syft/pkg/cataloger/binary/cataloger.go b/syft/pkg/cataloger/binary/cataloger.go index 0cf04b729ed7..8434741aea66 100644 --- a/syft/pkg/cataloger/binary/cataloger.go +++ b/syft/pkg/cataloger/binary/cataloger.go @@ -7,7 +7,7 @@ import ( "github.com/anchore/syft/syft/pkg" ) -const catalogerName = "binary-cataloger" +const CatalogerName = "binary-cataloger" func NewCataloger() *Cataloger { return &Cataloger{} @@ -22,7 +22,7 @@ type Cataloger struct{} // Name returns a string that uniquely describes the Cataloger func (c Cataloger) Name() string { - return catalogerName + return CatalogerName } // Catalog is given an object to resolve file references and content, this function returns any discovered Packages diff --git a/syft/pkg/cataloger/binary/package.go b/syft/pkg/cataloger/binary/package.go index a677b02a6235..3cbf31893f1f 100644 --- a/syft/pkg/cataloger/binary/package.go +++ b/syft/pkg/cataloger/binary/package.go @@ -31,7 +31,7 @@ func newPackage(classifier classifier, location file.Location, matchMetadata map ), Type: pkg.BinaryPkg, CPEs: cpes, - FoundBy: catalogerName, + FoundBy: CatalogerName, MetadataType: pkg.BinaryMetadataType, Metadata: pkg.BinaryMetadata{ Matches: []pkg.ClassifierMatch{ diff --git a/syft/pkg/cataloger/config.go b/syft/pkg/cataloger/config.go index 29c1633c2466..d0a64d83de07 100644 --- a/syft/pkg/cataloger/config.go +++ b/syft/pkg/cataloger/config.go @@ -8,23 +8,14 @@ import ( ) // TODO: these field naming vs helper function naming schemes are inconsistent. - type Config struct { - Search SearchConfig - Golang golang.GoCatalogerOpts - LinuxKernel kernel.LinuxCatalogerConfig - Python python.CatalogerConfig - Catalogers []string - Parallelism int -} - -func DefaultConfig() Config { - return Config{ - Search: DefaultSearchConfig(), - Parallelism: 1, - LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), - Python: python.DefaultCatalogerConfig(), - } + Search SearchConfig + Golang golang.GoCatalogerOpts + LinuxKernel kernel.LinuxCatalogerConfig + Python python.CatalogerConfig + Catalogers []string + Parallelism int + ExcludeBinaryOverlapByOwnership bool } func (c Config) Java() java.Config { diff --git a/syft/pkg/cataloger/deb/cataloger.go b/syft/pkg/cataloger/deb/cataloger.go index 946abae9e664..4dd3476b6b6e 100644 --- a/syft/pkg/cataloger/deb/cataloger.go +++ b/syft/pkg/cataloger/deb/cataloger.go @@ -7,11 +7,11 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -const catalogerName = "dpkgdb-cataloger" +const CatalogerName = "dpkgdb-cataloger" // NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files. func NewDpkgdbCataloger() *generic.Cataloger { - return generic.NewCataloger(catalogerName). + return generic.NewCataloger(CatalogerName). // note: these globs have been intentionally split up in order to improve search performance, // please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}" WithParserByGlobs(parseDpkgDB, "**/var/lib/dpkg/status", "**/var/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status") diff --git a/syft/pkg/cataloger/nix/cataloger.go b/syft/pkg/cataloger/nix/cataloger.go index 5d920f2300cd..9fb3112dd306 100644 --- a/syft/pkg/cataloger/nix/cataloger.go +++ b/syft/pkg/cataloger/nix/cataloger.go @@ -12,7 +12,7 @@ import ( ) const ( - catalogerName = "nix-store-cataloger" + CatalogerName = "nix-store-cataloger" nixStoreGlob = "**/nix/store/*" ) @@ -24,7 +24,7 @@ func NewStoreCataloger() *StoreCataloger { } func (c *StoreCataloger) Name() string { - return catalogerName + return CatalogerName } func (c *StoreCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { diff --git a/syft/pkg/cataloger/nix/cataloger_test.go b/syft/pkg/cataloger/nix/cataloger_test.go index f43babde93b0..c101158c1316 100644 --- a/syft/pkg/cataloger/nix/cataloger_test.go +++ b/syft/pkg/cataloger/nix/cataloger_test.go @@ -24,7 +24,7 @@ func TestCataloger_Catalog(t *testing.T) { Version: "2.34-210", PURL: "pkg:nix/glibc@2.34-210?output=bin&outputhash=h0cnbmfcn93xm5dg2x27ixhag1cwndga", Locations: file.NewLocationSet(file.NewLocation("nix/store/h0cnbmfcn93xm5dg2x27ixhag1cwndga-glibc-2.34-210-bin")), - FoundBy: catalogerName, + FoundBy: CatalogerName, Type: pkg.NixPkg, MetadataType: pkg.NixStoreMetadataType, Metadata: pkg.NixStoreMetadata{ diff --git a/syft/pkg/cataloger/nix/package.go b/syft/pkg/cataloger/nix/package.go index 090dfe1379c5..879a9e7e30a2 100644 --- a/syft/pkg/cataloger/nix/package.go +++ b/syft/pkg/cataloger/nix/package.go @@ -10,7 +10,7 @@ func newNixStorePackage(storePath nixStorePath, locations ...file.Location) pkg. p := pkg.Package{ Name: storePath.name, Version: storePath.version, - FoundBy: catalogerName, + FoundBy: CatalogerName, Locations: file.NewLocationSet(locations...), Type: pkg.NixPkg, PURL: packageURL(storePath), diff --git a/syft/pkg/cataloger/package_exclusions.go b/syft/pkg/cataloger/package_exclusions.go new file mode 100644 index 000000000000..db3a157fd983 --- /dev/null +++ b/syft/pkg/cataloger/package_exclusions.go @@ -0,0 +1,55 @@ +package cataloger + +import ( + "golang.org/x/exp/slices" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/alpm" + "github.com/anchore/syft/syft/pkg/cataloger/apkdb" + "github.com/anchore/syft/syft/pkg/cataloger/binary" + "github.com/anchore/syft/syft/pkg/cataloger/deb" + "github.com/anchore/syft/syft/pkg/cataloger/nix" + "github.com/anchore/syft/syft/pkg/cataloger/rpm" +) + +var ( + osCatalogerTypes = []string{ + apkdb.CatalogerName, + alpm.CatalogerName, + deb.CatalogerName, + nix.CatalogerName, + rpm.DBCatalogerName, + rpm.FileCatalogerName, + } + binaryCatalogerTypes = []string{binary.CatalogerName} +) + +// Exclude will remove packages from a collection given the following properties are true +// 1) the relationship between packages is OwnershipByFileOverlap +// 2) the parent is an "os" package +// 3) the child is a synthetic package generated by the binary cataloger +// 4) the package names are identical +// This exclude was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931 +func Exclude(r artifact.Relationship, c *pkg.Collection) bool { + if artifact.OwnershipByFileOverlapRelationship != r.Type { + return false + } + + parent := c.Package(r.From.ID()) + if parent == nil { + return false + } + + parentInExclusion := slices.Contains(osCatalogerTypes, parent.FoundBy) + if !parentInExclusion { + return false + } + + child := c.Package(r.To.ID()) + if child == nil { + return false + } + + return slices.Contains(binaryCatalogerTypes, child.FoundBy) +} diff --git a/syft/pkg/cataloger/package_exclusions_test.go b/syft/pkg/cataloger/package_exclusions_test.go new file mode 100644 index 000000000000..cab972890efe --- /dev/null +++ b/syft/pkg/cataloger/package_exclusions_test.go @@ -0,0 +1,78 @@ +package cataloger + +import ( + "testing" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/apkdb" + "github.com/anchore/syft/syft/pkg/cataloger/binary" +) + +func TestExclude(t *testing.T) { + packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg, FoundBy: apkdb.CatalogerName} + packageB := pkg.Package{Name: "package-a", Type: pkg.PythonPkg, FoundBy: "language-cataloger"} + packageC := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName} + packageD := pkg.Package{Name: "package-d", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName} + for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD} { + p := p + p.SetID() + } + + tests := []struct { + name string + relationship artifact.Relationship + packages *pkg.Collection + shouldExclude bool + }{ + { + name: "no exclusions from os -> python", + relationship: artifact.Relationship{ + Type: artifact.OwnershipByFileOverlapRelationship, + From: packageA, + To: packageB, + }, + packages: pkg.NewCollection(packageA, packageB), + shouldExclude: false, + }, + { + name: "exclusions from os -> binary", + relationship: artifact.Relationship{ + Type: artifact.OwnershipByFileOverlapRelationship, + From: packageA, + To: packageC, + }, + packages: pkg.NewCollection(packageA, packageC), + shouldExclude: true, + }, + { + name: "no exclusions from python -> binary", + relationship: artifact.Relationship{ + Type: artifact.OwnershipByFileOverlapRelationship, + From: packageB, + To: packageC, + }, + packages: pkg.NewCollection(packageB, packageC), + shouldExclude: false, + }, + { + name: "no exclusions for different package names", + relationship: artifact.Relationship{ + Type: artifact.OwnershipByFileOverlapRelationship, + From: packageA, + To: packageD, + }, + packages: pkg.NewCollection(packageA, packageD), + shouldExclude: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if !Exclude(test.relationship, test.packages) && test.shouldExclude { + t.Errorf("expected to exclude relationship %+v", test.relationship) + } + }) + + } +} diff --git a/syft/pkg/cataloger/rpm/cataloger.go b/syft/pkg/cataloger/rpm/cataloger.go index 909a6e973041..573bb4d1c783 100644 --- a/syft/pkg/cataloger/rpm/cataloger.go +++ b/syft/pkg/cataloger/rpm/cataloger.go @@ -11,6 +11,11 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) +const ( + DBCatalogerName = "rpm-db-cataloger" + FileCatalogerName = "rpm-file-cataloger" +) + // NewRpmDBCataloger returns a new RPM DB cataloger object. func NewRpmDBCataloger() *generic.Cataloger { // check if a sqlite driver is available @@ -18,14 +23,14 @@ func NewRpmDBCataloger() *generic.Cataloger { log.Warnf("sqlite driver is not available, newer RPM databases might not be cataloged") } - return generic.NewCataloger("rpm-db-cataloger"). + return generic.NewCataloger(DBCatalogerName). WithParserByGlobs(parseRpmDB, pkg.RpmDBGlob). WithParserByGlobs(parseRpmManifest, pkg.RpmManifestGlob) } // NewFileCataloger returns a new RPM file cataloger object. func NewFileCataloger() *generic.Cataloger { - return generic.NewCataloger("rpm-file-cataloger"). + return generic.NewCataloger(FileCatalogerName). WithParserByGlobs(parseRpm, "**/*.rpm") } diff --git a/test/cli/packages_cmd_test.go b/test/cli/packages_cmd_test.go index 4bd0643d3673..41bc2bcc074e 100644 --- a/test/cli/packages_cmd_test.go +++ b/test/cli/packages_cmd_test.go @@ -104,7 +104,7 @@ func TestPackagesCmdFlags(t *testing.T) { name: "squashed-scope-flag-hidden-packages", args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage}, assertions: []traitAssertion{ - assertPackageCount(163), + assertPackageCount(162), assertNotInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, }, @@ -113,7 +113,7 @@ func TestPackagesCmdFlags(t *testing.T) { name: "all-layers-scope-flag", args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage}, assertions: []traitAssertion{ - assertPackageCount(164), // packages are now deduplicated for this case + assertPackageCount(163), // packages are now deduplicated for this case assertInOutput("all-layers"), assertInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, @@ -126,7 +126,7 @@ func TestPackagesCmdFlags(t *testing.T) { "SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers", }, assertions: []traitAssertion{ - assertPackageCount(164), // packages are now deduplicated for this case + assertPackageCount(163), // packages are now deduplicated for this case assertInOutput("all-layers"), assertInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, diff --git a/test/compare/test-fixtures/acceptance-centos-8.2.2004.json b/test/compare/test-fixtures/acceptance-centos-8.2.2004.json index bc1801493f6f..218f9a825fe8 100644 --- a/test/compare/test-fixtures/acceptance-centos-8.2.2004.json +++ b/test/compare/test-fixtures/acceptance-centos-8.2.2004.json @@ -56880,40 +56880,6 @@ ] } }, - { - "id": "875f4d287d1bdcfd", - "name": "python", - "version": "3.6.8", - "type": "binary", - "foundBy": "binary-cataloger", - "locations": [ - { - "path": "/usr/lib64/libpython3.6m.so.1.0", - "layerID": "sha256:eb29745b8228e1e97c01b1d5c2554a319c00a94d8dd5746a3904222ad65a13f8" - } - ], - "licenses": [], - "language": "", - "cpes": [ - "cpe:2.3:a:python_software_foundation:python:3.6.8:*:*:*:*:*:*:*", - "cpe:2.3:a:python:python:3.6.8:*:*:*:*:*:*:*", - "cpe:2.3:a:python:python:3.6.8:*:*:*:*:*:*:*" - ], - "purl": "pkg:generic/python@3.6.8", - "metadataType": "BinaryMetadata", - "metadata": { - "matches": [ - { - "classifier": "python-binary-lib", - "location": { - "path": "/usr/lib64/libpython3.6m.so.1.0", - "layerID": "sha256:eb29745b8228e1e97c01b1d5c2554a319c00a94d8dd5746a3904222ad65a13f8", - "virtualPath": "/usr/lib64/libpython3.6m.so.1.0" - } - } - ] - } - }, { "id": "e57db3737a1d260f", "name": "python3-dnf", diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index 0b6ca54f0926..4596a75aa3c6 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -23,7 +23,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) { tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName) var pc *pkg.Collection - for _, c := range cataloger.ImageCatalogers(cataloger.DefaultConfig()) { + for _, c := range cataloger.ImageCatalogers(defaultConfig()) { // in case of future alteration where state is persisted, assume no dependency is safe to reuse userInput := "docker-archive:" + tarPath detection, err := source.Detect(userInput, source.DefaultDetectConfig()) @@ -260,7 +260,7 @@ func TestPkgCoverageCatalogerConfiguration(t *testing.T) { assert.Equal(t, definedLanguages, observedLanguages) // Verify that rust isn't actually an image cataloger - c := cataloger.DefaultConfig() + c := defaultConfig() c.Catalogers = []string{"rust"} assert.Len(t, cataloger.ImageCatalogers(c), 0) } diff --git a/test/integration/encode_decode_cycle_test.go b/test/integration/encode_decode_cycle_test.go index 458d33cdda93..844959bfec57 100644 --- a/test/integration/encode_decode_cycle_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -26,7 +26,10 @@ import ( // encode-decode-encode loop which will detect lossy behavior in both directions. func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { // use second image for relationships - images := []string{"image-pkg-coverage", "image-owning-package"} + images := []string{ + "image-pkg-coverage", + "image-owning-package", + } tests := []struct { formatOption sbom.FormatID redactor func(in []byte) []byte diff --git a/test/integration/package_deduplication_test.go b/test/integration/package_deduplication_test.go index e0760cd37b3e..75c86234e449 100644 --- a/test/integration/package_deduplication_test.go +++ b/test/integration/package_deduplication_test.go @@ -22,13 +22,13 @@ func TestPackageDeduplication(t *testing.T) { }{ { scope: source.AllLayersScope, - packageCount: 174, // without deduplication this would be 618 + packageCount: 172, // without deduplication this would be 618 instanceCount: map[string]int{ "basesystem": 1, "wget": 1, "curl": 2, // upgraded in the image "vsftpd": 1, - "httpd": 2, // rpm, binary + "httpd": 1, // rpm, - we exclude binary }, locationCount: map[string]int{ "basesystem-10.0-7.el7.centos": 4, @@ -37,18 +37,18 @@ func TestPackageDeduplication(t *testing.T) { "wget-1.14-18.el7_6.1": 3, "vsftpd-3.0.2-29.el7_9": 2, "httpd-2.4.6-97.el7.centos.5": 1, - "httpd-2.4.6": 1, // binary + // "httpd-2.4.6": 1, // binary }, }, { scope: source.SquashedScope, - packageCount: 172, + packageCount: 170, instanceCount: map[string]int{ "basesystem": 1, "wget": 1, "curl": 1, // upgraded, but the most recent "vsftpd": 1, - "httpd": 2, // rpm, binary + "httpd": 1, // rpm, binary is now excluded by overlap }, locationCount: map[string]int{ "basesystem-10.0-7.el7.centos": 1, @@ -56,7 +56,7 @@ func TestPackageDeduplication(t *testing.T) { "wget-1.14-18.el7_6.1": 1, "vsftpd-3.0.2-29.el7_9": 1, "httpd-2.4.6-97.el7.centos.5": 1, - "httpd-2.4.6": 1, // binary + // "httpd-2.4.6": 1, // binary (excluded) }, }, } @@ -64,7 +64,6 @@ func TestPackageDeduplication(t *testing.T) { for _, tt := range tests { t.Run(string(tt.scope), func(t *testing.T) { sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope, nil) - for _, p := range sbom.Artifacts.Packages.Sorted() { if p.Type == pkg.BinaryPkg { assert.NotEmpty(t, p.Name) diff --git a/test/integration/package_ownership_relationship_test.go b/test/integration/package_ownership_relationship_test.go index 4ca1873a1ef9..a984972897df 100644 --- a/test/integration/package_ownership_relationship_test.go +++ b/test/integration/package_ownership_relationship_test.go @@ -7,11 +7,11 @@ import ( "github.com/anchore/syft/syft/formats/syftjson" syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model" + "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" ) func TestPackageOwnershipRelationships(t *testing.T) { - // ensure that the json encoder is applying artifact ownership with an image that has expected ownership relationships tests := []struct { fixture string @@ -45,3 +45,43 @@ func TestPackageOwnershipRelationships(t *testing.T) { } } + +func TestPackageOwnershipExclusions(t *testing.T) { + // ensure that the json encoder is excluding packages by artifact ownership with an image that has expected ownership relationships + tests := []struct { + name string + fixture string + }{ + { + name: "busybox binary is filtered based on ownership relationship", + fixture: "image-os-binary-overlap", + }, + } + + for _, test := range tests { + t.Run(test.fixture, func(t *testing.T) { + sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope, nil) + binaryPackages := make([]pkg.Package, 0) + apkPackages := make([]pkg.Package, 0) + for p := range sbom.Artifacts.Packages.Enumerate() { + if p.Type == pkg.BinaryPkg && p.Name == "busybox" { + binaryPackages = append(binaryPackages, p) + } + if p.Type == pkg.ApkPkg && p.Name == "busybox" { + apkPackages = append(apkPackages, p) + } + } + + if len(binaryPackages) != 0 { + packageNames := make([]string, 0) + for _, p := range binaryPackages { + packageNames = append(packageNames, p.Name) + } + t.Errorf("expected to find no binary packages but found %d packages: %v", len(binaryPackages), packageNames) + } + if len(apkPackages) == 0 { + t.Errorf("expected to find apk packages but found none") + } + }) + } +} diff --git a/test/integration/test-fixtures/image-os-binary-overlap/Dockerfile b/test/integration/test-fixtures/image-os-binary-overlap/Dockerfile new file mode 100644 index 000000000000..0951649a3ccd --- /dev/null +++ b/test/integration/test-fixtures/image-os-binary-overlap/Dockerfile @@ -0,0 +1,7 @@ +FROM alpine:latest + +# syft should not longer show the binary package for this image: +# https://github.com/anchore/syft/issues/931 +# busybox 1.36.1 binary <-- not reported synthetic package overlap +# busybox 1.36.1-r0 apk +RUN apk update && apk add busybox diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index eeb583f65585..207fe675c354 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -8,6 +8,8 @@ import ( "github.com/anchore/stereoscope/pkg/imagetest" "github.com/anchore/syft/syft" "github.com/anchore/syft/syft/pkg/cataloger" + "github.com/anchore/syft/syft/pkg/cataloger/kernel" + "github.com/anchore/syft/syft/pkg/cataloger/python" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" ) @@ -24,7 +26,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Sco theSource.Close() }) - c := cataloger.DefaultConfig() + c := defaultConfig() c.Catalogers = catalogerCfg c.Search.Scope = scope @@ -52,6 +54,16 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Sco }, theSource } +func defaultConfig() cataloger.Config { + return cataloger.Config{ + Search: cataloger.DefaultSearchConfig(), + Parallelism: 1, + LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), + Python: python.DefaultCatalogerConfig(), + ExcludeBinaryOverlapByOwnership: true, + } +} + func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, source.Source) { userInput := "dir:" + dir detection, err := source.Detect(userInput, source.DefaultDetectConfig()) @@ -63,7 +75,7 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, source.Source) { }) // TODO: this would be better with functional options (after/during API refactor) - c := cataloger.DefaultConfig() + c := defaultConfig() c.Search.Scope = source.AllLayersScope pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) if err != nil {