Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

931: binary cataloger exclusion defaults #1948

Merged
merged 18 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion syft/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,31 @@ func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection,

catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...)

relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
// apply exclusions to the package catalog
// https://github.com/anchore/syft/issues/931
for _, r := range relationships {
if cataloger.Exclude(r, catalog) {
catalog.Delete(r.To.ID())
relationships = removeRelationshipsByID(relationships, r.To.ID())
}
}

// no need to consider source relationships for os -> binary exclusions
relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
return catalog, relationships, release, err
}

func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship {
// https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
filtered := relationships[:0]
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
for _, r := range relationships {
if r.To.ID() != id && r.From.ID() != id {
filtered = append(filtered, r)
}
}
return filtered
}

func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship {
relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method?
for p := range c.Enumerate() {
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/alpm/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "alpmdb-cataloger"
const CatalogerName = "alpmdb-cataloger"

func NewAlpmdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseAlpmDB, pkg.AlpmDBGlob)
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/apkdb/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "apkdb-cataloger"
const CatalogerName = "apkdb-cataloger"

// NewApkdbCataloger returns a new Alpine DB cataloger object.
func NewApkdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseApkDB, pkg.ApkDBGlob)
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/binary/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/anchore/syft/syft/pkg"
)

const catalogerName = "binary-cataloger"
const CatalogerName = "binary-cataloger"

func NewCataloger() *Cataloger {
return &Cataloger{}
Expand All @@ -22,7 +22,7 @@ type Cataloger struct{}

// Name returns a string that uniquely describes the Cataloger
func (c Cataloger) Name() string {
return catalogerName
return CatalogerName
}

// Catalog is given an object to resolve file references and content, this function returns any discovered Packages
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/binary/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func newPackage(classifier classifier, location file.Location, matchMetadata map
),
Type: pkg.BinaryPkg,
CPEs: cpes,
FoundBy: catalogerName,
FoundBy: CatalogerName,
MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
Expand Down
9 changes: 0 additions & 9 deletions syft/pkg/cataloger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,6 @@ type Config struct {
Parallelism int
}

func DefaultConfig() Config {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why delete the DefaultConfig method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function was only used as a part of *_test.go files. It was moved here:

func defaultConfig() cataloger.Config {
return cataloger.Config{
Search: cataloger.DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
ExcludeBinaryOverlapByOwnership: true,
}
}

Apologies for the boy scout change on an unrelated PR - my IDE was yelling about this being deadcode and I could not figure out why - the refactor over to test resolved that issue

return Config{
Search: DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
}
}

func (c Config) Java() java.Config {
return java.Config{
SearchUnindexedArchives: c.Search.IncludeUnindexedArchives,
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/deb/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "dpkgdb-cataloger"
const CatalogerName = "dpkgdb-cataloger"

// NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files.
func NewDpkgdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
// note: these globs have been intentionally split up in order to improve search performance,
// please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}"
WithParserByGlobs(parseDpkgDB, "**/var/lib/dpkg/status", "**/var/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status")
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/nix/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

const (
catalogerName = "nix-store-cataloger"
CatalogerName = "nix-store-cataloger"
nixStoreGlob = "**/nix/store/*"
)

Expand All @@ -24,7 +24,7 @@ func NewStoreCataloger() *StoreCataloger {
}

func (c *StoreCataloger) Name() string {
return catalogerName
return CatalogerName
}

func (c *StoreCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/nix/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestCataloger_Catalog(t *testing.T) {
Version: "2.34-210",
PURL: "pkg:nix/[email protected]?output=bin&outputhash=h0cnbmfcn93xm5dg2x27ixhag1cwndga",
Locations: file.NewLocationSet(file.NewLocation("nix/store/h0cnbmfcn93xm5dg2x27ixhag1cwndga-glibc-2.34-210-bin")),
FoundBy: catalogerName,
FoundBy: CatalogerName,
Type: pkg.NixPkg,
MetadataType: pkg.NixStoreMetadataType,
Metadata: pkg.NixStoreMetadata{
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/nix/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func newNixStorePackage(storePath nixStorePath, locations ...file.Location) pkg.
p := pkg.Package{
Name: storePath.name,
Version: storePath.version,
FoundBy: catalogerName,
FoundBy: CatalogerName,
Locations: file.NewLocationSet(locations...),
Type: pkg.NixPkg,
PURL: packageURL(storePath),
Expand Down
68 changes: 68 additions & 0 deletions syft/pkg/cataloger/package_exclusions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package cataloger

import (
"golang.org/x/exp/slices"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/alpm"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
"github.com/anchore/syft/syft/pkg/cataloger/deb"
"github.com/anchore/syft/syft/pkg/cataloger/nix"
)

type CategoryType string

const (
OsCatalogerType CategoryType = "os"
BinaryCatalogerType CategoryType = "binary"
)

var CatalogerTypeIndex = map[CategoryType][]string{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could these just be simplified into 2 variables? something like:

var parentCatalogerTypes = []string { .... }
var childCatalogerTypes = []string { .... }

Copy link
Contributor Author

@spiffcs spiffcs Aug 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! Yea that would be a good simplification here.

My only hesitancy to change it back to that is the original config object we had discussed on the issue:
#931 (comment)

I think keeping this as is has two advantages:

  1. Is clear to future users/contributors that Os/Binary categorization types were an explicit choice as and additional condition. The parent child designation loses this nuance a little.
  2. It keeps us open to category based configuration options we may want to consider in the future

WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the concern is that we want to be explicit about OS and binary cataloger types, these could be named

var osCatalogerTypes = []string { .... }
var binaryCatalogerTypes = []string { .... }

keeps us open to category based configuration options we may want to consider in the future

I'm all for forward-thinking such as being open to more configuration. The suggestion was more that since we're not doing that at the moment, we don't necessarily know what that would look like (although you had an option originally), so it might be better to just make whatever changes at such time as we do change the feature. Again, this is not a blocker and I'll leave it to your discernment.

kzantow marked this conversation as resolved.
Show resolved Hide resolved
"os": {
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
apkdb.CatalogerName,
alpm.CatalogerName,
deb.CatalogerName,
nix.CatalogerName,
"rpm-db-cataloger",
"rpm-file-cataloger",
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
},
"binary": {
binary.CatalogerName,
},
}

type PackageExclusionsConfig struct {
Exclusions []PackageExclusion
}

type PackageExclusion struct {
RelationshipType artifact.RelationshipType
ParentType CategoryType
ExclusionType CategoryType
}
spiffcs marked this conversation as resolved.
Show resolved Hide resolved

// Exclude will remove packages from a collection given the following properties are true
// 1) the relationship between packages is OwnershipByFileOverlap
// 2) the parent is an "os" package
// 3) the child is a synthetic package generated by the binary cataloger
// 4) the package names are identical
// This exclude was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func Exclude(r artifact.Relationship, c *pkg.Collection) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function seems very specific, but has a very generic name. I think the name should probably be tweaked to be a little more specific.

parent := c.Package(r.From.ID())
if parent == nil {
return false
}
child := c.Package(r.To.ID())
if child == nil {
return false
}

parentInExclusion := slices.Contains(CatalogerTypeIndex["os"], parent.FoundBy)
childInExclusion := slices.Contains(CatalogerTypeIndex["binary"], child.FoundBy)

return artifact.OwnershipByFileOverlapRelationship == r.Type &&
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
parentInExclusion &&
childInExclusion
}
78 changes: 78 additions & 0 deletions syft/pkg/cataloger/package_exclusions_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package cataloger

import (
"testing"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
)

func TestExclude(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg, FoundBy: apkdb.CatalogerName}
packageB := pkg.Package{Name: "package-a", Type: pkg.PythonPkg, FoundBy: "language-cataloger"}
packageC := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
packageD := pkg.Package{Name: "package-d", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD} {
p := p
p.SetID()
}

tests := []struct {
name string
relationship artifact.Relationship
packages *pkg.Collection
shouldExclude bool
}{
{
name: "no exclusions from os -> python",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageB,
},
packages: pkg.NewCollection(packageA, packageB),
shouldExclude: false,
},
{
name: "exclusions from os -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageC,
},
packages: pkg.NewCollection(packageA, packageC),
shouldExclude: true,
},
{
name: "no exclusions from python -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageB,
To: packageC,
},
packages: pkg.NewCollection(packageB, packageC),
shouldExclude: false,
},
{
name: "no exclusions for different package names",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageD,
},
packages: pkg.NewCollection(packageA, packageD),
shouldExclude: false,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if !Exclude(test.relationship, test.packages) && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
})

}
}
6 changes: 3 additions & 3 deletions test/cli/packages_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func TestPackagesCmdFlags(t *testing.T) {
name: "squashed-scope-flag-hidden-packages",
args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage},
assertions: []traitAssertion{
assertPackageCount(163),
assertPackageCount(162),
assertNotInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode,
},
Expand All @@ -113,7 +113,7 @@ func TestPackagesCmdFlags(t *testing.T) {
name: "all-layers-scope-flag",
args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage},
assertions: []traitAssertion{
assertPackageCount(164), // packages are now deduplicated for this case
assertPackageCount(163), // packages are now deduplicated for this case
assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode,
Expand All @@ -126,7 +126,7 @@ func TestPackagesCmdFlags(t *testing.T) {
"SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers",
},
assertions: []traitAssertion{
assertPackageCount(164), // packages are now deduplicated for this case
assertPackageCount(163), // packages are now deduplicated for this case
assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode,
Expand Down
34 changes: 0 additions & 34 deletions test/compare/test-fixtures/acceptance-centos-8.2.2004.json
Original file line number Diff line number Diff line change
Expand Up @@ -56880,40 +56880,6 @@
]
}
},
{
"id": "875f4d287d1bdcfd",
"name": "python",
"version": "3.6.8",
"type": "binary",
"foundBy": "binary-cataloger",
"locations": [
{
"path": "/usr/lib64/libpython3.6m.so.1.0",
"layerID": "sha256:eb29745b8228e1e97c01b1d5c2554a319c00a94d8dd5746a3904222ad65a13f8"
}
],
"licenses": [],
"language": "",
"cpes": [
"cpe:2.3:a:python_software_foundation:python:3.6.8:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python:3.6.8:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python:3.6.8:*:*:*:*:*:*:*"
],
"purl": "pkg:generic/[email protected]",
"metadataType": "BinaryMetadata",
"metadata": {
"matches": [
{
"classifier": "python-binary-lib",
"location": {
"path": "/usr/lib64/libpython3.6m.so.1.0",
"layerID": "sha256:eb29745b8228e1e97c01b1d5c2554a319c00a94d8dd5746a3904222ad65a13f8",
"virtualPath": "/usr/lib64/libpython3.6m.so.1.0"
}
}
]
}
},
{
"id": "e57db3737a1d260f",
"name": "python3-dnf",
Expand Down
4 changes: 2 additions & 2 deletions test/integration/catalog_packages_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName)

var pc *pkg.Collection
for _, c := range cataloger.ImageCatalogers(cataloger.DefaultConfig()) {
for _, c := range cataloger.ImageCatalogers(defaultConfig()) {
// in case of future alteration where state is persisted, assume no dependency is safe to reuse
userInput := "docker-archive:" + tarPath
detection, err := source.Detect(userInput, source.DefaultDetectConfig())
Expand Down Expand Up @@ -260,7 +260,7 @@ func TestPkgCoverageCatalogerConfiguration(t *testing.T) {
assert.Equal(t, definedLanguages, observedLanguages)

// Verify that rust isn't actually an image cataloger
c := cataloger.DefaultConfig()
c := defaultConfig()
c.Catalogers = []string{"rust"}
assert.Len(t, cataloger.ImageCatalogers(c), 0)
}
Expand Down
5 changes: 4 additions & 1 deletion test/integration/encode_decode_cycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ import (
// encode-decode-encode loop which will detect lossy behavior in both directions.
func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
// use second image for relationships
images := []string{"image-pkg-coverage", "image-owning-package"}
images := []string{
//"image-pkg-coverage",
"image-owning-package",
}
kzantow marked this conversation as resolved.
Show resolved Hide resolved
tests := []struct {
formatOption sbom.FormatID
redactor func(in []byte) []byte
Expand Down
Loading