From 3d09fb364b16fb61aff1d49c9b351fc2b7d63cf0 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 25 Oct 2022 11:47:32 -0400 Subject: [PATCH] port deb/dpkg cataloger to new generic cataloger pattern (#1288) Signed-off-by: Alex Goodman Signed-off-by: Alex Goodman --- internal/string_helpers.go | 9 + syft/lib.go | 2 +- syft/pkg/cataloger.go | 16 ++ syft/pkg/cataloger/apkdb/parse_apk_db_test.go | 2 +- syft/pkg/cataloger/catalog.go | 2 +- syft/pkg/cataloger/cataloger.go | 28 +- syft/pkg/cataloger/cataloger_test.go | 4 +- .../pkg/cataloger/cpp/parse_conanfile_test.go | 2 +- .../pkg/cataloger/cpp/parse_conanlock_test.go | 2 +- .../cataloger/dart/parse_pubspec_lock_test.go | 2 +- syft/pkg/cataloger/deb/cataloger.go | 228 +--------------- syft/pkg/cataloger/deb/cataloger_test.go | 175 +++++-------- syft/pkg/cataloger/deb/package.go | 247 ++++++++++++++++++ .../deb/package_test.go} | 97 +++---- .../pkg/cataloger/deb/parse_copyright_test.go | 31 +-- ...{parse_dpkg_status.go => parse_dpkg_db.go} | 68 +++-- ...g_status_test.go => parse_dpkg_db_test.go} | 230 +++++++--------- .../deb/parse_dpkg_info_files_test.go | 57 +--- .../dotnet/parse_dotnet_deps_test.go | 2 +- .../internal/pkgtest/assert_packages_equal.go | 37 --- .../internal/pkgtest/test_generic_parser.go | 156 ++++++++++- syft/pkg/dpkg_metadata.go | 42 +-- syft/pkg/url_test.go | 19 +- 23 files changed, 702 insertions(+), 756 deletions(-) create mode 100644 syft/pkg/cataloger.go create mode 100644 syft/pkg/cataloger/deb/package.go rename syft/pkg/{dpkg_metadata_test.go => cataloger/deb/package_test.go} (52%) rename syft/pkg/cataloger/deb/{parse_dpkg_status.go => parse_dpkg_db.go} (75%) rename syft/pkg/cataloger/deb/{parse_dpkg_status_test.go => parse_dpkg_db_test.go} (64%) delete mode 100644 syft/pkg/cataloger/internal/pkgtest/assert_packages_equal.go diff --git a/internal/string_helpers.go b/internal/string_helpers.go index 87bebea915a..09a8fda2d5a 100644 --- a/internal/string_helpers.go +++ b/internal/string_helpers.go @@ -19,3 +19,12 @@ func TruncateMiddleEllipsis(input string, maxLen int) string { } return input[:maxLen/2] + "..." + input[len(input)-(maxLen/2):] } + +func StringInSlice(a string, list []string) bool { + for _, b := range list { + if b == a { + return true + } + } + return false +} diff --git a/syft/lib.go b/syft/lib.go index f10e778a331..44261c38421 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -49,7 +49,7 @@ func CatalogPackages(src *source.Source, cfg cataloger.Config) (*pkg.Catalog, [] } // if the catalogers have been configured, use them regardless of input type - var catalogers []cataloger.Cataloger + var catalogers []pkg.Cataloger if len(cfg.Catalogers) > 0 { catalogers = cataloger.AllCatalogers(cfg) } else { diff --git a/syft/pkg/cataloger.go b/syft/pkg/cataloger.go new file mode 100644 index 00000000000..28cc57c35dd --- /dev/null +++ b/syft/pkg/cataloger.go @@ -0,0 +1,16 @@ +package pkg + +import ( + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/source" +) + +// Cataloger describes behavior for an object to participate in parsing container image or file system +// contents for the purpose of discovering Packages. Each concrete implementation should focus on discovering Packages +// for a specific Package Type or ecosystem. +type Cataloger interface { + // Name returns a string that uniquely describes a cataloger + Name() string + // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. + Catalog(resolver source.FileResolver) ([]Package, []artifact.Relationship, error) +} diff --git a/syft/pkg/cataloger/apkdb/parse_apk_db_test.go b/syft/pkg/cataloger/apkdb/parse_apk_db_test.go index af385dbbdde..ef8dda2c195 100644 --- a/syft/pkg/cataloger/apkdb/parse_apk_db_test.go +++ b/syft/pkg/cataloger/apkdb/parse_apk_db_test.go @@ -754,7 +754,7 @@ func TestMultiplePackages(t *testing.T) { VersionID: "3.12", }} - pkgtest.TestGenericParserWithEnv(t, fixture, parseApkDB, &env, expected, expectedRelationships) + pkgtest.TestFileParserWithEnv(t, fixture, parseApkDB, &env, expected, expectedRelationships) } diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index b844b1d9720..1e14ef86af2 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -42,7 +42,7 @@ func newMonitor() (*progress.Manual, *progress.Manual) { // In order to efficiently retrieve contents from a underlying container image the content fetch requests are // done in bulk. Specifically, all files of interest are collected from each catalogers and accumulated into a single // request. -func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ...Cataloger) (*pkg.Catalog, []artifact.Relationship, error) { +func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ...pkg.Cataloger) (*pkg.Catalog, []artifact.Relationship, error) { catalog := pkg.NewCatalog() var allRelationships []artifact.Relationship diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index 821926d677e..dd704695f4a 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -9,7 +9,6 @@ import ( "strings" "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/alpm" "github.com/anchore/syft/syft/pkg/cataloger/apkdb" @@ -28,24 +27,13 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/ruby" "github.com/anchore/syft/syft/pkg/cataloger/rust" "github.com/anchore/syft/syft/pkg/cataloger/swift" - "github.com/anchore/syft/syft/source" ) const AllCatalogersPattern = "all" -// Cataloger describes behavior for an object to participate in parsing container image or file system -// contents for the purpose of discovering Packages. Each concrete implementation should focus on discovering Packages -// for a specific Package Type or ecosystem. -type Cataloger interface { - // Name returns a string that uniquely describes a cataloger - Name() string - // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. - Catalog(resolver source.FileResolver) ([]pkg.Package, []artifact.Relationship, error) -} - // ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. -func ImageCatalogers(cfg Config) []Cataloger { - return filterCatalogers([]Cataloger{ +func ImageCatalogers(cfg Config) []pkg.Cataloger { + return filterCatalogers([]pkg.Cataloger{ alpm.NewAlpmdbCataloger(), ruby.NewGemSpecCataloger(), python.NewPythonPackageCataloger(), @@ -62,8 +50,8 @@ func ImageCatalogers(cfg Config) []Cataloger { } // DirectoryCatalogers returns a slice of locally implemented catalogers that are fit for detecting packages from index files (and select installations) -func DirectoryCatalogers(cfg Config) []Cataloger { - return filterCatalogers([]Cataloger{ +func DirectoryCatalogers(cfg Config) []pkg.Cataloger { + return filterCatalogers([]pkg.Cataloger{ alpm.NewAlpmdbCataloger(), ruby.NewGemFileLockCataloger(), python.NewPythonIndexCataloger(), @@ -89,8 +77,8 @@ func DirectoryCatalogers(cfg Config) []Cataloger { } // AllCatalogers returns all implemented catalogers -func AllCatalogers(cfg Config) []Cataloger { - return filterCatalogers([]Cataloger{ +func AllCatalogers(cfg Config) []pkg.Cataloger { + return filterCatalogers([]pkg.Cataloger{ alpm.NewAlpmdbCataloger(), ruby.NewGemFileLockCataloger(), ruby.NewGemSpecCataloger(), @@ -128,7 +116,7 @@ func RequestedAllCatalogers(cfg Config) bool { return false } -func filterCatalogers(catalogers []Cataloger, enabledCatalogerPatterns []string) []Cataloger { +func filterCatalogers(catalogers []pkg.Cataloger, enabledCatalogerPatterns []string) []pkg.Cataloger { // if cataloger is not set, all applicable catalogers are enabled by default if len(enabledCatalogerPatterns) == 0 { return catalogers @@ -138,7 +126,7 @@ func filterCatalogers(catalogers []Cataloger, enabledCatalogerPatterns []string) return catalogers } } - var keepCatalogers []Cataloger + var keepCatalogers []pkg.Cataloger for _, cataloger := range catalogers { if contains(enabledCatalogerPatterns, cataloger.Name()) { keepCatalogers = append(keepCatalogers, cataloger) diff --git a/syft/pkg/cataloger/cataloger_test.go b/syft/pkg/cataloger/cataloger_test.go index 071e7bd3065..5dfd5ccb26d 100644 --- a/syft/pkg/cataloger/cataloger_test.go +++ b/syft/pkg/cataloger/cataloger_test.go @@ -10,7 +10,7 @@ import ( "github.com/anchore/syft/syft/source" ) -var _ Cataloger = (*dummy)(nil) +var _ pkg.Cataloger = (*dummy)(nil) type dummy struct { name string @@ -147,7 +147,7 @@ func Test_filterCatalogers(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var catalogers []Cataloger + var catalogers []pkg.Cataloger for _, n := range tt.catalogers { catalogers = append(catalogers, dummy{name: n}) } diff --git a/syft/pkg/cataloger/cpp/parse_conanfile_test.go b/syft/pkg/cataloger/cpp/parse_conanfile_test.go index 061a76bf786..edb9ff30a41 100644 --- a/syft/pkg/cataloger/cpp/parse_conanfile_test.go +++ b/syft/pkg/cataloger/cpp/parse_conanfile_test.go @@ -90,5 +90,5 @@ func TestParseConanfile(t *testing.T) { // TODO: relationships are not under test var expectedRelationships []artifact.Relationship - pkgtest.TestGenericParser(t, fixture, parseConanfile, expected, expectedRelationships) + pkgtest.TestFileParser(t, fixture, parseConanfile, expected, expectedRelationships) } diff --git a/syft/pkg/cataloger/cpp/parse_conanlock_test.go b/syft/pkg/cataloger/cpp/parse_conanlock_test.go index c289dcfc8b0..c5a57fa795e 100644 --- a/syft/pkg/cataloger/cpp/parse_conanlock_test.go +++ b/syft/pkg/cataloger/cpp/parse_conanlock_test.go @@ -35,5 +35,5 @@ func TestParseConanlock(t *testing.T) { // TODO: relationships are not under test var expectedRelationships []artifact.Relationship - pkgtest.TestGenericParser(t, fixture, parseConanlock, expected, expectedRelationships) + pkgtest.TestFileParser(t, fixture, parseConanlock, expected, expectedRelationships) } diff --git a/syft/pkg/cataloger/dart/parse_pubspec_lock_test.go b/syft/pkg/cataloger/dart/parse_pubspec_lock_test.go index b0661738a18..bbef7e0492c 100644 --- a/syft/pkg/cataloger/dart/parse_pubspec_lock_test.go +++ b/syft/pkg/cataloger/dart/parse_pubspec_lock_test.go @@ -98,5 +98,5 @@ func TestParsePubspecLock(t *testing.T) { // TODO: relationships are not under test var expectedRelationships []artifact.Relationship - pkgtest.TestGenericParser(t, fixture, parsePubspecLock, expected, expectedRelationships) + pkgtest.TestFileParser(t, fixture, parsePubspecLock, expected, expectedRelationships) } diff --git a/syft/pkg/cataloger/deb/cataloger.go b/syft/pkg/cataloger/deb/cataloger.go index a9db587b7e4..4484757105d 100644 --- a/syft/pkg/cataloger/deb/cataloger.go +++ b/syft/pkg/cataloger/deb/cataloger.go @@ -4,230 +4,14 @@ Package dpkg provides a concrete Cataloger implementation for Debian package DB package deb import ( - "fmt" - "io" - "path" - "path/filepath" - "sort" - - "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/source" -) - -const ( - md5sumsExt = ".md5sums" - conffilesExt = ".conffiles" - docsPath = "/usr/share/doc" + "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -type Cataloger struct{} - -// NewDpkgdbCataloger returns a new Deb package cataloger object. -func NewDpkgdbCataloger() *Cataloger { - return &Cataloger{} -} - -// Name returns a string that uniquely describes a cataloger -func (c *Cataloger) Name() string { - return "dpkgdb-cataloger" -} - -// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing dpkg support files. -func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []artifact.Relationship, error) { - dbFileMatches, err := resolver.FilesByGlob(pkg.DpkgDBGlob) - if err != nil { - return nil, nil, fmt.Errorf("failed to find dpkg status files's by glob: %w", err) - } - - var allPackages []pkg.Package - for _, dbLocation := range dbFileMatches { - dbContents, err := resolver.FileContentsByLocation(dbLocation) - if err != nil { - return nil, nil, err - } - - pkgs, err := parseDpkgStatus(dbContents) - internal.CloseAndLogError(dbContents, dbLocation.VirtualPath) - if err != nil { - log.Warnf("dpkg cataloger: unable to catalog package=%+v: %w", dbLocation.RealPath, err) - continue - } - - for i := range pkgs { - p := &pkgs[i] - p.FoundBy = c.Name() - p.Locations.Add(dbLocation) - - // the current entry only has what may have been listed in the status file, however, there are additional - // files that are listed in multiple other locations. We should retrieve them all and merge the file lists - // together. - mergeFileListing(resolver, dbLocation, p) - - // fetch additional data from the copyright file to derive the license information - addLicenses(resolver, dbLocation, p) - - p.SetID() - } - - allPackages = append(allPackages, pkgs...) - } - return allPackages, nil, nil -} - -func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { - // get license information from the copyright file - copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, p) - - if copyrightReader != nil && copyrightLocation != nil { - defer internal.CloseAndLogError(copyrightReader, copyrightLocation.VirtualPath) - // attach the licenses - p.Licenses = parseLicensesFromCopyright(copyrightReader) - - // keep a record of the file where this was discovered - p.Locations.Add(*copyrightLocation) - } -} - -func mergeFileListing(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { - metadata := p.Metadata.(pkg.DpkgMetadata) - - // get file listing (package files + additional config files) - files, infoLocations := getAdditionalFileListing(resolver, dbLocation, p) -loopNewFiles: - for _, newFile := range files { - for _, existingFile := range metadata.Files { - if existingFile.Path == newFile.Path { - // skip adding this file since it already exists - continue loopNewFiles - } - } - metadata.Files = append(metadata.Files, newFile) - } - - // sort files by path - sort.SliceStable(metadata.Files, func(i, j int) bool { - return metadata.Files[i].Path < metadata.Files[j].Path - }) - - // persist alterations - p.Metadata = metadata - - // persist location information from each new source of information - p.Locations.Add(infoLocations...) -} - -func getAdditionalFileListing(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) ([]pkg.DpkgFileRecord, []source.Location) { - // ensure the default value for a collection is never nil since this may be shown as JSON - var files = make([]pkg.DpkgFileRecord, 0) - var locations []source.Location - - md5Reader, md5Location := fetchMd5Contents(resolver, dbLocation, p) - - if md5Reader != nil && md5Location != nil { - defer internal.CloseAndLogError(md5Reader, md5Location.VirtualPath) - // attach the file list - files = append(files, parseDpkgMD5Info(md5Reader)...) - - // keep a record of the file where this was discovered - locations = append(locations, *md5Location) - } - - conffilesReader, conffilesLocation := fetchConffileContents(resolver, dbLocation, p) - - if conffilesReader != nil && conffilesLocation != nil { - defer internal.CloseAndLogError(conffilesReader, conffilesLocation.VirtualPath) - // attach the file list - files = append(files, parseDpkgConffileInfo(conffilesReader)...) - - // keep a record of the file where this was discovered - locations = append(locations, *conffilesLocation) - } - - return files, locations -} - -func fetchMd5Contents(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) (io.ReadCloser, *source.Location) { - var md5Reader io.ReadCloser - var err error - - parentPath := filepath.Dir(dbLocation.RealPath) - - // look for /var/lib/dpkg/info/NAME:ARCH.md5sums - name := md5Key(p) - location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+md5sumsExt)) - - if location == nil { - // the most specific key did not work, fallback to just the name - // look for /var/lib/dpkg/info/NAME.md5sums - location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", p.Name+md5sumsExt)) - } - - // this is unexpected, but not a show-stopper - if location != nil { - md5Reader, err = resolver.FileContentsByLocation(*location) - if err != nil { - log.Warnf("failed to fetch deb md5 contents (package=%s): %+v", p.Name, err) - } - } - - return md5Reader, location -} - -func fetchConffileContents(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) (io.ReadCloser, *source.Location) { - var reader io.ReadCloser - var err error - - parentPath := filepath.Dir(dbLocation.RealPath) - - // look for /var/lib/dpkg/info/NAME:ARCH.conffiles - name := md5Key(p) - location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+conffilesExt)) - - if location == nil { - // the most specific key did not work, fallback to just the name - // look for /var/lib/dpkg/info/NAME.conffiles - location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", p.Name+conffilesExt)) - } - - // this is unexpected, but not a show-stopper - if location != nil { - reader, err = resolver.FileContentsByLocation(*location) - if err != nil { - log.Warnf("failed to fetch deb conffiles contents (package=%s): %+v", p.Name, err) - } - } - - return reader, location -} - -func fetchCopyrightContents(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) (io.ReadCloser, *source.Location) { - // look for /usr/share/docs/NAME/copyright files - name := p.Name - copyrightPath := path.Join(docsPath, name, "copyright") - location := resolver.RelativeFileByPath(dbLocation, copyrightPath) - - // we may not have a copyright file for each package, ignore missing files - if location == nil { - return nil, nil - } - - reader, err := resolver.FileContentsByLocation(*location) - if err != nil { - log.Warnf("failed to fetch deb copyright contents (package=%s): %w", p.Name, err) - } - - return reader, location -} - -func md5Key(p *pkg.Package) string { - metadata := p.Metadata.(pkg.DpkgMetadata) +const catalogerName = "dpkgdb-cataloger" - contentKey := p.Name - if metadata.Architecture != "" && metadata.Architecture != "all" { - contentKey = contentKey + ":" + metadata.Architecture - } - return contentKey +// NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files. +func NewDpkgdbCataloger() *generic.Cataloger { + return generic.NewCataloger(catalogerName). + WithParserByGlobs(parseDpkgDB, pkg.DpkgDBGlob) } diff --git a/syft/pkg/cataloger/deb/cataloger_test.go b/syft/pkg/cataloger/deb/cataloger_test.go index 8744ec561ca..7e627b1c9a7 100644 --- a/syft/pkg/cataloger/deb/cataloger_test.go +++ b/syft/pkg/cataloger/deb/cataloger_test.go @@ -3,138 +3,81 @@ package deb import ( "testing" - "github.com/go-test/deep" - "github.com/stretchr/testify/assert" - - "github.com/anchore/stereoscope/pkg/imagetest" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" "github.com/anchore/syft/syft/source" ) func TestDpkgCataloger(t *testing.T) { - tests := []struct { - name string - sources map[string][]string - expected []pkg.Package - }{ + expected := []pkg.Package{ { - name: "go-case", - sources: map[string][]string{ - "libpam-runtime": { - "/var/lib/dpkg/status", - "/var/lib/dpkg/info/libpam-runtime.md5sums", - "/var/lib/dpkg/info/libpam-runtime.conffiles", - "/usr/share/doc/libpam-runtime/copyright", - }, - }, - expected: []pkg.Package{ - { - Name: "libpam-runtime", - Version: "1.1.8-3.6", - FoundBy: "dpkgdb-cataloger", - Licenses: []string{"GPL-1", "GPL-2", "LGPL-2.1"}, - Type: pkg.DebPkg, - MetadataType: pkg.DpkgMetadataType, - Metadata: pkg.DpkgMetadata{ - Package: "libpam-runtime", - Source: "pam", - Version: "1.1.8-3.6", - Architecture: "all", - Maintainer: "Steve Langasek ", - InstalledSize: 1016, - Description: `Runtime support for the PAM library + Name: "libpam-runtime", + Version: "1.1.8-3.6", + FoundBy: "dpkgdb-cataloger", + Licenses: []string{"GPL-1", "GPL-2", "LGPL-2.1"}, + Locations: source.NewLocationSet( + source.NewVirtualLocation("/var/lib/dpkg/status", "/var/lib/dpkg/status"), + source.NewVirtualLocation("/var/lib/dpkg/info/libpam-runtime.md5sums", "/var/lib/dpkg/info/libpam-runtime.md5sums"), + source.NewVirtualLocation("/var/lib/dpkg/info/libpam-runtime.conffiles", "/var/lib/dpkg/info/libpam-runtime.conffiles"), + source.NewVirtualLocation("/usr/share/doc/libpam-runtime/copyright", "/usr/share/doc/libpam-runtime/copyright"), + ), + Type: pkg.DebPkg, + MetadataType: pkg.DpkgMetadataType, + Metadata: pkg.DpkgMetadata{ + Package: "libpam-runtime", + Source: "pam", + Version: "1.1.8-3.6", + Architecture: "all", + Maintainer: "Steve Langasek ", + InstalledSize: 1016, + Description: `Runtime support for the PAM library Contains configuration files and directories required for authentication to work on Debian systems. This package is required on almost all installations.`, - Files: []pkg.DpkgFileRecord{ - { - Path: "/etc/pam.conf", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "87fc76f18e98ee7d3848f6b81b3391e5", - }, - IsConfigFile: true, - }, - { - Path: "/etc/pam.d/other", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "31aa7f2181889ffb00b87df4126d1701", - }, - IsConfigFile: true, - }, - {Path: "/lib/x86_64-linux-gnu/libz.so.1.2.11", Digest: &file.Digest{ - Algorithm: "md5", - Value: "55f905631797551d4d936a34c7e73474", - }}, - {Path: "/usr/share/doc/zlib1g/changelog.Debian.gz", Digest: &file.Digest{ - Algorithm: "md5", - Value: "cede84bda30d2380217f97753c8ccf3a", - }}, - {Path: "/usr/share/doc/zlib1g/changelog.gz", Digest: &file.Digest{ - Algorithm: "md5", - Value: "f3c9dafa6da7992c47328b4464f6d122", - }}, - {Path: "/usr/share/doc/zlib1g/copyright", Digest: &file.Digest{ - Algorithm: "md5", - Value: "a4fae96070439a5209a62ae5b8017ab2", - }}, + Files: []pkg.DpkgFileRecord{ + { + Path: "/etc/pam.conf", + Digest: &file.Digest{ + Algorithm: "md5", + Value: "87fc76f18e98ee7d3848f6b81b3391e5", + }, + IsConfigFile: true, + }, + { + Path: "/etc/pam.d/other", + Digest: &file.Digest{ + Algorithm: "md5", + Value: "31aa7f2181889ffb00b87df4126d1701", }, + IsConfigFile: true, }, + {Path: "/lib/x86_64-linux-gnu/libz.so.1.2.11", Digest: &file.Digest{ + Algorithm: "md5", + Value: "55f905631797551d4d936a34c7e73474", + }}, + {Path: "/usr/share/doc/zlib1g/changelog.Debian.gz", Digest: &file.Digest{ + Algorithm: "md5", + Value: "cede84bda30d2380217f97753c8ccf3a", + }}, + {Path: "/usr/share/doc/zlib1g/changelog.gz", Digest: &file.Digest{ + Algorithm: "md5", + Value: "f3c9dafa6da7992c47328b4464f6d122", + }}, + {Path: "/usr/share/doc/zlib1g/copyright", Digest: &file.Digest{ + Algorithm: "md5", + Value: "a4fae96070439a5209a62ae5b8017ab2", + }}, }, }, }, } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - - img := imagetest.GetFixtureImage(t, "docker-archive", "image-dpkg") - - s, err := source.NewFromImage(img, "") - if err != nil { - t.Fatal(err) - } - - c := NewDpkgdbCataloger() - - resolver, err := s.FileResolver(source.SquashedScope) - if err != nil { - t.Errorf("could not get resolver error: %+v", err) - } - - actual, _, err := c.Catalog(resolver) - if err != nil { - t.Fatalf("failed to catalog: %+v", err) - } - - if len(actual) != len(test.expected) { - for _, a := range actual { - t.Logf(" %+v", a) - } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(test.expected)) - } - - // test sources... - for idx := range actual { - a := &actual[idx] - // we will test the sources separately - var sourcesList = make([]string, len(a.Locations.ToSlice())) - for i, s := range a.Locations.ToSlice() { - sourcesList[i] = s.RealPath - } - a.Locations = source.NewLocationSet() - - assert.ElementsMatch(t, sourcesList, test.sources[a.Name]) - } - - // test remaining fields... - for _, d := range deep.Equal(actual, test.expected) { - t.Errorf("diff: %+v", d) - } - - }) - } + c := NewDpkgdbCataloger() + pkgtest.NewCatalogTester(). + WithImageResolver(t, "image-dpkg"). + IgnoreLocationLayer(). // this fixture can be rebuilt, thus the layer ID will change + Expects(expected, nil). + TestCataloger(t, c) } diff --git a/syft/pkg/cataloger/deb/package.go b/syft/pkg/cataloger/deb/package.go new file mode 100644 index 00000000000..2190e2b2358 --- /dev/null +++ b/syft/pkg/cataloger/deb/package.go @@ -0,0 +1,247 @@ +package deb + +import ( + "fmt" + "io" + "path" + "path/filepath" + "sort" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +const ( + md5sumsExt = ".md5sums" + conffilesExt = ".conffiles" + docsPath = "/usr/share/doc" +) + +func newDpkgPackage(d pkg.DpkgMetadata, dbLocation source.Location, resolver source.FileResolver, release *linux.Release) pkg.Package { + p := pkg.Package{ + Name: d.Package, + Version: d.Version, + Locations: source.NewLocationSet(dbLocation), + PURL: packageURL(d, release), + Type: pkg.DebPkg, + MetadataType: pkg.DpkgMetadataType, + Metadata: d, + } + + // the current entry only has what may have been listed in the status file, however, there are additional + // files that are listed in multiple other locations. We should retrieve them all and merge the file lists + // together. + mergeFileListing(resolver, dbLocation, &p) + + // fetch additional data from the copyright file to derive the license information + addLicenses(resolver, dbLocation, &p) + + p.SetID() + + return p +} + +// PackageURL returns the PURL for the specific Debian package (see https://github.com/package-url/purl-spec) +func packageURL(m pkg.DpkgMetadata, distro *linux.Release) string { + if distro == nil { + return "" + } + + if distro.ID != "debian" && !internal.StringInSlice("debian", distro.IDLike) { + return "" + } + + qualifiers := map[string]string{ + pkg.PURLQualifierArch: m.Architecture, + } + + if m.Source != "" { + if m.SourceVersion != "" { + qualifiers[pkg.PURLQualifierUpstream] = fmt.Sprintf("%s@%s", m.Source, m.SourceVersion) + } else { + qualifiers[pkg.PURLQualifierUpstream] = m.Source + } + } + + return packageurl.NewPackageURL( + packageurl.TypeDebian, + distro.ID, + m.Package, + m.Version, + pkg.PURLQualifiers( + qualifiers, + distro, + ), + "", + ).ToString() +} + +func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { + metadata := p.Metadata.(pkg.DpkgMetadata) + + // get license information from the copyright file + copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, metadata) + + if copyrightReader != nil && copyrightLocation != nil { + defer internal.CloseAndLogError(copyrightReader, copyrightLocation.VirtualPath) + // attach the licenses + p.Licenses = parseLicensesFromCopyright(copyrightReader) + + // keep a record of the file where this was discovered + p.Locations.Add(*copyrightLocation) + } +} + +func mergeFileListing(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { + metadata := p.Metadata.(pkg.DpkgMetadata) + + // get file listing (package files + additional config files) + files, infoLocations := getAdditionalFileListing(resolver, dbLocation, metadata) +loopNewFiles: + for _, newFile := range files { + for _, existingFile := range metadata.Files { + if existingFile.Path == newFile.Path { + // skip adding this file since it already exists + continue loopNewFiles + } + } + metadata.Files = append(metadata.Files, newFile) + } + + // sort files by path + sort.SliceStable(metadata.Files, func(i, j int) bool { + return metadata.Files[i].Path < metadata.Files[j].Path + }) + + // persist alterations + p.Metadata = metadata + + // persist location information from each new source of information + p.Locations.Add(infoLocations...) +} + +func getAdditionalFileListing(resolver source.FileResolver, dbLocation source.Location, m pkg.DpkgMetadata) ([]pkg.DpkgFileRecord, []source.Location) { + // ensure the default value for a collection is never nil since this may be shown as JSON + var files = make([]pkg.DpkgFileRecord, 0) + var locations []source.Location + + md5Reader, md5Location := fetchMd5Contents(resolver, dbLocation, m) + + if md5Reader != nil && md5Location != nil { + defer internal.CloseAndLogError(md5Reader, md5Location.VirtualPath) + // attach the file list + files = append(files, parseDpkgMD5Info(md5Reader)...) + + // keep a record of the file where this was discovered + locations = append(locations, *md5Location) + } + + conffilesReader, conffilesLocation := fetchConffileContents(resolver, dbLocation, m) + + if conffilesReader != nil && conffilesLocation != nil { + defer internal.CloseAndLogError(conffilesReader, conffilesLocation.VirtualPath) + // attach the file list + files = append(files, parseDpkgConffileInfo(conffilesReader)...) + + // keep a record of the file where this was discovered + locations = append(locations, *conffilesLocation) + } + + return files, locations +} + +func fetchMd5Contents(resolver source.FileResolver, dbLocation source.Location, m pkg.DpkgMetadata) (io.ReadCloser, *source.Location) { + var md5Reader io.ReadCloser + var err error + + if resolver == nil { + return nil, nil + } + + parentPath := filepath.Dir(dbLocation.RealPath) + + // look for /var/lib/dpkg/info/NAME:ARCH.md5sums + name := md5Key(m) + location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+md5sumsExt)) + + if location == nil { + // the most specific key did not work, fallback to just the name + // look for /var/lib/dpkg/info/NAME.md5sums + location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", m.Package+md5sumsExt)) + } + + // this is unexpected, but not a show-stopper + if location != nil { + md5Reader, err = resolver.FileContentsByLocation(*location) + if err != nil { + log.Warnf("failed to fetch deb md5 contents (package=%s): %+v", m.Package, err) + } + } + + return md5Reader, location +} + +func fetchConffileContents(resolver source.FileResolver, dbLocation source.Location, m pkg.DpkgMetadata) (io.ReadCloser, *source.Location) { + var reader io.ReadCloser + var err error + + if resolver == nil { + return nil, nil + } + + parentPath := filepath.Dir(dbLocation.RealPath) + + // look for /var/lib/dpkg/info/NAME:ARCH.conffiles + name := md5Key(m) + location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+conffilesExt)) + + if location == nil { + // the most specific key did not work, fallback to just the name + // look for /var/lib/dpkg/info/NAME.conffiles + location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", m.Package+conffilesExt)) + } + + // this is unexpected, but not a show-stopper + if location != nil { + reader, err = resolver.FileContentsByLocation(*location) + if err != nil { + log.Warnf("failed to fetch deb conffiles contents (package=%s): %+v", m.Package, err) + } + } + + return reader, location +} + +func fetchCopyrightContents(resolver source.FileResolver, dbLocation source.Location, m pkg.DpkgMetadata) (io.ReadCloser, *source.Location) { + if resolver == nil { + return nil, nil + } + + // look for /usr/share/docs/NAME/copyright files + copyrightPath := path.Join(docsPath, m.Package, "copyright") + location := resolver.RelativeFileByPath(dbLocation, copyrightPath) + + // we may not have a copyright file for each package, ignore missing files + if location == nil { + return nil, nil + } + + reader, err := resolver.FileContentsByLocation(*location) + if err != nil { + log.Warnf("failed to fetch deb copyright contents (package=%s): %w", m.Package, err) + } + + return reader, location +} + +func md5Key(metadata pkg.DpkgMetadata) string { + contentKey := metadata.Package + if metadata.Architecture != "" && metadata.Architecture != "all" { + contentKey = contentKey + ":" + metadata.Architecture + } + return contentKey +} diff --git a/syft/pkg/dpkg_metadata_test.go b/syft/pkg/cataloger/deb/package_test.go similarity index 52% rename from syft/pkg/dpkg_metadata_test.go rename to syft/pkg/cataloger/deb/package_test.go index 088e4100066..267ccb97f42 100644 --- a/syft/pkg/dpkg_metadata_test.go +++ b/syft/pkg/cataloger/deb/package_test.go @@ -1,20 +1,19 @@ -package pkg +package deb import ( - "strings" "testing" - "github.com/go-test/deep" - "github.com/sergi/go-diff/diffmatchpatch" + "github.com/google/go-cmp/cmp" "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" ) -func TestDpkgMetadata_pURL(t *testing.T) { +func Test_packageURL(t *testing.T) { tests := []struct { name string distro *linux.Release - metadata DpkgMetadata + metadata pkg.DpkgMetadata expected string }{ { @@ -22,8 +21,23 @@ func TestDpkgMetadata_pURL(t *testing.T) { distro: &linux.Release{ ID: "debian", VersionID: "11", + IDLike: []string{ + "debian", + }, }, - metadata: DpkgMetadata{ + metadata: pkg.DpkgMetadata{ + Package: "p", + Version: "v", + }, + expected: "pkg:deb/debian/p@v?distro=debian-11", + }, + { + name: "missing ID_LIKE", + distro: &linux.Release{ + ID: "debian", + VersionID: "11", + }, + metadata: pkg.DpkgMetadata{ Package: "p", Version: "v", }, @@ -32,10 +46,13 @@ func TestDpkgMetadata_pURL(t *testing.T) { { name: "with arch info", distro: &linux.Release{ - ID: "ubuntu", + ID: "ubuntu", + IDLike: []string{ + "debian", + }, VersionID: "16.04", }, - metadata: DpkgMetadata{ + metadata: pkg.DpkgMetadata{ Package: "p", Version: "v", Architecture: "a", @@ -44,19 +61,22 @@ func TestDpkgMetadata_pURL(t *testing.T) { }, { name: "missing distro", - metadata: DpkgMetadata{ + metadata: pkg.DpkgMetadata{ Package: "p", Version: "v", }, - expected: "pkg:deb/p@v", + expected: "", }, { name: "with upstream qualifier with source pkg name info", distro: &linux.Release{ ID: "debian", VersionID: "11", + IDLike: []string{ + "debian", + }, }, - metadata: DpkgMetadata{ + metadata: pkg.DpkgMetadata{ Package: "p", Source: "s", Version: "v", @@ -68,8 +88,11 @@ func TestDpkgMetadata_pURL(t *testing.T) { distro: &linux.Release{ ID: "debian", VersionID: "11", + IDLike: []string{ + "debian", + }, }, - metadata: DpkgMetadata{ + metadata: pkg.DpkgMetadata{ Package: "p", Source: "s", Version: "v", @@ -81,51 +104,9 @@ func TestDpkgMetadata_pURL(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - actual := test.metadata.PackageURL(test.distro) - if actual != test.expected { - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(test.expected, actual, true) - t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) - } - }) - } -} - -func TestDpkgMetadata_FileOwner(t *testing.T) { - tests := []struct { - metadata DpkgMetadata - expected []string - }{ - { - metadata: DpkgMetadata{ - Files: []DpkgFileRecord{ - {Path: "/somewhere"}, - {Path: "/else"}, - }, - }, - expected: []string{ - "/else", - "/somewhere", - }, - }, - { - metadata: DpkgMetadata{ - Files: []DpkgFileRecord{ - {Path: "/somewhere"}, - {Path: ""}, - }, - }, - expected: []string{ - "/somewhere", - }, - }, - } - - for _, test := range tests { - t.Run(strings.Join(test.expected, ","), func(t *testing.T) { - actual := test.metadata.OwnedFiles() - for _, d := range deep.Equal(test.expected, actual) { - t.Errorf("diff: %+v", d) + actual := packageURL(test.metadata, test.distro) + if diff := cmp.Diff(test.expected, actual); diff != "" { + t.Errorf("unexpected packageURL (-want +got):\n%s", diff) } }) } diff --git a/syft/pkg/cataloger/deb/parse_copyright_test.go b/syft/pkg/cataloger/deb/parse_copyright_test.go index c6cd0c0c9f3..e58162aa8f1 100644 --- a/syft/pkg/cataloger/deb/parse_copyright_test.go +++ b/syft/pkg/cataloger/deb/parse_copyright_test.go @@ -4,7 +4,8 @@ import ( "os" "testing" - "github.com/go-test/deep" + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" ) func TestParseLicensesFromCopyright(t *testing.T) { @@ -38,31 +39,15 @@ func TestParseLicensesFromCopyright(t *testing.T) { for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - file, err := os.Open(test.fixture) - if err != nil { - t.Fatal("Unable to read: ", err) - } - defer func() { - err := file.Close() - if err != nil { - t.Fatal("closing file failed:", err) - } - }() - - actual := parseLicensesFromCopyright(file) + f, err := os.Open(test.fixture) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, f.Close()) }) - if len(actual) != len(test.expected) { - for _, a := range actual { - t.Logf(" %+v", a) - } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(test.expected)) - } + actual := parseLicensesFromCopyright(f) - diffs := deep.Equal(actual, test.expected) - for _, d := range diffs { - t.Errorf("diff: %+v", d) + if diff := cmp.Diff(test.expected, actual); diff != "" { + t.Errorf("unexpected package licenses (-want +got):\n%s", diff) } - }) } } diff --git a/syft/pkg/cataloger/deb/parse_dpkg_status.go b/syft/pkg/cataloger/deb/parse_dpkg_db.go similarity index 75% rename from syft/pkg/cataloger/deb/parse_dpkg_status.go rename to syft/pkg/cataloger/deb/parse_dpkg_db.go index 9bc4023ab93..f5bf9aa9ff8 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_status.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_db.go @@ -13,7 +13,10 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/source" ) var ( @@ -21,20 +24,24 @@ var ( sourceRegexp = regexp.MustCompile(`(?P\S+)( \((?P.*)\))?`) ) -func newDpkgPackage(d pkg.DpkgMetadata) *pkg.Package { - return &pkg.Package{ - Name: d.Package, - Version: d.Version, - Type: pkg.DebPkg, - MetadataType: pkg.DpkgMetadataType, - Metadata: d, +func parseDpkgDB(resolver source.FileResolver, env *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + metadata, err := parseDpkgStatus(reader) + if err != nil { + return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) + } + + var pkgs []pkg.Package + for _, m := range metadata { + pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease)) } + + return pkgs, nil, nil } // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. -func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) { +func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgMetadata, error) { buffedReader := bufio.NewReader(reader) - var packages []pkg.Package + var metadata []pkg.DpkgMetadata continueProcessing := true for continueProcessing { @@ -46,40 +53,44 @@ func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) { return nil, err } } - - p := newDpkgPackage(entry) - if pkg.IsValid(p) { - packages = append(packages, *p) + if entry == nil { + continue } + + metadata = append(metadata, *entry) } - return packages, nil + return metadata, nil } // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. -func parseDpkgStatusEntry(reader *bufio.Reader) (pkg.DpkgMetadata, error) { +func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgMetadata, error) { var retErr error dpkgFields, err := extractAllFields(reader) if err != nil { if !errors.Is(err, errEndOfPackages) { - return pkg.DpkgMetadata{}, err + return nil, err + } + if len(dpkgFields) == 0 { + return nil, err } retErr = err } - entry := pkg.DpkgMetadata{ - // ensure the default value for a collection is never nil since this may be shown as JSON - Files: make([]pkg.DpkgFileRecord, 0), - } + entry := pkg.DpkgMetadata{} err = mapstructure.Decode(dpkgFields, &entry) if err != nil { - return pkg.DpkgMetadata{}, err + return nil, err + } + + sourceName, sourceVersion := extractSourceVersion(entry.Source) + if sourceVersion != "" { + entry.SourceVersion = sourceVersion + entry.Source = sourceName } - name, version := extractSourceVersion(entry.Source) - if version != "" { - entry.SourceVersion = version - entry.Source = name + if entry.Package == "" { + return nil, retErr } // there may be an optional conffiles section that we should persist as files @@ -89,7 +100,12 @@ func parseDpkgStatusEntry(reader *bufio.Reader) (pkg.DpkgMetadata, error) { } } - return entry, retErr + if entry.Files == nil { + // ensure the default value for a collection is never nil since this may be shown as JSON + entry.Files = make([]pkg.DpkgFileRecord, 0) + } + + return &entry, retErr } func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) { diff --git a/syft/pkg/cataloger/deb/parse_dpkg_status_test.go b/syft/pkg/cataloger/deb/parse_dpkg_db_test.go similarity index 64% rename from syft/pkg/cataloger/deb/parse_dpkg_status_test.go rename to syft/pkg/cataloger/deb/parse_dpkg_db_test.go index 46d928ca0e3..0bae7c8962a 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_status_test.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_db_test.go @@ -5,41 +5,37 @@ import ( "errors" "fmt" "os" - "path/filepath" - "strings" "testing" - "github.com/go-test/deep" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" + "github.com/anchore/syft/syft/source" ) -func compareEntries(t *testing.T, left, right pkg.DpkgMetadata) { - t.Helper() - if diff := deep.Equal(left, right); diff != nil { - t.Error(diff) - } -} - -func TestSinglePackage(t *testing.T) { +func Test_parseDpkgStatus(t *testing.T) { tests := []struct { name string - expected pkg.DpkgMetadata + expected []pkg.DpkgMetadata fixturePath string }{ { - name: "Test Single Package", - fixturePath: filepath.Join("test-fixtures", "status", "single"), - expected: pkg.DpkgMetadata{ - Package: "apt", - Source: "apt-dev", - Version: "1.8.2", - Architecture: "amd64", - InstalledSize: 4064, - Maintainer: "APT Development Team ", - Description: `commandline package manager + name: "single package", + fixturePath: "test-fixtures/status/single", + expected: []pkg.DpkgMetadata{ + { + Package: "apt", + Source: "apt-dev", + Version: "1.8.2", + Architecture: "amd64", + InstalledSize: 4064, + Maintainer: "APT Development Team ", + Description: `commandline package manager This package provides commandline tools for searching and managing as well as querying information about packages as a low-level access to all features of the libapt-pkg library. @@ -53,53 +49,55 @@ func TestSinglePackage(t *testing.T) { * apt-cdrom to use removable media as a source for packages * apt-config as an interface to the configuration settings * apt-key as an interface to manage authentication keys`, - Files: []pkg.DpkgFileRecord{ - { - Path: "/etc/apt/apt.conf.d/01autoremove", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "76120d358bc9037bb6358e737b3050b5", + Files: []pkg.DpkgFileRecord{ + { + Path: "/etc/apt/apt.conf.d/01autoremove", + Digest: &file.Digest{ + Algorithm: "md5", + Value: "76120d358bc9037bb6358e737b3050b5", + }, + IsConfigFile: true, }, - IsConfigFile: true, - }, - { - Path: "/etc/cron.daily/apt-compat", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "49e9b2cfa17849700d4db735d04244f3", + { + Path: "/etc/cron.daily/apt-compat", + Digest: &file.Digest{ + Algorithm: "md5", + Value: "49e9b2cfa17849700d4db735d04244f3", + }, + IsConfigFile: true, }, - IsConfigFile: true, - }, - { - Path: "/etc/kernel/postinst.d/apt-auto-removal", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "4ad976a68f045517cf4696cec7b8aa3a", + { + Path: "/etc/kernel/postinst.d/apt-auto-removal", + Digest: &file.Digest{ + Algorithm: "md5", + Value: "4ad976a68f045517cf4696cec7b8aa3a", + }, + IsConfigFile: true, }, - IsConfigFile: true, - }, - { - Path: "/etc/logrotate.d/apt", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "179f2ed4f85cbaca12fa3d69c2a4a1c3", + { + Path: "/etc/logrotate.d/apt", + Digest: &file.Digest{ + Algorithm: "md5", + Value: "179f2ed4f85cbaca12fa3d69c2a4a1c3", + }, + IsConfigFile: true, }, - IsConfigFile: true, }, }, }, }, { - name: "parse storage notation", - fixturePath: filepath.Join("test-fixtures", "status", "installed-size-4KB"), - expected: pkg.DpkgMetadata{ - Package: "apt", - Source: "apt-dev", - Version: "1.8.2", - Architecture: "amd64", - InstalledSize: 4000, - Maintainer: "APT Development Team ", - Description: `commandline package manager + name: "single package with installed size", + fixturePath: "test-fixtures/status/installed-size-4KB", + expected: []pkg.DpkgMetadata{ + { + Package: "apt", + Source: "apt-dev", + Version: "1.8.2", + Architecture: "amd64", + InstalledSize: 4000, + Maintainer: "APT Development Team ", + Description: `commandline package manager This package provides commandline tools for searching and managing as well as querying information about packages as a low-level access to all features of the libapt-pkg library. @@ -113,41 +111,13 @@ func TestSinglePackage(t *testing.T) { * apt-cdrom to use removable media as a source for packages * apt-config as an interface to the configuration settings * apt-key as an interface to manage authentication keys`, + Files: []pkg.DpkgFileRecord{}, + }, }, - }} - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - file, err := os.Open(test.fixturePath) - if err != nil { - t.Fatal("Unable to read test_fixtures/single: ", err) - } - defer func() { - err := file.Close() - if err != nil { - t.Fatal("closing file failed:", err) - } - }() - - reader := bufio.NewReader(file) - - entry, err := parseDpkgStatusEntry(reader) - if err != nil { - t.Fatal("Unable to read file contents: ", err) - } - - compareEntries(t, entry, test.expected) - }) - } -} - -func TestMultiplePackages(t *testing.T) { - tests := []struct { - name string - expected []pkg.DpkgMetadata - }{ + }, { - name: "Test Multiple Package", + name: "multiple entries", + fixturePath: "test-fixtures/status/multiple", expected: []pkg.DpkgMetadata{ { Package: "no-version", @@ -237,30 +207,18 @@ func TestMultiplePackages(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - file, err := os.Open("test-fixtures/status/multiple") - if err != nil { - t.Fatal("Unable to read: ", err) - } - defer func() { - err := file.Close() - if err != nil { - t.Fatal("closing file failed:", err) - } - }() + f, err := os.Open(test.fixturePath) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, f.Close()) }) - pkgs, err := parseDpkgStatus(file) - if err != nil { - t.Fatal("Unable to read file contents: ", err) - } + reader := bufio.NewReader(f) - if len(pkgs) != 3 { - t.Fatalf("unexpected number of entries: %d", len(pkgs)) - } + entries, err := parseDpkgStatus(reader) + require.NoError(t, err) - for idx, entry := range pkgs { - compareEntries(t, entry.Metadata.(pkg.DpkgMetadata), test.expected[idx]) + if diff := cmp.Diff(test.expected, entries); diff != "" { + t.Errorf("unexpected entry (-want +got):\n%s", diff) } - }) } } @@ -304,23 +262,23 @@ func TestSourceVersionExtract(t *testing.T) { } } -func assertAs(expected error) assert.ErrorAssertionFunc { - return func(t assert.TestingT, err error, i ...interface{}) bool { - return assert.ErrorAs(t, err, &expected) +func requireAs(expected error) require.ErrorAssertionFunc { + return func(t require.TestingT, err error, i ...interface{}) { + require.ErrorAs(t, err, &expected) } } -func Test_parseDpkgStatus(t *testing.T) { +func Test_parseDpkgStatus_negativeCases(t *testing.T) { tests := []struct { name string input string want []pkg.Package - wantErr assert.ErrorAssertionFunc + wantErr require.ErrorAssertionFunc }{ { name: "no more packages", input: `Package: apt`, - wantErr: assert.NoError, + wantErr: require.NoError, }, { name: "duplicated key", @@ -328,14 +286,14 @@ func Test_parseDpkgStatus(t *testing.T) { Package: apt-get `, - wantErr: assertAs(errors.New("duplicate key discovered: Package")), + wantErr: requireAs(errors.New("duplicate key discovered: Package")), }, { name: "no match for continuation", input: ` Package: apt `, - wantErr: assertAs(errors.New("no match for continuation: line: ' Package: apt'")), + wantErr: requireAs(errors.New("no match for continuation: line: ' Package: apt'")), }, { name: "find keys", @@ -348,6 +306,8 @@ Installed-Size: 10kib { Name: "apt", Type: "deb", + PURL: "pkg:deb/debian/apt?distro=debian-10", + Locations: source.NewLocationSet(source.NewLocation("place")), MetadataType: "DpkgMetadata", Metadata: pkg.DpkgMetadata{ Package: "apt", @@ -356,16 +316,18 @@ Installed-Size: 10kib }, }, }, - wantErr: assert.NoError, + wantErr: require.NoError, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - r := bufio.NewReader(strings.NewReader(tt.input)) - got, err := parseDpkgStatus(r) - tt.wantErr(t, err, fmt.Sprintf("parseDpkgStatus")) - assert.Equal(t, tt.want, got) + pkgtest.NewCatalogTester(). + FromString("place", tt.input). + WithErrorAssertion(tt.wantErr). + WithLinuxRelease(linux.Release{ID: "debian", VersionID: "10"}). + Expects(tt.want, nil). + TestParser(t, parseDpkgDB) }) } } @@ -376,53 +338,53 @@ func Test_handleNewKeyValue(t *testing.T) { line string wantKey string wantVal interface{} - wantErr assert.ErrorAssertionFunc + wantErr require.ErrorAssertionFunc }{ { name: "cannot parse field", line: "blabla", - wantErr: assertAs(errors.New("cannot parse field from line: 'blabla'")), + wantErr: requireAs(errors.New("cannot parse field from line: 'blabla'")), }, { name: "parse field", line: "key: val", wantKey: "key", wantVal: "val", - wantErr: assert.NoError, + wantErr: require.NoError, }, { name: "parse installed size", line: "InstalledSize: 128", wantKey: "InstalledSize", wantVal: 128, - wantErr: assert.NoError, + wantErr: require.NoError, }, { name: "parse installed kib size", line: "InstalledSize: 1kib", wantKey: "InstalledSize", wantVal: 1024, - wantErr: assert.NoError, + wantErr: require.NoError, }, { name: "parse installed kb size", line: "InstalledSize: 1kb", wantKey: "InstalledSize", wantVal: 1000, - wantErr: assert.NoError, + wantErr: require.NoError, }, { name: "parse installed-size mb", line: "Installed-Size: 1 mb", wantKey: "InstalledSize", wantVal: 1000000, - wantErr: assert.NoError, + wantErr: require.NoError, }, { name: "fail parsing installed-size", line: "Installed-Size: 1bla", wantKey: "", - wantErr: assertAs(fmt.Errorf("unhandled size name: %s", "bla")), + wantErr: requireAs(fmt.Errorf("unhandled size name: %s", "bla")), }, } for _, tt := range tests { diff --git a/syft/pkg/cataloger/deb/parse_dpkg_info_files_test.go b/syft/pkg/cataloger/deb/parse_dpkg_info_files_test.go index f3e097b7233..bd4f1bdabaf 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_info_files_test.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_info_files_test.go @@ -4,7 +4,8 @@ import ( "os" "testing" - "github.com/go-test/deep" + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" @@ -40,29 +41,14 @@ func TestMD5SumInfoParsing(t *testing.T) { for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - file, err := os.Open(test.fixture) - if err != nil { - t.Fatal("Unable to read: ", err) - } - defer func() { - err := file.Close() - if err != nil { - t.Fatal("closing file failed:", err) - } - }() - - actual := parseDpkgMD5Info(file) + f, err := os.Open(test.fixture) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, f.Close()) }) - if len(actual) != len(test.expected) { - for _, a := range actual { - t.Logf(" %+v", a) - } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(test.expected)) - } + actual := parseDpkgMD5Info(f) - diffs := deep.Equal(actual, test.expected) - for _, d := range diffs { - t.Errorf("diff: %+v", d) + if diff := cmp.Diff(test.expected, actual); diff != "" { + t.Errorf("unexpected md5 files (-want +got):\n%s", diff) } }) @@ -89,29 +75,14 @@ func TestConffileInfoParsing(t *testing.T) { for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - file, err := os.Open(test.fixture) - if err != nil { - t.Fatal("Unable to read: ", err) - } - defer func() { - err := file.Close() - if err != nil { - t.Fatal("closing file failed:", err) - } - }() - - actual := parseDpkgConffileInfo(file) + f, err := os.Open(test.fixture) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, f.Close()) }) - if len(actual) != len(test.expected) { - for _, a := range actual { - t.Logf(" %+v", a) - } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(test.expected)) - } + actual := parseDpkgConffileInfo(f) - diffs := deep.Equal(actual, test.expected) - for _, d := range diffs { - t.Errorf("diff: %+v", d) + if diff := cmp.Diff(test.expected, actual); diff != "" { + t.Errorf("unexpected md5 files (-want +got):\n%s", diff) } }) diff --git a/syft/pkg/cataloger/dotnet/parse_dotnet_deps_test.go b/syft/pkg/cataloger/dotnet/parse_dotnet_deps_test.go index 9f0d2c706b2..9a7e21e109c 100644 --- a/syft/pkg/cataloger/dotnet/parse_dotnet_deps_test.go +++ b/syft/pkg/cataloger/dotnet/parse_dotnet_deps_test.go @@ -209,5 +209,5 @@ func TestParseDotnetDeps(t *testing.T) { } var expectedRelationships []artifact.Relationship - pkgtest.TestGenericParser(t, fixture, parseDotnetDeps, expected, expectedRelationships) + pkgtest.TestFileParser(t, fixture, parseDotnetDeps, expected, expectedRelationships) } diff --git a/syft/pkg/cataloger/internal/pkgtest/assert_packages_equal.go b/syft/pkg/cataloger/internal/pkgtest/assert_packages_equal.go deleted file mode 100644 index 0fbcfd94b9f..00000000000 --- a/syft/pkg/cataloger/internal/pkgtest/assert_packages_equal.go +++ /dev/null @@ -1,37 +0,0 @@ -package pkgtest - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/source" -) - -func AssertPackagesEqual(t testing.TB, expected, actual []pkg.Package) { - if diff := cmp.Diff(expected, actual, - cmpopts.IgnoreFields(pkg.Package{}, "id"), // note: ID is not deterministic for test purposes - cmp.Comparer( - func(x, y source.LocationSet) bool { - xs := x.ToSlice() - ys := y.ToSlice() - - if len(xs) != len(ys) { - return false - } - for i, xe := range xs { - ye := ys[i] - if !(cmp.Equal(xe.Coordinates, ye.Coordinates) && cmp.Equal(xe.VirtualPath, ye.VirtualPath)) { - return false - } - } - - return true - }, - ), - ); diff != "" { - t.Errorf("unexpected packages from parsing (-expected +actual)\n%s", diff) - } -} diff --git a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go index 44784bdd683..cba41e1b78f 100644 --- a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go +++ b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go @@ -1,37 +1,175 @@ package pkgtest import ( + "io" "os" + "strings" "testing" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/require" + "github.com/anchore/stereoscope/pkg/imagetest" "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/source" ) -func TestGenericParser(t *testing.T, fixturePath string, parser generic.Parser, expectedPkgs []pkg.Package, expectedRelationships []artifact.Relationship) { - t.Helper() - TestGenericParserWithEnv(t, fixturePath, parser, nil, expectedPkgs, expectedRelationships) +type locationComparer func(x, y source.Location) bool + +type CatalogTester struct { + expectedPkgs []pkg.Package + expectedRelationships []artifact.Relationship + env *generic.Environment + reader source.LocationReadCloser + resolver source.FileResolver + wantErr require.ErrorAssertionFunc + compareOptions []cmp.Option + locationComparer locationComparer } -func TestGenericParserWithEnv(t *testing.T, fixturePath string, parser generic.Parser, env *generic.Environment, expectedPkgs []pkg.Package, expectedRelationships []artifact.Relationship) { +func NewCatalogTester() *CatalogTester { + return &CatalogTester{ + wantErr: require.NoError, + locationComparer: func(x, y source.Location) bool { + return cmp.Equal(x.Coordinates, y.Coordinates) && cmp.Equal(x.VirtualPath, y.VirtualPath) + }, + } +} + +func (p *CatalogTester) FromFile(t *testing.T, path string) *CatalogTester { t.Helper() - fixture, err := os.Open(fixturePath) + + fixture, err := os.Open(path) require.NoError(t, err) - actualPkgs, actualRelationships, err := parser(nil, env, source.LocationReadCloser{ + p.reader = source.LocationReadCloser{ Location: source.NewLocation(fixture.Name()), ReadCloser: fixture, - }) + } + return p +} + +func (p *CatalogTester) FromString(location, data string) *CatalogTester { + p.reader = source.LocationReadCloser{ + Location: source.NewLocation(location), + ReadCloser: io.NopCloser(strings.NewReader(data)), + } + return p +} + +func (p *CatalogTester) WithLinuxRelease(r linux.Release) *CatalogTester { + if p.env == nil { + p.env = &generic.Environment{} + } + p.env.LinuxRelease = &r + return p +} + +func (p *CatalogTester) WithEnv(env *generic.Environment) *CatalogTester { + p.env = env + return p +} + +func (p *CatalogTester) WithError() *CatalogTester { + p.wantErr = require.Error + return p +} + +func (p *CatalogTester) WithErrorAssertion(a require.ErrorAssertionFunc) *CatalogTester { + p.wantErr = a + return p +} + +func (p *CatalogTester) WithResolver(r source.FileResolver) *CatalogTester { + p.resolver = r + return p +} + +func (p *CatalogTester) WithImageResolver(t *testing.T, fixtureName string) *CatalogTester { + t.Helper() + img := imagetest.GetFixtureImage(t, "docker-archive", fixtureName) + + s, err := source.NewFromImage(img, fixtureName) + require.NoError(t, err) + + r, err := s.FileResolver(source.SquashedScope) require.NoError(t, err) + p.resolver = r + return p +} + +func (p *CatalogTester) IgnoreLocationLayer() *CatalogTester { + p.locationComparer = func(x, y source.Location) bool { + return cmp.Equal(x.Coordinates.RealPath, y.Coordinates.RealPath) && cmp.Equal(x.VirtualPath, y.VirtualPath) + } + return p +} + +func (p *CatalogTester) Expects(pkgs []pkg.Package, relationships []artifact.Relationship) *CatalogTester { + p.expectedPkgs = pkgs + p.expectedRelationships = relationships + return p +} + +func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) { + t.Helper() + pkgs, relationships, err := parser(p.resolver, p.env, p.reader) + p.wantErr(t, err) + p.assertPkgs(t, pkgs, relationships) +} + +func (p *CatalogTester) TestCataloger(t *testing.T, cataloger pkg.Cataloger) { + t.Helper() + pkgs, relationships, err := cataloger.Catalog(p.resolver) + p.wantErr(t, err) + p.assertPkgs(t, pkgs, relationships) +} - AssertPackagesEqual(t, expectedPkgs, actualPkgs) +func (p *CatalogTester) assertPkgs(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship) { + t.Helper() + + p.compareOptions = append(p.compareOptions, + cmpopts.IgnoreFields(pkg.Package{}, "id"), // note: ID is not deterministic for test purposes + cmp.Comparer( + func(x, y source.LocationSet) bool { + xs := x.ToSlice() + ys := y.ToSlice() + + if len(xs) != len(ys) { + return false + } + for i, xe := range xs { + ye := ys[i] + if !p.locationComparer(xe, ye) { + return false + } + } + + return true + }, + ), + ) - if diff := cmp.Diff(expectedRelationships, actualRelationships); diff != "" { + if diff := cmp.Diff(p.expectedPkgs, pkgs, p.compareOptions...); diff != "" { + t.Errorf("unexpected packages from parsing (-expected +actual)\n%s", diff) + } + + if diff := cmp.Diff(p.expectedRelationships, relationships, p.compareOptions...); diff != "" { t.Errorf("unexpected relationships from parsing (-expected +actual)\n%s", diff) } } + +func TestFileParser(t *testing.T, fixturePath string, parser generic.Parser, expectedPkgs []pkg.Package, expectedRelationships []artifact.Relationship) { + t.Helper() + NewCatalogTester().FromFile(t, fixturePath).Expects(expectedPkgs, expectedRelationships).TestParser(t, parser) +} + +func TestFileParserWithEnv(t *testing.T, fixturePath string, parser generic.Parser, env *generic.Environment, expectedPkgs []pkg.Package, expectedRelationships []artifact.Relationship) { + t.Helper() + + NewCatalogTester().FromFile(t, fixturePath).WithEnv(env).Expects(expectedPkgs, expectedRelationships).TestParser(t, parser) +} diff --git a/syft/pkg/dpkg_metadata.go b/syft/pkg/dpkg_metadata.go index cf8e6cf0529..2ad5de2dbce 100644 --- a/syft/pkg/dpkg_metadata.go +++ b/syft/pkg/dpkg_metadata.go @@ -1,22 +1,16 @@ package pkg import ( - "fmt" "sort" "github.com/scylladb/go-set/strset" - "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/linux" ) const DpkgDBGlob = "**/var/lib/dpkg/{status,status.d/**}" -var ( - _ FileOwner = (*DpkgMetadata)(nil) - _ urlIdentifier = (*DpkgMetadata)(nil) -) +var _ FileOwner = (*DpkgMetadata)(nil) // DpkgMetadata represents all captured data for a Debian package DB entry; available fields are described // at http://manpages.ubuntu.com/manpages/xenial/man1/dpkg-query.1.html in the --showformat section. @@ -39,40 +33,6 @@ type DpkgFileRecord struct { IsConfigFile bool `json:"isConfigFile"` } -// PackageURL returns the PURL for the specific Debian package (see https://github.com/package-url/purl-spec) -func (m DpkgMetadata) PackageURL(distro *linux.Release) string { - var namespace string - if distro != nil { - namespace = distro.ID - } - - qualifiers := map[string]string{ - PURLQualifierArch: m.Architecture, - } - - if m.Source != "" { - if m.SourceVersion != "" { - qualifiers[PURLQualifierUpstream] = fmt.Sprintf("%s@%s", m.Source, m.SourceVersion) - } else { - qualifiers[PURLQualifierUpstream] = m.Source - } - } - - return packageurl.NewPackageURL( - // TODO: replace with `packageurl.TypeDebian` upon merge of https://github.com/package-url/packageurl-go/pull/21 - // TODO: or, since we're now using an Anchore fork of this module, we could do this sooner. - "deb", - namespace, - m.Package, - m.Version, - PURLQualifiers( - qualifiers, - distro, - ), - "", - ).ToString() -} - func (m DpkgMetadata) OwnedFiles() (result []string) { s := strset.New() for _, f := range m.Files { diff --git a/syft/pkg/url_test.go b/syft/pkg/url_test.go index 9afa9c350ee..7ee9ec10ba9 100644 --- a/syft/pkg/url_test.go +++ b/syft/pkg/url_test.go @@ -66,24 +66,6 @@ func TestPackageURL(t *testing.T) { }, expected: "pkg:npm/name@v0.1.0", }, - { - name: "deb", - distro: &linux.Release{ - ID: "ubuntu", - VersionID: "20.04", - }, - pkg: Package{ - Name: "bad-name", - Version: "bad-v0.1.0", - Type: DebPkg, - Metadata: DpkgMetadata{ - Package: "name", - Version: "v0.1.0", - Architecture: "amd64", - }, - }, - expected: "pkg:deb/ubuntu/name@v0.1.0?arch=amd64&distro=ubuntu-20.04", - }, { name: "rpm", distro: &linux.Release{ @@ -199,6 +181,7 @@ func TestPackageURL(t *testing.T) { expectedTypes.Remove(string(ConanPkg)) expectedTypes.Remove(string(DartPubPkg)) expectedTypes.Remove(string(DotnetPkg)) + expectedTypes.Remove(string(DebPkg)) for _, test := range tests { t.Run(test.name, func(t *testing.T) {