Skip to content

Commit

Permalink
fix(sbom): deduplicate SBOM packages by ID
Browse files Browse the repository at this point in the history
Signed-off-by: Dan Luhring <[email protected]>
  • Loading branch information
luhring committed Oct 24, 2024
1 parent 04b5ca4 commit a016065
Show file tree
Hide file tree
Showing 12 changed files with 434 additions and 53 deletions.
39 changes: 28 additions & 11 deletions pkg/sbom/generator/spdx/spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"
"unicode/utf8"

"github.com/charmbracelet/log"
purl "github.com/package-url/packageurl-go"
"sigs.k8s.io/release-utils/version"

Expand Down Expand Up @@ -137,6 +138,18 @@ func (sx *SPDX) Generate(opts *options.Options, path string) error {
}
}

dedupedPackages := make([]Package, 0, len(doc.Packages))
seenIDs := make(map[string]struct{})
for i := range doc.Packages {
if _, ok := seenIDs[doc.Packages[i].ID]; !ok {
seenIDs[doc.Packages[i].ID] = struct{}{}
dedupedPackages = append(dedupedPackages, doc.Packages[i])
} else {
log.Info("duplicate package ID found in SBOM, deduplicating package...", "ID", doc.Packages[i].ID)
}
}
doc.Packages = dedupedPackages

if err := renderDoc(doc, path); err != nil {
return fmt.Errorf("rendering document: %w", err)
}
Expand Down Expand Up @@ -178,7 +191,9 @@ func replacePackage(doc *Document, originalID, newID string) {
}
}

// locateApkSBOM returns the SBOM
// locateApkSBOM returns the path to the SBOM in the given filesystem, using the
// given Package's name and version. It returns an empty string if the SBOM is
// not found.
func locateApkSBOM(fsys apkfs.FullFS, p *Package) (string, error) {
re := regexp.MustCompile(`-r\d+$`)
for _, s := range []string{
Expand Down Expand Up @@ -209,51 +224,53 @@ func (sx *SPDX) ProcessInternalApkSBOM(opts *options.Options, doc *Document, p *
return fmt.Errorf("inspecting FS for internal apk SBOM: %w", err)
}
if path == "" {
// The SBOM does not exist.
// (So just ignore that the package was specified to the SPDX Generate method?)
return nil
}

internalDoc, err := sx.ParseInternalSBOM(opts, path)
apkSBOMDoc, err := sx.ParseInternalSBOM(opts, path)
if err != nil {
// TODO: Log error parsing apk SBOM
return nil
}

// Cycle the top level elements...
elementIDs := map[string]struct{}{}
for _, elementID := range internalDoc.DocumentDescribes {
elementIDs[elementID] = struct{}{}
idsDescribedByAPKSBOM := map[string]struct{}{}
for _, elementID := range apkSBOMDoc.DocumentDescribes {
idsDescribedByAPKSBOM[elementID] = struct{}{}
}

// ... searching for a 1st level package
targetElementIDs := map[string]struct{}{}
for _, pkg := range internalDoc.Packages {
for _, pkg := range apkSBOMDoc.Packages {
// that matches the name
if p.Name != pkg.Name {
continue
}

if _, ok := elementIDs[pkg.ID]; !ok {
if _, ok := idsDescribedByAPKSBOM[pkg.ID]; !ok {
continue
}

targetElementIDs[pkg.ID] = struct{}{}
if len(targetElementIDs) == len(elementIDs) {
if len(targetElementIDs) == len(idsDescribedByAPKSBOM) {
// Exit early if we found them all.
break
}
}

// Copy the targetElementIDs
todo := make(map[string]struct{}, len(internalDoc.Relationships))
todo := make(map[string]struct{}, len(apkSBOMDoc.Relationships))
for id := range targetElementIDs {
todo[id] = struct{}{}
}

if err := copySBOMElements(internalDoc, doc, todo); err != nil {
if err := copySBOMElements(apkSBOMDoc, doc, todo); err != nil {
return fmt.Errorf("copying element: %w", err)
}

if err := mergeLicensingInfos(internalDoc, doc); err != nil {
if err := mergeLicensingInfos(apkSBOMDoc, doc); err != nil {
return fmt.Errorf("merging LicensingInfos: %w", err)
}

Expand Down
116 changes: 78 additions & 38 deletions pkg/sbom/generator/spdx/spdx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package spdx

import (
"encoding/json"
"fmt"
"os"
"path"
Expand Down Expand Up @@ -58,13 +59,24 @@ var testOpts = &options.Options{
},
}

// TODO: clean this up and make consistent with the other test cases
func TestGenerate(t *testing.T) {
dir := t.TempDir()
fsys := apkfs.NewMemFS()
sx := New(fsys)
path := filepath.Join(dir, testOpts.FileName+"."+sx.Ext())
err := sx.Generate(testOpts, path)
require.NoError(t, err)
require.FileExists(t, path)
}

func TestSPDX_Generate(t *testing.T) {
tests := []struct {
name string
opts *options.Options
}{
{
name: "custom license",
name: "custom-license",
opts: &options.Options{
OS: options.OSInfo{
Name: "unknown",
Expand All @@ -75,19 +87,15 @@ func TestSPDX_Generate(t *testing.T) {
Packages: []*apk.InstalledPackage{
{
Package: apk.Package{
Name: "font-ubuntu",
Version: "0.869-r1",
Arch: "x86_64",
Description: "Ubuntu font family",
License: "LicenseRef-ubuntu-font",
Origin: "font-ubuntu",
Name: "font-ubuntu",
Version: "0.869-r1",
},
},
},
},
},
{
name: "no supplier",
name: "no-supplier",
opts: &options.Options{
OS: options.OSInfo{
Name: "Apko Images, Plc",
Expand All @@ -98,12 +106,33 @@ func TestSPDX_Generate(t *testing.T) {
Packages: []*apk.InstalledPackage{
{
Package: apk.Package{
Name: "libattr1",
Version: "2.5.1-r2",
Arch: "x86_64",
Description: "library for managing filesystem extended attributes",
License: "GPL-2.0-or-later",
Origin: "attr",
Name: "libattr1",
Version: "2.5.1-r2",
},
},
},
},
},
{
name: "package-deduplicating",
opts: &options.Options{
OS: options.OSInfo{
Name: "unknown",
ID: "unknown",
Version: "3.0",
},
FileName: "sbom",
Packages: []*apk.InstalledPackage{
{
Package: apk.Package{
Name: "logstash-8",
Version: "8.15.3-r4",
},
},
{
Package: apk.Package{
Name: "logstash-8-compat",
Version: "8.15.3-r4",
},
},
},
Expand All @@ -113,49 +142,60 @@ func TestSPDX_Generate(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pkgName := tt.opts.Packages[0].Name
apkSBOMPath := filepath.Join("testdata", "apk_sboms", fmt.Sprintf("%s.spdx.json", pkgName))
apkSBOMBytes, err := os.ReadFile(apkSBOMPath)
require.NoError(t, err)

fsys := apkfs.NewMemFS()
sbomDir := path.Join("var", "lib", "db", "sbom")
err = fsys.MkdirAll(sbomDir, 0750)
err := fsys.MkdirAll(sbomDir, 0750)
require.NoError(t, err)

sbomDestPath := path.Join(sbomDir, fmt.Sprintf("%s.spdx.json", pkgName))
err = fsys.WriteFile(sbomDestPath, apkSBOMBytes, 0644)
require.NoError(t, err)
for _, apkPkg := range tt.opts.Packages {
apkSBOMName := fmt.Sprintf("%s-%s.spdx.json", apkPkg.Name, apkPkg.Version)
apkSBOMTestdataPath := filepath.Join("testdata", "apk_sboms", apkSBOMName)
apkSBOMBytes, err := os.ReadFile(apkSBOMTestdataPath)
require.NoError(t, err)

sbomDestPath := path.Join(sbomDir, apkSBOMName)
err = fsys.WriteFile(sbomDestPath, apkSBOMBytes, 0644)
require.NoError(t, err)
}

sx := New(fsys)
imageSBOMDestPath := filepath.Join(t.TempDir(), pkgName+"."+sx.Ext())
imageSBOMName := fmt.Sprintf("%s.spdx.json", tt.name)
imageSBOMDestPath := filepath.Join(t.TempDir(), imageSBOMName)
err = sx.Generate(tt.opts, imageSBOMDestPath)
require.NoError(t, err)

actual, err := os.ReadFile(imageSBOMDestPath)
require.NoError(t, err)

expectedImageSBOMPath := filepath.Join("testdata", "expected_image_sboms", fmt.Sprintf("%s.spdx.json", pkgName))
expectedImageSBOMPath := filepath.Join("testdata", "expected_image_sboms", imageSBOMName)
expected, err := os.ReadFile(expectedImageSBOMPath)
require.NoError(t, err)

if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("Unexpected image SBOM (-want, +got): \n%s", diff)
}
t.Run("goldenfile diff", func(t *testing.T) {
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("Unexpected image SBOM (-want, +got): \n%s", diff)
}
})

t.Run("unique SPDX IDs", func(t *testing.T) {
doc := new(Document)
err := json.Unmarshal(actual, doc)
if err != nil {
t.Fatalf("unmarshalling SBOM: %v", err)
}

ids := make(map[string]struct{})
for _, p := range doc.Packages {
if _, ok := ids[p.ID]; ok {
t.Errorf("duplicate SPDX ID found: %s", p.ID)
}
ids[p.ID] = struct{}{}
}
})
})
}
}

func TestGenerate(t *testing.T) {
dir := t.TempDir()
fsys := apkfs.NewMemFS()
sx := New(fsys)
path := filepath.Join(dir, testOpts.FileName+"."+sx.Ext())
err := sx.Generate(testOpts, path)
require.NoError(t, err)
require.FileExists(t, path)
}

func TestReproducible(t *testing.T) {
// Create two sboms based on the same input and ensure
// they are identical
Expand Down
21 changes: 21 additions & 0 deletions pkg/sbom/generator/spdx/testdata/apk_sboms/_generate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/sh

set -euo pipefail

# Define an array of package name-version strings
packages=(
"font-ubuntu-0.869-r1"
"libattr1-2.5.1-r2"
"logstash-8-8.15.3-r4"
"logstash-8-compat-8.15.3-r4"
)

# Base URL for downloading APKs
base_url="https://packages.wolfi.dev/os/x86_64"

# Loop through the array and process each package
for pkg in "${packages[@]}"; do
url="${base_url}/${pkg}.apk"
output_path="${pkg}.spdx.json"
curl -q "$url" | tar Ozx "var/lib/db/sbom/${pkg}.spdx.json" >"$output_path" 2>/dev/null
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{
"SPDXID": "SPDXRef-DOCUMENT",
"name": "apk-logstash-8-8.15.3-r4",
"spdxVersion": "SPDX-2.3",
"creationInfo": {
"created": "2024-10-24T15:16:49Z",
"creators": [
"Tool: melange (v0.14.7)",
"Organization: Chainguard, Inc"
],
"licenseListVersion": "3.22"
},
"dataLicense": "CC0-1.0",
"documentNamespace": "https://spdx.org/spdxdocs/chainguard/melange/0434ffabb9f0343f5dada78abdb2e419b2be48fb",
"documentDescribes": [
"SPDXRef-Package-logstash-8-8.15.3-r4"
],
"packages": [
{
"SPDXID": "SPDXRef-Package-logstash-8-8.15.3-r4",
"name": "logstash-8",
"versionInfo": "8.15.3-r4",
"filesAnalyzed": false,
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "Apache-2.0",
"downloadLocation": "NOASSERTION",
"originator": "Organization: Wolfi",
"supplier": "Organization: Wolfi",
"copyrightText": "\n",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceLocator": "pkg:apk/wolfi/[email protected]?arch=x86_64",
"referenceType": "purl"
}
]
},
{
"SPDXID": "SPDXRef-Package-logstash-8.yaml-c7d40faa38ddffa98700cfa4c2f9bde196acc504",
"name": "logstash-8.yaml",
"versionInfo": "c7d40faa38ddffa98700cfa4c2f9bde196acc504",
"filesAnalyzed": false,
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "Apache-2.0",
"downloadLocation": "NOASSERTION",
"originator": "Organization: Wolfi",
"supplier": "Organization: Wolfi",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceLocator": "pkg:github/wolfi-dev/os@c7d40faa38ddffa98700cfa4c2f9bde196acc504#logstash-8.yaml",
"referenceType": "purl"
}
]
},
{
"SPDXID": "SPDXRef-Package-github.com-elastic-logstash-v8.15.3-8364c8e89cfb113e38ec3f966df7eb1e9abe9d33-0",
"name": "logstash",
"versionInfo": "v8.15.3",
"filesAnalyzed": false,
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "Apache-2.0",
"downloadLocation": "NOASSERTION",
"originator": "Organization: Elastic",
"supplier": "Organization: Elastic",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceLocator": "pkg:github/elastic/[email protected]",
"referenceType": "purl"
}
]
}
],
"relationships": [
{
"spdxElementId": "SPDXRef-Package-logstash-8-8.15.3-r4",
"relationshipType": "DESCRIBED_BY",
"relatedSpdxElement": "SPDXRef-Package-logstash-8.yaml-c7d40faa38ddffa98700cfa4c2f9bde196acc504"
},
{
"spdxElementId": "SPDXRef-Package-logstash-8-8.15.3-r4",
"relationshipType": "GENERATED_FROM",
"relatedSpdxElement": "SPDXRef-Package-github.com-elastic-logstash-v8.15.3-8364c8e89cfb113e38ec3f966df7eb1e9abe9d33-0"
}
]
}
Loading

0 comments on commit a016065

Please sign in to comment.