Skip to content

Commit

Permalink
Pull in more v6 schema changes (#456)
Browse files Browse the repository at this point in the history
* fix transformers, split packaging, better logs

Signed-off-by: Alex Goodman <[email protected]>

* bump grype

Signed-off-by: Alex Goodman <[email protected]>

* keep old legacy package file

Signed-off-by: Alex Goodman <[email protected]>

---------

Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman authored Dec 20, 2024
1 parent 5973c76 commit 07e7a21
Show file tree
Hide file tree
Showing 14 changed files with 391 additions and 253 deletions.
12 changes: 4 additions & 8 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,17 @@ module github.com/anchore/grype-db

go 1.23.2

toolchain go1.23.4

require (
github.com/Masterminds/semver/v3 v3.3.1
github.com/OneOfOne/xxhash v1.2.8
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
github.com/adrg/xdg v0.5.3
github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a
github.com/anchore/grype v0.86.1
github.com/anchore/syft v1.18.1
github.com/anchore/grype v0.86.2-0.20241218195423-d94e68a680dc
github.com/anchore/syft v1.18.2-0.20241216153735-397eb9c10acd
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de
github.com/dave/jennifer v1.7.1
github.com/dustin/go-humanize v1.0.1
github.com/glebarez/sqlite v1.11.0
github.com/go-test/deep v1.1.1
github.com/google/go-cmp v0.6.0
Expand Down Expand Up @@ -102,13 +101,12 @@ require (
github.com/distribution/reference v0.6.0 // indirect
github.com/docker/cli v27.4.0+incompatible // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
github.com/docker/docker v27.4.0+incompatible // indirect
github.com/docker/docker v27.4.1+incompatible // indirect
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/docker/go-connections v0.4.0 // indirect
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/edsrzf/mmap-go v1.1.0 // indirect
github.com/elliotchance/phpserialize v1.4.0 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
Expand Down Expand Up @@ -246,7 +244,5 @@ require (
modernc.org/sqlite v1.34.2 // indirect
)

replace github.com/mholt/archiver/v3 v3.5.1 => github.com/anchore/archiver/v3 v3.5.2

// this is a breaking change, so we need to pin the version until glebarez/go-sqlite is updated to use internal/libc
replace modernc.org/sqlite v1.33.0 => modernc.org/sqlite v1.32.0
12 changes: 6 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,14 @@ github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0v
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ=
github.com/anchore/go-version v1.2.2-0.20210903204242-51efa5b487c4 h1:rmZG77uXgE+o2gozGEBoUMpX27lsku+xrMwlmBZJtbg=
github.com/anchore/go-version v1.2.2-0.20210903204242-51efa5b487c4/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E=
github.com/anchore/grype v0.86.1 h1:HWpzCOCwjKkwkIEEC5lcKI4yl6GhTF3+Z12tXWYtMoI=
github.com/anchore/grype v0.86.1/go.mod h1:k3VnXfi+e/OGx1mTUL733gy3fyB4W/AdHP8fSyQML9w=
github.com/anchore/grype v0.86.2-0.20241218195423-d94e68a680dc h1:DmRYOnqIuu81SZzac60zscLGf/NXQe6FFnNBPoXV71c=
github.com/anchore/grype v0.86.2-0.20241218195423-d94e68a680dc/go.mod h1:rqbEt5hrFdlJ8nrk8VFv0A77XD4gzhkWk8LU9iU77AA=
github.com/anchore/packageurl-go v0.1.1-0.20241018175412-5c22e6360c4f h1:dAQPIrQ3a5PBqZeZ+B9NGZsGmodk4NO9OjDIsQmQyQM=
github.com/anchore/packageurl-go v0.1.1-0.20241018175412-5c22e6360c4f/go.mod h1:KoYIv7tdP5+CC9VGkeZV4/vGCKsY55VvoG+5dadg4YI=
github.com/anchore/stereoscope v0.0.11 h1:d+dePyWyQzoQehnWOnx/aISW5HW1zLAQKzvaFIpydsU=
github.com/anchore/stereoscope v0.0.11/go.mod h1:dxQyMHSdvgOCscQd/lInPHeP5xCJsZYxpzvzy8Y804Y=
github.com/anchore/syft v1.18.1 h1:JZ7CLbeWrWolCZa4f6SJBLJ9qGBLFCzHrFd8c4bsm94=
github.com/anchore/syft v1.18.1/go.mod h1:ufXPZcjmoTjERaC0HTEW2+chF+fQdryhaQ9arcUO2WQ=
github.com/anchore/syft v1.18.2-0.20241216153735-397eb9c10acd h1:11d0Pzp4Ysw1XxloRS6cHNDBWwqB3MSMzffgMYwFDUw=
github.com/anchore/syft v1.18.2-0.20241216153735-397eb9c10acd/go.mod h1:A8LH+VE33zk5efyBdo45/X9BdXEFrMvetwjMvPV+OFw=
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
Expand Down Expand Up @@ -381,8 +381,8 @@ github.com/docker/cli v27.4.0+incompatible h1:/nJzWkcI1MDMN+U+px/YXnQWJqnu4J+QKG
github.com/docker/cli v27.4.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v27.4.0+incompatible h1:I9z7sQ5qyzO0BfAb9IMOawRkAGxhYsidKiTMcm0DU+A=
github.com/docker/docker v27.4.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker v27.4.1+incompatible h1:ZJvcY7gfwHn1JF48PfbyXg7Jyt9ZCWDW+GGXOIxEwp4=
github.com/docker/docker v27.4.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
Expand Down
2 changes: 2 additions & 0 deletions internal/tarutil/reader_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ func (t ReaderEntry) writeEntry(tw lowLevelWriter) error {
}

func writeEntry(tw lowLevelWriter, filename string, fileInfo os.FileInfo, opener func() (io.Reader, error)) error {
log.WithFields("path", filename).Trace("adding file to archive")

header, err := tar.FileInfoHeader(fileInfo, "")
if err != nil {
return err
Expand Down
80 changes: 74 additions & 6 deletions pkg/process/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"fmt"
"time"

"github.com/dustin/go-humanize"

"github.com/anchore/grype-db/internal/log"
"github.com/anchore/grype-db/pkg/data"
v3 "github.com/anchore/grype-db/pkg/process/v3"
Expand Down Expand Up @@ -104,17 +106,39 @@ func getWriter(schemaVersion int, dataAge time.Time, directory string, states pr

func build(results []providerResults, writer data.Writer, processors ...data.Processor) error {
lastUpdate := time.Now()
var totalRecords int
for _, result := range results {
totalRecords += int(result.count)
}
log.WithFields("total", humanize.Comma(int64(totalRecords))).Info("processing all records")

var recordsProcessed int

// for exponential moving average, choose an alpha between 0 and 1, where 1 biases towards the most recent sample
// and 0 biases towards the average of all samples.
rateWindow := newEMA(0.4)

for _, result := range results {
log.WithFields("provider", result.provider.Provider, "count", result.count).Info("processing provider records")
idx := 0
log.WithFields("provider", result.provider.Provider, "total", humanize.Comma(result.count)).Info("processing provider records")
providerRecordsProcessed := 0
recordsProcessedInStatusCycle := 0
for opener := range result.openers {
idx++
log.WithFields("entry", opener.String()).Tracef("processing")
providerRecordsProcessed++
recordsProcessed++
recordsProcessedInStatusCycle++
var processor data.Processor

if time.Since(lastUpdate) > 3*time.Second {
log.WithFields("provider", result.provider.Provider, "count", result.count, "processed", idx).Debug("processing provider records")
r := recordsPerSecond(recordsProcessedInStatusCycle, lastUpdate)
rateWindow.Add(r)

log.WithFields(
"provider", fmt.Sprintf("%q %1.0f/s (%1.2f%%)", result.provider.Provider, r, percent(providerRecordsProcessed, int(result.count))),
"overall", fmt.Sprintf("%1.2f%%", percent(recordsProcessed, totalRecords)),
"eta", eta(recordsProcessed, totalRecords, rateWindow.Average()).String(),
).Debug("status")
lastUpdate = time.Now()
recordsProcessedInStatusCycle = 0
}

f, err := opener.Open()
Expand All @@ -129,7 +153,6 @@ func build(results []providerResults, writer data.Writer, processors ...data.Pro
for _, candidate := range processors {
if candidate.IsSupported(envelope.Schema) {
processor = candidate
log.WithFields("schema", envelope.Schema).Trace("matched with processor")
break
}
}
Expand All @@ -153,3 +176,48 @@ func build(results []providerResults, writer data.Writer, processors ...data.Pro

return nil
}

type expMovingAverage struct {
alpha float64
value float64
count int
}

func newEMA(alpha float64) *expMovingAverage {
return &expMovingAverage{alpha: alpha}
}

func (e *expMovingAverage) Add(sample float64) {
if e.count == 0 {
e.value = sample // initialize with the first sample
} else {
e.value = e.alpha*sample + (1-e.alpha)*e.value
}
e.count++
}

func (e *expMovingAverage) Average() float64 {
return e.value
}

func recordsPerSecond(idx int, lastUpdate time.Time) float64 {
sec := time.Since(lastUpdate).Seconds()
if sec == 0 {
return 0
}
return float64(idx) / sec
}

func percent(idx, total int) float64 {
if total == 0 {
return 0
}
return float64(idx) / float64(total) * 100
}

func eta(idx, total int, rate float64) time.Duration {
if rate == 0 {
return 0
}
return time.Duration(float64(total-idx)/rate) * time.Second
}
173 changes: 3 additions & 170 deletions pkg/process/package.go
Original file line number Diff line number Diff line change
@@ -1,185 +1,18 @@
package process

import (
"errors"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"time"

"github.com/scylladb/go-set/strset"

"github.com/anchore/grype-db/internal/log"
"github.com/anchore/grype-db/internal/tarutil"
"github.com/anchore/grype-db/pkg/provider"
v6process "github.com/anchore/grype-db/pkg/process/v6"
grypeDBLegacyDistribution "github.com/anchore/grype/grype/db/legacy/distribution"
v6 "github.com/anchore/grype/grype/db/v6"
v6Distribution "github.com/anchore/grype/grype/db/v6/distribution"
)

func Package(dbDir, publishBaseURL, overrideArchiveExtension string) error {
// check if metadata file exists, if so, then this
if _, err := os.Stat(filepath.Join(dbDir, grypeDBLegacyDistribution.MetadataFileName)); os.IsNotExist(err) {
return packageDB(dbDir, overrideArchiveExtension)
// TODO: detect from disk which version of the DB is present
return v6process.CreateArchive(dbDir, overrideArchiveExtension)
}
return packageLegacyDB(dbDir, publishBaseURL, overrideArchiveExtension)
}

func packageDB(dbDir, overrideArchiveExtension string) error {
extension, err := resolveExtension(overrideArchiveExtension)
if err != nil {
return err
}
log.WithFields("from", dbDir, "extension", extension).Info("packaging database")

s, err := v6.NewReader(v6.Config{DBDirPath: dbDir})
if err != nil {
return fmt.Errorf("unable to open vulnerability store: %w", err)
}

metadata, err := s.GetDBMetadata()
if err != nil || metadata == nil {
return fmt.Errorf("unable to get vulnerability store metadata: %w", err)
}

if metadata.Model != v6.ModelVersion {
return fmt.Errorf("metadata model %d does not match vulnerability store model %d", v6.ModelVersion, metadata.Model)
}

providerModels, err := s.AllProviders()
if err != nil {
return fmt.Errorf("unable to get all providers: %w", err)
}

if len(providerModels) == 0 {
return fmt.Errorf("no providers found in the vulnerability store")
}

eldest, err := toProviders(providerModels).EarliestTimestamp()
if err != nil {
return err
}

// output archive vulnerability-db_VERSION_OLDESTDATADATE_BUILTEPOCH.tar.gz, where:
// - VERSION: schema version in the form of v#.#.#
// - OLDESTDATADATE: RFC3338 formatted value of the oldest date capture date found for all contained providers
// - BUILTEPOCH: linux epoch formatted value of the database metadata built field
tarName := fmt.Sprintf(
"vulnerability-db_v%s_%s_%d.%s",
fmt.Sprintf("%d.%d.%d", metadata.Model, metadata.Revision, metadata.Addition),
eldest.UTC().Format(time.RFC3339),
metadata.BuildTimestamp.Unix(),
extension,
)

tarPath := filepath.Join(dbDir, tarName)

if err := populateTar(tarPath); err != nil {
return err
}

log.WithFields("path", tarPath).Info("created database archive")

return writeLatestDocument(tarPath, *metadata)
}

func toProviders(states []v6.Provider) provider.States {
var result provider.States
for _, state := range states {
result = append(result, provider.State{
Provider: state.ID,
Timestamp: *state.DateCaptured,
})
}
return result
}

func resolveExtension(overrideArchiveExtension string) (string, error) {
var extension = "tar.zst"

if overrideArchiveExtension != "" {
extension = strings.TrimLeft(overrideArchiveExtension, ".")
}

var found bool
for _, valid := range []string{"tar.zst", "tar.xz", "tar.gz"} {
if valid == extension {
found = true
break
}
}

if !found {
return "", fmt.Errorf("unsupported archive extension %q", extension)
}
return extension, nil
}

var listingFiles = strset.New("listing.json", "latest.json", "history.json")

func populateTar(tarPath string) error {
originalDir, err := os.Getwd()
if err != nil {
return fmt.Errorf("unable to get CWD: %w", err)
}

dbDir, tarName := filepath.Split(tarPath)

if dbDir != "" {
if err = os.Chdir(dbDir); err != nil {
return fmt.Errorf("unable to cd to build dir: %w", err)
}

defer func() {
if err = os.Chdir(originalDir); err != nil {
log.Errorf("unable to cd to original dir: %v", err)
}
}()
}

fileInfos, err := os.ReadDir("./")
if err != nil {
return fmt.Errorf("unable to list db directory: %w", err)
}

var files []string
for _, fi := range fileInfos {
if !listingFiles.Has(fi.Name()) && !strings.Contains(fi.Name(), ".tar.") {
files = append(files, fi.Name())
}
}

if err = tarutil.PopulateWithPaths(tarName, files...); err != nil {
return fmt.Errorf("unable to create db archive: %w", err)
}

return nil
}

func writeLatestDocument(tarPath string, metadata v6.DBMetadata) error {
archive, err := v6Distribution.NewArchive(tarPath, *metadata.BuildTimestamp, metadata.Model, metadata.Revision, metadata.Addition)
if err != nil || archive == nil {
return fmt.Errorf("unable to create archive: %w", err)
}

doc := v6Distribution.NewLatestDocument(*archive)
if doc == nil {
return errors.New("unable to create latest document")
}

dbDir := filepath.Dir(tarPath)

latestPath := path.Join(dbDir, v6Distribution.LatestFileName)

fh, err := os.OpenFile(latestPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("unable to create latest file: %w", err)
}

if err = doc.Write(fh); err != nil {
return fmt.Errorf("unable to write latest document: %w", err)
}
return nil
}
Loading

0 comments on commit 07e7a21

Please sign in to comment.