Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use concurrency for path scanning #405

Merged
merged 24 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bincapz.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"io/fs"
"log/slog"
"os"
"runtime"
"strings"

"github.com/chainguard-dev/bincapz/pkg/action"
Expand Down Expand Up @@ -56,6 +57,7 @@ func parseRisk(s string) int {

func main() {
allFlag := flag.Bool("all", false, "Ignore nothing, show all")
concurrencyFlag := flag.Int("j", runtime.NumCPU(), "Concurrently scan files within target directories")
diffFlag := flag.Bool("diff", false, "Show capability drift between two files")
formatFlag := flag.String("format", "terminal", "Output type -- valid values are: json, markdown, simple, terminal, yaml")
ignoreSelfFlag := flag.Bool("ignore-self", true, "Ignore the bincapz binary")
Expand Down Expand Up @@ -192,6 +194,7 @@ func main() {
Stats: stats,
ErrFirstHit: *errFirstHitFlag,
ErrFirstMiss: *errFirstMissFlag,
Concurrency: *concurrencyFlag,
}

var res *bincapz.Report
Expand Down
8 changes: 6 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/chainguard-dev/bincapz

go 1.23
go 1.23.0

require (
github.com/agext/levenshtein v1.2.3
Expand All @@ -12,17 +12,22 @@ require (
github.com/liamg/magic v0.0.1
github.com/olekukonko/tablewriter v0.0.5
github.com/ulikunitz/xz v0.5.12
github.com/wk8/go-ordered-map/v2 v2.1.8
golang.org/x/sync v0.8.0
golang.org/x/term v0.23.0
gopkg.in/yaml.v3 v3.0.1
)

require (
github.com/bahlo/generic-list-go v0.2.0 // indirect
github.com/buger/jsonparser v1.1.1 // indirect
github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect
github.com/docker/cli v27.1.2+incompatible // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
github.com/docker/docker-credential-helpers v0.8.2 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/kr/pretty v0.2.1 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
Expand All @@ -33,6 +38,5 @@ require (
github.com/rivo/uniseg v0.4.7 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/vbatts/tar-split v0.11.5 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.24.0 // indirect
)
19 changes: 9 additions & 10 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
github.com/chainguard-dev/clog v1.4.0 h1:x0YyEppnUX+dxQAnfGNYdQEKNDSRCAwC08f/1eIxJ9E=
github.com/chainguard-dev/clog v1.4.0/go.mod h1:cV516KZWqYc/phZsCNwF36u/KMGS+Gj5Uqeb8Hlp95Y=
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/chainguard-dev/clog v1.5.0 h1:VFwdxf+4x7+EG8lRO4/tZFP7Hn/NG8OVkVNfgnnsADw=
github.com/chainguard-dev/clog v1.5.0/go.mod h1:4+WFhRMsGH79etYXY3plYdp+tCz/KCkU8fAr0HoaPvs=
github.com/containerd/stargz-snapshotter/estargz v0.15.1 h1:eXJjw9RbkLFgioVaTG+G/ZW/0kEe2oEKCdS/ZxIyoCU=
github.com/containerd/stargz-snapshotter/estargz v0.15.1/go.mod h1:gr2RNwukQ/S9Nv33Lt6UC7xEx58C+LHRdoqbEKjz1Kk=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker/cli v27.1.1+incompatible h1:goaZxOqs4QKxznZjjBWKONQci/MywhtRv2oNn0GkeZE=
github.com/docker/cli v27.1.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/cli v27.1.2+incompatible h1:nYviRv5Y+YAKx3dFrTvS1ErkyVVunKOhoweCTE1BsnI=
github.com/docker/cli v27.1.2+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
Expand All @@ -25,6 +25,7 @@ github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l
github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=
github.com/hillu/go-yara/v4 v4.3.3 h1:O+7iYTZK20fzsXiJyvA0d529RTdnZCrgS6HdE0O7BMg=
github.com/hillu/go-yara/v4 v4.3.3/go.mod h1:AHEs/FXVMQKVVlT6iG9d+q1BRr0gq0WoAWZQaZ0gS7s=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
Expand All @@ -34,14 +35,14 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/liamg/magic v0.0.1 h1:Ru22ElY+sCh6RvRTWjQzKKCxsEco8hE0co8n1qe7TBM=
github.com/liamg/magic v0.0.1/go.mod h1:yQkOmZZI52EA+SQ2xyHpVw8fNvTBruF873Y+Vt6S+fk=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
Expand Down Expand Up @@ -69,15 +70,13 @@ github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/vbatts/tar-split v0.11.5 h1:3bHCTIheBm1qFTcgh9oPu+nNBtX+XJIupG/vacinCts=
github.com/vbatts/tar-split v0.11.5/go.mod h1:yZbwRsSeGjusneWgA781EKej9HF8vme8okylkAeNKLk=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
Expand Down
23 changes: 8 additions & 15 deletions pkg/action/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ import (
"io/fs"
"os"
"path/filepath"
"sort"
"strings"
"runtime"
"testing"

"github.com/chainguard-dev/bincapz/pkg/bincapz"
Expand Down Expand Up @@ -230,11 +229,12 @@ func TestScanArchive(t *testing.T) {
t.Fatalf("render: %v", err)
}
bc := bincapz.Config{
IgnoreSelf: false,
IgnoreTags: []string{"harmless"},
Renderer: simple,
Rules: yrs,
ScanPaths: []string{"testdata/apko_nested.tar.gz"},
IgnoreSelf: false,
IgnoreTags: []string{"harmless"},
Renderer: simple,
Rules: yrs,
ScanPaths: []string{"testdata/apko_nested.tar.gz"},
Concurrency: runtime.NumCPU(),
}
res, err := Scan(ctx, bc)
if err != nil {
Expand All @@ -246,14 +246,7 @@ func TestScanArchive(t *testing.T) {

outBytes := out.Bytes()

// Sort the output to ensure consistent ordering
sorted := func(input []byte) []byte {
lines := strings.Split(string(input), "\n")
sort.Strings(lines)
return []byte(strings.Join(lines, "\n"))
}
sortedBytes := sorted(outBytes)
got := string(sortedBytes)
got := string(outBytes)

td, err := os.ReadFile("testdata/scan_archive")
if err != nil {
Expand Down
49 changes: 27 additions & 22 deletions pkg/action/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/agext/levenshtein"
"github.com/chainguard-dev/bincapz/pkg/bincapz"
"github.com/chainguard-dev/clog"
orderedmap "github.com/wk8/go-ordered-map/v2"
)

func relFileReport(ctx context.Context, c bincapz.Config, fromPath string) (map[string]*bincapz.FileReport, error) {
Expand All @@ -25,15 +26,15 @@ func relFileReport(ctx context.Context, c bincapz.Config, fromPath string) (map[
return nil, err
}
fromRelPath := map[string]*bincapz.FileReport{}
for _, f := range fromReport.Files {
if f.Skipped != "" || f.Error != "" {
for files := fromReport.Files.Oldest(); files != nil; files = files.Next() {
tstromberg marked this conversation as resolved.
Show resolved Hide resolved
if files.Value.Skipped != "" || files.Value.Error != "" {
continue
}
rel, err := filepath.Rel(fromPath, f.Path)
rel, err := filepath.Rel(fromPath, files.Value.Path)
if err != nil {
return nil, fmt.Errorf("rel(%q,%q): %w", fromPath, f.Path, err)
return nil, fmt.Errorf("rel(%q,%q): %w", fromPath, files.Value.Path, err)
}
fromRelPath[rel] = f
fromRelPath[rel] = files.Value
}

return fromRelPath, nil
Expand All @@ -55,9 +56,9 @@ func Diff(ctx context.Context, c bincapz.Config) (*bincapz.Report, error) {
}

d := &bincapz.DiffReport{
Added: map[string]*bincapz.FileReport{},
Removed: map[string]*bincapz.FileReport{},
Modified: map[string]*bincapz.FileReport{},
Added: orderedmap.New[string, *bincapz.FileReport](),
Removed: orderedmap.New[string, *bincapz.FileReport](),
Modified: orderedmap.New[string, *bincapz.FileReport](),
}

processSrc(ctx, c, src, dest, d)
Expand All @@ -74,7 +75,7 @@ func processSrc(ctx context.Context, c bincapz.Config, src, dest map[string]*bin
for relPath, fr := range src {
tr, exists := dest[relPath]
if !exists {
d.Removed[relPath] = fr
d.Removed.Set(relPath, fr)
continue
}
handleFile(ctx, c, fr, tr, relPath, d)
Expand All @@ -98,7 +99,7 @@ func handleFile(ctx context.Context, c bincapz.Config, fr, tr *bincapz.FileRepor
}
}

d.Modified[relPath] = rbs
d.Modified.Set(relPath, rbs)
}

func createFileReport(tr, fr *bincapz.FileReport) *bincapz.FileReport {
Expand Down Expand Up @@ -127,7 +128,7 @@ func processDest(ctx context.Context, c bincapz.Config, from, to map[string]*bin
for relPath, tr := range to {
fr, exists := from[relPath]
if !exists {
d.Added[relPath] = tr
d.Added.Set(relPath, tr)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the pattern for adding a KV pair to an orderedmap dictionary.

continue
}

Expand All @@ -153,10 +154,13 @@ func fileDestination(ctx context.Context, c bincapz.Config, fr, tr *bincapz.File
}

// are there already modified behaviors for this file?
if _, exists := d.Modified[relPath]; !exists {
d.Modified[relPath] = abs
if _, exists := d.Modified.Get(relPath); !exists {
d.Modified.Set(relPath, abs)
} else {
d.Modified[relPath].Behaviors = append(d.Modified[relPath].Behaviors, abs.Behaviors...)
if rel, exists := d.Modified.Get(relPath); exists {
rel.Behaviors = append(rel.Behaviors, abs.Behaviors...)
d.Modified.Set(relPath, rel)
}
}
}

Expand All @@ -172,20 +176,21 @@ func combineReports(d *bincapz.DiffReport) []diffReports {
diffs := make(chan diffReports)
var wg sync.WaitGroup

for rpath, rfr := range d.Removed {
for removed := d.Removed.Oldest(); removed != nil; removed = removed.Next() {
wg.Add(1)
go func(path string, fr *bincapz.FileReport) {
defer wg.Done()
for apath, afr := range d.Added {
for added := d.Added.Oldest(); added != nil; added = added.Next() {
diffs <- diffReports{
Added: apath,
AddedFR: afr,
Added: added.Key,
AddedFR: added.Value,
Removed: path,
RemovedFR: fr,
}
}
}(rpath, rfr)
}(removed.Key, removed.Value)
}

go func() {
wg.Wait()
close(diffs)
Expand Down Expand Up @@ -246,8 +251,8 @@ func fileMove(ctx context.Context, c bincapz.Config, fr, tr *bincapz.FileReport,

// Move these into the modified list if the files are not completely different (something like ~0.3)
if score > 0.3 {
d.Modified[apath] = abs
delete(d.Removed, rpath)
delete(d.Added, apath)
d.Modified.Set(apath, abs)
d.Modified.Delete(rpath)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the pattern for deleting a key from an orderedmap.

d.Modified.Delete(apath)
}
}
36 changes: 15 additions & 21 deletions pkg/action/oci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"bytes"
"io/fs"
"os"
"regexp"
"runtime"
"testing"

"github.com/chainguard-dev/bincapz/pkg/bincapz"
Expand All @@ -14,21 +14,13 @@ import (
thirdparty "github.com/chainguard-dev/bincapz/third_party"
"github.com/chainguard-dev/clog"
"github.com/chainguard-dev/clog/slogtest"
"github.com/google/go-cmp/cmp"
)

func reduceMarkdown(s string) string {
spaceRe := regexp.MustCompile(` +`)
dashRe := regexp.MustCompile(` -`)

s = spaceRe.ReplaceAllString(s, " ")
s = dashRe.ReplaceAllString(s, " ")
return s
}

func TestOCI(t *testing.T) {
t.Parallel()
ctx := slogtest.Context(t)
clog.FromContext(ctx).With("test", "scan_archive")
clog.FromContext(ctx).With("test", "scan_oci")

yrs, err := compile.Recursive(ctx, []fs.FS{rules.FS, thirdparty.FS})
if err != nil {
Expand All @@ -42,12 +34,13 @@ func TestOCI(t *testing.T) {
}

bc := bincapz.Config{
IgnoreSelf: false,
IgnoreTags: []string{"harmless"},
Renderer: simple,
Rules: yrs,
ScanPaths: []string{"cgr.dev/chainguard/static"},
OCI: true,
IgnoreSelf: false,
IgnoreTags: []string{"harmless"},
Renderer: simple,
Rules: yrs,
ScanPaths: []string{"cgr.dev/chainguard/static"},
OCI: true,
Concurrency: runtime.NumCPU(),
}
res, err := Scan(ctx, bc)
if err != nil {
Expand All @@ -57,14 +50,15 @@ func TestOCI(t *testing.T) {
t.Fatalf("full: %v", err)
}

got := reduceMarkdown(out.String())
got := out.String()

td, err := os.ReadFile("testdata/scan_oci")
if err != nil {
t.Fatalf("testdata read failed: %v", err)
}
want := reduceMarkdown(string(td))
if got != want {
t.Fatalf("got %q, want %q", got, want)
// Sort the loaded contents to ensure consistent ordering
want := string(td)
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("Simple output mismatch: (-want +got):\n%s", diff)
}
}
2 changes: 1 addition & 1 deletion pkg/action/programkind.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func programKind(ctx context.Context, path string) string {
headerString := ""
n, err := io.ReadFull(f, header[:])
if err == nil || errors.Is(err, io.ErrUnexpectedEOF) {
kind, err := magic.Lookup(header[:n])
kind, err := magic.LookupSync(header[:n])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I doubt we were seeing much of a benefit from this.

LookupSync lookups up the file type based on the provided magic bytes without spawning any additional goroutines. You should provide at least the first 1024 bytes of the file in this slice. A magic.ErrUnknown will be returned if the file type is not known.

if err == nil {
desc = kind.Description
}
Expand Down
Loading