Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve frequency-based upgrade heuristics, add flag to disable #342

Merged
merged 12 commits into from
Jul 9, 2024
4 changes: 3 additions & 1 deletion bincapz.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func main() {
errFirstHitFlag := flag.Bool("err-first-hit", false, "exit with error if scan source has matching capabilities")
ociFlag := flag.Bool("oci", false, "Scan an OCI image")
omitEmptyFlag := flag.Bool("omit-empty", false, "Omit files that contain no matches")
frequencyUpgradeFlag := flag.Bool("frequency-upgrade", true, "increase file risk score based on frequency heuristics")
tstromberg marked this conversation as resolved.
Show resolved Hide resolved
profileFlag := flag.Bool("profile", false, "Generate profile and trace files")
statsFlag := flag.Bool("stats", false, "Show statistics about the scan")
thirdPartyFlag := flag.Bool("third-party", true, "Include third-party rules, which may have licensing restrictions")
Expand Down Expand Up @@ -178,12 +179,13 @@ func main() {
return
}

bc := action.Config{
bc := bincapz.Config{
IgnoreSelf: *ignoreSelfFlag,
IgnoreTags: ignoreTags,
IncludeDataFiles: includeDataFiles,
MinFileRisk: minFileRisk,
MinRisk: minRisk,
FrequencyUpgrade: *frequencyUpgradeFlag,
OCI: *ociFlag,
OmitEmpty: *omitEmptyFlag,
Renderer: renderer,
Expand Down
24 changes: 0 additions & 24 deletions pkg/action/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,3 @@
// SPDX-License-Identifier: Apache-2.0

package action

import (
"io"

"github.com/chainguard-dev/bincapz/pkg/render"
"github.com/hillu/go-yara/v4"
)

type Config struct {
IgnoreSelf bool
IgnoreTags []string
IncludeDataFiles bool
MinFileRisk int
MinRisk int
OCI bool
OmitEmpty bool
Output io.Writer
Renderer render.Renderer
Rules *yara.Rules
ScanPaths []string
Stats bool
ErrFirstMiss bool
ErrFirstHit bool
}
3 changes: 2 additions & 1 deletion pkg/action/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"
"testing"

"github.com/chainguard-dev/bincapz/pkg/bincapz"
"github.com/chainguard-dev/bincapz/pkg/compile"
"github.com/chainguard-dev/bincapz/pkg/render"
"github.com/chainguard-dev/bincapz/rules"
Expand Down Expand Up @@ -228,7 +229,7 @@ func TestScanArchive(t *testing.T) {
if err != nil {
t.Fatalf("render: %v", err)
}
bc := Config{
bc := bincapz.Config{
IgnoreSelf: false,
IgnoreTags: []string{"harmless"},
Renderer: simple,
Expand Down
16 changes: 8 additions & 8 deletions pkg/action/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
"github.com/chainguard-dev/clog"
)

func relFileReport(ctx context.Context, c Config, fromPath string) (map[string]*bincapz.FileReport, error) {
func relFileReport(ctx context.Context, c bincapz.Config, fromPath string) (map[string]*bincapz.FileReport, error) {
fromConfig := c
fromConfig.Renderer = nil
fromConfig.ScanPaths = []string{fromPath}
Expand All @@ -40,7 +40,7 @@ func relFileReport(ctx context.Context, c Config, fromPath string) (map[string]*
return fromRelPath, nil
}

func Diff(ctx context.Context, c Config) (*bincapz.Report, error) {
func Diff(ctx context.Context, c bincapz.Config) (*bincapz.Report, error) {
if len(c.ScanPaths) != 2 {
return nil, fmt.Errorf("diff mode requires 2 paths, you passed in %d path(s)", len(c.ScanPaths))
}
Expand Down Expand Up @@ -68,7 +68,7 @@ func Diff(ctx context.Context, c Config) (*bincapz.Report, error) {
return &bincapz.Report{Diff: d}, err
}

func processSrc(ctx context.Context, c Config, src, dest map[string]*bincapz.FileReport, d *bincapz.DiffReport) {
func processSrc(ctx context.Context, c bincapz.Config, src, dest map[string]*bincapz.FileReport, d *bincapz.DiffReport) {
// things that appear in the source
for relPath, fr := range src {
tr, exists := dest[relPath]
Expand All @@ -80,7 +80,7 @@ func processSrc(ctx context.Context, c Config, src, dest map[string]*bincapz.Fil
}
}

func handleFile(ctx context.Context, c Config, fr, tr *bincapz.FileReport, relPath string, d *bincapz.DiffReport) {
func handleFile(ctx context.Context, c bincapz.Config, fr, tr *bincapz.FileReport, relPath string, d *bincapz.DiffReport) {
// We've now established that file exists in both source & destination
if fr.RiskScore < c.MinFileRisk && tr.RiskScore < c.MinFileRisk {
clog.FromContext(ctx).Info("diff does not meet min trigger level", slog.Any("path", tr.Path))
Expand Down Expand Up @@ -120,7 +120,7 @@ func behaviorExists(b *bincapz.Behavior, behaviors []*bincapz.Behavior) bool {
return false
}

func processDest(ctx context.Context, c Config, from, to map[string]*bincapz.FileReport, d *bincapz.DiffReport) {
func processDest(ctx context.Context, c bincapz.Config, from, to map[string]*bincapz.FileReport, d *bincapz.DiffReport) {
// things that exist in the destination
for relPath, tr := range to {
fr, exists := from[relPath]
Expand All @@ -133,7 +133,7 @@ func processDest(ctx context.Context, c Config, from, to map[string]*bincapz.Fil
}
}

func fileDestination(ctx context.Context, c Config, fr, tr *bincapz.FileReport, relPath string, d *bincapz.DiffReport) {
func fileDestination(ctx context.Context, c bincapz.Config, fr, tr *bincapz.FileReport, relPath string, d *bincapz.DiffReport) {
// We've now established that this file exists in both source and destination
if fr.RiskScore < c.MinFileRisk && tr.RiskScore < c.MinFileRisk {
clog.FromContext(ctx).Info("diff does not meet min trigger level", slog.Any("path", tr.Path))
Expand All @@ -158,7 +158,7 @@ func fileDestination(ctx context.Context, c Config, fr, tr *bincapz.FileReport,
}
}

func inferMoves(ctx context.Context, c Config, d *bincapz.DiffReport) {
func inferMoves(ctx context.Context, c bincapz.Config, d *bincapz.DiffReport) {
// Walk over the added/removed paths and infer moves based on the
// levenshtein distance of the file names. If the distance is a 90+% match,
// then treat it as a move.
Expand All @@ -184,7 +184,7 @@ func inferMoves(ctx context.Context, c Config, d *bincapz.DiffReport) {
}
}

func fileMove(ctx context.Context, c Config, fr, tr *bincapz.FileReport, rpath, apath string, score float64, d *bincapz.DiffReport) {
func fileMove(ctx context.Context, c bincapz.Config, fr, tr *bincapz.FileReport, rpath, apath string, score float64, d *bincapz.DiffReport) {
if fr.RiskScore < c.MinFileRisk && tr.RiskScore < c.MinFileRisk {
clog.FromContext(ctx).Info("diff does not meet min trigger level", slog.Any("path", tr.Path))
return
Expand Down
3 changes: 2 additions & 1 deletion pkg/action/oci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"regexp"
"testing"

"github.com/chainguard-dev/bincapz/pkg/bincapz"
"github.com/chainguard-dev/bincapz/pkg/compile"
"github.com/chainguard-dev/bincapz/pkg/render"
"github.com/chainguard-dev/bincapz/rules"
Expand Down Expand Up @@ -45,7 +46,7 @@ func TestOCI(t *testing.T) {
t.Fatalf("oci: %v", err)
}

bc := Config{
bc := bincapz.Config{
IgnoreSelf: false,
IgnoreTags: []string{"harmless"},
Renderer: simple,
Expand Down
12 changes: 6 additions & 6 deletions pkg/action/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func formatPath(path string) string {
}

// scanSinglePath YARA scans a single path and converts it to a fileReport.
func scanSinglePath(ctx context.Context, c Config, yrs *yara.Rules, path string, absPath string, archiveRoot string) (*bincapz.FileReport, error) {
func scanSinglePath(ctx context.Context, c bincapz.Config, yrs *yara.Rules, path string, absPath string, archiveRoot string) (*bincapz.FileReport, error) {
logger := clog.FromContext(ctx)
var mrs yara.MatchRules
logger = logger.With("path", path)
Expand All @@ -87,7 +87,7 @@ func scanSinglePath(ctx context.Context, c Config, yrs *yara.Rules, path string,
return &bincapz.FileReport{Path: path, Error: fmt.Sprintf("scanfile: %v", err)}, nil
}

fr, err := report.Generate(ctx, path, mrs, c.IgnoreTags, c.MinRisk, c.IgnoreSelf)
fr, err := report.Generate(ctx, path, mrs, c)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -148,7 +148,7 @@ func errIfHitOrMiss(frs map[string]*bincapz.FileReport, kind string, scanPath st
}

// recursiveScan recursively YARA scans the configured paths - handling archives and OCI images.
func recursiveScan(ctx context.Context, c Config) (*bincapz.Report, error) {
func recursiveScan(ctx context.Context, c bincapz.Config) (*bincapz.Report, error) {
logger := clog.FromContext(ctx)
logger.Debug("recursive scan", slog.Any("config", c))
r := &bincapz.Report{
Expand Down Expand Up @@ -255,7 +255,7 @@ func recursiveScan(ctx context.Context, c Config) (*bincapz.Report, error) {
}

// processArchive extracts and scans a single archive file.
func processArchive(ctx context.Context, c Config, yrs *yara.Rules, archivePath string, logger *clog.Logger) (map[string]*bincapz.FileReport, error) {
func processArchive(ctx context.Context, c bincapz.Config, yrs *yara.Rules, archivePath string, logger *clog.Logger) (map[string]*bincapz.FileReport, error) {
logger = logger.With("archivePath", archivePath)

var err error
Expand Down Expand Up @@ -288,7 +288,7 @@ func processArchive(ctx context.Context, c Config, yrs *yara.Rules, archivePath
}

// processFile scans a single output file, rendering live output if available.
func processFile(ctx context.Context, c Config, yrs *yara.Rules, path string, scanPath string, archiveRoot string, logger *clog.Logger) (*bincapz.FileReport, error) {
func processFile(ctx context.Context, c bincapz.Config, yrs *yara.Rules, path string, scanPath string, archiveRoot string, logger *clog.Logger) (*bincapz.FileReport, error) {
logger = logger.With("path", path)

fr, err := scanSinglePath(ctx, c, yrs, path, scanPath, archiveRoot)
Expand Down Expand Up @@ -321,7 +321,7 @@ func processFile(ctx context.Context, c Config, yrs *yara.Rules, path string, sc
}

// Scan YARA scans a data source, applying output filters if necessary.
func Scan(ctx context.Context, c Config) (*bincapz.Report, error) {
func Scan(ctx context.Context, c bincapz.Config) (*bincapz.Report, error) {
r, err := recursiveScan(ctx, c)
if err != nil {
return r, err
Expand Down
32 changes: 32 additions & 0 deletions pkg/bincapz/bincapz.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,37 @@

package bincapz

import (
"context"
"io"

"github.com/hillu/go-yara/v4"
)

// Renderer is a common interface for Renderers.
type Renderer interface {
File(context.Context, *FileReport) error
Full(context.Context, *Report) error
}

type Config struct {
IgnoreSelf bool
IgnoreTags []string
IncludeDataFiles bool
FrequencyUpgrade bool
MinFileRisk int
MinRisk int
OCI bool
OmitEmpty bool
Output io.Writer
Renderer Renderer
Rules *yara.Rules
ScanPaths []string
Stats bool
ErrFirstMiss bool
ErrFirstHit bool
}

type Behavior struct {
Description string `json:",omitempty" yaml:",omitempty"`
// MatchStrings are all strings found relating to this behavior
Expand All @@ -29,6 +60,7 @@ type Behavior struct {
type FileReport struct {
Path string
SHA256 string
Size int64
// compiler -> x
Error string `json:",omitempty" yaml:",omitempty"`
Skipped string `json:",omitempty" yaml:",omitempty"`
Expand Down
9 changes: 1 addition & 8 deletions pkg/render/render.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,14 @@
package render

import (
"context"
"fmt"
"io"

"github.com/chainguard-dev/bincapz/pkg/bincapz"
)

// Renderer is a common interface for Renderers.
type Renderer interface {
File(context.Context, *bincapz.FileReport) error
Full(context.Context, *bincapz.Report) error
}

// New returns a new Renderer.
func New(kind string, w io.Writer) (Renderer, error) {
func New(kind string, w io.Writer) (bincapz.Renderer, error) {
switch kind {
case "", "auto", "terminal":
return NewTerminal(w), nil
Expand Down
70 changes: 62 additions & 8 deletions pkg/report/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,19 +253,27 @@ func matchStrings(ruleName string, ms []yara.MatchString) []string {
return longestUnique(raw)
}

func pathChecksum(path string) (string, error) {
func sizeAndChecksum(path string) (int64, string, error) {
s, err := os.Stat(path)
if err != nil {
return -1, "", err
}

size := s.Size()

f, err := os.Open(path)
if err != nil {
return fmt.Sprintf("err-%v", err), nil
return size, "", err
}

defer f.Close()

h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return "", err
return size, "", err
}

return fmt.Sprintf("%x", h.Sum(nil)), nil
return size, fmt.Sprintf("%x", h.Sum(nil)), nil
}

// fixURL fixes badly formed URLs.
Expand All @@ -286,20 +294,25 @@ func mungeDescription(s string) string {
}

//nolint:cyclop // ignore complexity of 44
func Generate(ctx context.Context, path string, mrs yara.MatchRules, ignoreTags []string, minScore int, ignoreSelf bool) (bincapz.FileReport, error) {
func Generate(ctx context.Context, path string, mrs yara.MatchRules, c bincapz.Config) (bincapz.FileReport, error) {
ignoreTags := c.IgnoreTags
minScore := c.MinRisk
ignoreSelf := c.IgnoreSelf

ignore := map[string]bool{}
for _, t := range ignoreTags {
ignore[t] = true
}

ptCheck, err := pathChecksum(path)
size, checksum, err := sizeAndChecksum(path)
if err != nil {
return bincapz.FileReport{}, err
}

fr := bincapz.FileReport{
Path: path,
SHA256: ptCheck,
SHA256: checksum,
Size: size,
Meta: map[string]string{},
Behaviors: []*bincapz.Behavior{},
}
Expand Down Expand Up @@ -449,7 +462,7 @@ func Generate(ctx context.Context, path string, mrs yara.MatchRules, ignoreTags
}

// If something has a lot of high, it's probably critical
if riskCounts[3] >= 4 {
if c.FrequencyUpgrade && upgradeRisk(ctx, overallRiskScore, riskCounts, size) {
overallRiskScore = 4
}

Expand All @@ -465,6 +478,47 @@ func Generate(ctx context.Context, path string, mrs yara.MatchRules, ignoreTags
return fr, nil
}

// upgradeRisk determines whether to upgrade risk based on finding density.
func upgradeRisk(ctx context.Context, riskScore int, riskCounts map[int]int, size int64) bool {
if riskScore != 3 {
return false
}
highCount := riskCounts[3]
sizeMB := size / 1024 / 1024
upgrade := false

// small scripts, tiny ELF binaries
if size < 1024 && highCount > 1 {
upgrade = true
}

// include most UPX binaries
if sizeMB < 2 && highCount > 2 {
upgrade = true
}

if sizeMB < 10 && highCount > 3 {
upgrade = true
}

// bloated go binaries
if sizeMB < 20 && highCount > 4 {
upgrade = true
}

if highCount > 6 {
upgrade = true
}

if !upgrade {
egibs marked this conversation as resolved.
Show resolved Hide resolved
return false
}

clog.DebugContextf(ctx, "upgrading risk: high=%d, size=%d", highCount, size)

return upgrade
}

// all returns a single boolean based on a slice of booleans.
func all(conditions ...bool) bool {
for _, condition := range conditions {
Expand Down
Loading
Loading