Skip to content

Commit

Permalink
Better handling of diffs between archives (#626)
Browse files Browse the repository at this point in the history
Signed-off-by: egibs <[email protected]>
  • Loading branch information
egibs authored Nov 15, 2024
1 parent 3aa2507 commit 24b63d1
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 26 deletions.
70 changes: 48 additions & 22 deletions pkg/action/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"
"path/filepath"
"regexp"
"strings"
"sync"

"github.com/agext/levenshtein"
Expand All @@ -20,6 +21,48 @@ import (
"golang.org/x/sync/errgroup"
)

func relPath(from string, fr *malcontent.FileReport, isArchive bool) (string, error) {
var err error
var rel string
switch {
case isArchive:
fromRoot := fr.ArchiveRoot

// trim archiveRoot from fullPath
archiveFile := strings.TrimPrefix(fr.FullPath, fr.ArchiveRoot)
rel, err = filepath.Rel(fromRoot, archiveFile)
if err != nil {
return "", err
}
default:
info, err := os.Stat(from)
if err != nil {
return "", err
}
dir := filepath.Dir(from)
// Evaluate symlinks to cover edge cases like macOS' /private/tmp -> /tmp symlink
// Also, remove any filenames to correctly determine the relative path
// Using "." and "." will show as modifications for completely unrelated files and paths
var fromRoot string
if info.IsDir() {
fromRoot, err = filepath.EvalSymlinks(from)
} else {
fromRoot, err = filepath.EvalSymlinks(dir)
}
if err != nil {
return "", err
}
if fromRoot == "." {
fromRoot = from
}
rel, err = filepath.Rel(fromRoot, fr.Path)
if err != nil {
return "", err
}
}
return rel, nil
}

func relFileReport(ctx context.Context, c malcontent.Config, fromPath string) (map[string]*malcontent.FileReport, error) {
fromConfig := c
fromConfig.Renderer = nil
Expand All @@ -34,30 +77,11 @@ func relFileReport(ctx context.Context, c malcontent.Config, fromPath string) (m
return true
}
if fr, ok := value.(*malcontent.FileReport); ok {
isArchive := fr.ArchiveRoot != ""
if fr.Skipped != "" || fr.Error != "" {
return true
}
// Evaluate symlinks to cover edge cases like macOS' /private/tmp -> /tmp symlink
// Also, remove any filenames to correctly determine the relative path
// Using "." and "." will show as modifications for completely unrelated files and paths
info, err := os.Stat(fromPath)
if err != nil {
return false
}
dir := filepath.Dir(fromPath)
var fromRoot string
if info.IsDir() {
fromRoot, err = filepath.EvalSymlinks(fromPath)
} else {
fromRoot, err = filepath.EvalSymlinks(dir)
}
if err != nil {
return false
}
if fromRoot == "." {
fromRoot = fromPath
}
rel, err := filepath.Rel(fromRoot, fr.Path)
rel, err := relPath(fromPath, fr, isArchive)
if err != nil {
return false
}
Expand Down Expand Up @@ -211,7 +235,8 @@ func handleFile(ctx context.Context, c malcontent.Config, fr, tr *malcontent.Fil
func createFileReport(tr, fr *malcontent.FileReport) *malcontent.FileReport {
return &malcontent.FileReport{
Path: tr.Path,
PreviousRelPath: fr.Path,
PreviousPath: fr.Path,
PreviousRelPath: fr.PreviousRelPath,
Behaviors: []*malcontent.Behavior{},
PreviousRiskScore: fr.RiskScore,
PreviousRiskLevel: fr.RiskLevel,
Expand Down Expand Up @@ -320,6 +345,7 @@ func fileMove(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR
// We think that this file moved from rpath to apath.
abs := &malcontent.FileReport{
Path: tr.Path,
PreviousPath: fr.Path,
PreviousRelPath: rpath,
PreviousRelPathScore: score,

Expand Down
2 changes: 2 additions & 0 deletions pkg/action/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
// Clean up the path if scanning an archive
var clean string
if isArchive {
fr.ArchiveRoot = archiveRoot
fr.FullPath = path
clean, err = cleanPath(path, archiveRoot)
if err != nil {
return nil, err
Expand Down
7 changes: 7 additions & 0 deletions pkg/malcontent/malcontent.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ type FileReport struct {
Behaviors []*Behavior `json:",omitempty" yaml:",omitempty"`
FilteredBehaviors int `json:",omitempty" yaml:",omitempty"`

// The absolute path we think this moved fron
PreviousPath string `json:",omitempty" yaml:",omitempty"`
// The relative path we think this moved from.
PreviousRelPath string `json:",omitempty" yaml:",omitempty"`
// The levenshtein distance between the previous path and the current path
Expand All @@ -97,6 +99,11 @@ type FileReport struct {
IsMalcontent bool `json:",omitempty" yaml:",omitempty"`

Overrides []*Behavior `json:",omitempty" yaml:",omitempty"`

// Diffing archives is less straightforward than single files
// Store additional paths to help with relative pathing
ArchiveRoot string `json:",omitempty" yaml:",omitempty"`
FullPath string `json:",omitempty" yaml:",omitempty"`
}

type DiffReport struct {
Expand Down
3 changes: 3 additions & 0 deletions pkg/render/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ func (r JSON) Full(_ context.Context, rep *malcontent.Report) error {
if path, ok := key.(string); ok {
if r, ok := value.(*malcontent.FileReport); ok {
if r.Skipped == "" {
// Filter out diff-related fields
r.ArchiveRoot = ""
r.FullPath = ""
jr.Files[path] = r
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/render/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (r Markdown) Full(ctx context.Context, rep *malcontent.Report) error {
for modified := rep.Diff.Modified.Oldest(); modified != nil; modified = modified.Next() {
var title string
if modified.Value.PreviousRelPath != "" && modified.Value.PreviousRelPathScore >= 0.9 {
title = fmt.Sprintf("## Moved: %s -> %s (similarity: %0.2f)", modified.Value.PreviousRelPath, modified.Value.Path, modified.Value.PreviousRelPathScore)
title = fmt.Sprintf("## Moved: %s -> %s (similarity: %0.2f)", modified.Value.PreviousPath, modified.Value.Path, modified.Value.PreviousRelPathScore)
} else {
title = fmt.Sprintf("## Changed: %s", modified.Value.Path)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/render/simple.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func (r Simple) Full(_ context.Context, rep *malcontent.Report) error {

for modified := rep.Diff.Modified.Oldest(); modified != nil; modified = modified.Next() {
if modified.Value.PreviousRelPath != "" && modified.Value.PreviousRelPathScore >= 0.9 {
fmt.Fprintf(r.w, ">>> moved: %s -> %s (score: %f)\n", modified.Value.PreviousRelPath, modified.Value.Path, modified.Value.PreviousRelPathScore)
fmt.Fprintf(r.w, ">>> moved: %s -> %s (score: %f)\n", modified.Value.PreviousPath, modified.Value.Path, modified.Value.PreviousRelPathScore)
} else {
fmt.Fprintf(r.w, "*** changed: %s\n", modified.Value.Path)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/render/terminal.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (r Terminal) Full(ctx context.Context, rep *malcontent.Report) error {
for modified := rep.Diff.Modified.Oldest(); modified != nil; modified = modified.Next() {
var title string
if modified.Value.PreviousRelPath != "" && modified.Value.PreviousRelPathScore >= 0.9 {
title = fmt.Sprintf("Moved: %s -> %s (score: %f)", modified.Value.PreviousRelPath, modified.Value.Path, modified.Value.PreviousRelPathScore)
title = fmt.Sprintf("Moved: %s -> %s (score: %f)", modified.Value.PreviousPath, modified.Value.Path, modified.Value.PreviousRelPathScore)
} else {
title = fmt.Sprintf("Changed: %s", modified.Value.Path)
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/render/yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ func (r YAML) Full(_ context.Context, rep *malcontent.Report) error {
if path, ok := key.(string); ok {
if r, ok := value.(*malcontent.FileReport); ok {
if r.Skipped == "" {
r.ArchiveRoot = ""
r.FullPath = ""
yr.Files[path] = r
}
}
Expand Down
2 changes: 1 addition & 1 deletion tests/linux/clean/aws-c-io/aws-c-io.sdiff
Original file line number Diff line number Diff line change
@@ -1 +1 @@
>>> moved: aws-c-io-0.14.10-r0.spdx.json -> linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json (score: 0.979310)
>>> moved: linux/clean/aws-c-io/aws-c-io-0.14.10-r0.spdx.json -> linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json (score: 0.979310)

0 comments on commit 24b63d1

Please sign in to comment.