Skip to content

Commit

Permalink
Fix inconsistent path behaviors when running diffs (chainguard-dev#581)
Browse files Browse the repository at this point in the history
* Fix inconsistent path behaviors when running diffs

Signed-off-by: egibs <[email protected]>

* Refresh test data with new diff path parsing

Signed-off-by: egibs <[email protected]>

---------

Signed-off-by: egibs <[email protected]>
  • Loading branch information
egibs authored Nov 5, 2024
1 parent bcea5c4 commit a154e05
Show file tree
Hide file tree
Showing 15 changed files with 196 additions and 103 deletions.
115 changes: 68 additions & 47 deletions pkg/action/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"log/slog"
"math"
"os"
"path/filepath"
"regexp"
"sync"
Expand All @@ -32,7 +33,27 @@ func relFileReport(ctx context.Context, c malcontent.Config, fromPath string) (m
if files.Value.Skipped != "" || files.Value.Error != "" {
continue
}
rel, err := filepath.Rel(fromPath, files.Value.Path)
// Evaluate symlinks to cover edge cases like macOS' /private/tmp -> /tmp symlink
// Also, remove any filenames to correctly determine the relative path
// Using "." and "." will show as modifications for completely unrelated files and paths
info, err := os.Stat(fromPath)
if err != nil {
return nil, fmt.Errorf("failed to stat file %s: %w", fromPath, err)
}
dir := filepath.Dir(fromPath)
var fromRoot string
if info.IsDir() {
fromRoot, err = filepath.EvalSymlinks(fromPath)
} else {
fromRoot, err = filepath.EvalSymlinks(dir)
}
if err != nil {
return nil, fmt.Errorf("failed to evaluate symlink for %s: %w", fromPath, err)
}
if fromRoot == "." {
fromRoot = fromPath
}
rel, err := filepath.Rel(fromRoot, files.Value.Path)
if err != nil {
return nil, fmt.Errorf("rel(%q,%q): %w", fromPath, files.Value.Path, err)
}
Expand Down Expand Up @@ -99,6 +120,52 @@ func processSrc(ctx context.Context, c malcontent.Config, src, dest map[string]*
}
}

func processDest(ctx context.Context, c malcontent.Config, from, to map[string]*malcontent.FileReport, d *malcontent.DiffReport) {
// findings that exist only in the destination
for relPath, tr := range to {
fr, exists := from[relPath]
if !exists {
d.Added.Set(relPath, tr)
continue
}

fileDestination(ctx, c, fr, tr, relPath, d)
}
}

func fileDestination(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileReport, relPath string, d *malcontent.DiffReport) {
// We've now established that this file exists in both source and destination
if fr.RiskScore < c.MinFileRisk && tr.RiskScore < c.MinFileRisk {
clog.FromContext(ctx).Info("diff does not meet min trigger level", slog.Any("path", tr.Path))
return
}

// Filter files that are marked as added
if filterDiff(ctx, c, fr, tr) {
return
}

abs := createFileReport(tr, fr)

// if destination behavior is not in the source
for _, tb := range tr.Behaviors {
if !behaviorExists(tb, fr.Behaviors) {
tb.DiffAdded = true
abs.Behaviors = append(abs.Behaviors, tb)
continue
}
}

// are there already modified behaviors for this file?
rel, exists := d.Modified.Get(relPath)
if !exists {
d.Modified.Set(relPath, abs)
} else {
rel.Behaviors = append(rel.Behaviors, abs.Behaviors...)
d.Modified.Set(relPath, rel)
}
}

func handleFile(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileReport, relPath string, d *malcontent.DiffReport) {
// We've now established that file exists in both source & destination
if fr.RiskScore < c.MinFileRisk && tr.RiskScore < c.MinFileRisk {
Expand Down Expand Up @@ -148,52 +215,6 @@ func behaviorExists(b *malcontent.Behavior, behaviors []*malcontent.Behavior) bo
return false
}

func processDest(ctx context.Context, c malcontent.Config, from, to map[string]*malcontent.FileReport, d *malcontent.DiffReport) {
// findings that exist only in the destination
for relPath, tr := range to {
fr, exists := from[relPath]
if !exists {
d.Added.Set(relPath, tr)
continue
}

fileDestination(ctx, c, fr, tr, relPath, d)
}
}

func fileDestination(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileReport, relPath string, d *malcontent.DiffReport) {
// We've now established that this file exists in both source and destination
if fr.RiskScore < c.MinFileRisk && tr.RiskScore < c.MinFileRisk {
clog.FromContext(ctx).Info("diff does not meet min trigger level", slog.Any("path", tr.Path))
return
}

// Filter files that are marked as added
if filterDiff(ctx, c, fr, tr) {
return
}

abs := createFileReport(tr, fr)

// if destination behavior is not in the source
for _, tb := range tr.Behaviors {
if !behaviorExists(tb, fr.Behaviors) {
tb.DiffAdded = true
abs.Behaviors = append(abs.Behaviors, tb)
}
}

// are there already modified behaviors for this file?
if _, exists := d.Modified.Get(relPath); !exists {
d.Modified.Set(relPath, abs)
} else {
if rel, exists := d.Modified.Get(relPath); exists {
rel.Behaviors = append(rel.Behaviors, abs.Behaviors...)
d.Modified.Set(relPath, rel)
}
}
}

// filterMap filters orderedmap pairs by checking for matches against a slice of compiled regular expression patterns.
func filterMap(om *orderedmap.OrderedMap[string, *malcontent.FileReport], ps []*regexp.Regexp, c chan<- *orderedmap.Pair[string, *malcontent.FileReport], wg *sync.WaitGroup) {
defer wg.Done()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,34 @@
*** changed: linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst
--- missing: freedownloadmanager_clear_postinst
-data/embedded/pgp_key
-exec/install_additional/add_apt_key
-exec/shell/ignore_output
-fs/path/etc
-fs/path/usr_bin
-net/download
-net/url/embedded
++++ added: freedownloadmanager_infected_postinst
+3P/threat_hunting/touch
+anti-static/base64/exec
+anti-static/base64/http_agent
+data/base64/external
+data/embedded/base64_terms
+data/embedded/base64_url
+data/embedded/pgp_key
+data/encoding/base64
+evasion/hidden_files/var_tmp
+exec/install_additional/add_apt_key
+exec/shell/exec
+exec/shell/ignore_output
+fs/directory/create
+fs/file/delete_forcibly
+fs/file/make_executable
+fs/file/times_set
+fs/path/etc
+fs/path/tmp
+fs/path/usr_bin
+fs/path/var
+fs/permission/modify
+net/download
+net/url/embedded
+persist/cron/echo_tab
+persist/cron/tab
41 changes: 40 additions & 1 deletion test_data/linux/2024.sbcl.market/sbcl.sdiff
Original file line number Diff line number Diff line change
@@ -1,4 +1,43 @@
*** changed: linux/2024.sbcl.market/sbcl.dirty
--- missing: sbcl.clean
-data/compression/zstd
-discover/user/HOME
-discover/user/USER
-evasion/hidden_files/var_tmp
-exec/dylib/address_check
-exec/dylib/symbol_address
-exec/program
-exec/program/background
-exec/shell/echo
-fs/file/delete
-fs/file/truncate
-fs/link_read
-fs/path/dev
-fs/path/tmp
-fs/path/var
-fs/permission/modify
-fs/proc/self_exe
-fs/symlink_resolve
-net/url/embedded
++++ added: sbcl.dirty
+anti-static/packer/high_entropy
+data/compression/zstd
+data/embedded/zstd
+discover/user/HOME
+discover/user/USER
+evasion/hidden_files/var_tmp
+exec/dylib/address_check
+exec/dylib/symbol_address
+exec/program
+exec/program/background
+exec/shell/echo
+fs/file/delete
+fs/file/truncate
+fs/link_read
+fs/path/dev
+fs/path/tmp
+fs/path/var
+fs/permission/modify
+fs/proc/self_exe
+fs/symlink_resolve
+net/dns/txt
+net/url/embedded
2 changes: 1 addition & 1 deletion test_data/linux/clean/aws-c-io/aws-c-io.sdiff
Original file line number Diff line number Diff line change
@@ -1 +1 @@
*** changed: linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json
>>> moved: aws-c-io-0.14.10-r0.spdx.json -> linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json (score: 0.979310)
Binary file modified test_data/macOS/2023.3CX/libffmpeg.change_decrease.mdiff
Binary file not shown.
Binary file modified test_data/macOS/2023.3CX/libffmpeg.change_increase.mdiff
Binary file not shown.
Loading

0 comments on commit a154e05

Please sign in to comment.