Skip to content

Commit

Permalink
Add support for archives (chainguard-dev#144)
Browse files Browse the repository at this point in the history
* Add support for archives

* Make archive support transparent; small tweaks/comments

* Use map for archive types; retrieve extension

* Remove extraneous comment

* Address PR comments

* Update context used for crane.Pull

* Sort test cases

* One more sort

* Missed a condition

* Log close errors for now
  • Loading branch information
Evan Gibler authored Apr 18, 2024
1 parent 41d83cc commit 7064f36
Show file tree
Hide file tree
Showing 8 changed files with 396 additions and 68 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Enumerates program capabilities and malicious behaviors using fragment analysis.

- Analyzes binaries from any architecture - arm64, amd64, riscv, ppc64, sparc64
- Supports scripting languages such as bash, PHP, Perl, Ruby, NodeJS, and Python
- Supports OCI images and archives
- Integrates [YARA forge](https://yarahq.github.io/) for rules by Avast, Elastic, FireEye, Google, Nextron, and others.
- 12,000+ rules that detect everything from ioctl's to malware
- Tuned for especially excellent performance with Linux programs
Expand All @@ -18,7 +19,6 @@ Enumerates program capabilities and malicious behaviors using fragment analysis.

## Shortcomings

- Does not attempt to process archive files (jar, zip, apk)
- Minimal rule support for Windows and Java (help wanted!)
- Early in development; output is subject to change

Expand Down Expand Up @@ -139,6 +139,7 @@ bincapz --format=json <file> | jq '.Files.[].Behaviors | keys'
* `--format` string: Output type. Valid values are: json, markdown, simple, terminal, yaml (default "terminal")
* `--ignore-tags` string: Rule tags to ignore
* `--min-level`: minimum suspicion level to report (1=low, 2=medium, 3=high, 4=critical) (default 1)
* `--oci`: scan OCI images
* `--omit-empty`: omit files that contain no matches
* `--stats`: display statistics for risk level and `programkind`
* `--third-party`: include third-party rules, which may have licensing restrictions (default true)
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/hillu/go-yara/v4 v4.3.2
github.com/liamg/magic v0.0.1
github.com/olekukonko/tablewriter v0.0.5
github.com/ulikunitz/xz v0.5.12
golang.org/x/term v0.19.0
gopkg.in/yaml.v3 v3.0.1
)
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=
Expand Down
236 changes: 236 additions & 0 deletions pkg/action/archive.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
package action

import (
"archive/tar"
"archive/zip"
"compress/gzip"
"context"
"errors"
"fmt"
"io"
"os"
"path"
"path/filepath"
"strings"

"github.com/chainguard-dev/clog"
"github.com/ulikunitz/xz"
)

const maxBytes = 1 << 29 // 512MB

// copyArchive copies the source archive file to the temporary directory.
func copyArchive(ctx context.Context, src string, dst string) error {
logger := clog.FromContext(ctx).With("src", src, "dst", dst)
logger.Info("copying archive")
r, err := os.Open(src)
if err != nil {
return fmt.Errorf("failed to open source file: %w", err)
}
defer r.Close()

w, err := os.CreateTemp(dst, fmt.Sprintf("%s.*", filepath.Base(src)))
if err != nil {
return fmt.Errorf("failed to create temporary file: %w", err)
}

defer func() {
if cerr := w.Close(); cerr != nil {
logger.Errorf("failed to close file: %v", cerr)
}
}()

if _, err = io.Copy(w, r); err != nil {
return fmt.Errorf("failed to copy data: %w", err)
}

return nil
}

// tempDir creates a temporary directory and copies the archive file into it.
func tempDir(ctx context.Context, p string) (string, error) {
logger := clog.FromContext(ctx).With("path", p)
logger.Info("creating temp dir")
tmpDir, err := os.MkdirTemp("", fmt.Sprintf("bincapz-%s", filepath.Base(p)))
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}

if err := copyArchive(ctx, p, tmpDir); err != nil {
os.RemoveAll(tmpDir)
return "", fmt.Errorf("failed to copy archive: %w", err)
}

return tmpDir, nil
}

// extractTar extracts .apk and .tar* archives.
func extractTar(ctx context.Context, d string, f string) error {
logger := clog.FromContext(ctx).With("dir", d, "file", f)
logger.Info("extracting tar")

// Check if the file is valid
_, err := os.Stat(f)
if err != nil {
return fmt.Errorf("failed to stat file: %w", err)
}

tf, err := os.Open(f)
if err != nil {
return fmt.Errorf("failed to open file: %w", err)
}
defer tf.Close()

tr := tar.NewReader(tf)

if strings.Contains(f, ".apk") || strings.Contains(f, ".tar.gz") || strings.Contains(f, ".tgz") {
gzStream, err := gzip.NewReader(tf)
if err != nil {
return fmt.Errorf("failed to create gzip reader: %w", err)
}
defer gzStream.Close()
tr = tar.NewReader(gzStream)
}
if strings.Contains(f, ".tar.xz") {
xzStream, err := xz.NewReader(tf)
if err != nil {
return fmt.Errorf("failed to create xz reader: %w", err)
}
tr = tar.NewReader(xzStream)
}

for {
header, err := tr.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("failed to read tar header: %w", err)
}
clean := filepath.Clean(header.Name)
if filepath.IsAbs(clean) || strings.HasPrefix(clean, "..") {
return fmt.Errorf("invalid file path: %s", header.Name)
}
target := filepath.Join(d, clean)
if header.FileInfo().IsDir() {
if err := os.MkdirAll(target, os.FileMode(header.Mode)); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
continue
}

if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
return fmt.Errorf("failed to create directory for file: %w", err)
}

f, err := os.OpenFile(target, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.FileMode(header.Mode))
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}

if _, err := io.Copy(f, io.LimitReader(tr, maxBytes)); err != nil {
return fmt.Errorf("failed to copy file: %w", err)
}

if err := f.Close(); err != nil {
return fmt.Errorf("failed to close file: %w", err)
}
}
return nil
}

// extractZip extracts .jar and .zip archives.
func extractZip(ctx context.Context, d string, f string) error {
logger := clog.FromContext(ctx).With("dir", d, "file", f)
logger.Info("extracting zip")

// Check if the file is valid
_, err := os.Stat(f)
if err != nil {
return fmt.Errorf("failed to stat file: %w", err)
}

read, err := zip.OpenReader(f)
if err != nil {
return fmt.Errorf("failed to open zip file: %w", err)
}
defer read.Close()

for _, file := range read.File {
name := filepath.Join(d, filepath.Clean(filepath.ToSlash(file.Name)))

// Check if a directory with the same name exists
if info, err := os.Stat(name); err == nil && info.IsDir() {
continue
}

if file.Mode().IsDir() {
mode := file.Mode() | 0o755
err := os.MkdirAll(name, mode)
if err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
continue
}

open, err := file.Open()
if err != nil {
open.Close()
return fmt.Errorf("failed to open file in zip: %w", err)
}

err = os.MkdirAll(path.Dir(name), 0o755)
if err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}

mode := file.Mode() | 0o200
create, err := os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
if err != nil {
create.Close()
return fmt.Errorf("failed to create file: %w", err)
}

if _, err = io.Copy(create, io.LimitReader(open, maxBytes)); err != nil {
return fmt.Errorf("failed to copy file: %w", err)
}

open.Close()
create.Close()
}
return nil
}

// extractArchive specifies which extraction method to use based on the archive type.
func extractArchive(ctx context.Context, d string, f string) error {
switch {
// .jar and .zip files can be extracted using the same method
case strings.Contains(f, ".jar") || strings.Contains(f, ".zip"):
if err := extractZip(ctx, d, f); err != nil {
return fmt.Errorf("failed to extract zip-based file: %w", err)
}
// .apk and .tar* files can be extracted using the same method
case strings.Contains(f, ".apk") || strings.Contains(f, ".tar") || strings.Contains(f, ".tgz"):
if err := extractTar(ctx, d, f); err != nil {
return fmt.Errorf("failed to extract tar-based file: %w", err)
}
// Unsupported archive type
default:
return fmt.Errorf("unsupported archive type: %s", f)
}
return nil
}

// archive creates a temporary directory and extracts the archive file for scanning.
func archive(ctx context.Context, sp string) (string, error) {
tmpDir, err := tempDir(ctx, sp)
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}

if err := extractArchive(ctx, tmpDir, sp); err != nil {
return "", fmt.Errorf("failed to extract archive: %w", err)
}

return tmpDir, nil
}
54 changes: 8 additions & 46 deletions pkg/action/oci.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
package action

import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/chainguard-dev/clog"
"github.com/google/go-containerregistry/pkg/crane"
v1 "github.com/google/go-containerregistry/pkg/v1"
)

const maxBytes = 1 << 26

func prepare(d string) (string, *os.File, error) {
func prepareImage(ctx context.Context, d string) (string, *os.File, error) {
logger := clog.FromContext(ctx).With("image", d)
logger.Info("preparing image")
tmpDir, err := os.MkdirTemp("", fmt.Sprintf("bincapz-%s", filepath.Base(d)))
if err != nil {
return "", nil, fmt.Errorf("failed to create temp dir: %w", err)
Expand All @@ -28,7 +26,7 @@ func prepare(d string) (string, *os.File, error) {
}

var image v1.Image
if image, err = crane.Pull(d, crane.WithContext(context.Background())); err != nil {
if image, err = crane.Pull(d, crane.WithContext(ctx)); err != nil {
return "", nil, fmt.Errorf("failed to pull image: %w", err)
}
if err := crane.Export(image, tmpFile); err != nil {
Expand All @@ -41,50 +39,14 @@ func prepare(d string) (string, *os.File, error) {
return tmpDir, tmpFile, nil
}

func extract(d string, f *os.File) error {
tr := tar.NewReader(f)
for {
header, err := tr.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("failed to read tar header: %w", err)
}
clean := filepath.Clean(header.Name)
if filepath.IsAbs(clean) || strings.HasPrefix(clean, "..") {
return fmt.Errorf("invalid file path: %s", header.Name)
}
target := filepath.Join(d, clean)
if header.FileInfo().IsDir() {
if err := os.MkdirAll(target, os.FileMode(header.Mode)); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
continue
}
f, err := os.OpenFile(target, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.FileMode(header.Mode))
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
if _, err := io.Copy(f, io.LimitReader(tr, maxBytes)); err != nil {
f.Close()
return fmt.Errorf("failed to copy file: %w", err)
}
if err := f.Close(); err != nil {
return fmt.Errorf("failed to close file: %w", err)
}
}
return nil
}

// return a directory with the extracted image directories/files in it.
func oci(path string) (string, error) {
tmpDir, tmpFile, err := prepare(path)
func oci(ctx context.Context, path string) (string, error) {
tmpDir, tmpFile, err := prepareImage(ctx, path)
if err != nil {
return "", fmt.Errorf("failed to prepare image: %w", err)
}

err = extract(tmpDir, tmpFile)
err = extractTar(ctx, tmpDir, tmpFile.Name())
if err != nil {
return "", fmt.Errorf("failed to extract image: %w", err)
}
Expand Down
Loading

0 comments on commit 7064f36

Please sign in to comment.