From e3231af789562cf36b3c5ad0dd036a76a971b72f Mon Sep 17 00:00:00 2001 From: Ville Aikas Date: Tue, 2 Apr 2024 20:08:34 +0300 Subject: [PATCH] Refactor, add tests. Signed-off-by: Ville Aikas --- pkg/action/programkind.go | 112 +++++++++++++-------------- pkg/action/programkind_test.go | 133 +++++++++++++++++++++++++++++++++ pkg/action/testdata/empty | 0 pkg/action/testdata/python | 1 + pkg/action/testdata/rando | Bin 0 -> 1024 bytes pkg/action/testdata/shell | 1 + pkg/action/testdata/short | 1 + 7 files changed, 193 insertions(+), 55 deletions(-) create mode 100644 pkg/action/programkind_test.go create mode 100644 pkg/action/testdata/empty create mode 100644 pkg/action/testdata/python create mode 100644 pkg/action/testdata/rando create mode 100644 pkg/action/testdata/shell create mode 100644 pkg/action/testdata/short diff --git a/pkg/action/programkind.go b/pkg/action/programkind.go index d2924f9f1..0725a3da4 100644 --- a/pkg/action/programkind.go +++ b/pkg/action/programkind.go @@ -11,10 +11,36 @@ import ( "k8s.io/klog/v2" ) +// map from extensions to program kinds +var extMap = map[string]string{ + ".scpt": "compiled AppleScript", + ".scptd": "compiled AppleScript", + ".sh": "Shell script", + ".rb": "Ruby script", + ".py": "Python script", + ".pl": "PERL script", + ".yara": "", + ".expect": "Expect script", + ".php": "PHP file", + ".html": "", + ".js": "Javascript", + ".ts": "Typescript", + ".7z": "", + ".json": "", + ".yml": "", + ".yaml": "", + ".java": "Java source", + ".jar": "Java program", + ".asm": "", + ".service": "systemd", + ".cron": "crontab", + ".crontab": "crontab", + ".c": "C source", +} + // programKind tries to identify if a path is a program func programKind(path string) string { var header [263]byte - f, err := os.Open(path) if err != nil { log.Printf("os.Open[%s]: %v", path, err) @@ -23,14 +49,26 @@ func programKind(path string) string { defer f.Close() desc := "" - if _, err := io.ReadFull(f, header[:]); err == nil { - kind, err := magic.Lookup(header[:]) + var headerString string + n, err := io.ReadFull(f, header[:]) + switch { + case err == nil || err == io.ErrUnexpectedEOF: + // Read the full buffer, or some bytes, all good + kind, err := magic.Lookup(header[:n]) if err == nil { desc = kind.Description + } else { + desc = "" } + headerString = string(header[:n]) + case err == io.EOF: + // Nothing was read, so set the buffer so. + desc = "" + headerString = "" } - klog.V(1).Infof("desc: %q header: %q err: %v", desc, header[:], err) + // TODO: Is it safe to log unsanitized file stuff? + klog.V(1).Infof("desc: %q header: %q err: %v", desc, headerString, err) // the magic library gets these wrong if strings.HasSuffix(path, ".json") { @@ -57,63 +95,27 @@ func programKind(path string) string { return "Shell script" } - switch filepath.Ext(path) { - case ".scpt", "scptd": - return "compiled AppleScript" - case ".sh": - return "Shell script" - case ".rb": - return "Ruby script" - case ".py": - return "Python script" - case ".pl": - return "PERL script" - case ".yara": - return "" - case ".expect": - return "Expect script" - case ".php": - return "PHP file" - case ".html": - return "" - case ".js": - return "Javascript" - case ".ts": - return "Typescript" - case ".7z": - return "" - case ".json": - return "" - case ".yml", ".yaml": - return "" - case ".java": - return "Java source" - case ".jar": - return "Java program" - case ".asm": - return "" - case ".service": - return "systemd" - case ".cron", ".crontab": - return "crontab" - case ".c": - return "C source" + if found, kind := byExtension(path); found { + return kind } // By string match - s := string(header[:]) switch { - case strings.Contains(s, "import "): - return "Python" - case strings.HasPrefix(s, "#!/bin/sh") || strings.HasPrefix(s, "#!/bin/bash") || strings.Contains(s, `echo "`) || strings.Contains(s, `if [`) || strings.Contains(s, `grep `) || strings.Contains(s, "if !"): - return "Shell" - case strings.HasPrefix(s, "#!"): + case strings.Contains(headerString, "import "): + return "Python script" + case strings.HasPrefix(headerString, "#!/bin/sh") || strings.HasPrefix(headerString, "#!/bin/bash") || strings.Contains(headerString, `echo "`) || strings.Contains(headerString, `if [`) || strings.Contains(headerString, `grep `) || strings.Contains(headerString, "if !"): + return "Shell script" + case strings.HasPrefix(headerString, "#!"): return "script" - case strings.Contains(s, "#include <"): + case strings.Contains(headerString, "#include <"): return "C Program" } - - // fmt.Printf("File %s string: %s", path, s) - // fmt.Printf("File %s: desc: %s\n", path, desc) return "" } + +// byExtension returns true, and descriptive file type if the extension is +// known, and false otherwise. +func byExtension(path string) (bool, string) { + ret, ok := extMap[filepath.Ext(path)] + return ok, ret +} diff --git a/pkg/action/programkind_test.go b/pkg/action/programkind_test.go new file mode 100644 index 000000000..13d7a0f19 --- /dev/null +++ b/pkg/action/programkind_test.go @@ -0,0 +1,133 @@ +package action + +import ( + "fmt" + "testing" +) + +func TestProgramKindMagic(t *testing.T) { + +} + +func TestProgramStringMatch(t *testing.T) { + tests := []struct { + filename string + want string + }{{ + filename: "python", + want: "Python script", + }, { + filename: "shell", + want: "Shell script", + }, { + filename: "short", + want: "", + }, { + filename: "empty", + want: "", + }, { + filename: "rando", // generated with : `head -c 1024 pkg/action/testdata/rando` + }, { + filename: "juttu", + want: "", + }} + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + got := programKind(fmt.Sprintf("testdata/%s", tt.filename)) + if got != tt.want { + t.Errorf("programKind() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestProgramKindExtensions(t *testing.T) { + tests := []struct { + filename string + want string + notFound bool // true if the file extension is not found in the map + }{{ + filename: "applescript.scpt", + want: "compiled AppleScript", + }, { + filename: "applescript.scptd", + want: "compiled AppleScript", + }, { + filename: "shell.sh", + want: "Shell script", + }, { + filename: "ruby.rb", + want: "Ruby script", + }, { + filename: "python.py", + want: "Python script", + }, { + filename: "perl.pl", + want: "PERL script", + }, { + filename: "yara.yara", + want: "", + }, { + filename: "expect.expect", + want: "Expect script", + }, { + filename: "php.php", + want: "PHP file", + }, { + filename: "html.html", + want: "", + }, { + filename: "javascript.js", + want: "Javascript", + }, { + filename: "typescript.ts", + want: "Typescript", + }, { + filename: "7z.7z", + want: "", + }, { + filename: "json.json", + want: "", + }, { + filename: "yaml.yml", + want: "", + }, { + filename: "yaml.yaml", + want: "", + }, { + filename: "java.java", + want: "Java source", + }, { + filename: "java.jar", + want: "Java program", + }, { + filename: "asm.asm", + want: "", + }, { + filename: "systemd.service", + want: "systemd", + }, { + filename: "crontab.cron", + want: "crontab", + }, { + filename: "crontab.crontab", + want: "crontab", + }, { + filename: "c.c", + want: "C source", + }, { + filename: "juttu.juttu", + notFound: true, + }} + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + exists, kind := byExtension(tt.filename) + if exists != !tt.notFound { + t.Errorf("byExtension() exists = %v, want %v", exists, !tt.notFound) + } + if kind != tt.want { + t.Errorf("byExtension() kind = %v, want %v", kind, tt.want) + } + }) + } +} diff --git a/pkg/action/testdata/empty b/pkg/action/testdata/empty new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/action/testdata/python b/pkg/action/testdata/python new file mode 100644 index 000000000..36a1e2ed5 --- /dev/null +++ b/pkg/action/testdata/python @@ -0,0 +1 @@ +import sumtin diff --git a/pkg/action/testdata/rando b/pkg/action/testdata/rando new file mode 100644 index 0000000000000000000000000000000000000000..4e14e9b9db56b51978c59ea61406b4b32622da5a GIT binary patch literal 1024 zcmV+b1poW>CdOMpR67E$?eBqp-t5_d+f*7KcXTK+Hjd6;z!7F&KTf$|A$MXJTBZ{| zr~56Sd+^Fm>q6uTwMSdM!nmAfD*(iw%?XJri)PDE|2(U|_n?ia@5StTdA^#mA)B8# z;^QMk`=Xx|T}UU&>x#YNg;vO@zGXi_Docp|R$L6HBI!s_b5bTFIM$VP5F9FuhG;zu2-~2qJd#N{&m!fYP|2SKtrQF6-#gz| zW~9W3A(@z_nyD;y??25-UXh9zhp7jQ+6oR@Uw`@gOMb$jiAJ`)j3I-usbtBo^a(xr zC~?Oks*Uxs{sT@k)UIiknd8b_+ilPEDeni-(UjIDKK!}{3Te1Wnl9-WV#kja78+T3jmRMR3Db{)qz#FHAx|kttx09gHOtyVJZ<5>im-J;a9oy zLI~!@aaM;v$6SaxVp(eJy-K|Z&!rzRuSq4JWItxI4gqx_TT+y}fL{ZOoU1o|YYbn&{4wGEcTdFj|P4 zF-WppqMKN_HP3nRYSB^8PdAh8!N8PjKrlBtw{wdcoC}-E#!(YRC-Y!E6UDbn_1oj# zexT_kLot3b_7HVK81(vzUl=V$$y2tL5`9jhb$lFP4L-Bc`#^+X*B_KY;JYV;>H;UW z|5-<5s-YDB!?(VHu3S$EyJ<;10MwutOx_wIqlkt!XmQ?QJ&K*OomKH@ZsZQJ$OGWX z0VGKD85(!?L3`SlF5C*wpGXU*uDr7~D+GaAnD^{sbODy+Cpsw-5E_(Iv~*`I%AYgi zTBhC3ii*NHB8iAmowwz^D+3?9n&qg_h(pqXSm^Sq_! zvRMKG6dR1sRc(du!YP7-)ui?_2G2Jk*uz#*E)+7kuUTDsT<;KEn!`tu0bsPH@5YgN u>rPxYR1Y~yPiO8!y?tY}KfDZJ2H}amHWcD}l$leCs-WAN8ih2a4Nhl8Y4GU) literal 0 HcmV?d00001 diff --git a/pkg/action/testdata/shell b/pkg/action/testdata/shell new file mode 100644 index 000000000..96b4b06ad --- /dev/null +++ b/pkg/action/testdata/shell @@ -0,0 +1 @@ +#!/bin/sh \ No newline at end of file diff --git a/pkg/action/testdata/short b/pkg/action/testdata/short new file mode 100644 index 000000000..e61ef7b96 --- /dev/null +++ b/pkg/action/testdata/short @@ -0,0 +1 @@ +aa