From 01c18a9866cc57636d816bb5c769966e908ba3e7 Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Mon, 2 May 2022 16:34:03 +0000 Subject: [PATCH 1/5] ignore printable files --- checks/raw/binary_artifact.go | 19 + checks/raw/binary_artifact_test.go | 33 +- checks/testdata/binaryartifacts/printable.lib | 410 ++++++++++++++++++ 3 files changed, 446 insertions(+), 16 deletions(-) create mode 100644 checks/testdata/binaryartifacts/printable.lib diff --git a/checks/raw/binary_artifact.go b/checks/raw/binary_artifact.go index ae1bbf1c19a..77e8bf999d1 100644 --- a/checks/raw/binary_artifact.go +++ b/checks/raw/binary_artifact.go @@ -18,6 +18,7 @@ import ( "fmt" "path/filepath" "strings" + "unicode" "github.com/h2non/filetype" "github.com/h2non/filetype/types" @@ -91,6 +92,11 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin return false, sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("filetype.Get:%v", err)) } + // Sanity check the file contains non-readable characters. + if isText(content) { + return true, nil + } + exists1 := binaryFileTypes[t.Extension] exists2 := binaryFileTypes[strings.ReplaceAll(filepath.Ext(path), ".", "")] if exists1 || exists2 { @@ -103,3 +109,16 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin return true, nil } + +// TODO: refine this function. +func isText(content []byte) bool { + for _, c := range string(content) { + if c == '\t' || c == '\n' || c == '\r' { + continue + } + if !unicode.IsPrint(c) { + return false + } + } + return true +} diff --git a/checks/raw/binary_artifact_test.go b/checks/raw/binary_artifact_test.go index 72f50f1bcb0..17f828b41ca 100644 --- a/checks/raw/binary_artifact_test.go +++ b/checks/raw/binary_artifact_test.go @@ -16,7 +16,6 @@ package raw import ( "fmt" - "log" "os" "testing" @@ -29,37 +28,40 @@ import ( func TestBinaryArtifacts(t *testing.T) { t.Parallel() tests := []struct { - name string - inputFile string - err error - files []string - expect int + name string + err error + files []string + expect int }{ { - name: "Jar file", - inputFile: "../testdata/binaryartifacts/jars/aws-java-sdk-core-1.11.571.jar", - err: nil, + name: "Jar file", + err: nil, files: []string{ "../testdata/binaryartifacts/jars/aws-java-sdk-core-1.11.571.jar", }, expect: 1, }, { - name: "non binary file", - inputFile: "../testdata/licensedir/withlicense/LICENSE", - err: nil, + name: "non binary file", + err: nil, files: []string{ "../testdata/licensedir/withlicense/LICENSE", }, }, { - name: "non binary file", - inputFile: "../doesnotexist", - err: nil, + name: "non binary file", + err: nil, files: []string{ "../doesnotexist", }, }, + { + name: "printable character.lib", + err: nil, + files: []string{ + "../testdata/printable.lib", + }, + }, } for _, tt := range tests { tt := tt // Re-initializing variable so it is not changed while executing the closure below @@ -72,7 +74,6 @@ func TestBinaryArtifacts(t *testing.T) { mockRepoClient.EXPECT().GetFileContent(gomock.Any()).DoAndReturn(func(file string) ([]byte, error) { // This will read the file and return the content content, err := os.ReadFile(file) - log.Println(os.Getwd()) if err != nil { return content, fmt.Errorf("%w", err) } diff --git a/checks/testdata/binaryartifacts/printable.lib b/checks/testdata/binaryartifacts/printable.lib new file mode 100644 index 00000000000..bb8fbc8e2ce --- /dev/null +++ b/checks/testdata/binaryartifacts/printable.lib @@ -0,0 +1,410 @@ +# Backward compatibility +asflags-y += $(EXTRA_AFLAGS) +ccflags-y += $(EXTRA_CFLAGS) +cppflags-y += $(EXTRA_CPPFLAGS) +ldflags-y += $(EXTRA_LDFLAGS) + +# +# flags that take effect in sub directories +export KBUILD_SUBDIR_ASFLAGS := $(KBUILD_SUBDIR_ASFLAGS) $(subdir-asflags-y) +export KBUILD_SUBDIR_CCFLAGS := $(KBUILD_SUBDIR_CCFLAGS) $(subdir-ccflags-y) + +# Figure out what we need to build from the various variables +# =========================================================================== + +# When an object is listed to be built compiled-in and modular, +# only build the compiled-in version + +obj-m := $(filter-out $(obj-y),$(obj-m)) + +# Libraries are always collected in one lib file. +# Filter out objects already built-in + +lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m))) + + +# Handle objects in subdirs +# --------------------------------------------------------------------------- +# o if we encounter foo/ in $(obj-y), replace it by foo/built-in.o +# and add the directory to the list of dirs to descend into: $(subdir-y) +# o if we encounter foo/ in $(obj-m), remove it from $(obj-m) +# and add the directory to the list of dirs to descend into: $(subdir-m) + +# Determine modorder. +# Unfortunately, we don't have information about ordering between -y +# and -m subdirs. Just put -y's first. +modorder := $(patsubst %/,%/modules.order, $(filter %/, $(obj-y)) $(obj-m:.o=.ko)) + +__subdir-y := $(patsubst %/,%,$(filter %/, $(obj-y))) +subdir-y += $(__subdir-y) +__subdir-m := $(patsubst %/,%,$(filter %/, $(obj-m))) +subdir-m += $(__subdir-m) +obj-y := $(patsubst %/, %/built-in.o, $(obj-y)) +obj-m := $(filter-out %/, $(obj-m)) + +# Subdirectories we need to descend into + +subdir-ym := $(sort $(subdir-y) $(subdir-m)) + +# if $(foo-objs) exists, foo.o is a composite object +multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m)))) +multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))), $(m)))) +multi-used := $(multi-used-y) $(multi-used-m) +single-used-m := $(sort $(filter-out $(multi-used-m),$(obj-m))) + +# Build list of the parts of our composite objects, our composite +# objects depend on those (obviously) +multi-objs-y := $(foreach m, $(multi-used-y), $($(m:.o=-objs)) $($(m:.o=-y))) +multi-objs-m := $(foreach m, $(multi-used-m), $($(m:.o=-objs)) $($(m:.o=-y))) +multi-objs := $(multi-objs-y) $(multi-objs-m) + +# $(subdir-obj-y) is the list of objects in $(obj-y) which uses dir/ to +# tell kbuild to descend +subdir-obj-y := $(filter %/built-in.o, $(obj-y)) + +# $(obj-dirs) is a list of directories that contain object files +obj-dirs := $(dir $(multi-objs) $(obj-y)) + +# Replace multi-part objects by their individual parts, look at local dir only +real-objs-y := $(foreach m, $(filter-out $(subdir-obj-y), $(obj-y)), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))),$($(m:.o=-objs)) $($(m:.o=-y)),$(m))) $(extra-y) +real-objs-m := $(foreach m, $(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))),$($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)),$(m))) + +# Add subdir path + +extra-y := $(addprefix $(obj)/,$(extra-y)) +always := $(addprefix $(obj)/,$(always)) +targets := $(addprefix $(obj)/,$(targets)) +modorder := $(addprefix $(obj)/,$(modorder)) +obj-y := $(addprefix $(obj)/,$(obj-y)) +obj-m := $(addprefix $(obj)/,$(obj-m)) +lib-y := $(addprefix $(obj)/,$(lib-y)) +subdir-obj-y := $(addprefix $(obj)/,$(subdir-obj-y)) +real-objs-y := $(addprefix $(obj)/,$(real-objs-y)) +real-objs-m := $(addprefix $(obj)/,$(real-objs-m)) +single-used-m := $(addprefix $(obj)/,$(single-used-m)) +multi-used-y := $(addprefix $(obj)/,$(multi-used-y)) +multi-used-m := $(addprefix $(obj)/,$(multi-used-m)) +multi-objs-y := $(addprefix $(obj)/,$(multi-objs-y)) +multi-objs-m := $(addprefix $(obj)/,$(multi-objs-m)) +subdir-ym := $(addprefix $(obj)/,$(subdir-ym)) +obj-dirs := $(addprefix $(obj)/,$(obj-dirs)) + +# These flags are needed for modversions and compiling, so we define them here +# already +# $(modname_flags) #defines KBUILD_MODNAME as the name of the module it will +# end up in (or would, if it gets compiled in) +# Note: Files that end up in two or more modules are compiled without the +# KBUILD_MODNAME definition. The reason is that any made-up name would +# differ in different configs. +name-fix = $(squote)$(quote)$(subst $(comma),_,$(subst -,_,$1))$(quote)$(squote) +basename_flags = -DKBUILD_BASENAME=$(call name-fix,$(basetarget)) +modname_flags = $(if $(filter 1,$(words $(modname))),\ + -DKBUILD_MODNAME=$(call name-fix,$(modname))) + +orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \ + $(ccflags-y) $(CFLAGS_$(basetarget).o) +_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) +orig_a_flags = $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) $(KBUILD_SUBDIR_ASFLAGS) \ + $(asflags-y) $(AFLAGS_$(basetarget).o) +_a_flags = $(filter-out $(AFLAGS_REMOVE_$(basetarget).o), $(orig_a_flags)) +_cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F)) + +# +# Enable gcov profiling flags for a file, directory or for all files depending +# on variables GCOV_PROFILE_obj.o, GCOV_PROFILE and CONFIG_GCOV_PROFILE_ALL +# (in this order) +# +ifeq ($(CONFIG_GCOV_KERNEL),y) +_c_flags += $(if $(patsubst n%,, \ + $(GCOV_PROFILE_$(basetarget).o)$(GCOV_PROFILE)$(CONFIG_GCOV_PROFILE_ALL)), \ + $(CFLAGS_GCOV)) +endif + +# +# Enable address sanitizer flags for kernel except some files or directories +# we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE) +# +ifeq ($(CONFIG_KASAN),y) +_c_flags += $(if $(patsubst n%,, \ + $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ + $(CFLAGS_KASAN)) +endif + +ifeq ($(CONFIG_UBSAN),y) +_c_flags += $(if $(patsubst n%,, \ + $(UBSAN_SANITIZE_$(basetarget).o)$(UBSAN_SANITIZE)$(CONFIG_UBSAN_SANITIZE_ALL)), \ + $(CFLAGS_UBSAN)) +endif + +ifeq ($(CONFIG_KCOV),y) +_c_flags += $(if $(patsubst n%,, \ + $(KCOV_INSTRUMENT_$(basetarget).o)$(KCOV_INSTRUMENT)$(CONFIG_KCOV_INSTRUMENT_ALL)), \ + $(CFLAGS_KCOV)) +endif + +# If building the kernel in a separate objtree expand all occurrences +# of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). + +ifeq ($(KBUILD_SRC),) +__c_flags = $(_c_flags) +__a_flags = $(_a_flags) +__cpp_flags = $(_cpp_flags) +else + +# -I$(obj) locates generated .h files +# $(call addtree,-I$(obj)) locates .h files in srctree, from generated .c files +# and locates generated .h files +# FIXME: Replace both with specific CFLAGS* statements in the makefiles +__c_flags = $(if $(obj),$(call addtree,-I$(src)) -I$(obj)) \ + $(call flags,_c_flags) +__a_flags = $(call flags,_a_flags) +__cpp_flags = $(call flags,_cpp_flags) +endif + +c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ + $(__c_flags) $(modkern_cflags) \ + $(basename_flags) $(modname_flags) + +a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ + $(__a_flags) $(modkern_aflags) + +cpp_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ + $(__cpp_flags) + +ld_flags = $(LDFLAGS) $(ldflags-y) + +dtc_cpp_flags = -Wp,-MD,$(depfile).pre.tmp -nostdinc \ + -I$(srctree)/arch/$(SRCARCH)/boot/dts \ + -I$(srctree)/arch/$(SRCARCH)/boot/dts/include \ + -I$(srctree)/drivers/of/testcase-data \ + -undef -D__DTS__ + +# Finds the multi-part object the current object will be linked into +modname-multi = $(sort $(foreach m,$(multi-used),\ + $(if $(filter $(subst $(obj)/,,$*.o), $($(m:.o=-objs)) $($(m:.o=-y))),$(m:.o=)))) + +# Useful for describing the dependency of composite objects +# Usage: +# $(call multi_depend, multi_used_targets, suffix_to_remove, suffix_to_add) +define multi_depend +$(foreach m, $(notdir $1), \ + $(eval $(obj)/$m: \ + $(addprefix $(obj)/, $(foreach s, $3, $($(m:%$(strip $2)=%$(s))))))) +endef + +ifdef REGENERATE_PARSERS + +# GPERF +# --------------------------------------------------------------------------- +quiet_cmd_gperf = GPERF $@ + cmd_gperf = gperf -t --output-file $@ -a -C -E -g -k 1,3,$$ -p -t $< + +.PRECIOUS: $(src)/%.hash.c_shipped +$(src)/%.hash.c_shipped: $(src)/%.gperf + $(call cmd,gperf) + +# LEX +# --------------------------------------------------------------------------- +LEX_PREFIX = $(if $(LEX_PREFIX_${baseprereq}),$(LEX_PREFIX_${baseprereq}),yy) + +quiet_cmd_flex = LEX $@ + cmd_flex = flex -o$@ -L -P $(LEX_PREFIX) $< + +.PRECIOUS: $(src)/%.lex.c_shipped +$(src)/%.lex.c_shipped: $(src)/%.l + $(call cmd,flex) + +# YACC +# --------------------------------------------------------------------------- +YACC_PREFIX = $(if $(YACC_PREFIX_${baseprereq}),$(YACC_PREFIX_${baseprereq}),yy) + +quiet_cmd_bison = YACC $@ + cmd_bison = bison -o$@ -t -l -p $(YACC_PREFIX) $< + +.PRECIOUS: $(src)/%.tab.c_shipped +$(src)/%.tab.c_shipped: $(src)/%.y + $(call cmd,bison) + +quiet_cmd_bison_h = YACC $@ + cmd_bison_h = bison -o/dev/null --defines=$@ -t -l -p $(YACC_PREFIX) $< + +.PRECIOUS: $(src)/%.tab.h_shipped +$(src)/%.tab.h_shipped: $(src)/%.y + $(call cmd,bison_h) + +endif + +# Shipped files +# =========================================================================== + +quiet_cmd_shipped = SHIPPED $@ +cmd_shipped = cat $< > $@ + +$(obj)/%: $(src)/%_shipped + $(call cmd,shipped) + +# Commands useful for building a boot image +# =========================================================================== +# +# Use as following: +# +# target: source(s) FORCE +# $(if_changed,ld/objcopy/gzip) +# +# and add target to extra-y so that we know we have to +# read in the saved command line + +# Linking +# --------------------------------------------------------------------------- + +quiet_cmd_ld = LD $@ +cmd_ld = $(LD) $(LDFLAGS) $(ldflags-y) $(LDFLAGS_$(@F)) \ + $(filter-out FORCE,$^) -o $@ + +# Objcopy +# --------------------------------------------------------------------------- + +quiet_cmd_objcopy = OBJCOPY $@ +cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ + +# Gzip +# --------------------------------------------------------------------------- + +quiet_cmd_gzip = GZIP $@ +cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -n -f -9 > $@) || \ + (rm -f $@ ; false) + +# DTC +# --------------------------------------------------------------------------- +DTC ?= $(objtree)/scripts/dtc/dtc + +# Disable noisy checks by default +ifeq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),) +DTC_FLAGS += -Wno-unit_address_vs_reg +endif + +# Generate an assembly file to wrap the output of the device tree compiler +quiet_cmd_dt_S_dtb= DTB $@ +cmd_dt_S_dtb= \ +( \ + echo '\#include '; \ + echo '.section .dtb.init.rodata,"a"'; \ + echo '.balign STRUCT_ALIGNMENT'; \ + echo '.global __dtb_$(*F)_begin'; \ + echo '__dtb_$(*F)_begin:'; \ + echo '.incbin "$<" '; \ + echo '__dtb_$(*F)_end:'; \ + echo '.global __dtb_$(*F)_end'; \ + echo '.balign STRUCT_ALIGNMENT'; \ +) > $@ + +$(obj)/%.dtb.S: $(obj)/%.dtb + $(call cmd,dt_S_dtb) + +quiet_cmd_dtc = DTC $@ +cmd_dtc = mkdir -p $(dir ${dtc-tmp}) ; \ + $(CPP) $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \ + $(DTC) -O dtb -o $@ -b 0 \ + -i $(dir $<) $(DTC_FLAGS) \ + -d $(depfile).dtc.tmp $(dtc-tmp) ; \ + cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) + +$(obj)/%.dtb: $(src)/%.dts FORCE + $(call if_changed_dep,dtc) + +dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp) + +# Bzip2 +# --------------------------------------------------------------------------- + +# Bzip2 and LZMA do not include size in file... so we have to fake that; +# append the size as a 32-bit littleendian number as gzip does. +size_append = printf $(shell \ +dec_size=0; \ +for F in $1; do \ + fsize=$$(stat -c "%s" $$F); \ + dec_size=$$(expr $$dec_size + $$fsize); \ +done; \ +printf "%08x\n" $$dec_size | \ + sed 's/\(..\)/\1 /g' | { \ + read ch0 ch1 ch2 ch3; \ + for ch in $$ch3 $$ch2 $$ch1 $$ch0; do \ + printf '%s%03o' '\\' $$((0x$$ch)); \ + done; \ + } \ +) + +quiet_cmd_bzip2 = BZIP2 $@ +cmd_bzip2 = (cat $(filter-out FORCE,$^) | \ + bzip2 -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + +# Lzma +# --------------------------------------------------------------------------- + +quiet_cmd_lzma = LZMA $@ +cmd_lzma = (cat $(filter-out FORCE,$^) | \ + lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + +quiet_cmd_lzo = LZO $@ +cmd_lzo = (cat $(filter-out FORCE,$^) | \ + lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + +quiet_cmd_lz4 = LZ4 $@ +cmd_lz4 = (cat $(filter-out FORCE,$^) | \ + lz4c -l -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + +# U-Boot mkimage +# --------------------------------------------------------------------------- + +MKIMAGE := $(srctree)/scripts/mkuboot.sh + +# SRCARCH just happens to match slightly more than ARCH (on sparc), so reduces +# the number of overrides in arch makefiles +UIMAGE_ARCH ?= $(SRCARCH) +UIMAGE_COMPRESSION ?= $(if $(2),$(2),none) +UIMAGE_OPTS-y ?= +UIMAGE_TYPE ?= kernel +UIMAGE_LOADADDR ?= arch_must_set_this +UIMAGE_ENTRYADDR ?= $(UIMAGE_LOADADDR) +UIMAGE_NAME ?= 'Linux-$(KERNELRELEASE)' +UIMAGE_IN ?= $< +UIMAGE_OUT ?= $@ + +quiet_cmd_uimage = UIMAGE $(UIMAGE_OUT) + cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A $(UIMAGE_ARCH) -O linux \ + -C $(UIMAGE_COMPRESSION) $(UIMAGE_OPTS-y) \ + -T $(UIMAGE_TYPE) \ + -a $(UIMAGE_LOADADDR) -e $(UIMAGE_ENTRYADDR) \ + -n $(UIMAGE_NAME) -d $(UIMAGE_IN) $(UIMAGE_OUT) + +# XZ +# --------------------------------------------------------------------------- +# Use xzkern to compress the kernel image and xzmisc to compress other things. +# +# xzkern uses a big LZMA2 dictionary since it doesn't increase memory usage +# of the kernel decompressor. A BCJ filter is used if it is available for +# the target architecture. xzkern also appends uncompressed size of the data +# using size_append. The .xz format has the size information available at +# the end of the file too, but it's in more complex format and it's good to +# avoid changing the part of the boot code that reads the uncompressed size. +# Note that the bytes added by size_append will make the xz tool think that +# the file is corrupt. This is expected. +# +# xzmisc doesn't use size_append, so it can be used to create normal .xz +# files. xzmisc uses smaller LZMA2 dictionary than xzkern, because a very +# big dictionary would increase the memory usage too much in the multi-call +# decompression mode. A BCJ filter isn't used either. +quiet_cmd_xzkern = XZKERN $@ +cmd_xzkern = (cat $(filter-out FORCE,$^) | \ + sh $(srctree)/scripts/xz_wrap.sh && \ + $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) + +quiet_cmd_xzmisc = XZMISC $@ +cmd_xzmisc = (cat $(filter-out FORCE,$^) | \ + xz --check=crc32 --lzma2=dict=1MiB) > $@ || \ + (rm -f $@ ; false) \ No newline at end of file From b369553112769169152d49a1daf8ac16900263d0 Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Mon, 2 May 2022 16:45:20 +0000 Subject: [PATCH 2/5] updates --- checks/raw/binary_artifact.go | 1 + checks/raw/binary_artifact_test.go | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/checks/raw/binary_artifact.go b/checks/raw/binary_artifact.go index 77e8bf999d1..8c0c7ee0052 100644 --- a/checks/raw/binary_artifact.go +++ b/checks/raw/binary_artifact.go @@ -107,6 +107,7 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin }) } + fmt.Println(*pfiles) return true, nil } diff --git a/checks/raw/binary_artifact_test.go b/checks/raw/binary_artifact_test.go index 17f828b41ca..1df61c44a48 100644 --- a/checks/raw/binary_artifact_test.go +++ b/checks/raw/binary_artifact_test.go @@ -56,10 +56,10 @@ func TestBinaryArtifacts(t *testing.T) { }, }, { - name: "printable character.lib", + name: "printable character .lib", err: nil, files: []string{ - "../testdata/printable.lib", + "../testdata/binaryartifacts/printable.lib", }, }, } From 183c86be7dbcf0de36b69be8c4a5e249fec7d69d Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Mon, 2 May 2022 20:58:36 +0000 Subject: [PATCH 3/5] e2e tests --- checks/raw/binary_artifact.go | 1 - e2e/binary_artifacts_test.go | 19 +++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/checks/raw/binary_artifact.go b/checks/raw/binary_artifact.go index 8c0c7ee0052..77e8bf999d1 100644 --- a/checks/raw/binary_artifact.go +++ b/checks/raw/binary_artifact.go @@ -107,7 +107,6 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin }) } - fmt.Println(*pfiles) return true, nil } diff --git a/e2e/binary_artifacts_test.go b/e2e/binary_artifacts_test.go index 495d61d14f1..fd66deba31e 100644 --- a/e2e/binary_artifacts_test.go +++ b/e2e/binary_artifacts_test.go @@ -76,10 +76,11 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { Dlogger: &dl, } // TODO: upload real binaries to the repo as well. + // There are 24 dummy binaries that are ignoreed because they only contain ASCII characters. expected := scut.TestReturn{ Error: nil, - Score: checker.MinResultScore, - NumberOfWarn: 25, + Score: checker.MaxResultScore - 1, + NumberOfWarn: 1, NumberOfInfo: 0, NumberOfDebug: 0, } @@ -95,7 +96,7 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { repo, err := githubrepo.MakeGithubRepo("ossf-tests/scorecard-check-binary-artifacts-e2e") Expect(err).Should(BeNil()) repoClient := githubrepo.CreateGithubRepoClient(context.Background(), logger) - err = repoClient.InitRepo(repo, "0c6e8991781bba24bfaaa0525f69329f672ef7b5") + err = repoClient.InitRepo(repo, "5b48dea88825662d67ed94b609b45cf7705333b6") Expect(err).Should(BeNil()) req := checker.CheckRequest{ @@ -105,10 +106,11 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { Dlogger: &dl, } // TODO: upload real binaries to the repo as well. + // There are 24 dummy binaries that are ignoreed because they only contain ASCII characters. expected := scut.TestReturn{ Error: nil, - Score: checker.MinResultScore, - NumberOfWarn: 24, + Score: checker.MaxResultScore - 1, + NumberOfWarn: 1, NumberOfInfo: 0, NumberOfDebug: 0, } @@ -117,7 +119,7 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { Expect(scut.ValidateTestReturn(nil, "binary artifacts", &expected, &result, &dl)).Should(BeTrue()) Expect(repoClient.Close()).Should(BeNil()) }) - It("Should return binary artifacts present in source code", func() { + It("Should return no binary artifacts present in source code", func() { dl := scut.TestDetailLogger{} repo, err := githubrepo.MakeGithubRepo("ossf-tests/scorecard-check-binary-artifacts-e2e-4-binaries") Expect(err).Should(BeNil()) @@ -132,10 +134,11 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { Dlogger: &dl, } // TODO: upload real binaries to the repo as well. + // Existing binaries only contain SCII characters and are ignored. expected := scut.TestReturn{ Error: nil, - Score: checker.MaxResultScore - 4, - NumberOfWarn: 4, + Score: checker.MaxResultScore, + NumberOfWarn: 0, NumberOfInfo: 0, NumberOfDebug: 0, } From ef246fce7ea0a6db101729ed1fcaaf4e72352a0c Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Tue, 3 May 2022 16:18:46 +0000 Subject: [PATCH 4/5] e2e fix --- e2e/binary_artifacts_test.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/e2e/binary_artifacts_test.go b/e2e/binary_artifacts_test.go index fd66deba31e..9a39a6789cf 100644 --- a/e2e/binary_artifacts_test.go +++ b/e2e/binary_artifacts_test.go @@ -85,9 +85,6 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { NumberOfDebug: 0, } result := checks.BinaryArtifacts(&req) - // UPGRADEv2: to remove. - // Old version. - // New version. Expect(scut.ValidateTestReturn(nil, "binary artifacts", &expected, &result, &dl)).Should(BeTrue()) Expect(repoClient.Close()).Should(BeNil()) }) @@ -115,7 +112,6 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { NumberOfDebug: 0, } result := checks.BinaryArtifacts(&req) - // New version. Expect(scut.ValidateTestReturn(nil, "binary artifacts", &expected, &result, &dl)).Should(BeTrue()) Expect(repoClient.Close()).Should(BeNil()) }) @@ -161,11 +157,12 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { Repo: repo, Dlogger: &dl, } - // TODO: upload real binaries to the repo as well. + // TODO: upload real binaries to the repo. + // Existing binaries only contain SCII characters and are ignored. expected := scut.TestReturn{ Error: nil, - Score: checker.MaxResultScore - 4, - NumberOfWarn: 4, + Score: checker.MaxResultScore, + NumberOfWarn: 0, NumberOfInfo: 0, NumberOfDebug: 0, } @@ -199,11 +196,12 @@ var _ = Describe("E2E TEST:"+checks.CheckBinaryArtifacts, func() { Repo: repo, Dlogger: &dl, } - // TODO: upload real binaries to the repo as well. + // TODO: upload real binaries to the repo. + // Existing binaries only contain SCII characters and are ignored. expected := scut.TestReturn{ Error: nil, - Score: checker.MaxResultScore - 4, - NumberOfWarn: 4, + Score: checker.MaxResultScore, + NumberOfWarn: 0, NumberOfInfo: 0, NumberOfDebug: 0, } From 4bb8c9352388824c57557ce32e6fb6d27ade6c23 Mon Sep 17 00:00:00 2001 From: laurentsimon Date: Tue, 3 May 2022 19:38:43 +0000 Subject: [PATCH 5/5] comments --- checks/raw/binary_artifact.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/checks/raw/binary_artifact.go b/checks/raw/binary_artifact.go index 77e8bf999d1..5b3338e38f1 100644 --- a/checks/raw/binary_artifact.go +++ b/checks/raw/binary_artifact.go @@ -92,14 +92,18 @@ var checkBinaryFileContent fileparser.DoWhileTrueOnFileContent = func(path strin return false, sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("filetype.Get:%v", err)) } - // Sanity check the file contains non-readable characters. - if isText(content) { + exists1 := binaryFileTypes[t.Extension] + if exists1 { + *pfiles = append(*pfiles, checker.File{ + Path: path, + Type: checker.FileTypeBinary, + Offset: checker.OffsetDefault, + }) return true, nil } - exists1 := binaryFileTypes[t.Extension] exists2 := binaryFileTypes[strings.ReplaceAll(filepath.Ext(path), ".", "")] - if exists1 || exists2 { + if !isText(content) && exists2 { *pfiles = append(*pfiles, checker.File{ Path: path, Type: checker.FileTypeBinary,