Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 cron: support reading prefix from file for controller input files (7/n) #2445

Merged
merged 8 commits into from
Nov 9, 2022
Merged
37 changes: 29 additions & 8 deletions cron/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ const (
// TransferStatusFilename file identifies if shard transfer to BigQuery is completed.
TransferStatusFilename string = ".transfer_complete"

configFlag string = "config"
configDefault string = ""
configUsage string = "Location of config file. Required"
configFlag string = "config"
configDefault string = ""
configUsage string = "Location of config file. Required"
inputBucketParams string = "input-bucket"

projectID string = "SCORECARD_PROJECT_ID"
requestTopicURL string = "SCORECARD_REQUEST_TOPIC_URL"
Expand Down Expand Up @@ -300,16 +301,36 @@ func GetAPIResultsBucketURL() (string, error) {

// GetInputBucketURL() returns the bucket URL for input files.
func GetInputBucketURL() (string, error) {
return getStringConfigValue(inputBucketURL, configYAML, "InputBucketURL", "input-bucket-url")
bucketParams, err := GetAdditionalParams(inputBucketParams)
bURL, ok := bucketParams["url"]
if err != nil || !ok {
// TODO temporarily falling back to old variables until changes propagate to production
return getStringConfigValue(inputBucketURL, configYAML, "InputBucketURL", "input-bucket-url")
}
return bURL, nil
}

// GetInputBucketPrefix() returns the prefix used when fetching files from a bucket.
func GetInputBucketPrefix() (string, error) {
prefix, err := getStringConfigValue(inputBucketPrefix, configYAML, "InputBucketPrefix", "input-bucket-prefix")
if err != nil && !errors.Is(err, ErrorEmptyConfigValue) {
return "", err
bucketParams, err := GetAdditionalParams(inputBucketParams)
if err != nil {
// TODO temporarily falling back to old variables until changes propagate to production
prefix, err := getStringConfigValue(inputBucketPrefix, configYAML, "InputBucketPrefix", "input-bucket-prefix")
if err != nil && !errors.Is(err, ErrorEmptyConfigValue) {
return "", err
}
return prefix, nil
}
return bucketParams["prefix"], nil
}

// GetInputBucketPrefixFile() returns the file whose contents specify the prefix to use.
func GetInputBucketPrefixFile() (string, error) {
bucketParams, err := GetAdditionalParams(inputBucketParams)
if err != nil {
return "", fmt.Errorf("getting config for %s: %w", inputBucketParams, err)
}
return prefix, nil
return bucketParams["prefix-file"], nil
}

func GetAdditionalParams(subMapName string) (map[string]string, error) {
Expand Down
10 changes: 9 additions & 1 deletion cron/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,20 @@ shard-size: 10
webhook-url:
metric-exporter: stackdriver
result-data-bucket-url: gs://ossf-scorecard-data2

# TODO temporarily leaving old variables until changes propagate to production
input-bucket-url: gs://ossf-scorecard-input-projects
# Can be used to specify directories within a bucket. Can be empty.
input-bucket-prefix:

additional-params:
criticality:
input-bucket:
url: gs://ossf-scorecard-input-projects
# Optional prefix to limit files used as input files within a bucket (e.g. a specific file or directory)
prefix:
# Optional file to read a prefix from, instead of statically defining prefix above (note: prefix must be blank to use this option)
# This is good in situations where the prefix changes frequently (e.g. always using the most recent folder in a bucket)
prefix-file:

scorecard:
# API results bucket
Expand Down
61 changes: 52 additions & 9 deletions cron/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,30 @@ const (
prodShardSize int = 10
prodMetricExporter string = "stackdriver"
// Raw results.
prodRawBucket = "gs://ossf-scorecard-rawdata"
prodRawBigQueryTable = "scorecard-rawdata"
prodAPIBucketURL = "gs://ossf-scorecard-cron-results"
prodInputBucketURL = "gs://ossf-scorecard-input-projects"
prodInputBucketPrefix = ""
prodRawBucket = "gs://ossf-scorecard-rawdata"
prodRawBigQueryTable = "scorecard-rawdata"
prodAPIBucketURL = "gs://ossf-scorecard-cron-results"
prodInputBucketURL = "gs://ossf-scorecard-input-projects"
prodInputBucketPrefix = ""
prodInputBucketPrefixFile = ""
)

var (
prodInputBucketParams = map[string]string{
"url": prodInputBucketURL,
"prefix": prodInputBucketPrefix,
"prefix-file": prodInputBucketPrefixFile,
}
prodScorecardParams = map[string]string{
"api-results-bucket-url": prodAPIBucketURL,
"blacklisted-checks": prodBlacklistedChecks,
"cii-data-bucket-url": prodCIIDataBucket,
"raw-bigquery-table": prodRawBigQueryTable,
"raw-result-data-bucket-url": prodRawBucket,
}
prodCriticalityParams map[string]string = nil
prodAdditionalParams = map[string]map[string]string{
"scorecard": prodScorecardParams,
"criticality": prodCriticalityParams,
prodAdditionalParams = map[string]map[string]string{
"input-bucket": prodInputBucketParams,
"scorecard": prodScorecardParams,
}
)

Expand Down Expand Up @@ -474,3 +479,41 @@ func TestEnvVarName(t *testing.T) {
})
}
}

func TestGetAdditionalParams(t *testing.T) {
t.Parallel()
//nolint:govet
tests := []struct {
name string
mapName string
want map[string]string
wantErr bool
}{
{
name: "scorecard values",
mapName: "scorecard",
want: prodScorecardParams,
wantErr: false,
},
{
name: "nonexistant value",
mapName: "this-value-should-never-exist",
want: map[string]string{},
wantErr: true,
},
}
for _, testcase := range tests {
testcase := testcase
t.Run(testcase.name, func(t *testing.T) {
t.Parallel()
got, err := GetAdditionalParams(testcase.mapName)
if testcase.wantErr != (err != nil) {
t.Fatalf("unexpected error value for GetAdditionalParams: %v", err)
}
if !cmp.Equal(got, testcase.want) {
diff := cmp.Diff(got, testcase.want)
t.Errorf("test failed: expected - %v, got - %v. \n%s", testcase.want, got, diff)
}
})
}
}
17 changes: 16 additions & 1 deletion cron/data/blob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,26 @@ func TestBlobKeysPrefix(t *testing.T) {
{
name: "no prefix",
prefix: "",
want: []string{"key1.txt", "key2.txt", "key3.txt", "subdir/key4.txt"},
want: []string{"key1.txt", "key2.txt", "key3.txt", "subdir/key4.txt", "subdir/nested/key5.txt"},
},
{
name: "subdir prefix",
prefix: "subdir/",
want: []string{"subdir/key4.txt", "subdir/nested/key5.txt"},
},
{
name: "subdir prefix no terminal slash",
prefix: "subdir",
want: []string{"subdir/key4.txt", "subdir/nested/key5.txt"},
},
{
name: "nested prefix",
prefix: "subdir/nested/",
want: []string{"subdir/nested/key5.txt"},
},
{
name: "file prefix",
prefix: "subdir/key4.txt",
want: []string{"subdir/key4.txt"},
},
}
Expand Down
Empty file.
92 changes: 92 additions & 0 deletions cron/internal/controller/bucket.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright 2022 Security Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package main implements the PubSub controller.
package main

import (
"bytes"
"context"
"fmt"
"strings"

"github.com/ossf/scorecard/v4/cron/config"
"github.com/ossf/scorecard/v4/cron/data"
)

// getPrefix returns the prefix used when reading input files from a bucket.
// If "prefix" is set, the value is used irrespective of the value of "prefix-file".
// Otherwise, the contents of "prefix-file" (if set) are used.
func getPrefix(ctx context.Context, bucket string) (string, error) {
prefix, err := config.GetInputBucketPrefix()
if err != nil {
return "", fmt.Errorf("config.GetInputBucketPrefix: %w", err)
}
if prefix != "" {
// prioritize prefix if set
return prefix, nil
}

prefixFile, err := config.GetInputBucketPrefixFile()
if err != nil {
return "", fmt.Errorf("config.GetInputBucketPrefixFile: %w", err)
}
if prefixFile == "" {
// cant read a file which doesnt exist, but the value is optional so no error
return "", nil
}

b, err := data.GetBlobContent(ctx, bucket, prefixFile)
if err != nil {
return "", fmt.Errorf("fetching contents of prefix-file: %w", err)
}
s := string(b)
return strings.TrimSpace(s), nil
}

func bucketFiles(ctx context.Context) (data.Iterator, error) {
var iters []data.Iterator

bucket, err := config.GetInputBucketURL()
if err != nil {
return nil, fmt.Errorf("config.GetInputBucketURL: %w", err)
}
prefix, err := getPrefix(ctx, bucket)
if err != nil {
return nil, fmt.Errorf("getPrefix: %w", err)
}

files, err := data.GetBlobKeysWithPrefix(ctx, bucket, prefix)
if err != nil {
return nil, fmt.Errorf("data.GetBlobKeysWithPrefix: %w", err)
}

for _, f := range files {
b, err := data.GetBlobContent(ctx, bucket, f)
if err != nil {
return nil, fmt.Errorf("data.GetBlobContent: %w", err)
}
r := bytes.NewReader(b)
i, err := data.MakeIteratorFrom(r)
if err != nil {
return nil, fmt.Errorf("data.MakeIteratorFrom: %w", err)
}
iters = append(iters, i)
}
iter, err := data.MakeNestedIterator(iters)
if err != nil {
return nil, fmt.Errorf("data.MakeNestedIterator: %w", err)
}
return iter, nil
}
95 changes: 95 additions & 0 deletions cron/internal/controller/bucket_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright 2022 Security Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"context"
"path/filepath"
"testing"
)

//nolint:tparallel,paralleltest // since t.Setenv is used
func TestGetPrefix(t *testing.T) {
//nolint:govet
testcases := []struct {
name string
url string
prefix string
prefixFile string
want string
wantErr bool
}{
{
name: "no prefix",
url: "testdata/getPrefix",
prefix: "",
prefixFile: "",
want: "",
wantErr: false,
},
{
name: "prefix set",
url: "testdata/getPrefix",
prefix: "foo",
prefixFile: "",
want: "foo",
wantErr: false,
},
{
name: "prefix file set",
url: "testdata/getPrefix",
prefix: "",
prefixFile: "marker",
want: "bar",
wantErr: false,
},
{
name: "prefix and prefix file set",
url: "testdata/getPrefix",
prefix: "foo",
prefixFile: "marker",
want: "foo",
wantErr: false,
},
{
name: "non existent prefix file",
url: "testdata/getPrefix",
prefix: "",
prefixFile: "baz",
want: "",
wantErr: true,
},
}

for _, testcase := range testcases {
testcase := testcase
t.Run(testcase.name, func(t *testing.T) {
t.Setenv("INPUT_BUCKET_URL", testcase.url)
t.Setenv("INPUT_BUCKET_PREFIX", testcase.prefix)
t.Setenv("INPUT_BUCKET_PREFIX_FILE", testcase.prefixFile)
testdataPath, err := filepath.Abs(testcase.url)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
got, err := getPrefix(context.Background(), "file:///"+testdataPath)
if (err != nil) != testcase.wantErr {
t.Errorf("unexpected error produced: %v", err)
}
if got != testcase.want {
t.Errorf("test failed: expected - %s, got = %s", testcase.want, got)
}
})
}
}
Loading