From b1ab16e80f3fdcb060f70642ead9a85510a7f413 Mon Sep 17 00:00:00 2001 From: laurentsimon <64505099+laurentsimon@users.noreply.github.com> Date: Fri, 18 Mar 2022 19:05:14 -0700 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20raw=20results=20to=20cron=20s?= =?UTF-8?q?cans=20(#1741)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * draft * updates * updates * updates * updates * updates * comments * comments * comments * comments * comments * comments --- checks/binary_artifact.go | 3 +- checks/branch_protection.go | 1 - checks/code_review.go | 1 - checks/dependency_update_tool.go | 3 +- checks/security_policy.go | 1 - checks/vulnerabilities.go | 1 - cmd/root.go | 1 - cmd/serve.go | 2 +- cron/config/config.go | 20 +- cron/config/config.yaml | 3 + cron/config/config_test.go | 9 +- cron/controller/main.go | 19 +- cron/format/json_raw_results.go | 308 +++++++++++++++++++++++++++++ cron/k8s/controller.release.yaml | 3 + cron/k8s/transfer-raw.yaml | 41 ++++ cron/k8s/transfer.release-raw.yaml | 43 ++++ cron/k8s/worker.release.yaml | 2 + cron/worker/main.go | 33 +++- pkg/scorecard.go | 16 +- 19 files changed, 481 insertions(+), 29 deletions(-) create mode 100644 cron/format/json_raw_results.go create mode 100644 cron/k8s/transfer-raw.yaml create mode 100644 cron/k8s/transfer.release-raw.yaml diff --git a/checks/binary_artifact.go b/checks/binary_artifact.go index a3d0d567871..bb0312d458d 100644 --- a/checks/binary_artifact.go +++ b/checks/binary_artifact.go @@ -26,7 +26,7 @@ const CheckBinaryArtifacts string = "Binary-Artifacts" //nolint func init() { - var supportedRequestTypes = []checker.RequestType{ + supportedRequestTypes := []checker.RequestType{ checker.FileBased, checker.CommitBased, } @@ -47,7 +47,6 @@ func BinaryArtifacts(c *checker.CheckRequest) checker.CheckResult { // Return raw results. if c.RawResults != nil { c.RawResults.BinaryArtifactResults = rawData - return checker.CheckResult{} } // Return the score evaluation. diff --git a/checks/branch_protection.go b/checks/branch_protection.go index 2ab0a05769c..a45bac720b3 100644 --- a/checks/branch_protection.go +++ b/checks/branch_protection.go @@ -43,7 +43,6 @@ func BranchProtection(c *checker.CheckRequest) checker.CheckResult { // Return raw results. if c.RawResults != nil { c.RawResults.BranchProtectionResults = rawData - return checker.CheckResult{} } // Return the score evaluation. diff --git a/checks/code_review.go b/checks/code_review.go index 91c6ad9aba2..11bfff72918 100644 --- a/checks/code_review.go +++ b/checks/code_review.go @@ -46,7 +46,6 @@ func CodeReview(c *checker.CheckRequest) checker.CheckResult { // Return raw results. if c.RawResults != nil { c.RawResults.CodeReviewResults = rawData - return checker.CheckResult{} } // Return the score evaluation. diff --git a/checks/dependency_update_tool.go b/checks/dependency_update_tool.go index 286dd6d5c48..5f0f2b490a9 100644 --- a/checks/dependency_update_tool.go +++ b/checks/dependency_update_tool.go @@ -26,7 +26,7 @@ const CheckDependencyUpdateTool = "Dependency-Update-Tool" //nolint func init() { - var supportedRequestTypes = []checker.RequestType{ + supportedRequestTypes := []checker.RequestType{ checker.FileBased, } if err := registerCheck(CheckDependencyUpdateTool, DependencyUpdateTool, supportedRequestTypes); err != nil { @@ -46,7 +46,6 @@ func DependencyUpdateTool(c *checker.CheckRequest) checker.CheckResult { // Return raw results. if c.RawResults != nil { c.RawResults.DependencyUpdateToolResults = rawData - return checker.CheckResult{} } // Return the score evaluation. diff --git a/checks/security_policy.go b/checks/security_policy.go index 0505a58cf6e..5d8ec594a14 100644 --- a/checks/security_policy.go +++ b/checks/security_policy.go @@ -47,7 +47,6 @@ func SecurityPolicy(c *checker.CheckRequest) checker.CheckResult { // Set the raw results. if c.RawResults != nil { c.RawResults.SecurityPolicyResults = rawData - return checker.CheckResult{} } return evaluation.SecurityPolicy(CheckSecurityPolicy, c.Dlogger, &rawData) diff --git a/checks/vulnerabilities.go b/checks/vulnerabilities.go index 94706c49680..22944b48bca 100644 --- a/checks/vulnerabilities.go +++ b/checks/vulnerabilities.go @@ -46,7 +46,6 @@ func Vulnerabilities(c *checker.CheckRequest) checker.CheckResult { // Set the raw results. if c.RawResults != nil { c.RawResults.VulnerabilitiesResults = rawData - return checker.CheckResult{} } return evaluation.Vulnerabilities(CheckVulnerabilities, c.Dlogger, &rawData) diff --git a/cmd/root.go b/cmd/root.go index e8f54561678..cf8c1e59406 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -128,7 +128,6 @@ func rootCmd(o *options.Options) { ctx, repoURI, o.Commit, - o.Format == options.FormatRaw, enabledChecks, repoClient, ossFuzzRepoClient, diff --git a/cmd/serve.go b/cmd/serve.go index a57765fd79c..8d4f2f4a3a1 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -69,7 +69,7 @@ func serveCmd(o *options.Options) *cobra.Command { defer ossFuzzRepoClient.Close() ciiClient := clients.DefaultCIIBestPracticesClient() repoResult, err := pkg.RunScorecards( - ctx, repo, clients.HeadSHA /*commitSHA*/, false /*raw*/, checks.AllChecks, repoClient, + ctx, repo, clients.HeadSHA /*commitSHA*/, checks.AllChecks, repoClient, ossFuzzRepoClient, ciiClient, vulnsClient) if err != nil { logger.Error(err, "running enabled scorecard checks on repo") diff --git a/cron/config/config.go b/cron/config/config.go index da18f96af40..baefa266c56 100644 --- a/cron/config/config.go +++ b/cron/config/config.go @@ -47,9 +47,12 @@ const ( metricExporter string = "SCORECARD_METRIC_EXPORTER" ciiDataBucketURL string = "SCORECARD_CII_DATA_BUCKET_URL" blacklistedChecks string = "SCORECARD_BLACKLISTED_CHECKS" - + // v2 results. bigqueryTableV2 string = "SCORECARD_BIGQUERY_TABLEV2" resultDataBucketURLV2 string = "SCORECARD_DATA_BUCKET_URLV2" + // Raw results. + rawBigqueryTable string = "RAW_SCORECARD_BIGQUERY_TABLE" + rawResultDataBucketURL string = "RAW_SCORECARD_DATA_BUCKET_URL" ) var ( @@ -78,6 +81,9 @@ type config struct { // UPGRADEv2: to remove. ResultDataBucketURLV2 string `yaml:"result-data-bucket-url-v2"` BigQueryTableV2 string `yaml:"bigquery-table-v2"` + // Raw results. + RawResultDataBucketURL string `yaml:"raw-result-data-bucket-url"` + RawBigQueryTable string `yaml:"raw-bigquery-table"` } func getParsedConfigFromFile(byteValue []byte) (config, error) { @@ -196,6 +202,18 @@ func GetResultDataBucketURLV2() (string, error) { return getStringConfigValue(resultDataBucketURLV2, configYAML, "ResultDataBucketURLV2", "result-data-bucket-url-v2") } +// GetRawBigQueryTable returns the table name to transfer cron job results. +func GetRawBigQueryTable() (string, error) { + return getStringConfigValue(rawBigqueryTable, configYAML, + "RawBigQueryTable", "raw-bigquery-table") +} + +// GetRawResultDataBucketURL returns the bucketURL for storing cron job's raw results. +func GetRawResultDataBucketURL() (string, error) { + return getStringConfigValue(rawResultDataBucketURL, configYAML, + "RawResultDataBucketURL", "raw-result-data-bucket-url") +} + // GetShardSize returns the shard_size for the cron job. func GetShardSize() (int, error) { return getIntConfigValue(shardSize, configYAML, "ShardSize", "shard-size") diff --git a/cron/config/config.yaml b/cron/config/config.yaml index 58c5874631e..bb74415cea5 100644 --- a/cron/config/config.yaml +++ b/cron/config/config.yaml @@ -29,3 +29,6 @@ metric-exporter: stackdriver # UPGRADEv2: to remove. result-data-bucket-url-v2: gs://ossf-scorecard-data2 bigquery-table-v2: scorecard-v2 +# Raw results. +raw-result-data-bucket-url: gs://ossf-scorecard-rawdata +raw-bigquery-table: scorecard-rawdata diff --git a/cron/config/config_test.go b/cron/config/config_test.go index 6c83e2ae880..e5800fafee5 100644 --- a/cron/config/config_test.go +++ b/cron/config/config_test.go @@ -39,6 +39,9 @@ const ( // UPGRADEv2: to remove. prodBucketV2 = "gs://ossf-scorecard-data2" prodBigQueryTableV2 = "scorecard-v2" + // Raw results. + prodRawBucket = "gs://ossf-scorecard-rawdata" + prodRawBigQueryTable = "scorecard-rawdata" ) func getByteValueFromFile(filename string) ([]byte, error) { @@ -73,8 +76,10 @@ func TestYAMLParsing(t *testing.T) { ShardSize: prodShardSize, MetricExporter: prodMetricExporter, // UPGRADEv2: to remove. - ResultDataBucketURLV2: prodBucketV2, - BigQueryTableV2: prodBigQueryTableV2, + ResultDataBucketURLV2: prodBucketV2, + BigQueryTableV2: prodBigQueryTableV2, + RawResultDataBucketURL: prodRawBucket, + RawBigQueryTable: prodRawBigQueryTable, }, }, diff --git a/cron/controller/main.go b/cron/controller/main.go index dd43fa70889..ee543700e6a 100644 --- a/cron/controller/main.go +++ b/cron/controller/main.go @@ -34,7 +34,8 @@ import ( var headSHA = clients.HeadSHA func publishToRepoRequestTopic(iter data.Iterator, topicPublisher pubsub.Publisher, - shardSize int, datetime time.Time) (int32, error) { + shardSize int, datetime time.Time, +) (int32, error) { var shardNum int32 request := data.ScorecardBatchRequest{ JobTime: timestamppb.New(datetime), @@ -122,6 +123,11 @@ func main() { panic(err) } + rawBucket, err := config.GetRawResultDataBucketURL() + if err != nil { + panic(err) + } + shardNum, err := publishToRepoRequestTopic(reader, topicPublisher, shardSize, t) if err != nil { panic(err) @@ -154,4 +160,15 @@ func main() { if err != nil { panic(fmt.Errorf("error writing to BlobStore2: %w", err)) } + + // Raw data. + *metadata.ShardLoc = rawBucket + "/" + data.GetBlobFilename("", t) + metadataJSON, err = protojson.Marshal(&metadata) + if err != nil { + panic(fmt.Errorf("error during protojson.Marshal raw: %w", err)) + } + err = data.WriteToBlobStore(ctx, rawBucket, data.GetShardMetadataFilename(t), metadataJSON) + if err != nil { + panic(fmt.Errorf("error writing to BlobStore raw: %w", err)) + } } diff --git a/cron/format/json_raw_results.go b/cron/format/json_raw_results.go new file mode 100644 index 00000000000..ed0d42cb9a3 --- /dev/null +++ b/cron/format/json_raw_results.go @@ -0,0 +1,308 @@ +// Copyright 2022 Security Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package format + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/ossf/scorecard/v4/checker" + sce "github.com/ossf/scorecard/v4/errors" + "github.com/ossf/scorecard/v4/pkg" +) + +// Flat JSON structure to hold raw results. +type jsonScorecardRawResult struct { + Date string `json:"date"` + Repo jsonRepoV2 `json:"repo"` + Scorecard jsonScorecardV2 `json:"scorecard"` + Metadata []string `json:"metadata"` + Results jsonRawResults `json:"results"` +} + +// TODO: separate each check extraction into its own file. +type jsonFile struct { + Path string `json:"path"` + Offset int `json:"offset,omitempty"` +} + +type jsonTool struct { + Name string `json:"name"` + URL string `json:"url"` + Desc string `json:"desc"` + ConfigFiles []jsonFile `json:"files"` + // TODO: Runs, Issues, Merge requests. +} + +type jsonBranchProtectionSettings struct { + RequiredApprovingReviewCount *int `json:"required-reviewer-count"` + AllowsDeletions *bool `json:"allows-deletions"` + AllowsForcePushes *bool `json:"allows-force-pushes"` + RequiresCodeOwnerReviews *bool `json:"requires-code-owner-review"` + RequiresLinearHistory *bool `json:"required-linear-history"` + DismissesStaleReviews *bool `json:"dismisses-stale-reviews"` + EnforcesAdmins *bool `json:"enforces-admin"` + RequiresStatusChecks *bool `json:"requires-status-checks"` + RequiresUpToDateBranchBeforeMerging *bool `json:"requires-updated-branches-to-merge"` + StatusCheckContexts []string `json:"status-checks-contexts"` +} + +type jsonBranchProtection struct { + Protection *jsonBranchProtectionSettings `json:"protection"` + Name string `json:"name"` +} + +type jsonReview struct { + Reviewer jsonUser `json:"reviewer"` + State string `json:"state"` +} + +type jsonUser struct { + Login string `json:"login"` +} + +//nolint:govet +type jsonMergeRequest struct { + Number int `json:"number"` + Labels []string `json:"labels"` + Reviews []jsonReview `json:"reviews"` + Author jsonUser `json:"author"` +} + +type jsonDefaultBranchCommit struct { + // ApprovedReviews *jsonApprovedReviews `json:"approved-reviews"` + Committer jsonUser `json:"committer"` + MergeRequest *jsonMergeRequest `json:"merge-request"` + CommitMessage string `json:"commit-message"` + SHA string `json:"sha"` + + // TODO: check runs, etc. +} + +type jsonRawResults struct { + DatabaseVulnerabilities []jsonDatabaseVulnerability `json:"database-vulnerabilities"` + // List of binaries found in the repo. + Binaries []jsonFile `json:"binaries"` + // List of security policy files found in the repo. + // Note: we return one at most. + SecurityPolicies []jsonFile `json:"security-policies"` + // List of update tools. + // Note: we return one at most. + DependencyUpdateTools []jsonTool `json:"dependency-update-tools"` + // Branch protection settings for development and release branches. + BranchProtections []jsonBranchProtection `json:"branch-protections"` + // Commits. + DefaultBranchCommits []jsonDefaultBranchCommit `json:"default-branch-commits"` +} + +//nolint:unparam +func addCodeReviewRawResults(r *jsonScorecardRawResult, cr *checker.CodeReviewData) error { + r.Results.DefaultBranchCommits = []jsonDefaultBranchCommit{} + for _, commit := range cr.DefaultBranchCommits { + com := jsonDefaultBranchCommit{ + Committer: jsonUser{ + Login: commit.Committer.Login, + }, + CommitMessage: commit.CommitMessage, + SHA: commit.SHA, + } + + // Merge request field. + if commit.MergeRequest != nil { + mr := jsonMergeRequest{ + Number: commit.MergeRequest.Number, + Author: jsonUser{ + Login: commit.MergeRequest.Author.Login, + }, + } + + if len(commit.MergeRequest.Labels) > 0 { + mr.Labels = commit.MergeRequest.Labels + } + + for _, r := range commit.MergeRequest.Reviews { + mr.Reviews = append(mr.Reviews, jsonReview{ + State: r.State, + Reviewer: jsonUser{ + Login: r.Reviewer.Login, + }, + }) + } + + com.MergeRequest = &mr + } + + com.CommitMessage = commit.CommitMessage + + r.Results.DefaultBranchCommits = append(r.Results.DefaultBranchCommits, com) + } + return nil +} + +type jsonDatabaseVulnerability struct { + // For OSV: OSV-2020-484 + // For CVE: CVE-2022-23945 + ID string + // TODO: additional information +} + +//nolint:unparam +func addVulnerbilitiesRawResults(r *jsonScorecardRawResult, vd *checker.VulnerabilitiesData) error { + r.Results.DatabaseVulnerabilities = []jsonDatabaseVulnerability{} + for _, v := range vd.Vulnerabilities { + r.Results.DatabaseVulnerabilities = append(r.Results.DatabaseVulnerabilities, + jsonDatabaseVulnerability{ + ID: v.ID, + }) + } + return nil +} + +//nolint:unparam +func addBinaryArtifactRawResults(r *jsonScorecardRawResult, ba *checker.BinaryArtifactData) error { + r.Results.Binaries = []jsonFile{} + for _, v := range ba.Files { + r.Results.Binaries = append(r.Results.Binaries, jsonFile{ + Path: v.Path, + }) + } + return nil +} + +//nolint:unparam +func addSecurityPolicyRawResults(r *jsonScorecardRawResult, sp *checker.SecurityPolicyData) error { + r.Results.SecurityPolicies = []jsonFile{} + for _, v := range sp.Files { + r.Results.SecurityPolicies = append(r.Results.SecurityPolicies, jsonFile{ + Path: v.Path, + }) + } + return nil +} + +//nolint:unparam +func addDependencyUpdateToolRawResults(r *jsonScorecardRawResult, + dut *checker.DependencyUpdateToolData, +) error { + r.Results.DependencyUpdateTools = []jsonTool{} + for i := range dut.Tools { + t := dut.Tools[i] + offset := len(r.Results.DependencyUpdateTools) + r.Results.DependencyUpdateTools = append(r.Results.DependencyUpdateTools, jsonTool{ + Name: t.Name, + URL: t.URL, + Desc: t.Desc, + }) + for _, f := range t.ConfigFiles { + r.Results.DependencyUpdateTools[offset].ConfigFiles = append( + r.Results.DependencyUpdateTools[offset].ConfigFiles, + jsonFile{ + Path: f.Path, + }, + ) + } + } + return nil +} + +//nolint:unparam +func addBranchProtectionRawResults(r *jsonScorecardRawResult, bp *checker.BranchProtectionsData) error { + r.Results.BranchProtections = []jsonBranchProtection{} + for _, v := range bp.Branches { + var bp *jsonBranchProtectionSettings + if v.Protected != nil && *v.Protected { + bp = &jsonBranchProtectionSettings{ + AllowsDeletions: v.AllowsDeletions, + AllowsForcePushes: v.AllowsForcePushes, + RequiresCodeOwnerReviews: v.RequiresCodeOwnerReviews, + RequiresLinearHistory: v.RequiresLinearHistory, + DismissesStaleReviews: v.DismissesStaleReviews, + EnforcesAdmins: v.EnforcesAdmins, + RequiresStatusChecks: v.RequiresStatusChecks, + RequiresUpToDateBranchBeforeMerging: v.RequiresUpToDateBranchBeforeMerging, + RequiredApprovingReviewCount: v.RequiredApprovingReviewCount, + StatusCheckContexts: v.StatusCheckContexts, + } + } + r.Results.BranchProtections = append(r.Results.BranchProtections, jsonBranchProtection{ + Name: v.Name, + Protection: bp, + }) + } + return nil +} + +func fillJSONRawResults(r *jsonScorecardRawResult, raw *checker.RawResults) error { + // Vulnerabiliries. + if err := addVulnerbilitiesRawResults(r, &raw.VulnerabilitiesResults); err != nil { + return sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + } + + // Binary-Artifacts. + if err := addBinaryArtifactRawResults(r, &raw.BinaryArtifactResults); err != nil { + return sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + } + + // Security-Policy. + if err := addSecurityPolicyRawResults(r, &raw.SecurityPolicyResults); err != nil { + return sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + } + + // Dependency-Update-Tool. + if err := addDependencyUpdateToolRawResults(r, &raw.DependencyUpdateToolResults); err != nil { + return sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + } + + // Branch-Protection. + if err := addBranchProtectionRawResults(r, &raw.BranchProtectionResults); err != nil { + return sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + } + + // Code-Review. + if err := addCodeReviewRawResults(r, &raw.CodeReviewResults); err != nil { + return sce.WithMessage(sce.ErrScorecardInternal, err.Error()) + } + + return nil +} + +// AsRawJSON exports results as JSON for raw results. +func AsRawJSON(r *pkg.ScorecardResult, writer io.Writer) error { + encoder := json.NewEncoder(writer) + out := jsonScorecardRawResult{ + Repo: jsonRepoV2{ + Name: r.Repo.Name, + Commit: r.Repo.CommitSHA, + }, + Scorecard: jsonScorecardV2{ + Version: r.Scorecard.Version, + Commit: r.Scorecard.CommitSHA, + }, + Date: r.Date.Format("2006-01-02"), + Metadata: r.Metadata, + } + + // if err := out.fillJSONRawResults(r.Checks[0].RawResults); err != nil { + if err := fillJSONRawResults(&out, &r.RawResults); err != nil { + return err + } + + if err := encoder.Encode(out); err != nil { + return sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("encoder.Encode: %v", err)) + } + + return nil +} diff --git a/cron/k8s/controller.release.yaml b/cron/k8s/controller.release.yaml index f00cced6e80..7aeb1135e98 100644 --- a/cron/k8s/controller.release.yaml +++ b/cron/k8s/controller.release.yaml @@ -62,6 +62,9 @@ spec: # UPGRADEv2: to remove. - name: SCORECARD_DATA_BUCKET_URLV2 value: "gs://ossf-scorecard-data-releasetest2" + # Raw results. + - name: RAW_SCORECARD_DATA_BUCKET_URL + value: "gs://ossf-scorecard-rawdata-releasetest" - name: SCORECARD_SHARD_SIZE value: "5" resources: diff --git a/cron/k8s/transfer-raw.yaml b/cron/k8s/transfer-raw.yaml new file mode 100644 index 00000000000..faff1971b15 --- /dev/null +++ b/cron/k8s/transfer-raw.yaml @@ -0,0 +1,41 @@ +# Copyright 2021 Security Scorecard Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: batch/v1 +kind: CronJob +metadata: + name: scorecard-bq-raw-transfer +spec: + # At 02:00UTC on Monday and Thursday. + schedule: "0 2 * * 1,4" + concurrencyPolicy: "Forbid" + jobTemplate: + spec: + template: + spec: + containers: + - name: bq-rawdata-transfer + image: gcr.io/openssf/scorecard-bq-transfer:latest + imagePullPolicy: Always + resources: + limits: + memory: 1Gi + requests: + memory: 1Gi + env: + - name: RAW_SCORECARD_BIGQUERY_TABLE + value: "scorecard-rawdata" + - name: RAW_SCORECARD_DATA_BUCKET_URL + value: "gs://ossf-scorecard-rawdata" + restartPolicy: OnFailure diff --git a/cron/k8s/transfer.release-raw.yaml b/cron/k8s/transfer.release-raw.yaml new file mode 100644 index 00000000000..5a30062e272 --- /dev/null +++ b/cron/k8s/transfer.release-raw.yaml @@ -0,0 +1,43 @@ +# Copyright 2021 Security Scorecard Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: batch/v1 +kind: CronJob +metadata: + name: scorecard-bq-transfer-raw-releasetest +spec: + # Daily at 9am and 6pm PST. + schedule: "0 1,16 * * *" + concurrencyPolicy: "Forbid" + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: bq-transfer-rawdata-releasetest + image: gcr.io/openssf/scorecard-bq-transfer:latest + imagePullPolicy: Always + env: + - name: RAW_SCORECARD_DATA_BUCKET_URL + value: "gs://ossf-scorecard-rawdata-releasetest" + - name: RAW_SCORECARD_BIGQUERY_TABLE + value: "scorecard_raw-releasetest" + - name: SCORECARD_COMPLETION_THRESHOLD + value: "0.9" + resources: + limits: + memory: 1Gi + requests: + memory: 1Gi diff --git a/cron/k8s/worker.release.yaml b/cron/k8s/worker.release.yaml index 38c8492ac37..ca02fe8ab17 100644 --- a/cron/k8s/worker.release.yaml +++ b/cron/k8s/worker.release.yaml @@ -37,6 +37,8 @@ spec: # UPGRADEv2: to remove. - name: SCORECARD_DATA_BUCKET_URLV2 value: "gs://ossf-scorecard-data-releasetest2" + - name: RAW_SCORECARD_DATA_BUCKET_URL + value: "gs://ossf-scorecard-rawdata-releasetest" - name: SCORECARD_REQUEST_SUBSCRIPTION_URL value: "gcppubsub://projects/openssf/subscriptions/scorecard-batch-worker-releasetest" - name: SCORECARD_BLACKLISTED_CHECKS diff --git a/cron/worker/main.go b/cron/worker/main.go index b0eafd425db..a491da2be68 100644 --- a/cron/worker/main.go +++ b/cron/worker/main.go @@ -48,12 +48,13 @@ var ignoreRuntimeErrors = flag.Bool("ignoreRuntimeErrors", false, "if set to tru // nolint: gocognit func processRequest(ctx context.Context, batchRequest *data.ScorecardBatchRequest, - blacklistedChecks []string, bucketURL, bucketURL2 string, + blacklistedChecks []string, bucketURL, bucketURL2, rawBucketURL string, checkDocs docs.Doc, repoClient clients.RepoClient, ossFuzzRepoClient clients.RepoClient, ciiClient clients.CIIBestPracticesClient, vulnsClient clients.VulnerabilitiesClient, - logger *log.Logger) error { + logger *log.Logger, +) error { filename := data.GetBlobFilename( fmt.Sprintf("shard-%07d", batchRequest.GetShardNum()), batchRequest.GetJobTime().AsTime()) @@ -67,7 +68,13 @@ func processRequest(ctx context.Context, if err != nil { return fmt.Errorf("error during BlobExists: %w", err) } - if exists1 && exists2 { + + exists3, err := data.BlobExists(ctx, rawBucketURL, filename) + if err != nil { + return fmt.Errorf("error during BlobExists: %w", err) + } + + if exists1 && exists2 && exists3 { logger.Info(fmt.Sprintf("Already processed shard %s. Nothing to do.", filename)) // We have already processed this request, nothing to do. return nil @@ -75,6 +82,7 @@ func processRequest(ctx context.Context, var buffer bytes.Buffer var buffer2 bytes.Buffer + var rawBuffer bytes.Buffer // TODO: run Scorecard for each repo in a separate thread. for _, repoReq := range batchRequest.GetRepos() { logger.Info(fmt.Sprintf("Running Scorecard for repo: %s", *repoReq.Url)) @@ -99,7 +107,7 @@ func processRequest(ctx context.Context, for _, check := range blacklistedChecks { delete(checksToRun, check) } - result, err := pkg.RunScorecards(ctx, repo, commitSHA, false /*raw*/, checksToRun, + result, err := pkg.RunScorecards(ctx, repo, commitSHA, checksToRun, repoClient, ossFuzzRepoClient, ciiClient, vulnsClient) if errors.Is(err, sce.ErrRepoUnreachable) { // Not accessible repo - continue. @@ -129,6 +137,11 @@ func processRequest(ctx context.Context, if err := format.AsJSON2(&result, true /*showDetails*/, log.InfoLevel, checkDocs, &buffer2); err != nil { return fmt.Errorf("error during result.AsJSON2: %w", err) } + + // Raw result. + if err := format.AsRawJSON(&result, &rawBuffer); err != nil { + return fmt.Errorf("error during result.AsRawJSON: %w", err) + } } if err := data.WriteToBlobStore(ctx, bucketURL, filename, buffer.Bytes()); err != nil { return fmt.Errorf("error during WriteToBlobStore: %w", err) @@ -138,6 +151,11 @@ func processRequest(ctx context.Context, return fmt.Errorf("error during WriteToBlobStore2: %w", err) } + // Raw result. + if err := data.WriteToBlobStore(ctx, rawBucketURL, filename, rawBuffer.Bytes()); err != nil { + return fmt.Errorf("error during WriteToBlobStore2: %w", err) + } + logger.Info(fmt.Sprintf("Write to shard file successful: %s", filename)) return nil @@ -191,6 +209,11 @@ func main() { panic(err) } + rawBucketURL, err := config.GetRawResultDataBucketURL() + if err != nil { + panic(err) + } + blacklistedChecks, err := config.GetBlacklistedChecks() if err != nil { panic(err) @@ -236,7 +259,7 @@ func main() { break } if err := processRequest(ctx, req, blacklistedChecks, - bucketURL, bucketURL2, checkDocs, + bucketURL, bucketURL2, rawBucketURL, checkDocs, repoClient, ossFuzzRepoClient, ciiClient, vulnsClient, logger); err != nil { // TODO(log): Previously Warn. Consider logging an error here. logger.Info(fmt.Sprintf("error processing request: %v", err)) diff --git a/pkg/scorecard.go b/pkg/scorecard.go index cfda7297637..0cc42b5f87f 100644 --- a/pkg/scorecard.go +++ b/pkg/scorecard.go @@ -31,7 +31,8 @@ func runEnabledChecks(ctx context.Context, repo clients.Repo, raw *checker.RawResults, checksToRun checker.CheckNameToFnMap, repoClient clients.RepoClient, ossFuzzRepoClient clients.RepoClient, ciiClient clients.CIIBestPracticesClient, vulnsClient clients.VulnerabilitiesClient, - resultsCh chan checker.CheckResult) { + resultsCh chan checker.CheckResult, +) { request := checker.CheckRequest{ Ctx: ctx, RepoClient: repoClient, @@ -78,12 +79,12 @@ func getRepoCommitHash(r clients.RepoClient) (string, error) { func RunScorecards(ctx context.Context, repo clients.Repo, commitSHA string, - raw bool, checksToRun checker.CheckNameToFnMap, repoClient clients.RepoClient, ossFuzzRepoClient clients.RepoClient, ciiClient clients.CIIBestPracticesClient, - vulnsClient clients.VulnerabilitiesClient) (ScorecardResult, error) { + vulnsClient clients.VulnerabilitiesClient, +) (ScorecardResult, error) { if err := repoClient.InitRepo(repo, commitSHA); err != nil { // No need to call sce.WithMessage() since InitRepo will do that for us. //nolint:wrapcheck @@ -108,13 +109,8 @@ func RunScorecards(ctx context.Context, Date: time.Now(), } resultsCh := make(chan checker.CheckResult) - if raw { - go runEnabledChecks(ctx, repo, &ret.RawResults, checksToRun, repoClient, ossFuzzRepoClient, - ciiClient, vulnsClient, resultsCh) - } else { - go runEnabledChecks(ctx, repo, nil, checksToRun, repoClient, ossFuzzRepoClient, - ciiClient, vulnsClient, resultsCh) - } + go runEnabledChecks(ctx, repo, &ret.RawResults, checksToRun, repoClient, ossFuzzRepoClient, + ciiClient, vulnsClient, resultsCh) for result := range resultsCh { ret.Checks = append(ret.Checks, result)