Skip to content

Commit

Permalink
Store metadata in BigQuery (#1197)
Browse files Browse the repository at this point in the history
  • Loading branch information
azeemshaikh38 authored Oct 30, 2021
1 parent c751120 commit 69f9774
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 52 deletions.
37 changes: 21 additions & 16 deletions cron/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,13 @@ import (
"github.com/ossf/scorecard/v3/pkg"
)

func publishToRepoRequestTopic(ctx context.Context, iter data.Iterator, datetime time.Time) (int32, error) {
func publishToRepoRequestTopic(iter data.Iterator, topicPublisher pubsub.Publisher,
shardSize int, datetime time.Time) (int32, error) {
var shardNum int32
request := data.ScorecardBatchRequest{
JobTime: timestamppb.New(datetime),
ShardNum: &shardNum,
}
topic, err := config.GetRequestTopicURL()
if err != nil {
return shardNum, fmt.Errorf("error getting RequestTopicURL: %w", err)
}
topicPublisher, err := pubsub.CreatePublisher(ctx, topic)
if err != nil {
return shardNum, fmt.Errorf("error running CreatePublisher: %w", err)
}

shardSize, err := config.GetShardSize()
if err != nil {
return shardNum, fmt.Errorf("error getting ShardSize: %w", err)
}

// Create and send batch requests of repoURLs of size `ShardSize`:
// * Iterate through incoming repoURLs until `request` has len(Repos) of size `ShardSize`.
Expand All @@ -59,7 +47,10 @@ func publishToRepoRequestTopic(ctx context.Context, iter data.Iterator, datetime
if err != nil {
return shardNum, fmt.Errorf("error reading repoURL: %w", err)
}
request.Repos = append(request.GetRepos(), repoURL.Repo)
request.Repos = append(request.GetRepos(), &data.Repo{
Url: &repoURL.Repo,
Metadata: repoURL.Metadata.ToString(),
})
if len(request.GetRepos()) < shardSize {
continue
}
Expand Down Expand Up @@ -102,6 +93,20 @@ func main() {
panic(err)
}

topic, err := config.GetRequestTopicURL()
if err != nil {
panic(err)
}
topicPublisher, err := pubsub.CreatePublisher(ctx, topic)
if err != nil {
panic(err)
}

shardSize, err := config.GetShardSize()
if err != nil {
panic(err)
}

bucket, err := config.GetResultDataBucketURL()
if err != nil {
panic(err)
Expand All @@ -112,7 +117,7 @@ func main() {
panic(err)
}

shardNum, err := publishToRepoRequestTopic(ctx, reader, t)
shardNum, err := publishToRepoRequestTopic(reader, topicPublisher, shardSize, t)
if err != nil {
panic(err)
}
Expand Down
9 changes: 9 additions & 0 deletions cron/data/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ func (s *CSVStrings) UnmarshalCSV(input []byte) error {
return nil
}

// ToString converts CSVStrings -> []string.
func (s CSVStrings) ToString() []string {
var ret []string
for _, i := range s {
ret = append(ret, i)
}
return ret
}

// RepoFormat is used to read input repos.
type RepoFormat struct {
Repo string `csv:"repo"`
Expand Down
41 changes: 41 additions & 0 deletions cron/data/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,47 @@ import (
"github.com/google/go-cmp/cmp"
)

func TestToString(t *testing.T) {
t.Parallel()
testcases := []struct {
name string
input CSVStrings
output []string
}{
{
name: "Basic",
input: []string{"str1", "str2"},
output: []string{"str1", "str2"},
},
{
name: "NilInput",
input: nil,
output: nil,
},
{
name: "EmptyString",
input: []string{""},
output: []string{""},
},
{
name: "EmptySlice",
input: make([]string, 0),
output: nil,
},
}

for _, testcase := range testcases {
testcase := testcase
t.Run(testcase.name, func(t *testing.T) {
t.Parallel()
actual := testcase.input.ToString()
if !cmp.Equal(testcase.output, actual) {
t.Errorf("testcase failed: expected equal, got diff: %s", cmp.Diff(testcase.output, actual))
}
})
}
}

func TestUnmarshalCsv(t *testing.T) {
t.Parallel()
testcases := []struct {
Expand Down
135 changes: 106 additions & 29 deletions cron/data/request.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion cron/data/request.proto
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ import "google/protobuf/timestamp.proto";

option go_package = "github.com/ossf/scorecard/cron/data";

message Repo {
optional string url = 1;
repeated string metadata = 2;
}

message ScorecardBatchRequest {
repeated string repos = 1;
repeated Repo repos = 4;
optional int32 shard_num = 2;
optional google.protobuf.Timestamp job_time = 3;
reserved 1;
}
6 changes: 5 additions & 1 deletion cron/pubsub/publisher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ func TestPublish(t *testing.T) {
topic: testcase.topic,
}
request := data.ScorecardBatchRequest{
Repos: []string{"repo1"},
Repos: []*data.Repo{
{
Url: &repo1,
},
},
}
if err := publisher.Publish(&request); err != nil {
t.Errorf("Failed to parse message: %v", err)
Expand Down
Loading

0 comments on commit 69f9774

Please sign in to comment.