Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a metadata_merge that merges YAML from security check & download_… #593

Merged
merged 7 commits into from
Jun 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 2 additions & 14 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ go_register_toolchains()
# is being imported from a *.bzl file.
# gazelle:repo bazel_gazelle

load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository")
load("//repositories:go_repositories.bzl", bazel_toolchains_go_deps = "go_deps")

gazelle_dependencies()
bazel_toolchains_go_deps()

container_pull(
name = "official_jessie",
Expand Down Expand Up @@ -591,15 +591,3 @@ pip_import(
load("@pip_deps//:requirements.bzl", "pip_install")

pip_install()

go_repository(
name = "in_gopkg_yaml_v2",
commit = "51d6538a90f86fe93ac480b35f37b2be17fef232", # v2.2.2
importpath = "gopkg.in/yaml.v2",
)

go_repository(
name = "com_github_pkg_errors",
commit = "05ac58a23b8798a296fa64f7d9c1559904db4b98", # v0.8.1
importpath = "github.com/pkg/errors",
)
46 changes: 46 additions & 0 deletions repositories/go_repositories.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2016 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Once recursive workspace is implemented in Bazel, this file should cease
# to exist.
"""
Provides functions to pull all Go external package dependencies of this
repository.
"""

load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository")

def go_deps():
"""Pull in external Go packages needed by Go binaries in this repo.

Pull in all dependencies needed to build the Go binaries in this
smukherj1 marked this conversation as resolved.
Show resolved Hide resolved
repository. This function assumes the repositories imported by the macro
'repositories' in //repositories:repositories.bzl have been imported
already.
"""
gazelle_dependencies()
excludes = native.existing_rules().keys()
if "in_gopkg_yaml_v2" not in excludes:
go_repository(
name = "in_gopkg_yaml_v2",
commit = "51d6538a90f86fe93ac480b35f37b2be17fef232", # v2.2.2
importpath = "gopkg.in/yaml.v2",
)

if "com_github_pkg_errors" not in excludes:
go_repository(
name = "com_github_pkg_errors",
commit = "05ac58a23b8798a296fa64f7d9c1559904db4b98", # v0.8.1
importpath = "github.com/pkg/errors",
)
88 changes: 88 additions & 0 deletions rules/container/metadata_merge.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
metadata_merge accepts a list of YAML metadata generated by the download_pkgs
& packages_metadata rules. metadata_metaga merges the input list into a single
YAML file. The tags are added in order & de-duplicated. The de-duplication
removes later occurences of the same tag. The packages are merged in order
including any duplicates.

Example for the following input YAML files:
File 1 (generated by security_check rule):
tags:
- foo

File 2 (generated by download_pkgs rule):
packages:
- name: foo
version: 1

File 3 (generated by security_check rule):
tags:
- bar

File 4 (generated by download_pkgs rule)
packages:
- name: baz
version: 2

The merged YAML will be as follows:
tags:
- foo
- bar
packages:
- name: foo
version: 1
- name: baz
version: 2
"""

def _impl(ctx):
yaml_files = ctx.files.srcs
if len(yaml_files) == 0:
fail("Attribute yamls to {} did not specify any YAML files.".format(ctx.label))
args = []
for yaml_file in yaml_files:
args.append("-yamlFile")
args.append(yaml_file.path)
args.append("-outFile")
args.append(ctx.outputs.yaml.path)
ctx.actions.run(
inputs = yaml_files,
outputs = [ctx.outputs.yaml],
executable = ctx.executable._merger,
arguments = args,
mnemonic = "MetadataYAMLMerge",
)

metadata_merge = rule(
attrs = {
smukherj1 marked this conversation as resolved.
Show resolved Hide resolved
"srcs": attr.label_list(
mandatory = True,
allow_files = [".yaml"],
doc = "YAML file targets to merge.",
),
"_merger": attr.label(
default = "@bazel_toolchains//src/go/cmd/metadata_merge",
cfg = "host",
executable = True,
doc = "The go binary that merges a given list of YAML files to " +
"produce a single output YAML.",
),
},
outputs = {
"yaml": "%{name}.yaml",
},
implementation = _impl,
)
18 changes: 18 additions & 0 deletions src/go/cmd/metadata_merge/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")

go_library(
name = "go_default_library",
srcs = ["metadata_merge.go"],
importpath = "github.com/bazelbuild/bazel-toolchains/src/go/cmd/metadata_merge",
visibility = ["//visibility:private"],
deps = [
"//src/go/pkg/metadata:go_default_library",
"@in_gopkg_yaml_v2//:go_default_library",
],
)

go_binary(
name = "metadata_merge",
embed = [":go_default_library"],
visibility = ["//visibility:public"],
)
115 changes: 115 additions & 0 deletions src/go/cmd/metadata_merge/metadata_merge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package main

import (
"flag"
"fmt"
"gopkg.in/yaml.v2"
"io/ioutil"
"log"
"os"

"github.com/bazelbuild/bazel-toolchains/src/go/pkg/metadata"
)

var (
outFile = flag.String("outFile", "", "Output merged YAML file to generate.")
)

// strArgList implements a command line flag that can be specified multiple
// times to define a list of values.
type strArgList struct {
// Args is the list of command line flags.
Args []string
}

func (l *strArgList) String() string {
return fmt.Sprintf("%v", l.Args)
}

// Set appends the given value for a particular occurance of the flag to the
// list of flag values.
func (l *strArgList) Set(value string) error {
l.Args = append(l.Args, value)
return nil
}

// metadataYAML stores the contents of one or more YAML file with the following
// top level keys:
// 1. "tags" (list of strings).
// 2. "packages" (list of YAML objects with keys "name" & "version" which are
// strings).
type metadataYAML struct {
// Tags is the list of tags read from YAML files with a top level "tags"
// key.
Tags []string `yaml:"tags"`
// Packages is the list of software package entries read from YAML files
// with a top level "packages" key.
Packages []metadata.PackageMetadata `yaml:"packages"`

// tagsLookup maintains a map of tags in the "Tags" field.
tagsLookup map[string]bool
}

// merge merges the contents of the metadataYaml 'from' into the metadataYAML
// 'm'. This does the following:
// 1. Add every tag that appears in 'from' into 'm' if it doesn't already exist
// in 'm'.
// 2. Add every package that apppears in 'from' into 'm'. If the list of
// packages in 'from' have duplicates with the list of packages in 'm', the
// list of packages in 'm' will contain these duplicates after the merge.
func (m *metadataYAML) merge(from *metadataYAML) error {
for _, t := range from.Tags {
if _, ok := m.tagsLookup[t]; ok {
// This tag has been added already.
continue
}
m.tagsLookup[t] = true
m.Tags = append(m.Tags, t)
}
for _, p := range from.Packages {
m.Packages = append(m.Packages, p)
}
return nil
}

func main() {
var yamlFiles strArgList
flag.Var(&yamlFiles, "yamlFile", "Path to an input YAML file to process. Can be specified multiple times to process more than one file.")
flag.Parse()
log.Println("Running the YAML Metadata merger.")
for _, f := range yamlFiles.Args {
log.Println("-yamlFile", f)
}
log.Println("-outFile", *outFile)
if len(yamlFiles.Args) == 0 {
log.Fatalf("No input YAML files provided. Use the -yamlFile flag to provide at least 1 YAML file.")
}
if *outFile == "" {
log.Fatalf("-outFile was not specified.")
}

result := metadataYAML{tagsLookup: make(map[string]bool)}
for _, yamlFile := range yamlFiles.Args {
log.Println("Loading metadata from", yamlFile)
blob, err := ioutil.ReadFile(yamlFile)
if err != nil {
log.Fatalf("Unable to read data from %s: %v", yamlFile, err)
}
m := new(metadataYAML)
if err := yaml.UnmarshalStrict(blob, m); err != nil {
log.Fatalf("Unable to parse data read from %s as metadata YAML: %v", yamlFile, err)
}
if err := result.merge(m); err != nil {
log.Fatalf("Unable to merge metadata read from %s into a single merged YAML: %v", yamlFile, err)
}
}
log.Printf("Merged YAML has %d tags and %d packages.", len(result.Tags), len(result.Packages))
blob, err := yaml.Marshal(&result)
if err != nil {
log.Fatalf("Unable to generate a merged YAML blob for the output merged YAML file: %v", err)
}
if err := ioutil.WriteFile(*outFile, blob, os.FileMode(0644)); err != nil {
log.Fatalf("Unable to write %d bytes of content to output YAML file %s: %v", len(blob), *outFile, err)
}
log.Printf("Successfully generated output %s that merged %d YAML files.", *outFile, len(yamlFiles.Args))
}
1 change: 1 addition & 0 deletions src/go/cmd/packages_metadata/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
importpath = "github.com/bazelbuild/bazel-toolchains/src/go/cmd/packages_metadata",
visibility = ["//visibility:private"],
deps = [
"//src/go/pkg/metadata:go_default_library",
"@com_github_pkg_errors//:go_default_library",
"@in_gopkg_yaml_v2//:go_default_library",
],
Expand Down
26 changes: 6 additions & 20 deletions src/go/cmd/packages_metadata/packages_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,21 @@ import (
"os"
"sort"
"strings"

"github.com/bazelbuild/bazel-toolchains/src/go/pkg/metadata"
)

var (
inputCSV = flag.String("inputCSV", "", "Path to the metadata csv file generated by the download_pkgs rule in GoogleContainerTools/base_images_docker")
outputYAML = flag.String("outputYAML", "", "Path to the output YAML file to generate.")
)

// packageMetada is the YAML entry for a single software package.
type packageMetadata struct {
// Name is the name of the software package.
Name string `yaml:"name"`
// Version is the version string of the software package.
Version string `yaml:"version"`
}

// packageMetadata is the collection of software package metadata read from
// the input CSV file to be serialized into a YAML file.
type packagesMetadata struct {
// Packages is the list of software package entries read from the input
// CSV file.
Packages []packageMetadata `yaml:"packages"`
}

// newPackageMetadataFromCSV reads the package metadata from the given
// CSV file with columns "Name" & "Version" and returns the corresponding
// packagesMetadata object. The package entries in the returned packagesMetadata
// PackagesMetadata object. The package entries in the returned PackagesMetadata
// are sorted by their package names.
func newPackagesMetadataFromCSV(csvFile string) (*packagesMetadata, error) {
result := new(packagesMetadata)
func newPackagesMetadataFromCSV(csvFile string) (*metadata.PackagesMetadata, error) {
result := new(metadata.PackagesMetadata)
f, err := os.Open(csvFile)
if err != nil {
return nil, errors.Wrapf(err, "unable to open %s for reading", csvFile)
Expand All @@ -74,7 +60,7 @@ func newPackagesMetadataFromCSV(csvFile string) (*packagesMetadata, error) {
if err != nil {
return nil, errors.Wrapf(err, "error parsing record in CSV file %s", csvFile)
}
result.Packages = append(result.Packages, packageMetadata{
result.Packages = append(result.Packages, metadata.PackageMetadata{
Name: record[0],
Version: record[1],
})
Expand Down
8 changes: 8 additions & 0 deletions src/go/pkg/metadata/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "go_default_library",
srcs = ["packages.go"],
importpath = "github.com/bazelbuild/bazel-toolchains/src/go/pkg/metadata",
visibility = ["//visibility:public"],
)
19 changes: 19 additions & 0 deletions src/go/pkg/metadata/packages.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Package metadata provides functionality to store metadata about debian
// packages installed in a docker image layer.
package metadata

// PackageMetadata is the YAML entry for a single software package.
type PackageMetadata struct {
// Name is the name of the software package.
Name string `yaml:"name"`
// Version is the version string of the software package.
Version string `yaml:"version"`
}

// PackagesMetadata is the collection of software package metadata read from
// the input CSV file to be serialized into a YAML file.
type PackagesMetadata struct {
// Packages is the list of software package entries read from the input
// CSV file.
Packages []PackageMetadata `yaml:"packages"`
}
Loading