From 153ac47d430a127f2ceec0df7c79b115ff45121f Mon Sep 17 00:00:00 2001 From: Stan Rosenberg Date: Fri, 12 May 2023 18:53:40 -0400 Subject: [PATCH] roachprod: stage arm64 and FIPS-enabled binaries Add `--arch` to override binary's architecture and refactor. As of this change, `roachprod stage` is able to stage both amd64 and arm64 on linux and darwin, as well as FIPS-enabled binary built for amd64. In conjunction with the previous change [1], roachprod now uses arm64-based AMI for graviton2/graviton3 machines. Below is an example of how to create a VM with graviton3, ``` roachprod create -n1 --clouds aws --aws-machine-type m7g.2xlarge --local-ssd=false $CRL_USERNAME-test roachprod stage --arch arm64 $CRL_USERNAME-test release v23.1.0-rc.2 roachprod start $CRL_USERNAME-test ``` [1] https://github.com/cockroachdb/cockroach/pull/103236 Epic: none Release note: None --- pkg/cmd/roachprod/flags.go | 10 +- pkg/cmd/roachprod/main.go | 36 +++++- pkg/cmd/roachtest/cluster.go | 3 +- pkg/roachprod/install/staging.go | 79 ++++++++++--- pkg/roachprod/install/staging_test.go | 161 +++++++++++++++++++++++++- pkg/roachprod/roachprod.go | 30 ++++- pkg/roachprod/vm/aws/aws.go | 12 +- 7 files changed, 302 insertions(+), 29 deletions(-) diff --git a/pkg/cmd/roachprod/flags.go b/pkg/cmd/roachprod/flags.go index b867fcdac15f..e5852c624542 100644 --- a/pkg/cmd/roachprod/flags.go +++ b/pkg/cmd/roachprod/flags.go @@ -62,6 +62,7 @@ var ( createVMOpts = vm.DefaultCreateOpts() startOpts = roachprod.DefaultStartOpts() stageOS string + stageArch string stageDir string logsDir string logsFilter string @@ -106,6 +107,8 @@ func initFlags() { vm.AllProviderNames())) createCmd.Flags().BoolVar(&createVMOpts.GeoDistributed, "geo", false, "Create geo-distributed cluster") + createCmd.Flags().BoolVar(&createVMOpts.EnableFIPS, + "fips", false, "Enable FIPS mode (uses custom AMI)") // N.B. We set "usage=roachprod" as the default, custom label for billing tracking. createCmd.Flags().StringToStringVar(&createVMOpts.CustomLabels, "label", map[string]string{"usage": "roachprod"}, @@ -217,9 +220,14 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS putCmd.Flags().BoolVar(&useTreeDist, "treedist", useTreeDist, "use treedist copy algorithm") stageCmd.Flags().StringVar(&stageOS, "os", "", "operating system override for staged binaries") - stageCmd.Flags().StringVar(&stageDir, "dir", "", "destination for staged binaries") + stageCmd.Flags().StringVar(&stageArch, "arch", "", + "architecture override for staged binaries [amd64, arm64, fips]; N.B. fips implies amd64 with openssl") + stageCmd.Flags().StringVar(&stageDir, "dir", "", "destination for staged binaries") + // N.B. stageURLCmd just prints the URL that stageCmd would use. stageURLCmd.Flags().StringVar(&stageOS, "os", "", "operating system override for staged binaries") + stageURLCmd.Flags().StringVar(&stageArch, "arch", "", + "architecture override for staged binaries [amd64, arm64, fips]; N.B. fips implies amd64 with openssl") logsCmd.Flags().StringVar(&logsFilter, "filter", "", "re to filter log messages") diff --git a/pkg/cmd/roachprod/main.go b/pkg/cmd/roachprod/main.go index 3e1895a59729..565ca56e68a3 100644 --- a/pkg/cmd/roachprod/main.go +++ b/pkg/cmd/roachprod/main.go @@ -60,7 +60,8 @@ The above commands will create a "local" 3 node cluster, start a cockroach cluster on these nodes, run a sql command on the 2nd node, stop, wipe and destroy the cluster. `, - Version: "details:\n" + build.GetInfo().Long(), + Version: "details:\n" + build.GetInfo().Long(), + PersistentPreRun: validateAndConfigure, } // Provide `cobra.Command` functions with a standard return code handler. @@ -779,7 +780,7 @@ Currently available application options are: if len(args) == 2 { versionArg = args[1] } - urls, err := roachprod.StageURL(config.Logger, args[0], versionArg, stageOS) + urls, err := roachprod.StageURL(config.Logger, args[0], versionArg, stageOS, stageArch) if err != nil { return err } @@ -818,7 +819,7 @@ Some examples of usage: if len(args) == 3 { versionArg = args[2] } - return roachprod.Stage(context.Background(), config.Logger, args[0], stageOS, stageDir, args[1], versionArg) + return roachprod.Stage(context.Background(), config.Logger, args[0], stageOS, stageArch, stageDir, args[1], versionArg) }), } @@ -1141,6 +1142,35 @@ var fixLongRunningAWSHostnamesCmd = &cobra.Command{ }), } +// Before executing any command, validate and canonicalize args. +func validateAndConfigure(cmd *cobra.Command, args []string) { + // Skip validation for commands that are self-sufficient. + switch cmd.Name() { + case "help", "version", "list": + return + } + + printErrAndExit := func(err error) { + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + os.Exit(1) + } + } + + // Validate architecture flag, if set. + if archOpt := cmd.Flags().Lookup("arch"); archOpt != nil && archOpt.Changed { + arch := strings.ToLower(archOpt.Value.String()) + + if arch != "amd64" && arch != "arm64" && arch != "fips" { + printErrAndExit(fmt.Errorf("unsupported architecture %q", arch)) + } + if arch != archOpt.Value.String() { + // Set the canonical value. + _ = cmd.Flags().Set("arch", arch) + } + } +} + func main() { _ = roachprod.InitProviders() providerOptsContainer = vm.CreateProviderOptionsContainer() diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go index eee1af6db9c2..bf4dfdd98674 100644 --- a/pkg/cmd/roachtest/cluster.go +++ b/pkg/cmd/roachtest/cluster.go @@ -1769,7 +1769,8 @@ func (c *clusterImpl) Stage( } c.status("staging binary") defer c.status("") - return errors.Wrap(roachprod.Stage(ctx, l, c.MakeNodes(opts...), "" /* stageOS */, dir, application, versionOrSHA), "cluster.Stage") + return errors.Wrap(roachprod.Stage(ctx, l, c.MakeNodes(opts...), + "" /* stageOS */, "" /* stageArch */, dir, application, versionOrSHA), "cluster.Stage") } // Get gets files from remote hosts. diff --git a/pkg/roachprod/install/staging.go b/pkg/roachprod/install/staging.go index 2b558cc5637a..09c31a6dd2fa 100644 --- a/pkg/roachprod/install/staging.go +++ b/pkg/roachprod/install/staging.go @@ -43,21 +43,49 @@ type archInfo struct { ReleaseArchiveExtension string } +// N.B. DebugArchitecture must correspond to 'SuffixFromPlatform' followed by stripping the literal (os) version, +// in 'MakeCRDBBinaryNonReleaseFile' (see pkg/release/build.go and pkg/release/release.go). +// +// ReleaseArchitecture must correspond to 'SuffixFromPlatform' followed by stripping "gnu-" in 'makeArchiveKeys' +// +// (see pkg/release/upload.go). +// TODO(srosenberg): refactor to use the above, directly from pkg/release/ which is the source of truth. var ( - linuxArchInfo = archInfo{ + linux_x86_64_ArchInfo = archInfo{ DebugArchitecture: "linux-gnu-amd64", ReleaseArchitecture: "linux-amd64", LibraryExtension: ".so", ExecutableExtension: "", ReleaseArchiveExtension: "tgz", } - darwinArchInfo = archInfo{ + linux_x86_64_fips_ArchInfo = archInfo{ + DebugArchitecture: "linux-gnu-amd64-fips", + ReleaseArchitecture: "linux-amd64-fips", + LibraryExtension: ".so", + ExecutableExtension: "", + ReleaseArchiveExtension: "tgz", + } + linux_arm64_ArchInfo = archInfo{ + DebugArchitecture: "linux-gnu-arm64", + ReleaseArchitecture: "linux-arm64", + LibraryExtension: ".so", + ExecutableExtension: "", + ReleaseArchiveExtension: "tgz", + } + darwin_x86_64_ArchInfo = archInfo{ DebugArchitecture: "darwin-amd64", ReleaseArchitecture: "darwin-10.9-amd64", LibraryExtension: ".dylib", ExecutableExtension: "", ReleaseArchiveExtension: "tgz", } + darwin_arm64_ArchInfo = archInfo{ + DebugArchitecture: "darwin-arm64.unsigned", + ReleaseArchitecture: "darwin-11.0-arm64", + LibraryExtension: ".dylib", + ExecutableExtension: "", + ReleaseArchiveExtension: "tgz", + } windowsArchInfo = archInfo{ DebugArchitecture: "windows-amd64", ReleaseArchitecture: "windows-6.2-amd64", @@ -69,18 +97,36 @@ var ( crdbLibraries = []string{"libgeos", "libgeos_c"} ) -// ArchInfoForOS returns an ArchInfo for the given OS if the OS is -// currently supported. -func archInfoForOS(os string) (archInfo, error) { +// ArchInfoForOS returns an ArchInfo for the given OS and Architecture if currently supported. +func ArchInfoForOS(os string, arch string) (archInfo, error) { + if arch != "" && arch != "amd64" && arch != "arm64" && arch != "fips" { + return archInfo{}, errors.Errorf("unsupported architecture %q", arch) + } + switch os { case "linux": - return linuxArchInfo, nil + if arch == "arm64" { + return linux_arm64_ArchInfo, nil + } + if arch == "fips" { + return linux_x86_64_fips_ArchInfo, nil + } + return linux_x86_64_ArchInfo, nil case "darwin": - return darwinArchInfo, nil + if arch == "arm64" { + return darwin_arm64_ArchInfo, nil + } + if arch == "fips" { + return archInfo{}, errors.Errorf("%q is not supported on %q", arch, os) + } + return darwin_x86_64_ArchInfo, nil case "windows": + if arch == "fips" || arch == "arm64" { + return archInfo{}, errors.Errorf("%q is not supported on %q", arch, os) + } return windowsArchInfo, nil default: - return archInfo{}, errors.Errorf("no release architecture information for %q", os) + return archInfo{}, errors.Errorf("unsupported OS %q", os) } } @@ -130,16 +176,17 @@ func StageApplication( applicationName string, version string, os string, + arch string, destDir string, ) error { - archInfo, err := archInfoForOS(os) + archInfo, err := ArchInfoForOS(os, arch) if err != nil { return err } switch applicationName { case "cockroach": - err := StageRemoteBinary( + err := stageRemoteBinary( ctx, l, c, applicationName, "cockroach/cockroach", version, archInfo.DebugArchitecture, destDir, ) if err != nil { @@ -164,7 +211,8 @@ func StageApplication( } return nil case "workload": - err := StageRemoteBinary( + // N.B. workload binary is only available for linux amd64: https://github.com/cockroachdb/cockroach/issues/103563 + err := stageRemoteBinary( ctx, l, c, applicationName, "cockroach/workload", version, "" /* arch */, destDir, ) return err @@ -177,8 +225,10 @@ func StageApplication( // URLsForApplication returns a slice of URLs that should be // downloaded for the given application. -func URLsForApplication(application string, version string, os string) ([]*url.URL, error) { - archInfo, err := archInfoForOS(os) +func URLsForApplication( + application string, version string, os string, arch string, +) ([]*url.URL, error) { + archInfo, err := ArchInfoForOS(os, arch) if err != nil { return nil, err } @@ -205,6 +255,7 @@ func URLsForApplication(application string, version string, os string) ([]*url.U } return urls, nil case "workload": + // N.B. workload binary is only available for linux amd64: https://github.com/cockroachdb/cockroach/issues/103563 u, err := getEdgeURL("cockroach/workload", version, "" /* arch */, "" /* extension */) if err != nil { return nil, err @@ -225,7 +276,7 @@ func URLsForApplication(application string, version string, os string) ([]*url.U // application path to each specified by the cluster to the specified directory. // If no SHA is specified, the latest build of the binary is used instead. // Returns the SHA of the resolved binary. -func StageRemoteBinary( +func stageRemoteBinary( ctx context.Context, l *logger.Logger, c *SyncedCluster, diff --git a/pkg/roachprod/install/staging_test.go b/pkg/roachprod/install/staging_test.go index f66e25fc7fda..3455df5fff79 100644 --- a/pkg/roachprod/install/staging_test.go +++ b/pkg/roachprod/install/staging_test.go @@ -21,6 +21,7 @@ func TestURLsForApplication(t *testing.T) { application string version string os string + arch string } tests := []struct { name string @@ -41,6 +42,20 @@ func TestURLsForApplication(t *testing.T) { "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.linux-gnu-amd64.563ea3967c98c67d47ede30d895c82315e4b1a77.so", }, }, + { + name: "cockroach linux arm64 sha", + args: args{ + application: "cockroach", + version: "563ea3967c98c67d47ede30d895c82315e4b1a77", + os: "linux", + arch: "arm64", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/cockroach.linux-gnu-arm64.563ea3967c98c67d47ede30d895c82315e4b1a77", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos.linux-gnu-arm64.563ea3967c98c67d47ede30d895c82315e4b1a77.so", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.linux-gnu-arm64.563ea3967c98c67d47ede30d895c82315e4b1a77.so", + }, + }, { name: "cockroach darwin sha", args: args{ @@ -54,6 +69,20 @@ func TestURLsForApplication(t *testing.T) { "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.darwin-amd64.563ea3967c98c67d47ede30d895c82315e4b1a77.dylib", }, }, + { + name: "cockroach darwin arm64 sha", + args: args{ + application: "cockroach", + version: "563ea3967c98c67d47ede30d895c82315e4b1a77", + os: "darwin", + arch: "arm64", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/cockroach.darwin-arm64.unsigned.563ea3967c98c67d47ede30d895c82315e4b1a77", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos.darwin-arm64.unsigned.563ea3967c98c67d47ede30d895c82315e4b1a77.dylib", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.darwin-arm64.unsigned.563ea3967c98c67d47ede30d895c82315e4b1a77.dylib", + }, + }, { name: "cockroach windows sha", args: args{ @@ -91,6 +120,34 @@ func TestURLsForApplication(t *testing.T) { "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.linux-gnu-amd64.so.LATEST", }, }, + { + name: "cockroach linux FIPS latest", + args: args{ + application: "cockroach", + version: "", + os: "linux", + arch: "fips", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/cockroach.linux-gnu-amd64-fips.LATEST", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos.linux-gnu-amd64-fips.so.LATEST", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.linux-gnu-amd64-fips.so.LATEST", + }, + }, + { + name: "cockroach linux arm64 latest", + args: args{ + application: "cockroach", + version: "", + os: "linux", + arch: "arm64", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/cockroach.linux-gnu-arm64.LATEST", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos.linux-gnu-arm64.so.LATEST", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.linux-gnu-arm64.so.LATEST", + }, + }, { name: "cockroach darwin latest", args: args{ @@ -104,6 +161,20 @@ func TestURLsForApplication(t *testing.T) { "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.darwin-amd64.dylib.LATEST", }, }, + { + name: "cockroach darwin arm64 latest", + args: args{ + application: "cockroach", + version: "", + os: "darwin", + arch: "arm64", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/cockroach.darwin-arm64.unsigned.LATEST", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos.darwin-arm64.unsigned.dylib.LATEST", + "https://storage.googleapis.com/cockroach-edge-artifacts-prod/cockroach/lib/libgeos_c.darwin-arm64.unsigned.dylib.LATEST", + }, + }, { name: "cockroach windows latest", args: args{ @@ -128,6 +199,30 @@ func TestURLsForApplication(t *testing.T) { "https://storage.googleapis.com/cockroach-release-artifacts-prod/cockroach-v22.1.11.linux-amd64.tgz", }, }, + { + name: "release linux FIPS", + args: args{ + application: "release", + version: "v22.1.11", + os: "linux", + arch: "fips", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-release-artifacts-prod/cockroach-v22.1.11.linux-amd64-fips.tgz", + }, + }, + { + name: "release linux arm64", + args: args{ + application: "release", + version: "v22.1.11", + os: "linux", + arch: "arm64", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-release-artifacts-prod/cockroach-v22.1.11.linux-arm64.tgz", + }, + }, { name: "release darwin", args: args{ @@ -139,6 +234,18 @@ func TestURLsForApplication(t *testing.T) { "https://storage.googleapis.com/cockroach-release-artifacts-prod/cockroach-v22.1.11.darwin-10.9-amd64.tgz", }, }, + { + name: "release darwin arm64", + args: args{ + application: "release", + version: "v22.1.11", + os: "darwin", + arch: "arm64", + }, + want: []string{ + "https://storage.googleapis.com/cockroach-release-artifacts-prod/cockroach-v22.1.11.darwin-11.0-arm64.tgz", + }, + }, { name: "release windows", args: args{ @@ -150,6 +257,58 @@ func TestURLsForApplication(t *testing.T) { "https://storage.googleapis.com/cockroach-release-artifacts-prod/cockroach-v22.1.11.windows-6.2-amd64.zip", }, }, + { + name: "unsupported arch 'arm63'", + args: args{ + application: "release", + version: "v22.1.11", + os: "darwin", + arch: "arm63", + }, + wantErr: true, + want: nil, + }, + { + name: "unsupported arch 'x86'", + args: args{ + application: "release", + version: "v22.1.11", + os: "linux", + arch: "x86", + }, + wantErr: true, + want: nil, + }, + { + name: "windows unsupported on arm64", + args: args{ + application: "cockroach", + os: "windows", + arch: "arm64", + }, + wantErr: true, + want: nil, + }, + { + name: "windows unsupported with FIPS", + args: args{ + application: "cockroach", + os: "windows", + arch: "fips", + }, + wantErr: true, + want: nil, + }, + { + name: "darwin unsupported with FIPS", + args: args{ + application: "cockroach", + os: "darwin", + arch: "fips", + }, + wantErr: true, + want: nil, + }, { name: "something else", args: args{ @@ -163,7 +322,7 @@ func TestURLsForApplication(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := URLsForApplication(tt.args.application, tt.args.version, tt.args.os) + got, err := URLsForApplication(tt.args.application, tt.args.version, tt.args.os, tt.args.arch) if (err != nil) != tt.wantErr { t.Errorf("URLsForApplication() error = %v, wantErr %v", err, tt.wantErr) return diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index 872a4af94ccc..202dd03149bc 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -504,7 +504,7 @@ func Stage( ctx context.Context, l *logger.Logger, clusterName string, - stageOS, stageDir, applicationName, version string, + stageOS, stageArch, stageDir, applicationName, version string, ) error { if err := LoadClusters(); err != nil { return err @@ -515,10 +515,22 @@ func Stage( } os := "linux" + arch := "amd64" + + if c.IsLocal() { + os = runtime.GOOS + arch = runtime.GOARCH + } if stageOS != "" { os = stageOS - } else if c.IsLocal() { - os = runtime.GOOS + } + if stageArch != "" { + arch = stageArch + } + // N.B. it's technically possible to stage a binary for a different OS/arch; e.g., emulated amd64 on mac silicon. + // However, we don't perform any other validation, hence a warning message is appropriate. + if c.IsLocal() && (os != runtime.GOOS || arch != runtime.GOARCH) { + l.Printf("WARN: locally staging %s/%s binaries on %s/%s", os, arch, runtime.GOOS, runtime.GOARCH) } dir := "." @@ -526,7 +538,7 @@ func Stage( dir = stageDir } - return install.StageApplication(ctx, l, c, applicationName, version, os, dir) + return install.StageApplication(ctx, l, c, applicationName, version, os, arch, dir) } // Reset resets all VMs in a cluster. @@ -1390,12 +1402,18 @@ func Logs(l *logger.Logger, clusterName, dest, username string, logsOpts LogsOpt } // StageURL TODO -func StageURL(l *logger.Logger, applicationName, version, stageOS string) ([]*url.URL, error) { +func StageURL( + l *logger.Logger, applicationName, version, stageOS string, stageArch string, +) ([]*url.URL, error) { os := runtime.GOOS if stageOS != "" { os = stageOS } - urls, err := install.URLsForApplication(applicationName, version, os) + arch := runtime.GOARCH + if stageArch != "" { + arch = stageArch + } + urls, err := install.URLsForApplication(applicationName, version, os, arch) if err != nil { return nil, err } diff --git a/pkg/roachprod/vm/aws/aws.go b/pkg/roachprod/vm/aws/aws.go index ea7cf60bc636..e11e2f398317 100644 --- a/pkg/roachprod/vm/aws/aws.go +++ b/pkg/roachprod/vm/aws/aws.go @@ -950,7 +950,7 @@ func (p *Provider) runInstance( } else { machineType = providerOpts.MachineType } - + machineType = strings.ToLower(machineType) cpuOptions := providerOpts.CPUOptions // We avoid the need to make a second call to set the tags by jamming @@ -1019,10 +1019,16 @@ func (p *Provider) runInstance( } return *fl } - imageID := withFlagOverride(az.region.AMI_X86_64, &providerOpts.ImageAMI) - if opts.EnableFIPS { + useArmAMI := strings.Index(machineType, "6g.") == 1 || strings.Index(machineType, "7g.") == 1 + //TODO(srosenberg): remove this once we have a better way to detect ARM64 machines + if useArmAMI { + imageID = withFlagOverride(az.region.AMI_ARM64, &providerOpts.ImageAMI) + l.Printf("Using ARM64 AMI: %s for machine type: %s", imageID, machineType) + } + if !useArmAMI && opts.EnableFIPS { imageID = withFlagOverride(az.region.AMI_FIPS, &providerOpts.ImageAMI) + l.Printf("Using FIPS-enabled AMI: %s for machine type: %s", imageID, machineType) } args := []string{ "ec2", "run-instances",