From 89b3be749c01513ea7b9cb9adf44555f7c420439 Mon Sep 17 00:00:00 2001 From: Stan Rosenberg Date: Fri, 23 Jun 2023 16:52:06 -0400 Subject: [PATCH] roachprod: add `--gce-use-spot` Previously, `--gce-preemptible` was available. This change adds an option to create a GCE spot instance, whose lifetime can extend 24h; otherwise, it's essentially equivalent to a GCE preemptible. VM metadata and billing estimator are updated to handle both preemptible and spot instances. Epic: none Release note: None --- pkg/cmd/roachprod/main.go | 11 +++++++++-- pkg/roachprod/vm/gce/gcloud.go | 23 ++++++++++++++++++++++- pkg/roachprod/vm/vm.go | 14 ++++++++------ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/pkg/cmd/roachprod/main.go b/pkg/cmd/roachprod/main.go index 37f666c72ae1..6199ed843ff7 100644 --- a/pkg/cmd/roachprod/main.go +++ b/pkg/cmd/roachprod/main.go @@ -326,6 +326,13 @@ hosts file. } } timeRemaining := c.LifetimeRemaining().Round(time.Second) + formatTTL := func(ttl time.Duration) string { + if c.VMs[0].Preemptible { + return color.HiMagentaString(ttl.String()) + } else { + return color.HiBlueString(ttl.String()) + } + } cost := c.CostPerHour totalCostPerHour += cost alive := timeutil.Since(c.CreatedAt).Round(time.Minute) @@ -336,14 +343,14 @@ hosts file. color.HiGreenString(p.Sprintf("$%.2f", cost)), colorByCostBucket(costSinceCreation)(p.Sprintf("$%.2f", costSinceCreation)), color.HiWhiteString(alive.String()), - color.HiBlueString(timeRemaining.String()), + formatTTL(timeRemaining), colorByCostBucket(costRemaining)(p.Sprintf("$%.2f", costRemaining))) } else { fmt.Fprintf(tw, "\t%s\t%s\t%s\t%s\t%s\t", color.HiGreenString(""), color.HiGreenString(""), color.HiWhiteString(alive.String()), - color.HiBlueString(timeRemaining.String()), + formatTTL(timeRemaining), color.HiGreenString("")) } } else { diff --git a/pkg/roachprod/vm/gce/gcloud.go b/pkg/roachprod/vm/gce/gcloud.go index 5ea7074e15c6..61f59c3f2780 100644 --- a/pkg/roachprod/vm/gce/gcloud.go +++ b/pkg/roachprod/vm/gce/gcloud.go @@ -118,6 +118,13 @@ type jsonVM struct { NatIP string } } + Scheduling struct { + AutomaticRestart bool + Preemptible bool + OnHostMaintenance string + InstanceTerminationAction string + ProvisioningModel string + } MachineType string SelfLink string Zone string @@ -155,6 +162,10 @@ func (jsonVM *jsonVM) toVM( vpc = lastComponent(jsonVM.NetworkInterfaces[0].Network) } } + if jsonVM.Scheduling.OnHostMaintenance == "" { + // N.B. 'onHostMaintenance' is always non-empty, hence its absense implies a parsing error + vmErrors = append(vmErrors, vm.ErrBadScheduling) + } machineType := lastComponent(jsonVM.MachineType) zone := lastComponent(jsonVM.Zone) @@ -216,6 +227,7 @@ func (jsonVM *jsonVM) toVM( Errors: vmErrors, DNS: fmt.Sprintf("%s.%s.%s", jsonVM.Name, zone, project), Lifetime: lifetime, + Preemptible: jsonVM.Scheduling.Preemptible, Labels: jsonVM.Labels, PrivateIP: privateIP, Provider: ProviderName, @@ -253,6 +265,7 @@ func DefaultProviderOpts() *ProviderOpts { TerminateOnMigration: false, useSharedUser: true, preemptible: false, + useSpot: false, } } @@ -283,6 +296,8 @@ type ProviderOpts struct { useSharedUser bool // use preemptible instances preemptible bool + // use spot instances (i.e., latest version of preemptibles which can run > 24 hours) + useSpot bool } // Provider is the GCE implementation of the vm.Provider interface. @@ -809,7 +824,10 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) { "will be repeated N times. If > 1 zone specified, nodes will be geo-distributed\n"+ "regardless of geo (default [%s])", strings.Join(defaultZones, ","))) - flags.BoolVar(&o.preemptible, ProviderName+"-preemptible", false, "use preemptible GCE instances") + flags.BoolVar(&o.preemptible, ProviderName+"-preemptible", false, + "use preemptible GCE instances (lifetime cannot exceed 24h)") + flags.BoolVar(&o.useSpot, ProviderName+"-use-spot", false, + "use spot GCE instances (like preemptible but lifetime can exceed 24h)") flags.BoolVar(&o.TerminateOnMigration, ProviderName+"-terminateOnMigration", false, "use 'TERMINATE' maintenance policy (for GCE live migrations)") } @@ -956,6 +974,8 @@ func (p *Provider) Create( // Preemptible instances require the following arguments set explicitly args = append(args, "--maintenance-policy", "TERMINATE") args = append(args, "--no-restart-on-failure") + } else if providerOpts.useSpot { + args = append(args, "--provisioning-model", "SPOT") } else { if providerOpts.TerminateOnMigration { args = append(args, "--maintenance-policy", "TERMINATE") @@ -1472,6 +1492,7 @@ func populateCostPerHour(l *logger.Logger, vms vm.List) error { }, }, }, + Preemptible: vm.Preemptible, MachineType: &cloudbilling.MachineType{ PredefinedMachineType: &cloudbilling.PredefinedMachineType{ MachineType: machineType, diff --git a/pkg/roachprod/vm/vm.go b/pkg/roachprod/vm/vm.go index d9637244acd0..f42aa3a5737b 100644 --- a/pkg/roachprod/vm/vm.go +++ b/pkg/roachprod/vm/vm.go @@ -73,9 +73,10 @@ type VM struct { CreatedAt time.Time `json:"created_at"` // If non-empty, indicates that some or all of the data in the VM instance // is not present or otherwise invalid. - Errors []error `json:"errors"` - Lifetime time.Duration `json:"lifetime"` - Labels map[string]string `json:"labels"` + Errors []error `json:"errors"` + Lifetime time.Duration `json:"lifetime"` + Preemptible bool `json:"preemptible"` + Labels map[string]string `json:"labels"` // The provider-internal DNS name for the VM instance DNS string `json:"dns"` // The name of the cloud provider that hosts the VM instance @@ -129,9 +130,10 @@ func Name(cluster string, idx int) string { // Error values for VM.Error var ( - ErrBadNetwork = errors.New("could not determine network information") - ErrInvalidName = errors.New("invalid VM name") - ErrNoExpiration = errors.New("could not determine expiration") + ErrBadNetwork = errors.New("could not determine network information") + ErrBadScheduling = errors.New("could not determine scheduling information") + ErrInvalidName = errors.New("invalid VM name") + ErrNoExpiration = errors.New("could not determine expiration") ) var regionRE = regexp.MustCompile(`(.*[^-])-?[a-z]$`)