Skip to content

Commit

Permalink
Merge #105470
Browse files Browse the repository at this point in the history
105470: roachprod: add `--gce-use-spot` r=renatolabs a=srosenberg

Previously, `--gce-preemptible` was available.
This change adds an option to create a GCE spot
instance, whose lifetime can extend 24h; otherwise, it's essentially equivalent to a GCE preemptible.
VM metadata and billing estimator are updated to
handle both preemptible and spot instances.

Epic: none

Release note: None

Co-authored-by: Stan Rosenberg <[email protected]>
  • Loading branch information
craig[bot] and srosenberg committed Jun 29, 2023
2 parents a8a5e7b + 89b3be7 commit 0736712
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 9 deletions.
11 changes: 9 additions & 2 deletions pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,13 @@ hosts file.
}
}
timeRemaining := c.LifetimeRemaining().Round(time.Second)
formatTTL := func(ttl time.Duration) string {
if c.VMs[0].Preemptible {
return color.HiMagentaString(ttl.String())
} else {
return color.HiBlueString(ttl.String())
}
}
cost := c.CostPerHour
totalCostPerHour += cost
alive := timeutil.Since(c.CreatedAt).Round(time.Minute)
Expand All @@ -336,14 +343,14 @@ hosts file.
color.HiGreenString(p.Sprintf("$%.2f", cost)),
colorByCostBucket(costSinceCreation)(p.Sprintf("$%.2f", costSinceCreation)),
color.HiWhiteString(alive.String()),
color.HiBlueString(timeRemaining.String()),
formatTTL(timeRemaining),
colorByCostBucket(costRemaining)(p.Sprintf("$%.2f", costRemaining)))
} else {
fmt.Fprintf(tw, "\t%s\t%s\t%s\t%s\t%s\t",
color.HiGreenString(""),
color.HiGreenString(""),
color.HiWhiteString(alive.String()),
color.HiBlueString(timeRemaining.String()),
formatTTL(timeRemaining),
color.HiGreenString(""))
}
} else {
Expand Down
23 changes: 22 additions & 1 deletion pkg/roachprod/vm/gce/gcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ type jsonVM struct {
NatIP string
}
}
Scheduling struct {
AutomaticRestart bool
Preemptible bool
OnHostMaintenance string
InstanceTerminationAction string
ProvisioningModel string
}
MachineType string
SelfLink string
Zone string
Expand Down Expand Up @@ -155,6 +162,10 @@ func (jsonVM *jsonVM) toVM(
vpc = lastComponent(jsonVM.NetworkInterfaces[0].Network)
}
}
if jsonVM.Scheduling.OnHostMaintenance == "" {
// N.B. 'onHostMaintenance' is always non-empty, hence its absense implies a parsing error
vmErrors = append(vmErrors, vm.ErrBadScheduling)
}

machineType := lastComponent(jsonVM.MachineType)
zone := lastComponent(jsonVM.Zone)
Expand Down Expand Up @@ -216,6 +227,7 @@ func (jsonVM *jsonVM) toVM(
Errors: vmErrors,
DNS: fmt.Sprintf("%s.%s.%s", jsonVM.Name, zone, project),
Lifetime: lifetime,
Preemptible: jsonVM.Scheduling.Preemptible,
Labels: jsonVM.Labels,
PrivateIP: privateIP,
Provider: ProviderName,
Expand Down Expand Up @@ -253,6 +265,7 @@ func DefaultProviderOpts() *ProviderOpts {
TerminateOnMigration: false,
useSharedUser: true,
preemptible: false,
useSpot: false,
}
}

Expand Down Expand Up @@ -283,6 +296,8 @@ type ProviderOpts struct {
useSharedUser bool
// use preemptible instances
preemptible bool
// use spot instances (i.e., latest version of preemptibles which can run > 24 hours)
useSpot bool
}

// Provider is the GCE implementation of the vm.Provider interface.
Expand Down Expand Up @@ -809,7 +824,10 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) {
"will be repeated N times. If > 1 zone specified, nodes will be geo-distributed\n"+
"regardless of geo (default [%s])",
strings.Join(defaultZones, ",")))
flags.BoolVar(&o.preemptible, ProviderName+"-preemptible", false, "use preemptible GCE instances")
flags.BoolVar(&o.preemptible, ProviderName+"-preemptible", false,
"use preemptible GCE instances (lifetime cannot exceed 24h)")
flags.BoolVar(&o.useSpot, ProviderName+"-use-spot", false,
"use spot GCE instances (like preemptible but lifetime can exceed 24h)")
flags.BoolVar(&o.TerminateOnMigration, ProviderName+"-terminateOnMigration", false,
"use 'TERMINATE' maintenance policy (for GCE live migrations)")
}
Expand Down Expand Up @@ -956,6 +974,8 @@ func (p *Provider) Create(
// Preemptible instances require the following arguments set explicitly
args = append(args, "--maintenance-policy", "TERMINATE")
args = append(args, "--no-restart-on-failure")
} else if providerOpts.useSpot {
args = append(args, "--provisioning-model", "SPOT")
} else {
if providerOpts.TerminateOnMigration {
args = append(args, "--maintenance-policy", "TERMINATE")
Expand Down Expand Up @@ -1472,6 +1492,7 @@ func populateCostPerHour(l *logger.Logger, vms vm.List) error {
},
},
},
Preemptible: vm.Preemptible,
MachineType: &cloudbilling.MachineType{
PredefinedMachineType: &cloudbilling.PredefinedMachineType{
MachineType: machineType,
Expand Down
14 changes: 8 additions & 6 deletions pkg/roachprod/vm/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ type VM struct {
CreatedAt time.Time `json:"created_at"`
// If non-empty, indicates that some or all of the data in the VM instance
// is not present or otherwise invalid.
Errors []error `json:"errors"`
Lifetime time.Duration `json:"lifetime"`
Labels map[string]string `json:"labels"`
Errors []error `json:"errors"`
Lifetime time.Duration `json:"lifetime"`
Preemptible bool `json:"preemptible"`
Labels map[string]string `json:"labels"`
// The provider-internal DNS name for the VM instance
DNS string `json:"dns"`
// The name of the cloud provider that hosts the VM instance
Expand Down Expand Up @@ -129,9 +130,10 @@ func Name(cluster string, idx int) string {

// Error values for VM.Error
var (
ErrBadNetwork = errors.New("could not determine network information")
ErrInvalidName = errors.New("invalid VM name")
ErrNoExpiration = errors.New("could not determine expiration")
ErrBadNetwork = errors.New("could not determine network information")
ErrBadScheduling = errors.New("could not determine scheduling information")
ErrInvalidName = errors.New("invalid VM name")
ErrNoExpiration = errors.New("could not determine expiration")
)

var regionRE = regexp.MustCompile(`(.*[^-])-?[a-z]$`)
Expand Down

0 comments on commit 0736712

Please sign in to comment.