Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
126323: sql: declare sessiondatapb.QoSLevel variables in one place r=xinhaoz a=xinhaoz

When we set the QualityOfService in the session data for an internal executor, we must provide a pointer to a
sessiondatapb.QoSLevel. Since the QoS levels are defined as constants, we can't take their address. This commit does a little cleanup by defining the variables versions of these consts in a singular place.

Epic: none
Release note: None

126425: roachtest: default to us-central1 when using ARM64 r=HerkoLategan,srosenberg a=renatolabs

Previously, the GCE implementation would choose a random zone in the `us-east1` region. However, T2A VMs are not available in those zones. As a result, the test runner would always fall back to n2 instances in those cases. In other words, unless the test went out of its way to specify a custom GCE zone that happened to support T2A instances, we would never test ARM64 builds on GCE.

In this commit, we change the default set of zones to take the desired architecture into account.

Epic: none

Release note: None

Co-authored-by: Xin Hao Zhang <[email protected]>
Co-authored-by: Renato Costa <[email protected]>
  • Loading branch information
3 people committed Jul 2, 2024
3 parents c6dc40a + 1683a05 + b369663 commit 417aaff
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 17 deletions.
8 changes: 5 additions & 3 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -3070,9 +3070,11 @@ func archForTest(ctx context.Context, l *logger.Logger, testSpec registry.TestSp
arch = vm.ArchAMD64
}
if roachtestflags.Cloud == spec.GCE && arch == vm.ArchARM64 {
// N.B. T2A support is rather limited, both in terms of supported regions and no local SSDs. Thus, we must
// fall back to AMD64 in those cases. See #122035.
if !gce.IsSupportedT2AZone(strings.Split(testSpec.Cluster.GCE.Zones, ",")) {
// N.B. T2A support is rather limited, both in terms of supported
// regions and no local SSDs. Thus, we must fall back to AMD64 in
// those cases. See #122035.
if testSpec.Cluster.GCE.Zones != "" &&
!gce.IsSupportedT2AZone(strings.Split(testSpec.Cluster.GCE.Zones, ",")) {
l.PrintfCtx(ctx, "%q specified one or more GCE regions unsupported by T2A, falling back to AMD64; see #122035", testSpec.Name)
return vm.ArchAMD64
}
Expand Down
19 changes: 12 additions & 7 deletions pkg/roachprod/vm/gce/gcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -959,11 +959,16 @@ type ProjectsVal struct {
// https://cloud.google.com/compute/docs/regions-zones#available
//
// Note that the default zone (the first zone returned by this
// function) is always in the us-east1 region, but we randomize the
// specific zone. This is to avoid "zone exhausted" errors in one
// particular zone, especially during nightly roachtest runs.
func defaultZones() []string {
// function) is always in the us-east1 region (or us-central1 for
// ARM64 builds), but we randomize the specific zone. This is to avoid
// "zone exhausted" errors in one particular zone, especially during
// nightly roachtest runs.
func defaultZones(arch string) []string {
zones := []string{"us-east1-b", "us-east1-c", "us-east1-d"}
if vm.ParseArch(arch) == vm.ArchARM64 {
// T2A instances are only available in us-central1 in NA.
zones = []string{"us-central1-a", "us-central1-b", "us-central1-f"}
}
rand.Shuffle(len(zones), func(i, j int) { zones[i], zones[j] = zones[j], zones[i] })

return []string{
Expand Down Expand Up @@ -1058,7 +1063,7 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) {
fmt.Sprintf("Zones for cluster. If zones are formatted as AZ:N where N is an integer, the zone\n"+
"will be repeated N times. If > 1 zone specified, nodes will be geo-distributed\n"+
"regardless of geo (default [%s])",
strings.Join(defaultZones(), ",")))
strings.Join(defaultZones(string(vm.ArchAMD64)), ",")))
flags.BoolVar(&o.preemptible, ProviderName+"-preemptible", false,
"use preemptible GCE instances (lifetime cannot exceed 24h)")
flags.BoolVar(&o.UseSpot, ProviderName+"-use-spot", false,
Expand Down Expand Up @@ -1263,9 +1268,9 @@ func computeZones(opts vm.CreateOpts, providerOpts *ProviderOpts) ([]string, err
}
if len(zones) == 0 {
if opts.GeoDistributed {
zones = defaultZones()
zones = defaultZones(opts.Arch)
} else {
zones = []string{defaultZones()[0]}
zones = []string{defaultZones(opts.Arch)[0]}
}
}
if providerOpts.useArmAMI() {
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/sessiondatapb/local_only_session_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,9 @@ const (
// use pointers to constants, we define these variables to use in
// those cases.
var (
UserLowQoS = UserLow
SystemLowQoS = SystemLow
TTLLowQoS = TTLLow
UserLowQoS = UserLow
)

var qosLevelsDict = map[QoSLevel]string{
Expand Down
3 changes: 1 addition & 2 deletions pkg/sql/ttl/ttljob/ttljob_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,12 +235,11 @@ func (m *rowLevelTTLMetrics) fetchStatistics(
// User a super low quality of service (lower than TTL low), as we don't
// really care if statistics gets left behind and prefer the TTL job to
// have priority.
qosLevel := sessiondatapb.SystemLow
datums, err := execCfg.InternalDB.Executor().QueryRowEx(
ctx,
c.opName,
nil,
getInternalExecutorOverride(qosLevel),
getInternalExecutorOverride(sessiondatapb.SystemLowQoS),
c.query,
c.args...,
)
Expand Down
6 changes: 2 additions & 4 deletions pkg/sql/ttl/ttljob/ttljob_query_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,6 @@ func (b *SelectQueryBuilder) buildQuery() string {
)
}

var qosLevel = sessiondatapb.TTLLow

func getInternalExecutorOverride(
qosLevel sessiondatapb.QoSLevel,
) sessiondata.InternalExecutorOverride {
Expand Down Expand Up @@ -158,7 +156,7 @@ func (b *SelectQueryBuilder) Run(
ctx,
b.selectOpName,
nil, /* txn */
getInternalExecutorOverride(qosLevel),
getInternalExecutorOverride(sessiondatapb.TTLLowQoS),
query,
b.cachedArgs...,
)
Expand Down Expand Up @@ -261,7 +259,7 @@ func (b *DeleteQueryBuilder) Run(
ctx,
b.deleteOpName,
txn.KV(),
getInternalExecutorOverride(qosLevel),
getInternalExecutorOverride(sessiondatapb.TTLLowQoS),
query,
deleteArgs...,
)
Expand Down

0 comments on commit 417aaff

Please sign in to comment.