Skip to content

Commit

Permalink
Merge #64131 #64707
Browse files Browse the repository at this point in the history
64131: storage: report all encountered intents in sst export error r=sumeerbhola,nvanbenschoten a=aliher1911

Previously, pebbleExportToSst was stopping upon encountering first
intent.

This was causing backups to be very slow if lots of intents build up.
To be able to proceed with export, intent needs to be resolved and
export retried. The result of this behaviour is that export would run
as many times as there were intents in the table before succeeding.

To address this, all intents from the range are collected and reported
in WriteIntentError. They could be resolved efficiently as batch
similar to how GC operates.

Fixes #59704

Release note (bug fix): Backup no longer resolves intents one by one.
This eliminates running a high pri query to cleanup intents to unblock
backup in case of intent buildup.

64707: sql: Add telemetry for zone configuration override r=otan a=ajstorm

Add telemetry for cases where the zone configuration of a multi-region
database or table is overridden by the user.

Release note: None

Closes #64268.

Co-authored-by: Oleg Afanasyev <[email protected]>
Co-authored-by: Adam Storm <[email protected]>
  • Loading branch information
3 people committed May 6, 2021
3 parents 2ad9dab + b05f1b3 + 9f6f1e4 commit 28c24ed
Show file tree
Hide file tree
Showing 10 changed files with 563 additions and 118 deletions.
1 change: 1 addition & 0 deletions pkg/ccl/backupccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ go_test(
srcs = [
"backup_cloud_test.go",
"backup_destination_test.go",
"backup_intents_test.go",
"backup_test.go",
"bench_test.go",
"create_scheduled_backup_test.go",
Expand Down
85 changes: 85 additions & 0 deletions pkg/ccl/backupccl/backup_intents_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2021 The Cockroach Authors.
//
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
// License (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt

package backupccl_test

import (
"context"
"fmt"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/ccl/utilccl"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/stretchr/testify/require"
)

func TestCleanupIntentsDuringBackupPerformanceRegression(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
defer utilccl.TestingEnableEnterprise()()

skip.UnderRace(t, "measures backup times not to regress, can't work under race")

// Time to create backup in presence of intents differs roughly 10x so some
// arbitrary number is picked which is 2x higher than current backup time on
// current (laptop) hardware.
const backupTimeout = time.Second * 10

const totalRowCount = 10000
const perTransactionRowCount = 10

// Interceptor catches requests that cleanup transactions of size 1000 which are
// test data transactions. All other transaction commits pass though.
interceptor := func(ctx context.Context, req roachpb.BatchRequest) *roachpb.Error {
endTxn := req.Requests[0].GetEndTxn()
if endTxn != nil && !endTxn.Commit && len(endTxn.LockSpans) == perTransactionRowCount {
// If this is a rollback of one the test's SQL transactions, allow the
// EndTxn to proceed and mark the transaction record as ABORTED, but strip
// the request of its lock spans so that no intents are recorded into the
// transaction record or eagerly resolved. This is a bit of a hack, but it
// mimics the behavior of an abandoned transaction which is aborted by a
// pusher after expiring due to an absence of heartbeats.
endTxn.LockSpans = nil
}
return nil
}
serverKnobs := kvserver.StoreTestingKnobs{TestingRequestFilter: interceptor}

s, sqlDb, _ := serverutils.StartServer(t,
base.TestServerArgs{Knobs: base.TestingKnobs{Store: &serverKnobs}})
defer s.Stopper().Stop(context.Background())

_, err := sqlDb.Exec("create table foo(v int not null)")
require.NoError(t, err)

for i := 0; i < totalRowCount; i += perTransactionRowCount {
tx, err := sqlDb.Begin()
require.NoError(t, err)
for j := 0; j < perTransactionRowCount; j += 1 {
statement := fmt.Sprintf("insert into foo (v) values (%d)", i+j)
_, err = tx.Exec(statement)
require.NoError(t, err)
}
require.NoError(t, tx.Rollback())
}

start := timeutil.Now()
_, err = sqlDb.Exec("backup table foo to 'userfile:///test.foo'")
stop := timeutil.Now()
require.NoError(t, err, "Failed to run backup")
t.Logf("Backup took %s", stop.Sub(start))
require.WithinDuration(t, stop, start, backupTimeout, "Time to make backup")
}
69 changes: 69 additions & 0 deletions pkg/ccl/telemetryccl/testdata/telemetry/multiregion
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,72 @@ SELECT * FROM t8 WHERE a = 1
----
sql.plan.opt.locality-optimized-search

exec
USE survive_region;
CREATE TABLE t9 (a INT PRIMARY KEY) LOCALITY REGIONAL BY ROW
----

feature-allowlist
sql.multiregion.zone_configuration.override.*
----

feature-counters
SET override_multi_region_zone_config = true;
ALTER TABLE t9 CONFIGURE ZONE USING num_replicas=10;
SET override_multi_region_zone_config = false
----
sql.multiregion.zone_configuration.override.user 1

# Note that this case illustrates that once the session variable is set, we'll
# count all instances where a zone configuration is modified, even if that
# modification didn't strictly require overriding.
feature-counters
SET override_multi_region_zone_config = true;
ALTER TABLE t9 CONFIGURE ZONE USING gc.ttlseconds=10;
SET override_multi_region_zone_config = false
----
sql.multiregion.zone_configuration.override.user 1

feature-counters
ALTER TABLE t9 CONFIGURE ZONE USING gc.ttlseconds=5
----

feature-counters
SET override_multi_region_zone_config = true;
ALTER TABLE t9 SET LOCALITY GLOBAL;
SET override_multi_region_zone_config = false
----
sql.multiregion.zone_configuration.override.system.table 1

# Ensure that no counters are set in the case where we're not overriding
feature-counters
ALTER TABLE t9 SET LOCALITY REGIONAL BY TABLE IN PRIMARY REGION;
----

# Note that this case illustrates that once the session variable is set, we'll
# count all instances where a table's zone configuration is modified, even if
# that modification didn't strictly require overriding.
feature-counters
SET override_multi_region_zone_config = true;
ALTER TABLE t9 SET LOCALITY GLOBAL;
SET override_multi_region_zone_config = false
----
sql.multiregion.zone_configuration.override.system.table 1

feature-counters
SET override_multi_region_zone_config = true;
ALTER DATABASE d CONFIGURE ZONE USING num_replicas=10;
SET override_multi_region_zone_config = false
----
sql.multiregion.zone_configuration.override.user 1

feature-counters
ALTER DATABASE d CONFIGURE ZONE USING gc.ttlseconds=5;
----

feature-counters
SET override_multi_region_zone_config = true;
ALTER DATABASE d ADD REGION "us-east-1";
SET override_multi_region_zone_config = false
----
sql.multiregion.zone_configuration.override.system.database 1
24 changes: 19 additions & 5 deletions pkg/sql/region_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/config/zonepb"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/dbdesc"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
"github.com/cockroachdb/errors"
"github.com/gogo/protobuf/proto"
)
Expand Down Expand Up @@ -1088,10 +1090,19 @@ func blockDiscardOfZoneConfigForMultiRegionObject(
// The change is permitted iff it is not modifying a protected multi-region
// field of the zone configs (as defined by zonepb.MultiRegionZoneConfigFields).
func (p *planner) CheckZoneConfigChangePermittedForMultiRegion(
ctx context.Context, zs tree.ZoneSpecifier, options tree.KVOptions, force bool,
ctx context.Context, zs tree.ZoneSpecifier, options tree.KVOptions,
) error {
// If the user has specified the FORCE option, the world is their oyster.
if force {
// If the user has specified that they're overriding, then the world is
// their oyster.
if p.SessionData().OverrideMultiRegionZoneConfigEnabled {
// Note that we increment the telemetry counter unconditionally here.
// It's possible that this will lead to over-counting as the user may
// have left the override on and is now updating a zone configuration
// that is not protected by the multi-region abstractions. To get finer
// grained counting however, would be more difficult to code, and may
// not even prove to be that valuable, so we have decided to live with
// the potential for over-counting.
telemetry.Inc(sqltelemetry.OverrideMultiRegionZoneConfigurationUser)
return nil
}

Expand Down Expand Up @@ -1130,8 +1141,9 @@ func (p *planner) CheckZoneConfigChangePermittedForMultiRegion(

hint := "to override this error, SET override_multi_region_zone_config = true and reissue the command"

// The request is to discard the zone configuration. Error in all discard
// cases.
// The request is to discard the zone configuration. Error in cases where
// the zone configuration being discarded was created by the multi-region
// abstractions.
if options == nil {
needToError := false
// Determine if this zone config that we're trying to discard is
Expand Down Expand Up @@ -1411,6 +1423,7 @@ func (p *planner) validateZoneConfigForMultiRegionDatabaseWasNotModifiedByUser(
) error {
// If the user is overriding, our work here is done.
if p.SessionData().OverrideMultiRegionZoneConfigEnabled {
telemetry.Inc(sqltelemetry.OverrideMultiRegionDatabaseZoneConfigurationSystem)
return nil
}
currentZoneConfig, err := getZoneConfigRaw(ctx, p.txn, p.ExecCfg().Codec, dbDesc.GetID())
Expand Down Expand Up @@ -1484,6 +1497,7 @@ func (p *planner) validateZoneConfigForMultiRegionTableWasNotModifiedByUser(
// If the user is overriding, or this is not a multi-region table our work here
// is done.
if p.SessionData().OverrideMultiRegionZoneConfigEnabled || desc.GetLocalityConfig() == nil {
telemetry.Inc(sqltelemetry.OverrideMultiRegionTableZoneConfigurationSystem)
return nil
}
currentZoneConfig, err := getZoneConfigRaw(ctx, p.txn, p.ExecCfg().Codec, desc.GetID())
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/set_zone_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ func (p *planner) SetZoneConfig(ctx context.Context, n *tree.SetZoneConfig) (pla
ctx,
n.ZoneSpecifier,
n.Options,
p.SessionData().OverrideMultiRegionZoneConfigEnabled,
); err != nil {
return nil, err
}
Expand Down
20 changes: 20 additions & 0 deletions pkg/sql/sqltelemetry/multiregion.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,26 @@ var (
ImportIntoMultiRegionDatabaseCounter = telemetry.GetCounterOnce(
"sql.multiregion.import",
)

// OverrideMultiRegionZoneConfigurationUser is to be incremented when a
// multi-region zone configuration is overridden by the user.
OverrideMultiRegionZoneConfigurationUser = telemetry.GetCounterOnce(
"sql.multiregion.zone_configuration.override.user",
)

// OverrideMultiRegionDatabaseZoneConfigurationSystem is to be incremented
// when a multi-region database zone configuration is overridden by the
// system.
OverrideMultiRegionDatabaseZoneConfigurationSystem = telemetry.GetCounterOnce(
"sql.multiregion.zone_configuration.override.system.database",
)

// OverrideMultiRegionTableZoneConfigurationSystem is to be incremented when
// a multi-region table/index/partition zone configuration is overridden by
// the system.
OverrideMultiRegionTableZoneConfigurationSystem = telemetry.GetCounterOnce(
"sql.multiregion.zone_configuration.override.system.table",
)
)

// CreateDatabaseSurvivalGoalCounter is to be incremented when the survival goal
Expand Down
Loading

0 comments on commit 28c24ed

Please sign in to comment.