Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
132129: roachtest: add slow disk perturbation test r=kvoli a=andrewbaptist

This change adds a new set of perturbation tests perturbation/*/slowDisk which tests slow disks. We have see support cases where slow disks can cause cluster level availability outages.

Epic: none

Release note: None

132166: rac2,kvserver: do not quiesce if send tokens held r=sumeerbhola a=pav-kv

This PR prevents range quiescence if RACv2 holds any send tokens for this range. Quiescence would prevent `MsgApp` pings which ensure that the leader reliably learns about the follower store admitting log entries, and causes it to release tokens accordingly. We do not want to end up holding tokens permanently.

Resolves #129581

132202: sql/schemachanger: clean up SequenceOwner elements during restore r=fqazi a=fqazi

Previously, when restoring a backup taken in middle of a DROP COLUMN, where a column had a sequence owner assigned, it was possible for the backup to be unrestorable. This would happen because the sequence reference would have been dropped in the plan, but the seqeunce owner element was still within the state. To address this, this test updates the rewrite logic to clean up any SequenceOwner elements which have the referenced sequence already removed.

Fixes: #130778

Release note (bug fix): Addressed a rare bug that could prevent backups taken during a DROP COLUMN operation with a sequence owner from restoring with the error: "rewriting descriptor ids: missing rewrite for <id> in SequenceOwner..."

Co-authored-by: Andrew Baptist <[email protected]>
Co-authored-by: Pavel Kalinnikov <[email protected]>
Co-authored-by: Faizan Qazi <[email protected]>
  • Loading branch information
4 people committed Oct 10, 2024
4 parents 69b1a27 + a1f9759 + 2af87f8 + e08e89b commit a93f2b2
Show file tree
Hide file tree
Showing 27 changed files with 1,958 additions and 342 deletions.
28 changes: 28 additions & 0 deletions pkg/ccl/schemachangerccl/backup_base_generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 29 additions & 4 deletions pkg/cmd/roachtest/roachtestutil/disk_stall.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,24 @@ type DiskStaller interface {
Setup(ctx context.Context)
Cleanup(ctx context.Context)
Stall(ctx context.Context, nodes option.NodeListOption)
Slow(ctx context.Context, nodes option.NodeListOption, bytesPerSecond int)
Unstall(ctx context.Context, nodes option.NodeListOption)
DataDir() string
LogDir() string
}

type NoopDiskStaller struct{}

var _ DiskStaller = NoopDiskStaller{}

func (n NoopDiskStaller) Cleanup(ctx context.Context) {}
func (n NoopDiskStaller) DataDir() string { return "{store-dir}" }
func (n NoopDiskStaller) LogDir() string { return "logs" }
func (n NoopDiskStaller) Setup(ctx context.Context) {}
func (n NoopDiskStaller) Slow(_ context.Context, _ option.NodeListOption, _ int) {}
func (n NoopDiskStaller) Stall(_ context.Context, _ option.NodeListOption) {}
func (n NoopDiskStaller) Unstall(_ context.Context, _ option.NodeListOption) {}

type Fataler interface {
Fatal(args ...interface{})
Fatalf(format string, args ...interface{})
Expand Down Expand Up @@ -68,15 +81,20 @@ func (s *cgroupDiskStaller) Setup(ctx context.Context) {
func (s *cgroupDiskStaller) Cleanup(ctx context.Context) {}

func (s *cgroupDiskStaller) Stall(ctx context.Context, nodes option.NodeListOption) {
// NB: I don't understand why, but attempting to set a bytesPerSecond={0,1}
// results in Invalid argument from the io.max cgroupv2 API.
s.Slow(ctx, nodes, 4)
}

func (s *cgroupDiskStaller) Slow(
ctx context.Context, nodes option.NodeListOption, bytesPerSecond int,
) {
// Shuffle the order of read and write stall initiation.
rand.Shuffle(len(s.readOrWrite), func(i, j int) {
s.readOrWrite[i], s.readOrWrite[j] = s.readOrWrite[j], s.readOrWrite[i]
})
for _, rw := range s.readOrWrite {
// NB: I don't understand why, but attempting to set a
// bytesPerSecond={0,1} results in Invalid argument from the io.max
// cgroupv2 API.
if err := s.setThroughput(ctx, nodes, rw, throughput{limited: true, bytesPerSecond: 4}); err != nil {
if err := s.setThroughput(ctx, nodes, rw, throughput{limited: true, bytesPerSecond: bytesPerSecond}); err != nil {
s.f.Fatal(err)
}
}
Expand Down Expand Up @@ -225,6 +243,13 @@ func (s *dmsetupDiskStaller) Stall(ctx context.Context, nodes option.NodeListOpt
s.c.Run(ctx, option.WithNodes(nodes), `sudo dmsetup suspend --noflush --nolockfs data1`)
}

func (s *dmsetupDiskStaller) Slow(
ctx context.Context, nodes option.NodeListOption, bytesPerSecond int,
) {
// TODO(baptist): Consider https://github.com/kawamuray/ddi.
s.f.Fatal("Slow is not supported for dmsetupDiskStaller")
}

func (s *dmsetupDiskStaller) Unstall(ctx context.Context, nodes option.NodeListOption) {
s.c.Run(ctx, option.WithNodes(nodes), `sudo dmsetup resume data1`)
}
Expand Down
Loading

0 comments on commit a93f2b2

Please sign in to comment.