Skip to content

Commit

Permalink
roachprod: add schedule-backups flag to roachprod start
Browse files Browse the repository at this point in the history
This patch adds a flag to roachprod start which creates a cluster backup
schedule to run incrementals every 15 minutes and fulls every hour by default,
starting when roachprod start completes. The user can also specify their own
scheduled backup recurrence via the 'scheduled-backup-args' flag.

For local roachprod clusters, the backups are stored in nodelocal, while others
store their backups in google cloud storage, at
`gs://cockroachdb-backup-testing`.

This flag currently defaults to false, but in the future, this setting should
become default true, to better emulate customer clusters. Performance senstive
roachtests may continue to keep this flag set to false.

Informs #86045

Release note: None

addres renato comments
  • Loading branch information
msbutler committed Oct 13, 2022
1 parent 2a1abc8 commit 2cb04c0
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 8 deletions.
7 changes: 7 additions & 0 deletions pkg/cmd/roachprod/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,13 @@ func initFlags() {
"skip-init", startOpts.SkipInit, "skip initializing the cluster")
startCmd.Flags().IntVar(&startOpts.StoreCount,
"store-count", startOpts.StoreCount, "number of stores to start each node with")
startCmd.Flags().BoolVar(&startOpts.ScheduleBackups,
"schedule-backups", startOpts.ScheduleBackups,
"create a cluster backup schedule once the cluster has started (by default, "+
"full backup hourly and incremental every 15 minutes)")
startCmd.Flags().StringVar(&startOpts.ScheduleBackupArgs, "schedule-backup-args", "",
`Recurrence and scheduled backup options specification.
Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS first_run = 'now'"`)

startTenantCmd.Flags().StringVarP(&hostCluster,
"host-cluster", "H", "", "host cluster")
Expand Down
49 changes: 47 additions & 2 deletions pkg/roachprod/install/cockroach.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachprod/config"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/roachprod/ssh"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -87,6 +88,10 @@ type StartOpts struct {
Sequential bool
ExtraArgs []string

// ScheduleBackups starts a backup schedule once the cluster starts
ScheduleBackups bool
ScheduleBackupArgs string

// systemd limits on resources.
NumFilesLimit int64

Expand Down Expand Up @@ -152,7 +157,7 @@ func (c *SyncedCluster) Start(ctx context.Context, l *logger.Logger, startOpts S
}

l.Printf("%s: starting nodes", c.Name)
return c.Parallel(l, "", len(nodes), parallelism, func(nodeIdx int) ([]byte, error) {
if err := c.Parallel(l, "", len(nodes), parallelism, func(nodeIdx int) ([]byte, error) {
node := nodes[nodeIdx]

// NB: if cockroach started successfully, we ignore the output as it is
Expand Down Expand Up @@ -193,7 +198,13 @@ func (c *SyncedCluster) Start(ctx context.Context, l *logger.Logger, startOpts S
return nil, errors.Wrap(err, "failed to set cluster settings")
}
return nil, nil
})
}); err != nil {
return err
}
if startOpts.ScheduleBackups {
return c.createFixedBackupSchedule(ctx, l, startOpts.ScheduleBackupArgs)
}
return nil
}

// NodeDir returns the data directory for the given node and store.
Expand Down Expand Up @@ -744,6 +755,40 @@ func (c *SyncedCluster) shouldAdvertisePublicIP() bool {
return false
}

// createFixedBackupSchedule creates a cluster backup schedule which, by
// default, runs an incremental every 15 minutes and a full every hour. On
// `roachprod create`, the user can provide a different recurrence using the
// 'schedule-backup-args' flag. If roachprod is local, the backups get stored in
// nodelocal, and otherwise in 'gs://cockroachdb-backup-testing'.
func (c *SyncedCluster) createFixedBackupSchedule(
ctx context.Context, l *logger.Logger, scheduledBackupArgs string,
) error {
externalStoragePath := `gs://cockroachdb-backup-testing`

if c.IsLocal() {
externalStoragePath = `nodelocal://1`
}
l.Printf("%s: creating backup schedule", c.Name)

collectionPath := fmt.Sprintf(`%s/roachprod-scheduled-backups/%s/%v`,
externalStoragePath, c.Name, timeutil.Now().UnixNano())

// Default scheduled backup runs a full backup every hour and an incremental
// every 15 minutes.
scheduleArgs := `RECURRING '*/15 * * * *'
FULL BACKUP '@hourly'
WITH SCHEDULE OPTIONS first_run = 'now'`

if scheduledBackupArgs != "" {
scheduleArgs = scheduledBackupArgs
}

createScheduleCmd := fmt.Sprintf(`-e
CREATE SCHEDULE IF NOT EXISTS test_only_backup FOR BACKUP INTO '%s' %s`,
collectionPath, scheduleArgs)
return c.SQL(ctx, l, []string{createScheduleCmd})
}

// getEnvVars returns all COCKROACH_* environment variables, in the form
// "key=value".
func getEnvVars() []string {
Expand Down
14 changes: 8 additions & 6 deletions pkg/roachprod/roachprod.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,12 +635,14 @@ func Extend(l *logger.Logger, clusterName string, lifetime time.Duration) error
// DefaultStartOpts returns a StartOpts populated with default values.
func DefaultStartOpts() install.StartOpts {
return install.StartOpts{
Sequential: true,
EncryptedStores: false,
NumFilesLimit: config.DefaultNumFilesLimit,
SkipInit: false,
StoreCount: 1,
TenantID: 2,
Sequential: true,
EncryptedStores: false,
NumFilesLimit: config.DefaultNumFilesLimit,
SkipInit: false,
StoreCount: 1,
TenantID: 2,
ScheduleBackups: false,
ScheduleBackupArgs: "",
}
}

Expand Down

0 comments on commit 2cb04c0

Please sign in to comment.