Skip to content

Commit

Permalink
Merge #88115
Browse files Browse the repository at this point in the history
88115: roachprod: add `schedule-backups` flag to `roachprod start` r=srosenberg a=msbutler

This patch adds a flag to roachprod start which creates a cluster backup schedule to run incrementals every 15 minutes and fulls every hour, starting when roachprod start completes.

For local roachprod clusters, the backups are stored in nodelocal, while others store their backups in google cloud storage.

This flag currently defaults to false, but in the future, this setting should become default true, to further emulate 
a customer cluster. Performance senstive roachtests may keep this setting switched off.

Informs #86045

Release note: None

Co-authored-by: Michael Butler <[email protected]>
  • Loading branch information
craig[bot] and msbutler committed Oct 14, 2022
2 parents b445bc7 + 2cb04c0 commit f7340ee
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 8 deletions.
7 changes: 7 additions & 0 deletions pkg/cmd/roachprod/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,13 @@ func initFlags() {
"skip-init", startOpts.SkipInit, "skip initializing the cluster")
startCmd.Flags().IntVar(&startOpts.StoreCount,
"store-count", startOpts.StoreCount, "number of stores to start each node with")
startCmd.Flags().BoolVar(&startOpts.ScheduleBackups,
"schedule-backups", startOpts.ScheduleBackups,
"create a cluster backup schedule once the cluster has started (by default, "+
"full backup hourly and incremental every 15 minutes)")
startCmd.Flags().StringVar(&startOpts.ScheduleBackupArgs, "schedule-backup-args", "",
`Recurrence and scheduled backup options specification.
Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS first_run = 'now'"`)

startTenantCmd.Flags().StringVarP(&hostCluster,
"host-cluster", "H", "", "host cluster")
Expand Down
49 changes: 47 additions & 2 deletions pkg/roachprod/install/cockroach.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachprod/config"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/roachprod/ssh"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -87,6 +88,10 @@ type StartOpts struct {
Sequential bool
ExtraArgs []string

// ScheduleBackups starts a backup schedule once the cluster starts
ScheduleBackups bool
ScheduleBackupArgs string

// systemd limits on resources.
NumFilesLimit int64

Expand Down Expand Up @@ -152,7 +157,7 @@ func (c *SyncedCluster) Start(ctx context.Context, l *logger.Logger, startOpts S
}

l.Printf("%s: starting nodes", c.Name)
return c.Parallel(l, "", len(nodes), parallelism, func(nodeIdx int) ([]byte, error) {
if err := c.Parallel(l, "", len(nodes), parallelism, func(nodeIdx int) ([]byte, error) {
node := nodes[nodeIdx]

// NB: if cockroach started successfully, we ignore the output as it is
Expand Down Expand Up @@ -193,7 +198,13 @@ func (c *SyncedCluster) Start(ctx context.Context, l *logger.Logger, startOpts S
return nil, errors.Wrap(err, "failed to set cluster settings")
}
return nil, nil
})
}); err != nil {
return err
}
if startOpts.ScheduleBackups {
return c.createFixedBackupSchedule(ctx, l, startOpts.ScheduleBackupArgs)
}
return nil
}

// NodeDir returns the data directory for the given node and store.
Expand Down Expand Up @@ -744,6 +755,40 @@ func (c *SyncedCluster) shouldAdvertisePublicIP() bool {
return false
}

// createFixedBackupSchedule creates a cluster backup schedule which, by
// default, runs an incremental every 15 minutes and a full every hour. On
// `roachprod create`, the user can provide a different recurrence using the
// 'schedule-backup-args' flag. If roachprod is local, the backups get stored in
// nodelocal, and otherwise in 'gs://cockroachdb-backup-testing'.
func (c *SyncedCluster) createFixedBackupSchedule(
ctx context.Context, l *logger.Logger, scheduledBackupArgs string,
) error {
externalStoragePath := `gs://cockroachdb-backup-testing`

if c.IsLocal() {
externalStoragePath = `nodelocal://1`
}
l.Printf("%s: creating backup schedule", c.Name)

collectionPath := fmt.Sprintf(`%s/roachprod-scheduled-backups/%s/%v`,
externalStoragePath, c.Name, timeutil.Now().UnixNano())

// Default scheduled backup runs a full backup every hour and an incremental
// every 15 minutes.
scheduleArgs := `RECURRING '*/15 * * * *'
FULL BACKUP '@hourly'
WITH SCHEDULE OPTIONS first_run = 'now'`

if scheduledBackupArgs != "" {
scheduleArgs = scheduledBackupArgs
}

createScheduleCmd := fmt.Sprintf(`-e
CREATE SCHEDULE IF NOT EXISTS test_only_backup FOR BACKUP INTO '%s' %s`,
collectionPath, scheduleArgs)
return c.SQL(ctx, l, []string{createScheduleCmd})
}

// getEnvVars returns all COCKROACH_* environment variables, in the form
// "key=value".
func getEnvVars() []string {
Expand Down
14 changes: 8 additions & 6 deletions pkg/roachprod/roachprod.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,12 +635,14 @@ func Extend(l *logger.Logger, clusterName string, lifetime time.Duration) error
// DefaultStartOpts returns a StartOpts populated with default values.
func DefaultStartOpts() install.StartOpts {
return install.StartOpts{
Sequential: true,
EncryptedStores: false,
NumFilesLimit: config.DefaultNumFilesLimit,
SkipInit: false,
StoreCount: 1,
TenantID: 2,
Sequential: true,
EncryptedStores: false,
NumFilesLimit: config.DefaultNumFilesLimit,
SkipInit: false,
StoreCount: 1,
TenantID: 2,
ScheduleBackups: false,
ScheduleBackupArgs: "",
}
}

Expand Down

0 comments on commit f7340ee

Please sign in to comment.