Skip to content

Commit

Permalink
Change garbage-collection-period-seconds flag to
Browse files Browse the repository at this point in the history
garbage-collection-period

Signed-off-by: Swapnil Mhamane <[email protected]>
  • Loading branch information
Swapnil Mhamane committed Sep 11, 2019
1 parent a22922b commit 341715d
Show file tree
Hide file tree
Showing 22 changed files with 287 additions and 296 deletions.
2 changes: 1 addition & 1 deletion chart/etcd-backup-restore/templates/etcd-statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ spec:
- --defragmentation-schedule={{ .Values.backup.defragmentationSchedule }}
{{- end }}
- --etcd-connection-timeout={{ .Values.backup.etcdConnectionTimeout }}
- --delta-snapshot-period-seconds={{ int $.Values.backup.deltaSnapshotPeriodSeconds }}
- --delta-snapshot-period={{ int $.Values.backup.deltaSnapshotPeriod }}
- --delta-snapshot-memory-limit={{ int $.Values.backup.deltaSnapshotMemoryLimit }}
{{- if and .Values.etcdAuth.username .Values.etcdAuth.password }}
- --etcd-username={{ .Values.etcdAuth.username }}
Expand Down
6 changes: 3 additions & 3 deletions chart/etcd-backup-restore/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ backup:
# schedule is cron standard schedule to take full snapshots.
schedule: "0 */1 * * *"

# deltaSnapshotPeriodSeconds is Period in seconds after which delta snapshot will be persisted. If this value is set to be lesser than 1, delta snapshotting will be disabled.
deltaSnapshotPeriodSeconds: 60
# deltaSnapshotPeriod is Period after which delta snapshot will be persisted. If this value is set to be lesser than 1 second, delta snapshotting will be disabled.
deltaSnapshotPeriod: "60s"
# deltaSnapshotMemoryLimit is memory limit in bytes after which delta snapshots will be taken out of schedule.
deltaSnapshotMemoryLimit: 104857600 #100MB

Expand All @@ -53,7 +53,7 @@ backup:
# maxBackups is the maximum number of backups to keep (may change in future). This is interpreted in case of garbageCollectionPolicy set to LimitBased.
maxBackups: 7

etcdConnectionTimeout: 300
etcdConnectionTimeout: "30s"
# etcdQuotaBytes used to Raise alarms when backend DB size exceeds the given quota bytes
etcdQuotaBytes: 8589934592 #8GB

Expand Down
13 changes: 7 additions & 6 deletions cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ func NewServerCommand(ctx context.Context) *cobra.Command {
Short: "start the http server with backup scheduler.",
Long: `Server will keep listening for http request to deliver its functionality through http endpoints.`,
Run: func(cmd *cobra.Command, args []string) {

printVersionInfo()
var (
snapstoreConfig *snapstore.Config
Expand Down Expand Up @@ -111,10 +112,10 @@ func NewServerCommand(ctx context.Context) *cobra.Command {
fullSnapshotSchedule,
ss,
maxBackups,
deltaSnapshotIntervalSeconds,
deltaSnapshotMemoryLimit,
time.Duration(etcdConnectionTimeout),
time.Duration(garbageCollectionPeriodSeconds),
deltaSnapshotInterval,
etcdConnectionTimeout,
garbageCollectionPeriod,
garbageCollectionPolicy,
tlsConfig)
if err != nil {
Expand All @@ -139,7 +140,7 @@ func NewServerCommand(ctx context.Context) *cobra.Command {
logger.Fatalf("failed to parse defragmentation schedule: %v", err)
return
}
go etcdutil.DefragDataPeriodically(ctx, tlsConfig, defragSchedule, time.Duration(etcdConnectionTimeout)*time.Second, ssr.TriggerFullSnapshot, logrus.NewEntry(logger))
go etcdutil.DefragDataPeriodically(ctx, tlsConfig, defragSchedule, etcdConnectionTimeout, ssr.TriggerFullSnapshot, logrus.NewEntry(logger))

runEtcdProbeLoopWithSnapshotter(tlsConfig, handler, ssr, ssrStopCh, ctx.Done(), ackCh)
return
Expand All @@ -156,7 +157,7 @@ func NewServerCommand(ctx context.Context) *cobra.Command {
logger.Fatalf("failed to parse defragmentation schedule: %v", err)
return
}
go etcdutil.DefragDataPeriodically(ctx, tlsConfig, defragSchedule, time.Duration(etcdConnectionTimeout)*time.Second, nil, logrus.NewEntry(logger))
go etcdutil.DefragDataPeriodically(ctx, tlsConfig, defragSchedule, etcdConnectionTimeout, nil, logrus.NewEntry(logger))

runEtcdProbeLoopWithoutSnapshotter(tlsConfig, handler, ctx.Done(), ackCh)
},
Expand Down Expand Up @@ -327,7 +328,7 @@ func ProbeEtcd(tlsConfig *etcdutil.TLSConfig) error {
}
}

ctx, cancel := context.WithTimeout(context.TODO(), time.Duration(etcdConnectionTimeout)*time.Second)
ctx, cancel := context.WithTimeout(context.TODO(), etcdConnectionTimeout)
defer cancel()
if _, err := client.Get(ctx, "foo"); err != nil {
logger.Errorf("Failed to connect to client: %v", err)
Expand Down
14 changes: 7 additions & 7 deletions cmd/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ storing snapshots on various cloud storage providers as well as local disk locat
fullSnapshotSchedule,
ss,
maxBackups,
deltaSnapshotIntervalSeconds,
deltaSnapshotMemoryLimit,
time.Duration(etcdConnectionTimeout),
time.Duration(garbageCollectionPeriodSeconds),
deltaSnapshotInterval,
etcdConnectionTimeout,
garbageCollectionPeriod,
garbageCollectionPolicy,
tlsConfig)
if err != nil {
Expand All @@ -79,7 +79,7 @@ storing snapshots on various cloud storage providers as well as local disk locat
logger.Fatalf("failed to parse defragmentation schedule: %v", err)
return
}
go etcdutil.DefragDataPeriodically(ctx, tlsConfig, defragSchedule, time.Duration(etcdConnectionTimeout)*time.Second, ssr.TriggerFullSnapshot, logrus.NewEntry(logger))
go etcdutil.DefragDataPeriodically(ctx, tlsConfig, defragSchedule, etcdConnectionTimeout, ssr.TriggerFullSnapshot, logrus.NewEntry(logger))

go ssr.RunGarbageCollector(ctx.Done())
if err := ssr.Run(ctx.Done(), true); err != nil {
Expand All @@ -98,11 +98,11 @@ storing snapshots on various cloud storage providers as well as local disk locat
func initializeSnapshotterFlags(cmd *cobra.Command) {
cmd.Flags().StringSliceVarP(&etcdEndpoints, "endpoints", "e", []string{"127.0.0.1:2379"}, "comma separated list of etcd endpoints")
cmd.Flags().StringVarP(&fullSnapshotSchedule, "schedule", "s", "* */1 * * *", "schedule for snapshots")
cmd.Flags().IntVarP(&deltaSnapshotIntervalSeconds, "delta-snapshot-period-seconds", "i", snapshotter.DefaultDeltaSnapshotIntervalSeconds, "Period in seconds after which delta snapshot will be persisted. If this value is set to be lesser than 1, delta snapshotting will be disabled.")
cmd.Flags().DurationVarP(&deltaSnapshotInterval, "delta-snapshot-period", "i", snapshotter.DefaultDeltaSnapshotInterval, "Period after which delta snapshot will be persisted. If this value is set to be lesser than 1 seconds, delta snapshotting will be disabled.")
cmd.Flags().IntVar(&deltaSnapshotMemoryLimit, "delta-snapshot-memory-limit", snapshotter.DefaultDeltaSnapMemoryLimit, "memory limit after which delta snapshots will be taken")
cmd.Flags().IntVarP(&maxBackups, "max-backups", "m", snapshotter.DefaultMaxBackups, "maximum number of previous backups to keep")
cmd.Flags().IntVar(&etcdConnectionTimeout, "etcd-connection-timeout", 30, "etcd client connection timeout")
cmd.Flags().IntVar(&garbageCollectionPeriodSeconds, "garbage-collection-period-seconds", 60, "Period in seconds for garbage collecting old backups")
cmd.Flags().DurationVar(&etcdConnectionTimeout, "etcd-connection-timeout", 30*time.Second, "etcd client connection timeout")
cmd.Flags().DurationVar(&garbageCollectionPeriod, "garbage-collection-period", 60*time.Second, "Period for garbage collecting old backups")
cmd.Flags().StringVar(&garbageCollectionPolicy, "garbage-collection-policy", snapshotter.GarbageCollectionPolicyExponential, "Policy for garbage collecting old backups")
cmd.Flags().BoolVar(&insecureTransport, "insecure-transport", true, "disable transport security for client connections")
cmd.Flags().BoolVar(&insecureSkipVerify, "insecure-skip-tls-verify", false, "skip server certificate verification")
Expand Down
34 changes: 18 additions & 16 deletions cmd/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package cmd

import (
"time"

"github.com/sirupsen/logrus"
)

Expand All @@ -29,22 +31,22 @@ var (
logger = logrus.New()
version bool
//snapshotter flags
fullSnapshotSchedule string
etcdEndpoints []string
etcdUsername string
etcdPassword string
deltaSnapshotIntervalSeconds int
deltaSnapshotMemoryLimit int
maxBackups int
etcdConnectionTimeout int
garbageCollectionPeriodSeconds int
garbageCollectionPolicy string
insecureTransport bool
insecureSkipVerify bool
certFile string
keyFile string
caFile string
defragmentationSchedule string
fullSnapshotSchedule string
etcdEndpoints []string
etcdUsername string
etcdPassword string
deltaSnapshotMemoryLimit int
deltaSnapshotInterval time.Duration
etcdConnectionTimeout time.Duration
garbageCollectionPeriod time.Duration
garbageCollectionPolicy string
maxBackups int
insecureTransport bool
insecureSkipVerify bool
certFile string
keyFile string
caFile string
defragmentationSchedule string

//server flags
port int
Expand Down
6 changes: 3 additions & 3 deletions doc/usage/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ The procedure to provide credentials to access the cloud provider object store v

### Taking scheduled snapshot

Sub-command `snapshot` takes scheduled backups, or `snapshots` of a running `etcd` cluster, which are pushed to one of the storage providers specified above (please note that `etcd` should already be running). One can apply standard cron format scheduling for regular backup of etcd. The cron schedule is used to take full backups. The delta snapshots are taken at regular intervals in the period in between full snapshots as indicated by the `delta-snapshot-period-seconds` flag. The default for the same is 10 seconds.
Sub-command `snapshot` takes scheduled backups, or `snapshots` of a running `etcd` cluster, which are pushed to one of the storage providers specified above (please note that `etcd` should already be running). One can apply standard cron format scheduling for regular backup of etcd. The cron schedule is used to take full backups. The delta snapshots are taken at regular intervals in the period in between full snapshots as indicated by the `delta-snapshot-period` flag. The default for the same is 20 seconds.

etcd-backup-restore has two garbage collection policies to clean up existing backups from the cloud bucket. The flag `garbage-collection-policy` is used to indicate the desired garbage collection policy.

Expand All @@ -37,7 +37,7 @@ $ ./bin/etcdbrctl snapshot \
--etcd-endpoints http://localhost:2379 \
--schedule "*/1 * * * *" \
--store-container="etcd-backup" \
--delta-snapshot-period-seconds=10 \
--delta-snapshot-period=10s \
--max-backups=10 \
--garbage-collection-policy='LimitBased'

Expand Down Expand Up @@ -89,7 +89,7 @@ $ ./bin/etcdbrctl snapshot \
--endpoints http://localhost:2379 \
--schedule "*/1 * * * *" \
--store-container="etcd-backup" \
--delta-snapshot-period-seconds=10 \
--delta-snapshot-period=10s \
--garbage-collection-policy='Exponential'

INFO[0000] etcd-backup-restore Version: 0.7.0-dev
Expand Down
2 changes: 1 addition & 1 deletion doc/usage/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Abnormally high snapshot duration (`etcdbr_snapshot_duration_seconds`) indicates

`etcdbr_snapshot_gc_total` gives the total number of snapshots garbage collected since bootstrap. You can use this in coordination with `etcdbr_snapshot_duration_seconds_count` to get number of snapshots in object store.

`etcdbr_snapshot_required` indicates whether a new snapshot is required to be taken. Acts as a boolean flag where zero value implies 'false' and non-zero values imply 'true'. :warning: This metric does not work as expected for the case where delta snapshots are disabled (by setting the etcdbrctl flag `delta-snapshot-period-seconds` to 0).
`etcdbr_snapshot_required` indicates whether a new snapshot is required to be taken. Acts as a boolean flag where zero value implies 'false' and non-zero values imply 'true'. :warning: This metric does not work as expected for the case where delta snapshots are disabled (by setting the etcdbrctl flag `delta-snapshot-period` to 0).

### Defragmentation

Expand Down
12 changes: 7 additions & 5 deletions pkg/etcdutil/defrag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,21 @@ import (
var _ = Describe("Defrag", func() {
var (
tlsConfig *TLSConfig
endpoints = []string{"http://localhost:2379"}
etcdConnectionTimeout = time.Duration(30 * time.Second)
etcdConnectionTimeout = 30 * time.Second
keyPrefix = "/defrag/key-"
valuePrefix = "val"
etcdUsername string
etcdPassword string
)
tlsConfig = NewTLSConfig("", "", "", true, true, endpoints, etcdUsername, etcdPassword)
BeforeEach(func() {
tlsConfig = NewTLSConfig("", "", "", true, true, endpoints, etcdUsername, etcdPassword)
})
Context("Defragmentation", func() {
BeforeEach(func() {
now := time.Now().Unix()
client, err := GetTLSClientForEtcd(tlsConfig)
defer client.Close()
logger.Infof("TLSConfig %v, Endpoint %v", tlsConfig, endpoints)
Expect(err).ShouldNot(HaveOccurred())
for index := 0; index <= 1000; index++ {
ctx, cancel := context.WithTimeout(testCtx, etcdConnectionTimeout)
Expand Down Expand Up @@ -78,7 +80,7 @@ var _ = Describe("Defrag", func() {
})

It("should keep size of DB same in case of timeout", func() {
etcdConnectionTimeout = time.Duration(time.Second)
etcdConnectionTimeout = time.Second
client, err := GetTLSClientForEtcd(tlsConfig)
Expect(err).ShouldNot(HaveOccurred())
defer client.Close()
Expand All @@ -88,7 +90,7 @@ var _ = Describe("Defrag", func() {
Expect(err).ShouldNot(HaveOccurred())
oldRevision := oldStatus.Header.GetRevision()

defragmentorJob := NewDefragmentorJob(testCtx, tlsConfig, time.Duration(time.Microsecond), logger, nil)
defragmentorJob := NewDefragmentorJob(testCtx, tlsConfig, time.Microsecond, logger, nil)
defragmentorJob.Run()

ctx, cancel = context.WithTimeout(testCtx, etcdDialTimeout)
Expand Down
14 changes: 8 additions & 6 deletions pkg/etcdutil/etcdutil_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ import (
const (
outputDir = "../../test/output"
etcdDir = outputDir + "/default.etcd"
etcdEndpoint = "http://localhost:2379"
etcdDialTimeout = time.Second * 30
)

var (
testCtx = context.Background()
logger = logrus.New().WithField("suite", "etcdutil")
etcd *embed.Etcd
err error
testCtx = context.Background()
logger = logrus.New().WithField("suite", "etcdutil")
etcd *embed.Etcd
endpoints []string
err error
)

func TestEtcdutil(t *testing.T) {
Expand All @@ -48,17 +48,19 @@ func TestEtcdutil(t *testing.T) {
}

var _ = SynchronizedBeforeSuite(func() []byte {
logger.Logger.Out = GinkgoWriter
err = os.RemoveAll(outputDir)
Expect(err).ShouldNot(HaveOccurred())

etcd, err = utils.StartEmbeddedEtcd(testCtx, etcdDir, logger)
Expect(err).ShouldNot(HaveOccurred())
endpoints = []string{etcd.Clients[0].Addr().String()}
logger.Infof("endpoints: %s", endpoints)
var data []byte
return data
}, func(data []byte) {})

var _ = SynchronizedAfterSuite(func() {}, func() {
etcd.Server.Stop()
etcd.Close()
os.RemoveAll(outputDir)
})
4 changes: 3 additions & 1 deletion pkg/initializer/validator/datavalidator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ var _ = Describe("Running Datavalidator", func() {
Logger: logger.Logger,
}
})

Context("with missing data directory", func() {
It("should return DataDirStatus as DataDirectoryNotExist, and non-nil error", func() {
tempDir := fmt.Sprintf("%s.%s", restoreDataDir, "temp")
Expand Down Expand Up @@ -189,8 +190,9 @@ var _ = Describe("Running Datavalidator", func() {
}()

// start etcd
etcd, err = utils.StartEmbeddedEtcd(testCtx, restoreDataDir, logger)
etcd, err := utils.StartEmbeddedEtcd(testCtx, restoreDataDir, logger)
Expect(err).ShouldNot(HaveOccurred())
endpoints := []string{etcd.Clients[0].Addr().String()}
// populate etcd but with lesser data than previous populate call, so that the new db has a lower revision
resp := &utils.EtcdDataPopulationResponse{}
utils.PopulateEtcd(testCtx, logger, endpoints, 0, int(keyTo/2), resp)
Expand Down
Loading

0 comments on commit 341715d

Please sign in to comment.