Skip to content

Commit

Permalink
Support identical cron schedule (#87)
Browse files Browse the repository at this point in the history
* Retry on lock being unavailable

* Refactor locking to return plain error

* Collect LockedTime in stats

* Add test case

* Add documentation for LOCK_TIMEOUT

* Log in case lock needs to be awaited

* Release resources created for awaiting lock
  • Loading branch information
m90 authored Mar 25, 2022
1 parent 9bc8db0 commit da8c63f
Show file tree
Hide file tree
Showing 13 changed files with 90 additions and 23 deletions.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,16 @@ You can populate below template according to your requirements and use it as you

# DOCKER_HOST="tcp://docker_socket_proxy:2375"

########### LOCK_TIMEOUT

# In the case of overlapping cron schedules run by the same container,
# subsequent invocations will wait for previous runs to finish before starting.
# By default, this will time out and fail in case the lock could not be acquired
# after 60 minutes. In case you need to adjust this timeout, supply a duration
# value as per https://pkg.go.dev/time#ParseDuration to `LOCK_TIMEOUT`

# LOCK_TIMEOUT="60m"

########### EMAIL NOTIFICATIONS

# ************************************************************************
Expand Down Expand Up @@ -681,7 +691,8 @@ volumes:
A separate cronjob will be created for each config file.
If a configuration value is set both in the global environment as well as in the config file, the config file will take precedence.
The `backup` command expects to run on an exclusive lock, so it is your responsibility to make sure the invocations do not overlap.
The `backup` command expects to run on an exclusive lock, so in case you provide the same or overlapping schedules in your cron expressions, the runs will still be executed serially, one after the other.
The exact order of schedules that use the same cron expression is not specified.
In case you need your schedules to overlap, you need to create a dedicated container for each schedule instead.
When changing the configuration, you currently need to manually restart the container for the changes to take effect.

Expand Down
1 change: 1 addition & 0 deletions cmd/backup/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ type Config struct {
WebdavPassword string `split_words:"true"`
ExecLabel string `split_words:"true"`
ExecForwardOutput bool `split_words:"true"`
LockTimeout time.Duration `split_words:"true" default:"60m"`
}
58 changes: 58 additions & 0 deletions cmd/backup/lock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2022 - Offen Authors <[email protected]>
// SPDX-License-Identifier: MPL-2.0

package main

import (
"errors"
"fmt"
"time"

"github.com/gofrs/flock"
)

// lock opens a lockfile at the given location, keeping it locked until the
// caller invokes the returned release func. In case the lock is currently blocked
// by another execution, it will repeatedly retry until the lock is available
// or the given timeout is exceeded.
func (s *script) lock(lockfile string) (func() error, error) {
start := time.Now()
defer func() {
s.stats.LockedTime = time.Now().Sub(start)
}()

retry := time.NewTicker(5 * time.Second)
defer retry.Stop()
deadline := time.NewTimer(s.c.LockTimeout)
defer deadline.Stop()

fileLock := flock.New(lockfile)

for {
acquired, err := fileLock.TryLock()
if err != nil {
return noop, fmt.Errorf("lock: error trying lock: %w", err)
}
if acquired {
if s.encounteredLock {
s.logger.Info("Acquired exclusive lock on subsequent attempt, ready to continue.")
}
return fileLock.Unlock, nil
}

if !s.encounteredLock {
s.logger.Infof(
"Exclusive lock was not available on first attempt. Will retry until it becomes available or the timeout of %s is exceeded.",
s.c.LockTimeout,
)
s.encounteredLock = true
}

select {
case <-retry.C:
continue
case <-deadline.C:
return noop, errors.New("lock: timed out waiting for lockfile to become available")
}
}
}
7 changes: 4 additions & 3 deletions cmd/backup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ import (
)

func main() {
unlock := lock("/var/lock/dockervolumebackup.lock")
defer unlock()

s, err := newScript()
if err != nil {
panic(err)
}

unlock, err := s.lock("/var/lock/dockervolumebackup.lock")
defer unlock()
s.must(err)

defer func() {
if pArg := recover(); pArg != nil {
if err, ok := pArg.(error); ok {
Expand Down
2 changes: 2 additions & 0 deletions cmd/backup/script.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ type script struct {
file string
stats *Stats

encounteredLock bool

c *Config
}

Expand Down
1 change: 1 addition & 0 deletions cmd/backup/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type Stats struct {
StartTime time.Time
EndTime time.Time
TookTime time.Duration
LockedTime time.Duration
LogOutput *bytes.Buffer
Containers ContainersStats
BackupFile BackupFileStats
Expand Down
17 changes: 0 additions & 17 deletions cmd/backup/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,10 @@ import (
"io"
"os"
"strings"

"github.com/gofrs/flock"
)

var noop = func() error { return nil }

// lock opens a lockfile at the given location, keeping it locked until the
// caller invokes the returned release func. When invoked while the file is
// still locked the function panics.
func lock(lockfile string) func() error {
fileLock := flock.New(lockfile)
acquired, err := fileLock.TryLock()
if err != nil {
panic(err)
}
if !acquired {
panic("unable to acquire file lock")
}
return fileLock.Unlock
}

// copy creates a copy of the file located at `dst` at `src`.
func copyFile(src, dst string) error {
in, err := os.Open(src)
Expand Down
1 change: 1 addition & 0 deletions docs/NOTIFICATION-TEMPLATES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Here is a list of all data passed to the template:
* `StartTime`: time when the script started execution
* `EndTime`: time when the backup has completed successfully (after pruning)
* `TookTime`: amount of time it took for the backup to run. (equal to `EndTime - StartTime`)
* `LockedTime`: amount of time it took for the backup to acquire the exclusive lock
* `LogOutput`: full log of the application
* `Containers`: object containing stats about the docker containers
* `All`: total number of containers
Expand Down
File renamed without changes.
2 changes: 2 additions & 0 deletions test/confd/02backup.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
BACKUP_FILENAME="other.tar.gz"
BACKUP_CRON_EXPRESSION="*/1 * * * *"
File renamed without changes.
5 changes: 3 additions & 2 deletions test/confd/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ services:
volumes:
- ./local:/archive
- app_data:/backup/app_data:ro
- ./backup.env:/etc/dockervolumebackup/conf.d/00backup.env
- ./never.env:/etc/dockervolumebackup/conf.d/10never.env
- ./01backup.env:/etc/dockervolumebackup/conf.d/01backup.env
- ./02backup.env:/etc/dockervolumebackup/conf.d/02backup.env
- ./03never.env:/etc/dockervolumebackup/conf.d/03never.env
- /var/run/docker.sock:/var/run/docker.sock

offen:
Expand Down
6 changes: 6 additions & 0 deletions test/confd/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ if [ ! -f ./local/conf.tar.gz ]; then
fi
echo "[TEST:PASS] Config from file was used."

if [ ! -f ./local/other.tar.gz ]; then
echo "[TEST:FAIL] Run on same schedule did not succeed."
exit 1
fi
echo "[TEST:PASS] Run on same schedule succeeded."

if [ -f ./local/never.tar.gz ]; then
echo "[TEST:FAIL] Unexpected file was found."
exit 1
Expand Down

0 comments on commit da8c63f

Please sign in to comment.