Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dkg: sync privkeylock before exiting #2257

Merged
merged 9 commits into from
Jun 1, 2023
35 changes: 24 additions & 11 deletions app/privkeylock/privkeylock.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"encoding/json"
"os"
"sync"
"time"

"github.com/obolnetwork/charon/app/errors"
Expand All @@ -21,20 +22,20 @@ var (
)

// New returns new private key locking service. It errors if a recently-updated private key lock file exists.
func New(path, command string) (Service, error) {
func New(path, command string) (*Service, error) {
content, err := os.ReadFile(path)
if errors.Is(err, os.ErrNotExist) { //nolint:revive // Empty block is fine.
// No file, we will create it in run
} else if err != nil {
return Service{}, errors.Wrap(err, "cannot read private key lock file", z.Str("path", path))
return nil, errors.Wrap(err, "cannot read private key lock file", z.Str("path", path))
} else {
var meta metadata
if err := json.Unmarshal(content, &meta); err != nil {
return Service{}, errors.Wrap(err, "cannot decode private key lock file content", z.Str("path", path))
return nil, errors.Wrap(err, "cannot decode private key lock file content", z.Str("path", path))
}

if time.Since(meta.Timestamp) <= staleDuration {
return Service{}, errors.New(
return nil, errors.New(
"existing private key lock file found, another charon instance may be running on your machine",
z.Str("path", path),
z.Str("command", meta.Command),
Expand All @@ -43,45 +44,57 @@ func New(path, command string) (Service, error) {
}

if err := writeFile(path, command, time.Now()); err != nil {
return Service{}, err
return nil, err
}

return Service{
s := &Service{
command: command,
path: path,
updatePeriod: updatePeriod,
}, nil
}

s.wg.Add(1)

return s, nil
}

// Service is a private key locking service.
type Service struct {
command string
path string
updatePeriod time.Duration
wg sync.WaitGroup
}

// Run runs the service, updating the lock file every second and deleting it on context cancellation.
func (h Service) Run(ctx context.Context) error {
tick := time.NewTicker(h.updatePeriod)
func (s *Service) Run(ctx context.Context) error {
tick := time.NewTicker(s.updatePeriod)
defer tick.Stop()

defer s.wg.Done()

for {
select {
case <-ctx.Done():
if err := os.Remove(h.path); err != nil {
if err := os.Remove(s.path); err != nil {
return errors.Wrap(err, "deleting private key lock file failed")
}

return nil
case <-tick.C:
// Overwrite lockfile with new metadata
if err := writeFile(h.path, h.command, time.Now()); err != nil {
if err := writeFile(s.path, s.command, time.Now()); err != nil {
return err
}
}
}
}

// Done waits until Service has finished deleting the private key lock file.
func (s *Service) Done() {
s.wg.Wait()
}

// metadata is the metadata stored in the lock file.
type metadata struct {
Command string `json:"command"`
Expand Down
10 changes: 9 additions & 1 deletion dkg/dkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,23 @@ func (c Config) HasTestConfig() bool {
//nolint:maintidx // Refactor into smaller steps.
func Run(ctx context.Context, conf Config) (err error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()

ctx = log.WithTopic(ctx, "dkg")

lockSvc, err := privkeylock.New(p2p.KeyPath(conf.DataDir)+".lock", "charon dkg")
if err != nil {
// cancel manually here because we'll defer cancel() and lockSvc.Done() later
gsora marked this conversation as resolved.
Show resolved Hide resolved
cancel()
return err
}

// make sure to always wait for lockSvc to be done.
gsora marked this conversation as resolved.
Show resolved Hide resolved
defer func() {
// explicitly cancel the context and wait until the privkey lock is deleted
gsora marked this conversation as resolved.
Show resolved Hide resolved
cancel()
lockSvc.Done()
}()

go func(ctx context.Context) {
if err := lockSvc.Run(ctx); err != nil {
log.Error(ctx, "Error locking private key file", err)
Expand Down
8 changes: 8 additions & 0 deletions dkg/dkg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ func testDKG(t *testing.T, def cluster.Definition, dir string, p2pKeys []*k1.Pri
testutil.SkipIfBindErr(t, err)
require.NoError(t, err)

// check that the privkey lock file has been deleted in all nodes at the end of dkg
for i := 0; i < len(def.Operators); i++ {
lockPath := path.Join(dir, fmt.Sprintf("node%d", i), "charon-enr-private-key.lock")

_, openErr := os.Open(lockPath)
require.ErrorIs(t, openErr, os.ErrNotExist)
}

if keymanager {
// Wait until all keystores are received by the keymanager server
expectedReceives := len(def.Operators)
Expand Down