From 5a3e98942a6c46b27f32563f4b5dd6dbe109aff9 Mon Sep 17 00:00:00 2001 From: Shaun Crampton Date: Wed, 28 Jun 2017 10:53:48 +0100 Subject: [PATCH] Switch to global iptables lock file. --- config/config_params.go | 4 +-- intdataplane/int_dataplane.go | 13 +++++++-- iptables/lock.go | 34 +++++++++++++++++------- iptables/table.go | 50 +++-------------------------------- iptables/table_test.go | 14 ---------- 5 files changed, 41 insertions(+), 74 deletions(-) diff --git a/config/config_params.go b/config/config_params.go index fa237cf94a..dae3adf3e2 100644 --- a/config/config_params.go +++ b/config/config_params.go @@ -111,8 +111,8 @@ type Config struct { IptablesRefreshInterval time.Duration `config:"seconds;10"` IptablesPostWriteCheckIntervalSecs time.Duration `config:"seconds;1"` - IptablesLockFilePath string `config:"file;"` - IptablesLockTimeout time.Duration `config:"seconds;2"` + IptablesLockFilePath string `config:"file;/run/xtables.lock"` + IptablesLockTimeout time.Duration `config:"seconds;10"` MetadataAddr string `config:"hostname;127.0.0.1;die-on-fail"` MetadataPort int `config:"int(0,65535);8775;die-on-fail"` diff --git a/intdataplane/int_dataplane.go b/intdataplane/int_dataplane.go index 180fd0f328..7eacf819e7 100644 --- a/intdataplane/int_dataplane.go +++ b/intdataplane/int_dataplane.go @@ -203,8 +203,6 @@ func NewIntDataplaneDriver(config Config) *InternalDataplane { InsertMode: config.IptablesInsertMode, RefreshInterval: config.IptablesRefreshInterval, PostWriteInterval: config.IptablesPostWriteCheckInterval, - LockFilePath: config.IptablesLockFilePath, - LockTimeout: config.IptablesLockTimeout, } // However, the NAT tables need an extra cleanup regex. @@ -737,6 +735,13 @@ func (d *InternalDataplane) apply() { var reschedDelayMutex sync.Mutex var reschedDelay time.Duration var iptablesWG sync.WaitGroup + // Grab the iptables lock so that we don't conflict with other processes that are also + // trying to update iptables. We're about to do parallel updates to multiple iptables + // tables but that's OK because each table is separate in the kernel. + iptLock, err := iptables.GrabIptablesLocks(d.config.IptablesLockFilePath, d.config.IptablesLockTimeout) + if err != nil { + log.WithError(err).Panic("Failed to get iptables lock") + } for _, t := range d.allIptablesTables { iptablesWG.Add(1) go func(t *iptables.Table) { @@ -751,6 +756,10 @@ func (d *InternalDataplane) apply() { }(t) } iptablesWG.Wait() + err = iptLock.Close() + if err != nil { + log.WithError(err).Panic("Failed to close iptables lock") + } // Now clean up any left-over IP sets. for _, ipSets := range d.ipSets { diff --git a/iptables/lock.go b/iptables/lock.go index eb56f0ae9a..7d2f97afab 100644 --- a/iptables/lock.go +++ b/iptables/lock.go @@ -23,8 +23,10 @@ import ( "os" "time" + "github.com/Sirupsen/logrus" "github.com/prometheus/client_golang/prometheus" "golang.org/x/sys/unix" + "io" ) var ( @@ -32,33 +34,43 @@ var ( Name: "felix_iptables_lock_acquire_secs", Help: "Time in seconds that it took to acquire the iptables lock(s).", }) - countLocktimeouts = prometheus.NewCounter(prometheus.CounterOpts{ + countLockTimeouts = prometheus.NewCounter(prometheus.CounterOpts{ Name: "felix_iptables_lock_timeouts", Help: "Number of lock timeouts while waiting for hte iptables lock(s).", }) + countLockRetries = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "felix_iptables_lock_retries", + Help: "Number of times the iptables lock was held by someone else and we had to retry.", + }) ) func init() { prometheus.MustRegister( summaryLockAcquisitionTime, - countLocktimeouts, + countLockTimeouts, + countLockRetries, ) } -const DefaultLockFilePath16x = "/run/xtables.lock" - type locker struct { lock16 *os.File lock14 *net.UnixListener } -func (l *locker) Close() { +func (l *locker) Close() error { if l.lock16 != nil { - l.lock16.Close() + err := l.lock16.Close() + if err != nil { + logrus.WithError(err).Error("Error while closing iptables lock file") + } } if l.lock14 != nil { - l.lock14.Close() + err := l.lock14.Close() + if err != nil { + logrus.WithError(err).Error("Error while closing iptables lock socket") + } } + return nil } var ( @@ -66,7 +78,7 @@ var ( Err16LockTimeout = errors.New("Timed out waiting for iptables 1.6 lock") ) -func grabIptablesLocks(lockFilePath string, timeout time.Duration) (Closable, error) { +func GrabIptablesLocks(lockFilePath string, timeout time.Duration) (io.Closer, error) { var err error var success bool @@ -94,10 +106,11 @@ func grabIptablesLocks(lockFilePath string, timeout time.Duration) (Closable, er break } if time.Since(startTime) > timeout { - countLocktimeouts.Inc() + countLockTimeouts.Inc() return nil, Err16LockTimeout } time.Sleep(51 * time.Millisecond) + countLockRetries.Inc() } startTime14 := time.Now() @@ -107,10 +120,11 @@ func grabIptablesLocks(lockFilePath string, timeout time.Duration) (Closable, er break } if time.Since(startTime14) > timeout { - countLocktimeouts.Inc() + countLockTimeouts.Inc() return nil, Err14LockTimeout } time.Sleep(51 * time.Millisecond) + countLockRetries.Inc() } summaryLockAcquisitionTime.Observe(time.Since(startTime).Seconds()) diff --git a/iptables/table.go b/iptables/table.go index 6333a3d774..8e7e791564 100644 --- a/iptables/table.go +++ b/iptables/table.go @@ -177,7 +177,6 @@ func init() { // Table doesn't do any internal synchronization, its methods should only be called from one // thread. To avoid conflicts in the dataplane itself, there should only be one instance of // Table for each iptable table in an application. - type Table struct { Name string IPVersion uint8 @@ -238,19 +237,8 @@ type Table struct { // Shims for time.XXX functions: timeSleep func(d time.Duration) timeNow func() time.Time - - grabIptablesLocks GrabLocksFunc - - lockFilePath string - lockTimeout time.Duration -} - -type Closable interface { - Close() } -type GrabLocksFunc func(path string, timeout time.Duration) (Closable, error) - type TableOptions struct { HistoricChainPrefixes []string ExtraCleanupRegexPattern string @@ -258,19 +246,12 @@ type TableOptions struct { RefreshInterval time.Duration PostWriteInterval time.Duration - // LockFilePath is the location of the iptables lockfile, used by iptables v1.6+. - // Defaults to the default path to the file. - LockFilePath string - LockTimeout time.Duration - // NewCmdOverride for tests, if non-nil, factory to use instead of the real exec.Command() NewCmdOverride cmdFactory // SleepOverride for tests, if non-nil, replacement for time.Sleep() SleepOverride func(d time.Duration) // NowOverride for tests, if non-nil, replacement for time.Now() NowOverride func() time.Time - // GrabLocksOverride for tests, if non-nil, replacement for grabIptablesLocks - GrabLocksOverride GrabLocksFunc } func NewTable( @@ -337,18 +318,6 @@ func NewTable( if options.NowOverride != nil { now = options.NowOverride } - lockFilePath := DefaultLockFilePath16x - if options.LockFilePath != "" { - lockFilePath = options.LockFilePath - } - lockTimeout := 2 * time.Second - if options.LockTimeout > 0 { - lockTimeout = options.LockTimeout - } - grabIptablesLocks := grabIptablesLocks - if options.GrabLocksOverride != nil { - grabIptablesLocks = options.GrabLocksOverride - } table := &Table{ Name: name, @@ -378,12 +347,9 @@ func NewTable( refreshInterval: options.RefreshInterval, - newCmd: newCmd, - timeSleep: sleep, - timeNow: now, - grabIptablesLocks: grabIptablesLocks, - lockFilePath: lockFilePath, - lockTimeout: lockTimeout, + newCmd: newCmd, + timeSleep: sleep, + timeNow: now, gaugeNumChains: gaugeNumChains.WithLabelValues(fmt.Sprintf("%d", ipVersion), name), gaugeNumRules: gaugeNumRules.WithLabelValues(fmt.Sprintf("%d", ipVersion), name), @@ -993,15 +959,7 @@ func (t *Table) applyUpdates() error { cmd.SetStdout(&outputBuf) cmd.SetStderr(&errBuf) countNumRestoreCalls.Inc() - l, err := t.grabIptablesLocks(t.lockFilePath, t.lockTimeout) - if err != nil { - t.logCxt.WithError(err).Warn("Failed to acquire iptables lock.") - t.inSyncWithDataPlane = false - countNumRestoreErrors.Inc() - return err - } - err = cmd.Run() - l.Close() + err := cmd.Run() if err != nil { t.logCxt.WithFields(log.Fields{ "output": outputBuf.String(), diff --git a/iptables/table_test.go b/iptables/table_test.go index a79a5edb97..060dcc9911 100644 --- a/iptables/table_test.go +++ b/iptables/table_test.go @@ -45,7 +45,6 @@ var _ = Describe("Table with an empty dataplane", func() { NewCmdOverride: dataplane.newCmd, SleepOverride: dataplane.sleep, NowOverride: dataplane.now, - GrabLocksOverride: noopGrabLocks, }, ) }) @@ -418,7 +417,6 @@ func describePostUpdateCheckTests(enableRefresh bool) { NewCmdOverride: dataplane.newCmd, SleepOverride: dataplane.sleep, NowOverride: dataplane.now, - GrabLocksOverride: noopGrabLocks, } if enableRefresh { options.RefreshInterval = 30 * time.Second @@ -621,7 +619,6 @@ func describeDirtyDataplaneTests(appendMode bool) { NewCmdOverride: dataplane.newCmd, SleepOverride: dataplane.sleep, InsertMode: insertMode, - GrabLocksOverride: noopGrabLocks, }, ) }) @@ -990,7 +987,6 @@ var _ = Describe("Table with inserts and a non-Calico chain", func() { NewCmdOverride: dataplane.newCmd, SleepOverride: dataplane.sleep, NowOverride: dataplane.now, - GrabLocksOverride: noopGrabLocks, }, ) table.SetRuleInsertions("FORWARD", []Rule{ @@ -1025,13 +1021,3 @@ var _ = Describe("Table with inserts and a non-Calico chain", func() { }) }) }) - -type noopCloser struct{} - -func (n noopCloser) Close() { -} - -func noopGrabLocks(path string, timeout time.Duration) (Closable, error) { - var c noopCloser - return c, nil -}