From f9c9d91b99ffdec94a363396f3c5d44addb04e6f Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Fri, 24 May 2019 23:24:19 +0200 Subject: [PATCH 1/4] [dbode] Use setcap to be able to set rlimit for docker image --- docker/m3dbnode/Dockerfile | 13 ++++++ src/dbnode/server/limits.go | 79 ++++++++++++++++++++++++++++++++++++- src/dbnode/server/server.go | 6 +++ 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/docker/m3dbnode/Dockerfile b/docker/m3dbnode/Dockerfile index d19910a0c7..4e168dd4ec 100644 --- a/docker/m3dbnode/Dockerfile +++ b/docker/m3dbnode/Dockerfile @@ -26,5 +26,18 @@ COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd.yml /etc/m3dbnode/m3dbnode.yml COPY --from=builder /go/src/github.com/m3db/m3/scripts/m3dbnode_bootstrapped.sh /bin/ +# Use setcap and run as specific user +RUN apk add libcap && \ + mkdir -p /home/m3dbnode-user && \ + addgroup -S m3dbnode-group && \ + adduser -u 1000 -S -h /home/m3dbnode-user -G m3dbnode-group m3dbnode-user && \ + chown m3dbnode-user:m3dbnode-group /bin/m3dbnode && \ + setcap cap_ipc_lock=+ep /bin/m3dbnode && \ + setcap cap_sys_resource=+ep /bin/m3dbnode && \ + mkdir -p /var/lib && \ + chown -R m3dbnode-user:m3dbnode-group /var/lib + +USER m3dbnode-user + ENTRYPOINT [ "/bin/m3dbnode" ] CMD [ "-f", "/etc/m3dbnode/m3dbnode.yml" ] diff --git a/src/dbnode/server/limits.go b/src/dbnode/server/limits.go index 7881aac382..a9e46e3f63 100644 --- a/src/dbnode/server/limits.go +++ b/src/dbnode/server/limits.go @@ -21,10 +21,15 @@ package server import ( + "bufio" "fmt" + "os/exec" + "strconv" + "strings" + "syscall" - xos "github.com/m3db/m3/src/x/os" xerror "github.com/m3db/m3/src/x/errors" + xos "github.com/m3db/m3/src/x/os" ) const ( @@ -78,3 +83,75 @@ func validateProcessLimits() error { return multiErr.FinalError() } + +func raiseRlimitToNROpen() error { + cmd := exec.Command("sysctl", "-a") + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf( + "unable to raise nofile limits: sysctl_stdout_err=%v", err) + } + + defer stdout.Close() + + if err := cmd.Start(); err != nil { + return fmt.Errorf( + "unable to raise nofile limits: sysctl_start_err=%v", err) + } + + var ( + scanner = bufio.NewScanner(stdout) + limit uint64 + ) + for scanner.Scan() { + line := scanner.Text() + if !strings.Contains(line, "nr_open") { + continue + } + equalsIdx := strings.LastIndex(line, "=") + if equalsIdx < 0 { + return fmt.Errorf( + "unable to raise nofile limits: sysctl_parse_stdout_err=%v", err) + } + value := strings.TrimSpace(line[equalsIdx+1:]) + n, err := strconv.Atoi(value) + if err != nil { + return fmt.Errorf( + "unable to raise nofile limits: sysctl_eval_stdout_err=%v", err) + } + + limit = uint64(n) + break + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf( + "unable to raise nofile limits: sysctl_read_stdout_err=%v", err) + } + + if err := cmd.Wait(); err != nil { + return fmt.Errorf( + "unable to raise nofile limits: sysctl_exec_err=%v", err) + } + + var limits syscall.Rlimit + if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &limits); err != nil { + return fmt.Errorf( + "unable to raise nofile limits: rlimit_get_err=%v", err) + } + + if limits.Max >= limit && limits.Cur >= limit { + // Limit already set correctly + return nil + } + + limits.Max = limit + limits.Cur = limit + + if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &limits); err != nil { + return fmt.Errorf( + "unable to raise nofile limits: rlimit_set_err=%v", err) + } + + return nil +} diff --git a/src/dbnode/server/server.go b/src/dbnode/server/server.go index 4b9a50093b..27335c8bc7 100644 --- a/src/dbnode/server/server.go +++ b/src/dbnode/server/server.go @@ -150,6 +150,12 @@ func Run(runOpts RunOptions) { } defer logger.Sync() + // Raise soft fd limit to hard limit + if err := raiseRlimitToNROpen(); err != nil { + logger.Warn("unable to raise rlimit", zap.Error(err)) + } + + // Parse file and directory modes newFileMode, err := cfg.Filesystem.ParseNewFileMode() if err != nil { logger.Fatal("could not parse new file mode", zap.Error(err)) From d813447dee137b12293dad97947bf296f6f08a7d Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Sun, 26 May 2019 15:49:51 +0200 Subject: [PATCH 2/4] Add cap_sys_resource with setcap --- docker/m3dbnode/Dockerfile | 10 +--------- src/dbnode/server/limits.go | 5 +++++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/docker/m3dbnode/Dockerfile b/docker/m3dbnode/Dockerfile index 4e168dd4ec..a6250598c7 100644 --- a/docker/m3dbnode/Dockerfile +++ b/docker/m3dbnode/Dockerfile @@ -28,16 +28,8 @@ COPY --from=builder /go/src/github.com/m3db/m3/scripts/m3dbnode_bootstrapped.sh # Use setcap and run as specific user RUN apk add libcap && \ - mkdir -p /home/m3dbnode-user && \ - addgroup -S m3dbnode-group && \ - adduser -u 1000 -S -h /home/m3dbnode-user -G m3dbnode-group m3dbnode-user && \ - chown m3dbnode-user:m3dbnode-group /bin/m3dbnode && \ setcap cap_ipc_lock=+ep /bin/m3dbnode && \ - setcap cap_sys_resource=+ep /bin/m3dbnode && \ - mkdir -p /var/lib && \ - chown -R m3dbnode-user:m3dbnode-group /var/lib - -USER m3dbnode-user + setcap cap_sys_resource=+ep /bin/m3dbnode ENTRYPOINT [ "/bin/m3dbnode" ] CMD [ "-f", "/etc/m3dbnode/m3dbnode.yml" ] diff --git a/src/dbnode/server/limits.go b/src/dbnode/server/limits.go index a9e46e3f63..7301a0a49b 100644 --- a/src/dbnode/server/limits.go +++ b/src/dbnode/server/limits.go @@ -124,6 +124,11 @@ func raiseRlimitToNROpen() error { break } + if limit == 0 { + return fmt.Errorf( + "unable to raise nofile limits: sysctl_limit_err=limit not parsed") + } + if err := scanner.Err(); err != nil { return fmt.Errorf( "unable to raise nofile limits: sysctl_read_stdout_err=%v", err) From 05df35e5eaac2b5140bd1665c3d3070fed079a83 Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Sun, 26 May 2019 16:36:09 +0200 Subject: [PATCH 3/4] Refactor and move method limit process to src/x/os --- docker/m3dbnode/Dockerfile | 1 - src/dbnode/server/limits.go | 82 ------------------------------------- src/dbnode/server/server.go | 12 +++++- src/x/os/limits.go | 9 ++++ src/x/os/limits_linux.go | 45 ++++++++++++++++++++ src/x/os/limits_other.go | 13 +++++- 6 files changed, 75 insertions(+), 87 deletions(-) diff --git a/docker/m3dbnode/Dockerfile b/docker/m3dbnode/Dockerfile index a6250598c7..d9ace30cf9 100644 --- a/docker/m3dbnode/Dockerfile +++ b/docker/m3dbnode/Dockerfile @@ -28,7 +28,6 @@ COPY --from=builder /go/src/github.com/m3db/m3/scripts/m3dbnode_bootstrapped.sh # Use setcap and run as specific user RUN apk add libcap && \ - setcap cap_ipc_lock=+ep /bin/m3dbnode && \ setcap cap_sys_resource=+ep /bin/m3dbnode ENTRYPOINT [ "/bin/m3dbnode" ] diff --git a/src/dbnode/server/limits.go b/src/dbnode/server/limits.go index 7301a0a49b..693c3e90eb 100644 --- a/src/dbnode/server/limits.go +++ b/src/dbnode/server/limits.go @@ -21,12 +21,7 @@ package server import ( - "bufio" "fmt" - "os/exec" - "strconv" - "strings" - "syscall" xerror "github.com/m3db/m3/src/x/errors" xos "github.com/m3db/m3/src/x/os" @@ -83,80 +78,3 @@ func validateProcessLimits() error { return multiErr.FinalError() } - -func raiseRlimitToNROpen() error { - cmd := exec.Command("sysctl", "-a") - stdout, err := cmd.StdoutPipe() - if err != nil { - return fmt.Errorf( - "unable to raise nofile limits: sysctl_stdout_err=%v", err) - } - - defer stdout.Close() - - if err := cmd.Start(); err != nil { - return fmt.Errorf( - "unable to raise nofile limits: sysctl_start_err=%v", err) - } - - var ( - scanner = bufio.NewScanner(stdout) - limit uint64 - ) - for scanner.Scan() { - line := scanner.Text() - if !strings.Contains(line, "nr_open") { - continue - } - equalsIdx := strings.LastIndex(line, "=") - if equalsIdx < 0 { - return fmt.Errorf( - "unable to raise nofile limits: sysctl_parse_stdout_err=%v", err) - } - value := strings.TrimSpace(line[equalsIdx+1:]) - n, err := strconv.Atoi(value) - if err != nil { - return fmt.Errorf( - "unable to raise nofile limits: sysctl_eval_stdout_err=%v", err) - } - - limit = uint64(n) - break - } - - if limit == 0 { - return fmt.Errorf( - "unable to raise nofile limits: sysctl_limit_err=limit not parsed") - } - - if err := scanner.Err(); err != nil { - return fmt.Errorf( - "unable to raise nofile limits: sysctl_read_stdout_err=%v", err) - } - - if err := cmd.Wait(); err != nil { - return fmt.Errorf( - "unable to raise nofile limits: sysctl_exec_err=%v", err) - } - - var limits syscall.Rlimit - if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &limits); err != nil { - return fmt.Errorf( - "unable to raise nofile limits: rlimit_get_err=%v", err) - } - - if limits.Max >= limit && limits.Cur >= limit { - // Limit already set correctly - return nil - } - - limits.Max = limit - limits.Cur = limit - - if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &limits); err != nil { - return fmt.Errorf( - "unable to raise nofile limits: rlimit_set_err=%v", err) - } - - return nil -} diff --git a/src/dbnode/server/server.go b/src/dbnode/server/server.go index 27335c8bc7..4ef2b5db88 100644 --- a/src/dbnode/server/server.go +++ b/src/dbnode/server/server.go @@ -75,6 +75,7 @@ import ( "github.com/m3db/m3/src/x/instrument" "github.com/m3db/m3/src/x/lockfile" "github.com/m3db/m3/src/x/mmap" + xos "github.com/m3db/m3/src/x/os" "github.com/m3db/m3/src/x/pool" "github.com/m3db/m3/src/x/serialize" xsync "github.com/m3db/m3/src/x/sync" @@ -150,9 +151,16 @@ func Run(runOpts RunOptions) { } defer logger.Sync() - // Raise soft fd limit to hard limit - if err := raiseRlimitToNROpen(); err != nil { + // Raise fd limits to nr_open system limit + result, err := xos.RaiseProcessNoFileToNROpen() + if err != nil { logger.Warn("unable to raise rlimit", zap.Error(err)) + } else { + logger.Info("raised rlimit no file fds limit", + zap.Bool("required", result.RaiseRequired), + zap.Uint64("sysNROpenValue", result.NROpenValue), + zap.Uint64("noFileMaxValue", result.NoFileMaxValue), + zap.Uint64("noFileCurrValue", result.NoFileCurrValue)) } // Parse file and directory modes diff --git a/src/x/os/limits.go b/src/x/os/limits.go index adef12cdf0..b9642765b5 100644 --- a/src/x/os/limits.go +++ b/src/x/os/limits.go @@ -27,3 +27,12 @@ type ProcessLimits struct { VMMaxMapCount int64 // corresponds to /proc/sys/vm/max_map_count VMSwappiness int64 // corresponds to /proc/sys/vm/swappiness } + +// RaiseProcessNoFileToNROpenResult captures the result of trying to +// raise the process num files open limit to the nr_open system value. +type RaiseProcessNoFileToNROpenResult struct { + RaiseRequired bool + NROpenValue uint64 + NoFileMaxValue uint64 + NoFileCurrValue uint64 +} diff --git a/src/x/os/limits_linux.go b/src/x/os/limits_linux.go index 42c1085786..d4bdd07e6b 100644 --- a/src/x/os/limits_linux.go +++ b/src/x/os/limits_linux.go @@ -32,6 +32,7 @@ const ( sysctlDir = "/proc/sys/" vmMaxMapCountKey = "vm.max_map_count" vmSwappinessKey = "vm.swappiness" + fsNROpenKey = "fs.nr_open" ) // CanGetProcessLimits returns a boolean to signify if it can return limits, @@ -66,6 +67,50 @@ func GetProcessLimits() (ProcessLimits, error) { }, nil } +// RaiseProcessNoFileToNROpen first determines the NROpen limit by reading +// the corresponding proc sys file and then if the hard or soft limits +// are below this number, the limits are raised using a call to setrlimit. +func RaiseProcessNoFileToNROpen() (RaiseProcessNoFileToNROpenResult, error) { + value, err := sysctlInt64(fsNROpenKey) + if err != nil { + return RaiseProcessNoFileToNROpenResult{}, fmt.Errorf( + "unable to raise nofile limits: nr_open_parse_err=%v", err) + } + + limit := uint64(value) + + var limits syscall.Rlimit + if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &limits); err != nil { + return RaiseProcessNoFileToNROpenResult{}, fmt.Errorf( + "unable to raise nofile limits: rlimit_get_err=%v", err) + } + + if limits.Max >= limit && limits.Cur >= limit { + // Limit already set correctly + return RaiseProcessNoFileToNROpenResult{ + RaiseRequired: false, + NROpenValue: limit, + NoFileMaxValue: limits.Max, + NoFileCurrValue: limits.Cur, + }, nil + } + + limits.Max = limit + limits.Cur = limit + + if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &limits); err != nil { + return RaiseProcessNoFileToNROpenResult{}, fmt.Errorf( + "unable to raise nofile limits: rlimit_set_err=%v", err) + } + + return RaiseProcessNoFileToNROpenResult{ + RaiseRequired: true, + NROpenValue: limit, + NoFileMaxValue: limits.Max, + NoFileCurrValue: limits.Cur, + }, nil +} + func sysctlInt64(key string) (int64, error) { str, err := sysctl(key) if err != nil { diff --git a/src/x/os/limits_other.go b/src/x/os/limits_other.go index cdb4aba54c..70b9c137c3 100644 --- a/src/x/os/limits_other.go +++ b/src/x/os/limits_other.go @@ -22,14 +22,17 @@ package xos -import "errors" +import ( + "errors" +) const ( nonLinuxWarning = "unable to determine process limits on non-linux os" ) var ( - errUnableToDetermineProcessLimits = errors.New(nonLinuxWarning) + errUnableToDetermineProcessLimits = errors.New(nonLinuxWarning) + errUnableToRaiseProcessNoFileNonLinux = errors.New("unable to raise no file limits on non-linux os") ) // CanGetProcessLimits returns a boolean to signify if it can return limits, @@ -42,3 +45,9 @@ func CanGetProcessLimits() (bool, string) { func GetProcessLimits() (ProcessLimits, error) { return ProcessLimits{}, errUnableToDetermineProcessLimits } + +// RaiseProcessNoFileToNROpen attempts to raise the process num files +// open limit to the nr_open system value. +func RaiseProcessNoFileToNROpen() (RaiseProcessNoFileToNROpenResult, error) { + return RaiseProcessNoFileToNROpenResult{}, errUnableToRaiseProcessNoFileNonLinux +} From bca7c427e8c207ece2f6c2297f9dfcbbaecd7c65 Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Tue, 28 May 2019 14:50:57 +0200 Subject: [PATCH 4/4] Address feedback --- docker/m3dbnode/Dockerfile | 11 ++++++++--- src/dbnode/server/server.go | 26 ++++++++++++++++---------- src/x/os/limits.go | 2 +- src/x/os/limits_linux.go | 4 ++-- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/docker/m3dbnode/Dockerfile b/docker/m3dbnode/Dockerfile index d9ace30cf9..4f6134f422 100644 --- a/docker/m3dbnode/Dockerfile +++ b/docker/m3dbnode/Dockerfile @@ -1,4 +1,4 @@ -# stage 1: build +# Stage 1: build FROM golang:1.10-alpine AS builder LABEL maintainer="The M3DB Authors " @@ -14,7 +14,7 @@ RUN cd /go/src/github.com/m3db/m3/ && \ git submodule update --init && \ make m3dbnode-linux-amd64 -# stage 2: lightweight "release" +# Stage 2: lightweight "release" FROM alpine:latest LABEL maintainer="The M3DB Authors " @@ -26,9 +26,14 @@ COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd.yml /etc/m3dbnode/m3dbnode.yml COPY --from=builder /go/src/github.com/m3db/m3/scripts/m3dbnode_bootstrapped.sh /bin/ -# Use setcap and run as specific user +# Use setcap to set +e "effective" and +p "permitted" to adjust the +# SYS_RESOURCE so the process can raise the hard file limit with +# setrlimit RUN apk add libcap && \ setcap cap_sys_resource=+ep /bin/m3dbnode +# Set the environment variable to raise the limit on startup +ENV PROCESS_LIMITS_RAISE true + ENTRYPOINT [ "/bin/m3dbnode" ] CMD [ "-f", "/etc/m3dbnode/m3dbnode.yml" ] diff --git a/src/dbnode/server/server.go b/src/dbnode/server/server.go index 4ef2b5db88..8d00682aef 100644 --- a/src/dbnode/server/server.go +++ b/src/dbnode/server/server.go @@ -31,6 +31,7 @@ import ( "path" "runtime" "runtime/debug" + "strings" "syscall" "time" @@ -93,6 +94,8 @@ const ( maxBgProcessLimitMonitorDuration = 5 * time.Minute filePathPrefixLockFile = ".lock" defaultServiceName = "m3dbnode" + raiseProcessLimitsEnvVar = "PROCESS_LIMITS_RAISE" + raiseProcessLimitsEnvVarTrue = "true" ) // RunOptions provides options for running the server @@ -151,16 +154,19 @@ func Run(runOpts RunOptions) { } defer logger.Sync() - // Raise fd limits to nr_open system limit - result, err := xos.RaiseProcessNoFileToNROpen() - if err != nil { - logger.Warn("unable to raise rlimit", zap.Error(err)) - } else { - logger.Info("raised rlimit no file fds limit", - zap.Bool("required", result.RaiseRequired), - zap.Uint64("sysNROpenValue", result.NROpenValue), - zap.Uint64("noFileMaxValue", result.NoFileMaxValue), - zap.Uint64("noFileCurrValue", result.NoFileCurrValue)) + raiseLimits := strings.TrimSpace(os.Getenv(raiseProcessLimitsEnvVar)) + if raiseLimits == raiseProcessLimitsEnvVarTrue { + // Raise fd limits to nr_open system limit + result, err := xos.RaiseProcessNoFileToNROpen() + if err != nil { + logger.Warn("unable to raise rlimit", zap.Error(err)) + } else { + logger.Info("raised rlimit no file fds limit", + zap.Bool("required", result.RaisePerformed), + zap.Uint64("sysNROpenValue", result.NROpenValue), + zap.Uint64("noFileMaxValue", result.NoFileMaxValue), + zap.Uint64("noFileCurrValue", result.NoFileCurrValue)) + } } // Parse file and directory modes diff --git a/src/x/os/limits.go b/src/x/os/limits.go index b9642765b5..c77d563109 100644 --- a/src/x/os/limits.go +++ b/src/x/os/limits.go @@ -31,7 +31,7 @@ type ProcessLimits struct { // RaiseProcessNoFileToNROpenResult captures the result of trying to // raise the process num files open limit to the nr_open system value. type RaiseProcessNoFileToNROpenResult struct { - RaiseRequired bool + RaisePerformed bool NROpenValue uint64 NoFileMaxValue uint64 NoFileCurrValue uint64 diff --git a/src/x/os/limits_linux.go b/src/x/os/limits_linux.go index d4bdd07e6b..c03de572db 100644 --- a/src/x/os/limits_linux.go +++ b/src/x/os/limits_linux.go @@ -88,7 +88,7 @@ func RaiseProcessNoFileToNROpen() (RaiseProcessNoFileToNROpenResult, error) { if limits.Max >= limit && limits.Cur >= limit { // Limit already set correctly return RaiseProcessNoFileToNROpenResult{ - RaiseRequired: false, + RaisePerformed: false, NROpenValue: limit, NoFileMaxValue: limits.Max, NoFileCurrValue: limits.Cur, @@ -104,7 +104,7 @@ func RaiseProcessNoFileToNROpen() (RaiseProcessNoFileToNROpenResult, error) { } return RaiseProcessNoFileToNROpenResult{ - RaiseRequired: true, + RaisePerformed: true, NROpenValue: limit, NoFileMaxValue: limits.Max, NoFileCurrValue: limits.Cur,