Skip to content

Commit

Permalink
[CHORE] adding auto GOMEMLIMIT flag
Browse files Browse the repository at this point in the history
Signed-off-by: Nicolas Takashi <[email protected]>
  • Loading branch information
nicolastakashi committed Mar 22, 2024
1 parent 4a2a455 commit 3db9990
Show file tree
Hide file tree
Showing 16 changed files with 141 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
### Changed

- [#7123](https://github.com/thanos-io/thanos/pull/7123) Rule: Change default Alertmanager API version to v2.
- [##7222](https://github.com/thanos-io/thanos/pull/7123) Automatic detection of memory limits and configure GOMEMLIMIT to match.

### Removed

Expand Down
7 changes: 7 additions & 0 deletions cmd/thanos/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ func runCompact(
srv.Shutdown(err)
})

if err := configureGoAutoMemLimit(conf.goMemLimitConf); err != nil {
return err
}

confContentYaml, err := conf.objStore.Content()
if err != nil {
return err
Expand Down Expand Up @@ -724,6 +728,7 @@ type compactConfig struct {
progressCalculateInterval time.Duration
filterConf *store.FilterConfig
disableAdminOperations bool
goMemLimitConf goMemLimitConfig
}

func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
Expand Down Expand Up @@ -837,4 +842,6 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
cmd.Flag("bucket-web-label", "External block label to use as group title in the bucket web UI").StringVar(&cc.label)

cmd.Flag("disable-admin-operations", "Disable UI/API admin operations like marking blocks for deletion and no compaction.").Default("false").BoolVar(&cc.disableAdminOperations)

cc.goMemLimitConf.registerFlag(cmd)
}
40 changes: 40 additions & 0 deletions cmd/thanos/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strings"
"time"

"github.com/KimMachineGun/automemlimit/memlimit"
extflag "github.com/efficientgo/tools/extkingpin"
"github.com/pkg/errors"

Expand Down Expand Up @@ -283,3 +284,42 @@ func parseFlagLabels(s []string) (labels.Labels, error) {
sort.Sort(lset)
return lset, nil
}

type goMemLimitConfig struct {
enableAutoGoMemlimit bool
memlimitRatio float64
}

func (gml *goMemLimitConfig) registerFlag(cmd extkingpin.FlagClause) *goMemLimitConfig {
cmd.Flag("enable-auto-gomemlimit",
"Enable go runtime to automatically limit memory consumption by compact component. This is an experimental feature.").
Default("false").BoolVar(&gml.enableAutoGoMemlimit)

cmd.Flag("auto-gomemlimit.ratio",
"The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory.").
Default("0.9").FloatVar(&gml.memlimitRatio)

return gml
}

func configureGoAutoMemLimit(common goMemLimitConfig) error {
if common.memlimitRatio <= 0.0 || common.memlimitRatio > 1.0 {
return errors.New("--auto-gomemlimit.ratio must be greater than 0 and less than or equal to 1.")
}

if common.enableAutoGoMemlimit {
if _, err := memlimit.SetGoMemLimitWithOpts(
memlimit.WithRatio(common.memlimitRatio),
memlimit.WithProvider(
memlimit.ApplyFallback(
memlimit.FromCgroup,
memlimit.FromSystem,
),
),
); err != nil {
return errors.Wrap(err, "Failed to set GOMEMLIMIT automatically")
}
}

return nil
}
7 changes: 7 additions & 0 deletions cmd/thanos/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,19 @@ func registerQuery(app *extkingpin.App) {
var storeRateLimits store.SeriesSelectLimits
storeRateLimits.RegisterFlags(cmd)

goMemLimitConfig := goMemLimitConfig{}
goMemLimitConfig.registerFlag(cmd)

cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, debugLogging bool) error {
selectorLset, err := parseFlagLabels(*selectorLabels)
if err != nil {
return errors.Wrap(err, "parse federation labels")
}

if err := configureGoAutoMemLimit(goMemLimitConfig); err != nil {
return err
}

for _, feature := range *featureList {
if feature == promqlAtModifier {
level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", promqlAtModifier)
Expand Down
7 changes: 7 additions & 0 deletions cmd/thanos/query_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type queryFrontendConfig struct {
http httpConfig
webDisableCORS bool
orgIdHeaders []string
goMemLimitConf goMemLimitConfig
}

func registerQueryFrontend(app *extkingpin.App) {
Expand Down Expand Up @@ -163,6 +164,8 @@ func registerQueryFrontend(app *extkingpin.App) {

reqLogConfig := extkingpin.RegisterRequestLoggingFlags(cmd)

cfg.goMemLimitConf.registerFlag(cmd)

cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error {
httpLogOpts, err := logging.ParseHTTPOptions(reqLogConfig)
if err != nil {
Expand Down Expand Up @@ -256,6 +259,10 @@ func runQueryFrontend(
// TODO: This should be removed once the org id header is fully removed in Thanos.
cfg.orgIdHeaders = append(cfg.orgIdHeaders, tenancy.DefaultTenantHeader)

if err := configureGoAutoMemLimit(cfg.goMemLimitConf); err != nil {
return err
}

queryRangeCacheConfContentYaml, err := cfg.QueryRangeConfig.CachePathOrContent.Content()
if err != nil {
return err
Expand Down
8 changes: 8 additions & 0 deletions cmd/thanos/receive.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ func registerReceive(app *extkingpin.App) {
return errors.Wrap(err, "error while parsing config for request logging")
}

if err := configureGoAutoMemLimit(conf.goMemLimitConf); err != nil {
return err
}

tsdbOpts := &tsdb.Options{
MinBlockDuration: int64(time.Duration(*conf.tsdbMinBlockDuration) / time.Millisecond),
MaxBlockDuration: int64(time.Duration(*conf.tsdbMaxBlockDuration) / time.Millisecond),
Expand Down Expand Up @@ -832,6 +836,8 @@ type receiveConfig struct {
limitsConfigReloadTimer time.Duration

asyncForwardWorkerCount uint

goMemLimitConf goMemLimitConfig
}

func (rc *receiveConfig) registerFlag(cmd extkingpin.FlagClause) {
Expand Down Expand Up @@ -966,6 +972,8 @@ func (rc *receiveConfig) registerFlag(cmd extkingpin.FlagClause) {
rc.writeLimitsConfig = extflag.RegisterPathOrContent(cmd, "receive.limits-config", "YAML file that contains limit configuration.", extflag.WithEnvSubstitution(), extflag.WithHidden())
cmd.Flag("receive.limits-config-reload-timer", "Minimum amount of time to pass for the limit configuration to be reloaded. Helps to avoid excessive reloads.").
Default("1s").Hidden().DurationVar(&rc.limitsConfigReloadTimer)

rc.goMemLimitConf.registerFlag(cmd)
}

// determineMode returns the ReceiverMode that this receiver is configured to run in.
Expand Down
6 changes: 6 additions & 0 deletions cmd/thanos/sidecar.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ func registerSidecar(app *extkingpin.App) {
return errors.Wrap(err, "Improper http client config")
}

if err := configureGoAutoMemLimit(conf.goMemLimitConf); err != nil {
return err
}

opts := reloader.Options{
HTTPClient: *httpClient,
CfgFile: conf.reloader.confFile,
Expand Down Expand Up @@ -504,6 +508,7 @@ type sidecarConfig struct {
shipper shipperConfig
limitMinTime thanosmodel.TimeOrDurationValue
storeRateLimits store.SeriesSelectLimits
goMemLimitConf goMemLimitConfig
}

func (sc *sidecarConfig) registerFlag(cmd extkingpin.FlagClause) {
Expand All @@ -516,6 +521,7 @@ func (sc *sidecarConfig) registerFlag(cmd extkingpin.FlagClause) {
sc.objStore = *extkingpin.RegisterCommonObjStoreFlags(cmd, "", false)
sc.shipper.registerFlag(cmd)
sc.storeRateLimits.RegisterFlags(cmd)
sc.goMemLimitConf.registerFlag(cmd)
cmd.Flag("min-time", "Start of time range limit to serve. Thanos sidecar will serve only metrics, which happened later than this value. Option can be a constant time in RFC3339 format or time duration relative to current time, such as -1d or 2h45m. Valid duration units are ms, s, m, h, d, w, y.").
Default("0000-01-01T00:00:00Z").SetValue(&sc.limitMinTime)
}
7 changes: 7 additions & 0 deletions cmd/thanos/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ type storeConfig struct {
lazyExpandedPostingsEnabled bool

indexHeaderLazyDownloadStrategy string
goMemLimitConf goMemLimitConfig
}

func (sc *storeConfig) registerFlag(cmd extkingpin.FlagClause) {
Expand Down Expand Up @@ -220,6 +221,8 @@ func (sc *storeConfig) registerFlag(cmd extkingpin.FlagClause) {
cmd.Flag("bucket-web-label", "External block label to use as group title in the bucket web UI").StringVar(&sc.label)

sc.reqLogConfig = extkingpin.RegisterRequestLoggingFlags(cmd)

sc.goMemLimitConf.registerFlag(cmd)
}

// registerStore registers a store command.
Expand All @@ -245,6 +248,10 @@ func registerStore(app *extkingpin.App) {
return errors.Wrap(err, "error while parsing config for request logging")
}

if err := configureGoAutoMemLimit(conf.goMemLimitConf); err != nil {
return err
}

conf.debugLogging = debugLogging

return runStore(g,
Expand Down
6 changes: 6 additions & 0 deletions docs/components/compact.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ usage: thanos compact [<flags>]
Continuously compacts blocks in an object store bucket.
Flags:
--auto-gomemlimit.ratio=0.9
The ratio of reserved GOMEMLIMIT memory to the
detected maximum container or system memory.
--block-discovery-strategy="concurrent"
One of concurrent, recursive. When set to
concurrent, stores will concurrently issue
Expand Down Expand Up @@ -375,6 +378,9 @@ Flags:
non-downsampled data is not efficient and useful
e.g it is not possible to render all samples for
a human eye anyway
--enable-auto-gomemlimit Enable go runtime to automatically limit memory
consumption by compact component. This is an
experimental feature.
--hash-func= Specify which hash function to use when
calculating the hashes of produced files.
If no function has been specified, it does not
Expand Down
6 changes: 6 additions & 0 deletions docs/components/query-frontend.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,16 @@ Query frontend command implements a service deployed in front of queriers to
improve query parallelization and caching.
Flags:
--auto-gomemlimit.ratio=0.9
The ratio of reserved GOMEMLIMIT memory to the
detected maximum container or system memory.
--cache-compression-type=""
Use compression in results cache.
Supported values are: 'snappy' and ” (disable
compression).
--enable-auto-gomemlimit Enable go runtime to automatically limit memory
consumption by compact component. This is an
experimental feature.
-h, --help Show context-sensitive help (also try
--help-long and --help-man).
--http-address="0.0.0.0:10902"
Expand Down
6 changes: 6 additions & 0 deletions docs/components/query.md
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,12 @@ Flags:
--alert.query-url=ALERT.QUERY-URL
The external Thanos Query URL that would be set
in all alerts 'Source' field.
--auto-gomemlimit.ratio=0.9
The ratio of reserved GOMEMLIMIT memory to the
detected maximum container or system memory.
--enable-auto-gomemlimit Enable go runtime to automatically limit memory
consumption by compact component. This is an
experimental feature.
--endpoint=<endpoint> ... Addresses of statically configured Thanos
API servers (repeatable). The scheme may be
prefixed with 'dns+' or 'dnssrv+' to detect
Expand Down
6 changes: 6 additions & 0 deletions docs/components/receive.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,12 @@ usage: thanos receive [<flags>]
Accept Prometheus remote write API requests and write to local tsdb.
Flags:
--auto-gomemlimit.ratio=0.9
The ratio of reserved GOMEMLIMIT memory to the
detected maximum container or system memory.
--enable-auto-gomemlimit Enable go runtime to automatically limit memory
consumption by compact component. This is an
experimental feature.
--grpc-address="0.0.0.0:10901"
Listen ip:port address for gRPC endpoints
(StoreAPI). Make sure this address is routable
Expand Down
6 changes: 6 additions & 0 deletions docs/components/sidecar.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ usage: thanos sidecar [<flags>]
Sidecar for Prometheus server.
Flags:
--auto-gomemlimit.ratio=0.9
The ratio of reserved GOMEMLIMIT memory to the
detected maximum container or system memory.
--enable-auto-gomemlimit Enable go runtime to automatically limit memory
consumption by compact component. This is an
experimental feature.
--grpc-address="0.0.0.0:10901"
Listen ip:port address for gRPC endpoints
(StoreAPI). Make sure this address is routable
Expand Down
8 changes: 8 additions & 0 deletions docs/components/store.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ Store node giving access to blocks in a bucket provider. Now supported GCS, S3,
Azure, Swift, Tencent COS and Aliyun OSS.
Flags:
--auto-gomemlimit.ratio=0.9
The ratio of reserved GOMEMLIMIT memory to the
detected maximum container or system memory.
--block-discovery-strategy="concurrent"
One of concurrent, recursive. When set to
concurrent, stores will concurrently issue
Expand Down Expand Up @@ -69,6 +72,9 @@ Flags:
cause the store to read them. For such use
cases use Prometheus + sidecar. Ignored if
--no-cache-index-header option is specified.
--enable-auto-gomemlimit Enable go runtime to automatically limit memory
consumption by compact component. This is an
experimental feature.
--grpc-address="0.0.0.0:10901"
Listen ip:port address for gRPC endpoints
(StoreAPI). Make sure this address is routable
Expand Down Expand Up @@ -375,6 +381,8 @@ While the remaining settings are **optional**:
The `redis` index cache allows to use [Redis](https://redis.io) as cache backend. This cache type is configured using `--index-cache.config-file` to reference the configuration file or `--index-cache.config` to put yaml config directly:

```yaml mdox-exec="go run scripts/cfggen/main.go --name=cacheutil.RedisClientConfig"
# command-line-arguments
ld: warning: ignoring duplicate libraries: '-lproc'
type: REDIS
config:
addr: ""
Expand Down
7 changes: 7 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,20 @@ require (

require (
github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 // indirect
github.com/cilium/ebpf v0.11.0 // indirect
github.com/containerd/cgroups/v3 v3.0.3 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/go-openapi/runtime v0.27.1 // indirect
github.com/godbus/dbus/v5 v5.0.4 // indirect
github.com/golang-jwt/jwt/v5 v5.2.0 // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
github.com/huaweicloud/huaweicloud-sdk-go-obs v3.23.3+incompatible // indirect
github.com/metalmatze/signal v0.0.0-20210307161603-1c9aa721a97a // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/onsi/ginkgo v1.16.5 // indirect
github.com/opencontainers/runtime-spec v1.0.2 // indirect
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
github.com/sercand/kuberesolver/v4 v4.0.0 // indirect
github.com/zhangyunhao116/umap v0.0.0-20221211160557-cb7705fafa39 // indirect
go.opentelemetry.io/collector/featuregate v1.0.1 // indirect
Expand All @@ -153,6 +159,7 @@ require (
require (
cloud.google.com/go/compute/metadata v0.2.3 // indirect
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.32.3 // indirect
github.com/KimMachineGun/automemlimit v0.5.0
github.com/OneOfOne/xxhash v1.2.6 // indirect
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect
Expand Down
Loading

0 comments on commit 3db9990

Please sign in to comment.