Skip to content
This repository has been archived by the owner on Aug 23, 2023. It is now read-only.

Update profiletrigger: switch from vsz to rss and add threshold for heap as well #1914

Merged
merged 3 commits into from
Oct 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 14 additions & 5 deletions cmd/metrictank/metrictank.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,11 @@ var (
blockProfileRate = flag.Int("block-profile-rate", 0, "see https://golang.org/pkg/runtime/#SetBlockProfileRate")
memProfileRate = flag.Int("mem-profile-rate", 512*1024, "0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")

proftrigPath = flag.String("proftrigger-path", "/tmp", "path to store triggered profiles")
proftrigFreqStr = flag.String("proftrigger-freq", "60s", "inspect status frequency. set to 0 to disable")
proftrigMinDiffStr = flag.String("proftrigger-min-diff", "1h", "minimum time between triggered profiles")
proftrigHeapThresh = flag.Int("proftrigger-heap-thresh", 25000000000, "if this many bytes allocated, trigger a profile")
proftrigPath = flag.String("proftrigger-path", "/tmp", "path to store triggered profiles")
proftrigFreqStr = flag.String("proftrigger-freq", "10s", "inspect status frequency. set to 0 to disable")
proftrigMinDiffStr = flag.String("proftrigger-min-diff", "1h", "minimum time between triggered profiles")
proftrigHeapThresh = flag.Int("proftrigger-heap-thresh", 25000000000, "threshold for process RSS, the amount of RAM memory used. (0 to disable) (see \"rss\" on dashboard)")
proftrigHeapThreshHeap = flag.Int("proftrigger-heap-thresh-heap", 0, "threshold for bytes allocated on heap (0 to disable) (see \"allocated in heap\" on dashboard)")
)

func main() {
Expand Down Expand Up @@ -228,7 +229,15 @@ func main() {
proftrigMinDiff := int(dur.MustParseNDuration("proftrigger-min-diff", *proftrigMinDiffStr))
if proftrigFreq > 0 {
errors := make(chan error)
trigger, _ := heap.New(*proftrigPath, *proftrigHeapThresh, proftrigMinDiff, time.Duration(proftrigFreq)*time.Second, errors)
cfg := heap.Config{
Path: *proftrigPath,
ThreshRSS: *proftrigHeapThresh,
ThreshHeap: *proftrigHeapThreshHeap,
MinTimeDiff: time.Duration(proftrigMinDiff) * time.Second,
CheckEvery: time.Duration(proftrigFreq) * time.Second,
}

trigger, _ := heap.New(cfg, errors)
go func() {
for e := range errors {
log.Errorf("profiletrigger heap: %s", e)
Expand Down
10 changes: 8 additions & 2 deletions docker/docker-chaos/metrictank.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 1s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
10 changes: 8 additions & 2 deletions docker/docker-cluster-query/metrictank.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 1s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
10 changes: 8 additions & 2 deletions docker/docker-cluster/metrictank.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 1s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
12 changes: 9 additions & 3 deletions docker/docker-dev-custom-cfg-kafka/metrictank.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 60s
proftrigger-freq = 10s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
12 changes: 9 additions & 3 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,21 @@ public-org = 0
block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024
# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 60s
proftrigger-freq = 10s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0
# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
```
Expand Down
3 changes: 1 addition & 2 deletions docs/operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ Metrictank crashed. What to do?
Tips:
* The [profiletrigger](https://github.com/grafana/metrictank/blob/master/docs/config.md#profiling-instrumentation-and-logging) functionality can automatically trigger
a memory profile and save it to disk. This can be very helpful if suddently memory usage spikes up and then metrictank gets killed in seconds or minutes.
It helps diagnose problems in the codebase that may lead to memory savings. The profiletrigger looks at the `bytes_sys` metric which is
the amount of memory consumed by the process.
It helps diagnose problems in the codebase that may lead to memory savings. The profiletrigger can look at both RSS used and heap size.
* Use [rollups](https://github.com/grafana/metrictank/blob/master/docs/consolidation.md#rollups) to be able to answer queries for long timeframes with less data
2) Check the metrictank log.
If it exited due to a panic, you should probably open a [ticket](https://github.com/grafana/metrictank/issues) with the output of `metrictank --version`, the panic, and perhaps preceding log data.
Expand Down
12 changes: 9 additions & 3 deletions metrictank-sample.ini
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 60s
proftrigger-freq = 10s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
12 changes: 9 additions & 3 deletions scripts/config/metrictank-docker-dev.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 60s
proftrigger-freq = 10s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
12 changes: 9 additions & 3 deletions scripts/config/metrictank-docker.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 60s
proftrigger-freq = 10s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
12 changes: 9 additions & 3 deletions scripts/config/metrictank-package.ini
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@ block-profile-rate = 0
# 0 to disable. 1 for max precision (expensive!) see https://golang.org/pkg/runtime/#pkg-variables")
mem-profile-rate = 524288 # 512*1024

# heap profiletrigger: triggers a heap (memory) profile for diagnosis when usage threshold is breached
# recommended usage: set proftrigger-heap-thresh-rss such that it is much larger than "normal" usage, but lower
# then how much RAM capacity you have, so that a profile can be captured before the process gets killed by the OOM-killer
# inspect status frequency. set to 0 to disable
proftrigger-freq = 60s
proftrigger-freq = 10s
# path to store triggered profiles
proftrigger-path = /tmp
# minimum time between triggered profiles
proftrigger-min-diff = 1h
# if process consumes this many bytes (see bytes_sys in dashboard), trigger a heap profile for developer diagnosis
# set it higher than your typical memory usage, but lower than how much RAM the process can take before its get killed
# threshold for process RSS, the amount of RAM memory used. (0 to disable) (see "rss" on dashboard)
proftrigger-heap-thresh = 25000000000
# threshold for bytes allocated on heap (0 to disable) (see "allocated in heap" on dashboard)
# typically, this is not all that useful, "rss" above is what most people care about (and the heap uses less than rss),
# but this setting can help detect a large heap even if some of the memory is swapped out (and thus not accounted for in rss)
proftrigger-heap-thresh-heap = 0

# only log log-level and lower (read right to left: to the left is lower). panic|fatal|error|warning|info|debug
log-level = info
Expand Down
Loading