-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix for - syncTargetAllocator in prometheus metrics receiver doesnt detect regex changes in metrics relabel config #30258
Changes from 35 commits
d1de8b3
35f4b63
d3b0ccd
f631611
11e35c0
0a886f4
9d9a83c
fe3b4f2
a2fa93e
f5e999d
424be70
fd262e9
fff66e3
f47c4ff
affec96
eebe192
a4080d8
f17fbd0
aa69f7f
dd00682
4015b6b
b3d8c53
7e40ef5
fa2d692
fc4a588
955d7ff
6211f40
2f9e8c8
a918c4d
dca8ecd
7f45edb
3e8d1ac
384f805
8c57a93
4a2030c
44c67d8
623c161
fb72c64
db313a7
16ee77b
899d093
289e7ea
874cb30
6325728
336d87a
8284879
b5dfef6
84d438b
f3f7577
dc001e7
cd7cbc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
change_type: 'bug_fix' | ||
component: 'prometheusreceiver' | ||
note: Fix hash computation to include non exported fields like regex in scrape configuration for TargetAllocator | ||
issues: [29313] | ||
subtext: | ||
change_logs: [] |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -8,6 +8,8 @@ import ( | |||||
"context" | ||||||
"errors" | ||||||
"fmt" | ||||||
"hash" | ||||||
"hash/fnv" | ||||||
"io" | ||||||
"net/http" | ||||||
"net/url" | ||||||
|
@@ -17,7 +19,6 @@ import ( | |||||
"time" | ||||||
|
||||||
"github.com/go-kit/log" | ||||||
"github.com/mitchellh/hashstructure/v2" | ||||||
commonconfig "github.com/prometheus/common/config" | ||||||
"github.com/prometheus/common/model" | ||||||
"github.com/prometheus/prometheus/config" | ||||||
|
@@ -105,7 +106,7 @@ func (r *pReceiver) Start(_ context.Context, host component.Host) error { | |||||
func (r *pReceiver) startTargetAllocator(allocConf *targetAllocator, baseCfg *config.Config) error { | ||||||
r.settings.Logger.Info("Starting target allocator discovery") | ||||||
// immediately sync jobs, not waiting for the first tick | ||||||
savedHash, err := r.syncTargetAllocator(uint64(0), allocConf, baseCfg) | ||||||
savedHash, err := r.syncTargetAllocator(nil, allocConf, baseCfg) | ||||||
if err != nil { | ||||||
return err | ||||||
} | ||||||
|
@@ -130,20 +131,40 @@ func (r *pReceiver) startTargetAllocator(allocConf *targetAllocator, baseCfg *co | |||||
return nil | ||||||
} | ||||||
|
||||||
// Calculate a hash for a scrape config map. | ||||||
// This is done by marshaling to YAML because it's the most straightforward and doesn't run into problems with unexported fields. | ||||||
func getScrapeConfigHash(jobToScrapeConfig map[string]*config.ScrapeConfig) (hash.Hash64, error) { | ||||||
var err error | ||||||
hash := fnv.New64() | ||||||
yamlEncoder := yaml.NewEncoder(hash) | ||||||
for jobName, scrapeConfig := range jobToScrapeConfig { | ||||||
_, err = hash.Write([]byte(jobName)) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
err = yamlEncoder.Encode(scrapeConfig) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
} | ||||||
yamlEncoder.Close() | ||||||
return hash, err | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I don't think anything outside of this function needs access to the mutable |
||||||
} | ||||||
|
||||||
// syncTargetAllocator request jobs from targetAllocator and update underlying receiver, if the response does not match the provided compareHash. | ||||||
// baseDiscoveryCfg can be used to provide additional ScrapeConfigs which will be added to the retrieved jobs. | ||||||
func (r *pReceiver) syncTargetAllocator(compareHash uint64, allocConf *targetAllocator, baseCfg *config.Config) (uint64, error) { | ||||||
func (r *pReceiver) syncTargetAllocator(compareHash hash.Hash64, allocConf *targetAllocator, baseCfg *config.Config) (hash.Hash64, error) { | ||||||
r.settings.Logger.Debug("Syncing target allocator jobs") | ||||||
scrapeConfigsResponse, err := r.getScrapeConfigsResponse(allocConf.Endpoint) | ||||||
if err != nil { | ||||||
r.settings.Logger.Error("Failed to retrieve job list", zap.Error(err)) | ||||||
return 0, err | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
hash, err := hashstructure.Hash(scrapeConfigsResponse, hashstructure.FormatV2, nil) | ||||||
hash, err := getScrapeConfigHash(scrapeConfigsResponse) | ||||||
if err != nil { | ||||||
r.settings.Logger.Error("Failed to hash job list", zap.Error(err)) | ||||||
return 0, err | ||||||
return nil, err | ||||||
} | ||||||
if hash == compareHash { | ||||||
// no update needed | ||||||
|
@@ -175,7 +196,7 @@ func (r *pReceiver) syncTargetAllocator(compareHash uint64, allocConf *targetAll | |||||
err = r.applyCfg(baseCfg) | ||||||
if err != nil { | ||||||
r.settings.Logger.Error("Failed to apply new scrape configuration", zap.Error(err)) | ||||||
return 0, err | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
return hash, nil | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ import ( | |
"github.com/prometheus/common/model" | ||
promConfig "github.com/prometheus/prometheus/config" | ||
promHTTP "github.com/prometheus/prometheus/discovery/http" | ||
"github.com/prometheus/prometheus/model/relabel" | ||
"github.com/stretchr/testify/require" | ||
"go.opentelemetry.io/collector/component/componenttest" | ||
"go.opentelemetry.io/collector/consumer/consumertest" | ||
|
@@ -50,9 +51,15 @@ type hTTPSDResponse struct { | |
Labels map[model.LabelName]model.LabelValue `json:"labels"` | ||
} | ||
|
||
type expectedMetricRelabelConfigTestResult struct { | ||
JobName string | ||
MetricRelabelRegex relabel.Regexp | ||
} | ||
|
||
type expectedTestResultJobMap struct { | ||
Targets []string | ||
Labels model.LabelSet | ||
Targets []string | ||
Labels model.LabelSet | ||
MetricRelabelConfig *expectedMetricRelabelConfigTestResult | ||
} | ||
|
||
type expectedTestResult struct { | ||
|
@@ -111,7 +118,6 @@ func transformTAResponseMap(rawResponses map[string][]mockTargetAllocatorRespons | |
}) | ||
} | ||
responsesMap[path] = responses | ||
|
||
v := &atomic.Int32{} | ||
responsesIndexMap[path] = v | ||
} | ||
|
@@ -481,6 +487,111 @@ func TestTargetAllocatorJobRetrieval(t *testing.T) { | |
jobMap: map[string]expectedTestResultJobMap{}, | ||
}, | ||
}, | ||
{ | ||
desc: "update metric relabel config regex", | ||
responses: Responses{ | ||
releaserMap: map[string]int{ | ||
"/scrape_configs": 1, | ||
}, | ||
responses: map[string][]mockTargetAllocatorResponseRaw{ | ||
"/scrape_configs": { | ||
mockTargetAllocatorResponseRaw{code: 200, data: map[string]map[string]any{ | ||
"job1": { | ||
"job_name": "job1", | ||
"scrape_interval": "30s", | ||
"scrape_timeout": "30s", | ||
"metrics_path": "/metrics", | ||
"scheme": "http", | ||
"metric_relabel_configs": []map[string]string{ | ||
{ | ||
"separator": ";", | ||
"regex": "regex1", | ||
"action": "keep", | ||
}, | ||
}, | ||
}, | ||
}}, | ||
mockTargetAllocatorResponseRaw{code: 200, data: map[string]map[string]any{ | ||
"job1": { | ||
"job_name": "job1", | ||
"scrape_interval": "30s", | ||
"scrape_timeout": "30s", | ||
"metrics_path": "/metrics", | ||
"scheme": "http", | ||
"metric_relabel_configs": []map[string]string{ | ||
{ | ||
"separator": ";", | ||
"regex": "regex2", | ||
"action": "keep", | ||
}, | ||
}, | ||
}, | ||
}}, | ||
}, | ||
"/jobs/job1/targets": { | ||
mockTargetAllocatorResponseRaw{code: 200, data: []hTTPSDResponse{ | ||
{Targets: []string{"localhost:9090"}, | ||
Labels: map[model.LabelName]model.LabelValue{ | ||
"__meta_datacenter": "london", | ||
"__meta_prometheus_job": "node", | ||
}}, | ||
}}, | ||
mockTargetAllocatorResponseRaw{code: 200, data: []hTTPSDResponse{ | ||
{Targets: []string{"localhost:9090"}, | ||
Labels: map[model.LabelName]model.LabelValue{ | ||
"__meta_datacenter": "london", | ||
"__meta_prometheus_job": "node", | ||
}}, | ||
}}, | ||
}, | ||
}, | ||
}, | ||
cfg: &Config{ | ||
PrometheusConfig: &promConfig.Config{ | ||
ScrapeConfigs: []*promConfig.ScrapeConfig{ | ||
{ | ||
JobName: "job1", | ||
HonorTimestamps: true, | ||
ScrapeInterval: model.Duration(30 * time.Second), | ||
ScrapeTimeout: model.Duration(30 * time.Second), | ||
MetricsPath: "/metrics", | ||
Scheme: "http", | ||
MetricRelabelConfigs: []*relabel.Config{ | ||
{ | ||
Separator: ";", | ||
Regex: relabel.MustNewRegexp("(.*)"), | ||
Action: relabel.Keep, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
TargetAllocator: &targetAllocator{ | ||
Interval: 10 * time.Second, | ||
CollectorID: "collector-1", | ||
HTTPSDConfig: &promHTTP.SDConfig{ | ||
HTTPClientConfig: commonconfig.HTTPClientConfig{}, | ||
RefreshInterval: model.Duration(60 * time.Second), | ||
}, | ||
}, | ||
}, | ||
want: expectedTestResult{ | ||
empty: false, | ||
jobMap: map[string]expectedTestResultJobMap{ | ||
"job1": { | ||
Targets: []string{"localhost:9090"}, | ||
Labels: map[model.LabelName]model.LabelValue{ | ||
"__meta_datacenter": "london", | ||
"__meta_prometheus_job": "node", | ||
}, | ||
MetricRelabelConfig: &expectedMetricRelabelConfigTestResult{ | ||
JobName: "job1", | ||
MetricRelabelRegex: relabel.MustNewRegexp("regex2"), | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} { | ||
t.Run(tc.desc, func(t *testing.T) { | ||
ctx := context.Background() | ||
|
@@ -505,7 +616,6 @@ func TestTargetAllocatorJobRetrieval(t *testing.T) { | |
require.Len(t, providers, 0) | ||
return | ||
} | ||
|
||
require.NotNil(t, providers) | ||
|
||
for _, provider := range providers { | ||
|
@@ -532,10 +642,23 @@ func TestTargetAllocatorJobRetrieval(t *testing.T) { | |
// which is identical to the source url | ||
s.Labels["__meta_url"] = model.LabelValue(sdConfig.URL) | ||
require.Equal(t, s.Labels, group.Labels) | ||
|
||
if s.MetricRelabelConfig != nil { | ||
// Adding wait here so that the latest scrape config is applied with the updated regex | ||
time.Sleep(5 * time.Second) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Putting a 5s sleep inside a triply-nested loop can have a significant impact on test runtime. Can this be avoided? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Aneurysm9 - Unfortunately, looks like if the sleep is not present, the updated regex doesnt get picked up. This is the only test that uses the sleep and for all the other tests it wouldn't go in this loop. |
||
for _, sc := range receiver.cfg.PrometheusConfig.ScrapeConfigs { | ||
if sc.JobName == s.MetricRelabelConfig.JobName { | ||
for _, mc := range sc.MetricRelabelConfigs { | ||
require.Equal(t, s.MetricRelabelConfig.MetricRelabelRegex, mc.Regex) | ||
} | ||
} | ||
} | ||
} | ||
found = true | ||
} | ||
require.True(t, found, "Returned job is not defined in expected values", group) | ||
} | ||
|
||
} | ||
}) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is going to be a viable approach. To ensure consistency I believe it necessary to first extract the keys from the map into a slice and sort them, then iterate over the sorted slice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Aneurysm9 - I did find another way to fix this issue using a different package. Please lmk if you see any concerns.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Aneurysm9 - here are the results for the benchmark ( see the test for the benchmark tests i used. This is just for reference and I can remove them after you review)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @Aneurysm9, Following up. Please lmk if you have any suggestions/feedback. Thanks for looking into this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Aneurysm9 - Following up. Please lmk if you have any concerns/feedback. Thanks!