Skip to content

Commit

Permalink
Make cortex_bucket_store_blocks_loaded metric per user (#4918)
Browse files Browse the repository at this point in the history
* make cortex_bucket_store_blocks_loaded metric per user

Signed-off-by: Ben Ye <[email protected]>

* fix integration test

Signed-off-by: Ben Ye <[email protected]>

* update changelog

Signed-off-by: Ben Ye <[email protected]>

* fix test

Signed-off-by: Ben Ye <[email protected]>

* update changelog

Signed-off-by: Ben Ye <[email protected]>

Signed-off-by: Ben Ye <[email protected]>
  • Loading branch information
yeya24 authored Oct 31, 2022
1 parent dd46578 commit ed36a62
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 19 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
* [CHANGE] Removes `-ingester.stream-chunks-when-using-blocks` experimental flag and stream chunks by default when `querier.ingester-streaming` is enabled. #4864
* [CHANGE] Compactor: Added `cortex_compactor_runs_interrupted_total` to separate compaction interruptions from failures
* [CHANGE] Enable PromQL `@` modifier, negative offset always. #4927
* [CHANGE] Store-gateway: Add user label to `cortex_bucket_store_blocks_loaded` metric. #4918
* [ENHANCEMENT] AlertManager: Retrying AlertManager Get Requests (Get Alertmanager status, Get Alertmanager Receivers) on next replica on error #4840
* [ENHANCEMENT] Querier/Ruler: Retry store-gateway in case of unexpected failure, instead of failing the query. #4532 #4839
* [ENHANCEMENT] Ring: DoBatch prioritize 4xx errors when failing. #4783
Expand Down
8 changes: 2 additions & 6 deletions integration/getting_started_with_gossiped_ring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,6 @@ func TestGettingStartedWithGossipedRing(t *testing.T) {
require.Equal(t, model.ValVector, result.Type())
assert.Equal(t, expectedVector, result.(model.Vector))

// Before flushing the blocks we expect no store-gateway has loaded any block.
require.NoError(t, cortex1.WaitSumMetrics(e2e.Equals(0), "cortex_bucket_store_blocks_loaded"))
require.NoError(t, cortex2.WaitSumMetrics(e2e.Equals(0), "cortex_bucket_store_blocks_loaded"))

// Flush blocks from ingesters to the store.
for _, instance := range []*e2ecortex.CortexService{cortex1, cortex2} {
res, err = e2e.GetRequest("http://" + instance.HTTPEndpoint() + "/flush")
Expand All @@ -124,8 +120,8 @@ func TestGettingStartedWithGossipedRing(t *testing.T) {
// Given store-gateway blocks sharding is enabled with the default replication factor of 3,
// and ingestion replication factor is 1, we do expect the series has been ingested by 1
// single ingester and so we have 1 block shipped from ingesters and loaded by both store-gateways.
require.NoError(t, cortex1.WaitSumMetrics(e2e.Equals(1), "cortex_bucket_store_blocks_loaded"))
require.NoError(t, cortex2.WaitSumMetrics(e2e.Equals(1), "cortex_bucket_store_blocks_loaded"))
require.NoError(t, cortex1.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_bucket_store_blocks_loaded"}, e2e.WaitMissingMetrics))
require.NoError(t, cortex2.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_bucket_store_blocks_loaded"}, e2e.WaitMissingMetrics))

// Make sure that no DNS failures occurred.
// No actual DNS lookups are necessarily performed, so we can't really assert on that.
Expand Down
4 changes: 3 additions & 1 deletion integration/querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,9 @@ func TestQuerierWithBlocksStorageRunningInMicroservicesMode(t *testing.T) {
// we don't known which store-gateway instance will synch the blocks, so we need to wait on
// metrics extracted from all instances.
if testCfg.blocksShardingStrategy != "" {
require.NoError(t, storeGateways.WaitSumMetrics(e2e.Equals(2), "cortex_bucket_store_blocks_loaded"))
// If shuffle sharding is enabled and we have tenant shard size set to 1,
// then the metric only appears in one store gateway instance.
require.NoError(t, storeGateways.WaitSumMetricsWithOptions(e2e.Equals(2), []string{"cortex_bucket_store_blocks_loaded"}, e2e.SkipMissingMetrics))
} else {
require.NoError(t, storeGateways.WaitSumMetrics(e2e.Equals(float64(2*storeGateways.NumInstances())), "cortex_bucket_store_blocks_loaded"))
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/storegateway/bucket_store_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
blocksLoaded: prometheus.NewDesc(
"cortex_bucket_store_blocks_loaded",
"Number of currently loaded blocks.",
nil, nil),
[]string{"user"}, nil),
seriesDataTouched: prometheus.NewDesc(
"cortex_bucket_store_series_data_touched",
"How many items of a data type in a block were touched for a single series request.",
Expand Down Expand Up @@ -212,7 +212,7 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCounters(out, m.blockDrops, "thanos_bucket_store_block_drops_total")
data.SendSumOfCounters(out, m.blockDropFailures, "thanos_bucket_store_block_drop_failures_total")

data.SendSumOfGauges(out, m.blocksLoaded, "thanos_bucket_store_blocks_loaded")
data.SendSumOfGaugesPerUser(out, m.blocksLoaded, "thanos_bucket_store_blocks_loaded")

data.SendSumOfSummariesWithLabels(out, m.seriesDataTouched, "thanos_bucket_store_series_data_touched", "data_type")
data.SendSumOfSummariesWithLabels(out, m.seriesDataFetched, "thanos_bucket_store_series_data_fetched", "data_type")
Expand Down
4 changes: 3 additions & 1 deletion pkg/storegateway/bucket_store_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ func TestBucketStoreMetrics(t *testing.T) {
err := testutil.GatherAndCompare(mainReg, bytes.NewBufferString(`
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 22519
cortex_bucket_store_blocks_loaded{user="user1"} 5328
cortex_bucket_store_blocks_loaded{user="user2"} 6908
cortex_bucket_store_blocks_loaded{user="user3"} 10283
# HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts.
# TYPE cortex_bucket_store_block_loads_total counter
Expand Down
17 changes: 8 additions & 9 deletions pkg/storegateway/bucket_stores_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ func TestBucketStores_InitialSync(t *testing.T) {
assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 2
cortex_bucket_store_blocks_loaded{user="user-1"} 1
cortex_bucket_store_blocks_loaded{user="user-2"} 1
# HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts.
# TYPE cortex_bucket_store_block_loads_total counter
Expand Down Expand Up @@ -158,7 +159,7 @@ func TestBucketStores_InitialSyncShouldRetryOnFailure(t *testing.T) {
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 1
cortex_bucket_store_blocks_loaded{user="user-1"} 1
# HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts.
# TYPE cortex_bucket_store_block_loads_total counter
Expand Down Expand Up @@ -219,7 +220,7 @@ func TestBucketStores_SyncBlocks(t *testing.T) {
assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(`
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 2
cortex_bucket_store_blocks_loaded{user="user-1"} 2
# HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts.
# TYPE cortex_bucket_store_block_loads_total counter
Expand Down Expand Up @@ -486,7 +487,8 @@ func TestBucketStores_deleteLocalFilesForExcludedTenants(t *testing.T) {
cortex_bucket_store_block_loads_total 2
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 2
cortex_bucket_store_blocks_loaded{user="user-1"} 1
cortex_bucket_store_blocks_loaded{user="user-2"} 1
`), metricNames...))

// Single user left in shard.
Expand All @@ -503,7 +505,7 @@ func TestBucketStores_deleteLocalFilesForExcludedTenants(t *testing.T) {
cortex_bucket_store_block_loads_total 2
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 1
cortex_bucket_store_blocks_loaded{user="user-1"} 1
`), metricNames...))

// No users left in this shard.
Expand All @@ -518,9 +520,6 @@ func TestBucketStores_deleteLocalFilesForExcludedTenants(t *testing.T) {
# HELP cortex_bucket_store_block_loads_total Total number of remote block loading attempts.
# TYPE cortex_bucket_store_block_loads_total counter
cortex_bucket_store_block_loads_total 2
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 0
`), metricNames...))

// We can always get user back.
Expand All @@ -537,7 +536,7 @@ func TestBucketStores_deleteLocalFilesForExcludedTenants(t *testing.T) {
cortex_bucket_store_block_loads_total 3
# HELP cortex_bucket_store_blocks_loaded Number of currently loaded blocks.
# TYPE cortex_bucket_store_blocks_loaded gauge
cortex_bucket_store_blocks_loaded 1
cortex_bucket_store_blocks_loaded{user="user-1"} 1
`), metricNames...))
}

Expand Down

0 comments on commit ed36a62

Please sign in to comment.