diff --git a/CHANGELOG.md b/CHANGELOG.md index 11c1d8f92ed1..d4b47b1d3a23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ##### Enhancements +* [11003](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Add the `metrics-namespace` flag to change the namespace of metrics currently using cortex as namespace. * [11038](https://github.com/grafana/loki/pull/11038) **kavirajk**: Remove already deprecated `store.max-look-back-period`. * [10906](https://github.com/grafana/loki/pull/10906) **kavirajk**: Support Loki ruler to notify WAL writes to remote storage. * [10613](https://github.com/grafana/loki/pull/10613) **ngc4579**: Helm: allow GrafanaAgent tolerations diff --git a/cmd/migrate/main.go b/cmd/migrate/main.go index e5bebfe044c8..477afe4a9c86 100644 --- a/cmd/migrate/main.go +++ b/cmd/migrate/main.go @@ -48,6 +48,7 @@ func main() { batch := flag.Int("batchLen", 500, "Specify how many chunks to read/write in one batch") shardBy := flag.Duration("shardBy", 6*time.Hour, "Break down the total interval into shards of this size, making this too small can lead to syncing a lot of duplicate chunks") parallel := flag.Int("parallel", 8, "How many parallel threads to process each shard") + metricsNamespace := flag.String("metrics.namespace", "cortex", "Namespace of the generated metrics") flag.Parse() go func() { @@ -127,7 +128,7 @@ func main() { // Create a new registerer to avoid registering duplicate metrics prometheus.DefaultRegisterer = prometheus.NewRegistry() clientMetrics := storage.NewClientMetrics() - s, err := storage.NewStore(sourceConfig.StorageConfig, sourceConfig.ChunkStoreConfig, sourceConfig.SchemaConfig, limits, clientMetrics, prometheus.DefaultRegisterer, util_log.Logger) + s, err := storage.NewStore(sourceConfig.StorageConfig, sourceConfig.ChunkStoreConfig, sourceConfig.SchemaConfig, limits, clientMetrics, prometheus.DefaultRegisterer, util_log.Logger, *metricsNamespace) if err != nil { log.Println("Failed to create source store:", err) os.Exit(1) @@ -136,7 +137,7 @@ func main() { // Create a new registerer to avoid registering duplicate metrics prometheus.DefaultRegisterer = prometheus.NewRegistry() - d, err := storage.NewStore(destConfig.StorageConfig, destConfig.ChunkStoreConfig, destConfig.SchemaConfig, limits, clientMetrics, prometheus.DefaultRegisterer, util_log.Logger) + d, err := storage.NewStore(destConfig.StorageConfig, destConfig.ChunkStoreConfig, destConfig.SchemaConfig, limits, clientMetrics, prometheus.DefaultRegisterer, util_log.Logger, *metricsNamespace) if err != nil { log.Println("Failed to create destination store:", err) os.Exit(1) diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index 7956a214f033..6dcfedb74746 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -224,6 +224,10 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set # will report 503 Service Unavailable status via /ready endpoint. # CLI flag: -shutdown-delay [shutdown_delay: | default = 0s] + +# Namespace of the metrics that in previous releases had cortex as namespace. +# CLI flag: -metrics-namespace +[metrics_namespace: | default = "cortex"] ``` ### server diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index a5b081c9a95d..b0b01c34dbaf 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -59,6 +59,7 @@ import ( "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) var errGatewayUnhealthy = errors.New("bloom-gateway is unhealthy in the ring") @@ -79,14 +80,14 @@ type metrics struct { func newMetrics(subsystem string, registerer prometheus.Registerer) *metrics { return &metrics{ queueDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: subsystem, Name: "queue_duration_seconds", Help: "Time spent by tasks in queue before getting picked up by a worker.", Buckets: prometheus.DefBuckets, }), inflightRequests: promauto.With(registerer).NewSummary(prometheus.SummaryOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: subsystem, Name: "inflight_tasks", Help: "Number of inflight tasks (either queued or processing) sampled at a regular interval. Quantile buckets keep track of inflight tasks over the last 60s.", @@ -195,7 +196,7 @@ func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, s pendingTasks: makePendingTasks(pendingTasksInitialCap), } - g.queueMetrics = queue.NewMetrics("bloom_gateway", reg) + g.queueMetrics = queue.NewMetrics(reg, constants.Loki, "bloom_gateway") g.queue = queue.NewRequestQueue(maxTasksPerTenant, time.Minute, g.queueMetrics) g.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(g.queueMetrics.Cleanup) diff --git a/pkg/bloomgateway/client.go b/pkg/bloomgateway/client.go index af9440ff18ce..cc1e59d5ea0a 100644 --- a/pkg/bloomgateway/client.go +++ b/pkg/bloomgateway/client.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/distributor/clientpool" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) // GRPCPool represents a pool of gRPC connections to different bloom gateway instances. @@ -94,7 +95,7 @@ type GatewayClient struct { func NewGatewayClient(cfg ClientConfig, limits Limits, registerer prometheus.Registerer, logger log.Logger) (*GatewayClient, error) { latency := promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "bloom_gateway", Name: "request_duration_seconds", Help: "Time (in seconds) spent serving requests when using the bloom gateway", diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 019fd1d44d2b..33c78f3e00a7 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -202,7 +202,7 @@ type Limits interface { DefaultLimits() *validation.Limits } -func NewCompactor(cfg Config, objectStoreClients map[config.DayTime]client.ObjectClient, deleteStoreClient client.ObjectClient, schemaConfig config.SchemaConfig, limits Limits, r prometheus.Registerer) (*Compactor, error) { +func NewCompactor(cfg Config, objectStoreClients map[config.DayTime]client.ObjectClient, deleteStoreClient client.ObjectClient, schemaConfig config.SchemaConfig, limits Limits, r prometheus.Registerer, metricsNamespace string) (*Compactor, error) { retentionEnabledStats.Set("false") if cfg.RetentionEnabled { retentionEnabledStats.Set("true") @@ -245,7 +245,7 @@ func NewCompactor(cfg Config, objectStoreClients map[config.DayTime]client.Objec } ringCfg := cfg.CompactorRing.ToRingConfig(ringReplicationFactor) - compactor.ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, ringNameForServer, ringKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("cortex_", r), util_log.Logger) + compactor.ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, ringNameForServer, ringKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", r), util_log.Logger) if err != nil { return nil, errors.Wrap(err, "create ring client") } diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index 4f000bb6346c..79159d06d828 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -16,6 +16,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/client" "github.com/grafana/loki/pkg/storage/chunk/client/local" "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/util/constants" loki_net "github.com/grafana/loki/pkg/util/net" ) @@ -49,7 +50,7 @@ func setupTestCompactor(t *testing.T, objectClients map[config.DayTime]client.Ob c, err := NewCompactor(cfg, objectClients, nil, config.SchemaConfig{ Configs: periodConfigs, - }, nil, nil) + }, nil, nil, constants.Loki) require.NoError(t, err) c.RegisterIndexCompactor("dummy", testIndexCompactor{}) diff --git a/pkg/compactor/deletion/metrics.go b/pkg/compactor/deletion/metrics.go index b9489e5f23b8..b3196948830b 100644 --- a/pkg/compactor/deletion/metrics.go +++ b/pkg/compactor/deletion/metrics.go @@ -3,6 +3,8 @@ package deletion import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) type DeleteRequestClientMetrics struct { @@ -14,13 +16,13 @@ func NewDeleteRequestClientMetrics(r prometheus.Registerer) *DeleteRequestClient m := DeleteRequestClientMetrics{} m.deleteRequestsLookupsTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "delete_request_lookups_total", Help: "Number times the client has looked up delete requests", }) m.deleteRequestsLookupsFailedTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "delete_request_lookups_failed_total", Help: "Number times the client has failed to look up delete requests", }) @@ -36,7 +38,7 @@ func newDeleteRequestHandlerMetrics(r prometheus.Registerer) *deleteRequestHandl m := deleteRequestHandlerMetrics{} m.deleteRequestsReceivedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_delete_requests_received_total", Help: "Number of delete requests received per user", }, []string{"user"}) @@ -58,37 +60,37 @@ func newDeleteRequestsManagerMetrics(r prometheus.Registerer) *deleteRequestsMan m := deleteRequestsManagerMetrics{} m.deleteRequestsProcessedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_delete_requests_processed_total", Help: "Number of delete requests processed per user", }, []string{"user"}) m.deleteRequestsChunksSelectedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_delete_requests_chunks_selected_total", Help: "Number of chunks selected while building delete plans per user", }, []string{"user"}) m.deletionFailures = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_delete_processing_fails_total", Help: "Number of times the delete phase of compaction has failed", }, []string{"cause"}) m.loadPendingRequestsAttemptsTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_load_pending_requests_attempts_total", Help: "Number of attempts that were made to load pending requests with status", }, []string{"status"}) m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_oldest_pending_delete_request_age_seconds", Help: "Age of oldest pending delete request in seconds since they are over their cancellation period", }) m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_pending_delete_requests_count", Help: "Count of delete requests which are over their cancellation period and have not finished processing yet", }) m.deletedLinesTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "compactor_deleted_lines", Help: "Number of deleted lines per user", }, []string{"user"}) diff --git a/pkg/compactor/generationnumber/metrics.go b/pkg/compactor/generationnumber/metrics.go index 1f302fa08fc7..c71f1b582156 100644 --- a/pkg/compactor/generationnumber/metrics.go +++ b/pkg/compactor/generationnumber/metrics.go @@ -2,6 +2,8 @@ package generationnumber import ( "github.com/prometheus/client_golang/prometheus" + + "github.com/grafana/loki/pkg/util/constants" ) // Make this package level because we want several instances of a loader to be able to report metrics @@ -21,7 +23,7 @@ func newGenLoaderMetrics(r prometheus.Registerer) *genLoaderMetrics { } cacheGenLoadFailures := prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "delete_cache_gen_load_failures_total", Help: "Total number of failures while loading cache generation number using gen number loader", }, []string{"source"}) diff --git a/pkg/distributor/clientpool/ingester_client_pool.go b/pkg/distributor/clientpool/ingester_client_pool.go index 3c4fa75401c9..0979d4607b44 100644 --- a/pkg/distributor/clientpool/ingester_client_pool.go +++ b/pkg/distributor/clientpool/ingester_client_pool.go @@ -38,6 +38,6 @@ func NewPool(name string, cfg PoolConfig, ring ring.ReadRing, factory ring_clien HealthCheckTimeout: cfg.RemoteTimeout, } - // TODO(chaudum): Allow cofiguration of metric name by the caller. + // TODO(chaudum): Allow configuration of metric name by the caller. return ring_client.NewPool(name, poolCfg, ring_client.NewRingServiceDiscovery(ring), factory, clients, logger) } diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 5e76243fd41d..687e52fbbc02 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -43,6 +43,7 @@ import ( "github.com/grafana/loki/pkg/logql/syntax" "github.com/grafana/loki/pkg/runtime" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" lokiring "github.com/grafana/loki/pkg/util/ring" "github.com/grafana/loki/pkg/validation" @@ -133,6 +134,7 @@ func New( ingestersRing ring.ReadRing, overrides Limits, registerer prometheus.Registerer, + metricsNamespace string, ) (*Distributor, error) { factory := cfg.factory if factory == nil { @@ -178,22 +180,22 @@ func New( healthyInstancesCount: atomic.NewUint32(0), rateLimitStrat: rateLimitStrat, ingesterAppends: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "distributor_ingester_appends_total", Help: "The total number of batch appends sent to ingesters.", }, []string{"ingester"}), ingesterAppendTimeouts: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "distributor_ingester_append_timeouts_total", Help: "The total number of failed batch appends sent to ingesters due to timeouts.", }, []string{"ingester"}), replicationFactor: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "distributor_replication_factor", Help: "The configured replication factor.", }), streamShardCount: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "stream_sharding_count", Help: "Total number of times the distributor has sharded streams", }), @@ -203,7 +205,7 @@ func New( if overrides.IngestionRateStrategy() == validation.GlobalIngestionRateStrategy { d.rateLimitStrat = validation.GlobalIngestionRateStrategy - distributorsRing, distributorsLifecycler, err = newRingAndLifecycler(cfg.DistributorRing, d.healthyInstancesCount, util_log.Logger, registerer) + distributorsRing, distributorsLifecycler, err = newRingAndLifecycler(cfg.DistributorRing, d.healthyInstancesCount, util_log.Logger, registerer, metricsNamespace) if err != nil { return nil, err } @@ -731,7 +733,7 @@ func calculateShards(rate int64, pushSize, desiredRate int) int { } // newRingAndLifecycler creates a new distributor ring and lifecycler with all required lifecycler delegates -func newRingAndLifecycler(cfg RingConfig, instanceCount *atomic.Uint32, logger log.Logger, reg prometheus.Registerer) (*ring.Ring, *ring.BasicLifecycler, error) { +func newRingAndLifecycler(cfg RingConfig, instanceCount *atomic.Uint32, logger log.Logger, reg prometheus.Registerer, metricsNamespace string) (*ring.Ring, *ring.BasicLifecycler, error) { kvStore, err := kv.NewClient(cfg.KVStore, ring.GetCodec(), kv.RegistererWithKVName(reg, "distributor-lifecycler"), logger) if err != nil { return nil, nil, errors.Wrap(err, "failed to initialize distributors' KV store") @@ -748,12 +750,12 @@ func newRingAndLifecycler(cfg RingConfig, instanceCount *atomic.Uint32, logger l delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger) delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*cfg.HeartbeatTimeout, delegate, logger) - distributorsLifecycler, err := ring.NewBasicLifecycler(lifecyclerCfg, "distributor", ringKey, kvStore, delegate, logger, prometheus.WrapRegistererWithPrefix("cortex_", reg)) + distributorsLifecycler, err := ring.NewBasicLifecycler(lifecyclerCfg, "distributor", ringKey, kvStore, delegate, logger, prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", reg)) if err != nil { return nil, nil, errors.Wrap(err, "failed to initialize distributors' lifecycler") } - distributorsRing, err := ring.New(cfg.ToRingConfig(), "distributor", ringKey, logger, prometheus.WrapRegistererWithPrefix("cortex_", reg)) + distributorsRing, err := ring.New(cfg.ToRingConfig(), "distributor", ringKey, logger, prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", reg)) if err != nil { return nil, nil, errors.Wrap(err, "failed to initialize distributors' ring client") } diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index d41704c4e3fc..0e4527937a7c 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -33,6 +33,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql/syntax" "github.com/grafana/loki/pkg/runtime" + "github.com/grafana/loki/pkg/util/constants" fe "github.com/grafana/loki/pkg/util/flagext" loki_flagext "github.com/grafana/loki/pkg/util/flagext" util_log "github.com/grafana/loki/pkg/util/log" @@ -1159,7 +1160,7 @@ func prepare(t *testing.T, numDistributors, numIngesters int, limits *validation overrides, err := validation.NewOverrides(*limits, nil) require.NoError(t, err) - d, err := New(distributorConfig, clientConfig, runtime.DefaultTenantConfigs(), ingestersRing, overrides, prometheus.NewPedanticRegistry()) + d, err := New(distributorConfig, clientConfig, runtime.DefaultTenantConfigs(), ingestersRing, overrides, prometheus.NewPedanticRegistry(), constants.Loki) require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), d)) distributors[i] = d diff --git a/pkg/distributor/ratestore_metrics.go b/pkg/distributor/ratestore_metrics.go index 696bbf51701e..2e31e3934f0b 100644 --- a/pkg/distributor/ratestore_metrics.go +++ b/pkg/distributor/ratestore_metrics.go @@ -4,6 +4,8 @@ import ( "github.com/grafana/dskit/instrument" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) type ratestoreMetrics struct { @@ -21,51 +23,51 @@ type ratestoreMetrics struct { func newRateStoreMetrics(reg prometheus.Registerer) *ratestoreMetrics { return &ratestoreMetrics{ rateRefreshFailures: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_refresh_failures_total", Help: "The total number of failed attempts to refresh the distributor's view of stream rates", }, []string{"source"}), streamCount: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_streams", Help: "The number of unique streams reported by all ingesters. Sharded streams are combined", }), expiredCount: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_expired_streams_total", Help: "The number of streams that have been expired by the ratestore", }), maxStreamShardCount: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_max_stream_shards", Help: "The number of shards for a single stream reported by ingesters during a sync operation.", }), streamShardCount: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_stream_shards", Help: "The distribution of number of shards for a single stream reported by ingesters during a sync operation.", Buckets: []float64{0, 1, 2, 4, 8, 16, 32, 64, 128}, }), maxStreamRate: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_max_stream_rate_bytes", Help: "The maximum stream rate for any stream reported by ingesters during a sync operation. Sharded Streams are combined.", }), streamRate: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_stream_rate_bytes", Help: "The distribution of stream rates for any stream reported by ingesters during a sync operation. Sharded Streams are combined.", Buckets: prometheus.ExponentialBuckets(20000, 2, 14), // biggest bucket is 20000*2^(14-1) = 163,840,000 (~163.84MB) }), maxUniqueStreamRate: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_max_unique_stream_rate_bytes", Help: "The maximum stream rate for any stream reported by ingesters during a sync operation. Sharded Streams are considered separate.", }), refreshDuration: instrument.NewHistogramCollector( promauto.With(reg).NewHistogramVec( prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "rate_store_refresh_duration_seconds", Help: "Time spent refreshing the rate store", Buckets: prometheus.DefBuckets, diff --git a/pkg/distributor/writefailures/metrics.go b/pkg/distributor/writefailures/metrics.go index 7acc50d1c0d9..e62d6f19b4f7 100644 --- a/pkg/distributor/writefailures/metrics.go +++ b/pkg/distributor/writefailures/metrics.go @@ -3,6 +3,8 @@ package writefailures import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) type metrics struct { @@ -13,13 +15,13 @@ type metrics struct { func newMetrics(reg prometheus.Registerer, subsystem string) *metrics { return &metrics{ loggedCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "write_failures_logged_total", Help: "The total number of write failures logs successfully emitted for a tenant.", ConstLabels: prometheus.Labels{"subsystem": subsystem}, }, []string{"org_id"}), discardedCount: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "write_failures_discarded_total", Help: "The total number of write failures logs discarded for a tenant.", ConstLabels: prometheus.Labels{"subsystem": subsystem}, diff --git a/pkg/ingester/checkpoint_test.go b/pkg/ingester/checkpoint_test.go index 00e57d7ddc37..2999bd930ecf 100644 --- a/pkg/ingester/checkpoint_test.go +++ b/pkg/ingester/checkpoint_test.go @@ -21,6 +21,7 @@ import ( "github.com/grafana/loki/pkg/logql/log" "github.com/grafana/loki/pkg/runtime" "github.com/grafana/loki/pkg/storage/chunk" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -68,7 +69,7 @@ func TestIngesterWAL(t *testing.T) { } } - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -111,7 +112,7 @@ func TestIngesterWAL(t *testing.T) { expectCheckpoint(t, walDir, false, time.Second) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -125,7 +126,7 @@ func TestIngesterWAL(t *testing.T) { require.Nil(t, services.StopAndAwaitTerminated(context.Background(), i)) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -148,7 +149,7 @@ func TestIngesterWALIgnoresStreamLimits(t *testing.T) { } } - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -194,7 +195,7 @@ func TestIngesterWALIgnoresStreamLimits(t *testing.T) { require.NoError(t, err) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -251,7 +252,7 @@ func TestIngesterWALBackpressureSegments(t *testing.T) { } } - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -272,7 +273,7 @@ func TestIngesterWALBackpressureSegments(t *testing.T) { expectCheckpoint(t, walDir, false, time.Second) // restart the ingester, ensuring we replayed from WAL. - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -293,7 +294,7 @@ func TestIngesterWALBackpressureCheckpoint(t *testing.T) { } } - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -314,7 +315,7 @@ func TestIngesterWALBackpressureCheckpoint(t *testing.T) { require.Nil(t, services.StopAndAwaitTerminated(context.Background(), i)) // restart the ingester, ensuring we can replay from the checkpoint as well. - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -589,7 +590,7 @@ func TestIngesterWALReplaysUnorderedToOrdered(t *testing.T) { } } - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -661,7 +662,7 @@ func TestIngesterWALReplaysUnorderedToOrdered(t *testing.T) { require.NoError(t, err) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) diff --git a/pkg/ingester/flush.go b/pkg/ingester/flush.go index bcc4a8bf8a15..35f82b4d2f9f 100644 --- a/pkg/ingester/flush.go +++ b/pkg/ingester/flush.go @@ -43,7 +43,7 @@ const ( func (i *Ingester) InitFlushQueues() { i.flushQueuesDone.Add(i.cfg.ConcurrentFlushes) for j := 0; j < i.cfg.ConcurrentFlushes; j++ { - i.flushQueues[j] = util.NewPriorityQueue(flushQueueLength) + i.flushQueues[j] = util.NewPriorityQueue(i.metrics.flushQueueLength) go i.flushLoop(j) } } diff --git a/pkg/ingester/flush_test.go b/pkg/ingester/flush_test.go index f7e61238036e..a4cf09451fb7 100644 --- a/pkg/ingester/flush_test.go +++ b/pkg/ingester/flush_test.go @@ -35,6 +35,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/fetcher" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/index/stats" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -276,7 +277,7 @@ func newTestStore(t require.TestingT, cfg Config, walOverride WAL) (*testStore, limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) require.NoError(t, err) - ing, err := New(cfg, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + ing, err := New(cfg, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), ing)) diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index e8e8f5ff2068..f5c9e50350ad 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -22,7 +22,6 @@ import ( "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "google.golang.org/grpc/health/grpc_health_v1" @@ -62,10 +61,6 @@ const ( var ( ErrReadOnly = errors.New("Ingester is shutting down") - flushQueueLength = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "cortex_ingester_flush_queue_length", - Help: "The total number of series pending in the flush queue.", - }) compressionStats = analytics.NewString("ingester_compression") targetSizeStats = analytics.NewInt("ingester_target_size_bytes") walStats = analytics.NewString("ingester_wal") @@ -236,7 +231,7 @@ type Ingester struct { } // New makes a new Ingester. -func New(cfg Config, clientConfig client.Config, store Store, limits Limits, configs *runtime.TenantConfigs, registerer prometheus.Registerer, writeFailuresCfg writefailures.Cfg) (*Ingester, error) { +func New(cfg Config, clientConfig client.Config, store Store, limits Limits, configs *runtime.TenantConfigs, registerer prometheus.Registerer, writeFailuresCfg writefailures.Cfg, metricsNamespace string) (*Ingester, error) { if cfg.ingesterClientFactory == nil { cfg.ingesterClientFactory = client.New } @@ -246,7 +241,7 @@ func New(cfg Config, clientConfig client.Config, store Store, limits Limits, con if cfg.WAL.Enabled { walStats.Set("enabled") } - metrics := newIngesterMetrics(registerer) + metrics := newIngesterMetrics(registerer, metricsNamespace) i := &Ingester{ cfg: cfg, @@ -284,7 +279,7 @@ func New(cfg Config, clientConfig client.Config, store Store, limits Limits, con } i.wal = wal - i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, "ingester", RingKey, !cfg.WAL.Enabled || cfg.WAL.FlushOnShutdown, util_log.Logger, prometheus.WrapRegistererWithPrefix("cortex_", registerer)) + i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, "ingester", RingKey, !cfg.WAL.Enabled || cfg.WAL.FlushOnShutdown, util_log.Logger, prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", registerer)) if err != nil { return nil, err } diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index 5f2f788e6feb..5e44d63eca64 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -40,6 +40,7 @@ import ( "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/index/seriesvolume" "github.com/grafana/loki/pkg/storage/stores/index/stats" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -52,7 +53,7 @@ func TestPrepareShutdownMarkerPathNotSet(t *testing.T) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -75,7 +76,7 @@ func TestPrepareShutdown(t *testing.T) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -136,7 +137,7 @@ func TestIngester_GetStreamRates_Correctness(t *testing.T) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -168,7 +169,7 @@ func BenchmarkGetStreamRatesAllocs(b *testing.B) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(b, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -192,7 +193,7 @@ func TestIngester(t *testing.T) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -377,7 +378,7 @@ func TestIngesterStreamLimitExceeded(t *testing.T) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, overrides, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, overrides, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -724,7 +725,7 @@ func Test_InMemoryLabels(t *testing.T) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -1064,7 +1065,7 @@ func TestStats(t *testing.T) { limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) require.NoError(t, err) - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) i.instances["test"] = defaultInstance(t) @@ -1091,7 +1092,7 @@ func TestVolume(t *testing.T) { limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) require.NoError(t, err) - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) i.instances["test"] = defaultInstance(t) @@ -1170,7 +1171,7 @@ func createIngesterServer(t *testing.T, ingesterConfig Config) (ingesterClient, limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) require.NoError(t, err) - ing, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + ing, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) listener := bufconn.Listen(1024 * 1024) diff --git a/pkg/ingester/instance.go b/pkg/ingester/instance.go index 75c9b3ffc0af..14306b01dc4a 100644 --- a/pkg/ingester/instance.go +++ b/pkg/ingester/instance.go @@ -38,6 +38,7 @@ import ( "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/index/seriesvolume" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/deletion" util_log "github.com/grafana/loki/pkg/util/log" mathutil "github.com/grafana/loki/pkg/util/math" @@ -56,22 +57,22 @@ const ( var ( memoryStreams = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_memory_streams", Help: "The total number of streams in memory per tenant.", }, []string{"tenant"}) memoryStreamsLabelsBytes = promauto.NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_memory_streams_labels_bytes", Help: "Total bytes of labels of the streams in memory.", }) streamsCreatedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_streams_created_total", Help: "The total number of streams created per tenant.", }, []string{"tenant"}) streamsRemovedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_streams_removed_total", Help: "The total number of streams removed per tenant.", }, []string{"tenant"}) diff --git a/pkg/ingester/instance_test.go b/pkg/ingester/instance_test.go index ff54db7c33c2..ac29f3516df4 100644 --- a/pkg/ingester/instance_test.go +++ b/pkg/ingester/instance_test.go @@ -25,6 +25,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/index/seriesvolume" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -56,7 +57,7 @@ var defaultPeriodConfigs = []config.PeriodConfig{ }, } -var NilMetrics = newIngesterMetrics(nil) +var NilMetrics = newIngesterMetrics(nil, constants.Loki) func TestLabelsCollisions(t *testing.T) { limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index 96b2af61f379..e3d3a41c1a59 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -5,6 +5,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/analytics" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -62,6 +63,8 @@ type ingesterMetrics struct { // Shutdown marker for ingester scale down shutdownMarker prometheus.Gauge + + flushQueueLength prometheus.Gauge } // setRecoveryBytesInUse bounds the bytes reports to >= 0. @@ -80,7 +83,7 @@ const ( duplicateReason = "duplicate" ) -func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics { +func newIngesterMetrics(r prometheus.Registerer, metricsNamespace string) *ingesterMetrics { return &ingesterMetrics{ walDiskFullFailures: promauto.With(r).NewCounter(prometheus.CounterOpts{ Name: "loki_ingester_wal_disk_full_failures_total", @@ -176,46 +179,46 @@ func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics { Help: "Total number of ingesters automatically forgotten", }), chunkUtilization: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_utilization", Help: "Distribution of stored chunk utilization (when stored).", Buckets: prometheus.LinearBuckets(0, 0.2, 6), }), memoryChunks: promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_memory_chunks", Help: "The total number of chunks in memory.", }), chunkEntries: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_entries", Help: "Distribution of stored lines per chunk (when stored).", Buckets: prometheus.ExponentialBuckets(200, 2, 9), // biggest bucket is 200*2^(9-1) = 51200 }), chunkSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_size_bytes", Help: "Distribution of stored chunk sizes (when stored).", Buckets: prometheus.ExponentialBuckets(20000, 2, 10), // biggest bucket is 20000*2^(10-1) = 10,240,000 (~10.2MB) }), chunkCompressionRatio: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_compression_ratio", Help: "Compression ratio of chunks (when stored).", Buckets: prometheus.LinearBuckets(.75, 2, 10), }), chunksPerTenant: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunks_stored_total", Help: "Total stored chunks per tenant.", }, []string{"tenant"}), chunkSizePerTenant: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_stored_bytes_total", Help: "Total bytes stored in chunks per tenant.", }, []string{"tenant"}), chunkAge: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_age_seconds", Help: "Distribution of chunk ages (when stored).", // with default settings chunks should flush between 5 min and 12 hours @@ -223,19 +226,19 @@ func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics { Buckets: []float64{60, 300, 600, 1800, 3600, 7200, 14400, 36000, 43200, 57600}, }), chunkEncodeTime: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_encode_time_seconds", Help: "Distribution of chunk encode times.", // 10ms to 10s. Buckets: prometheus.ExponentialBuckets(0.01, 4, 6), }), chunksFlushedPerReason: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunks_flushed_total", Help: "Total flushed chunks per reason.", }, []string{"reason"}), chunkLifespan: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunk_bounds_hours", Help: "Distribution of chunk end-start durations.", // 1h -> 8hr @@ -248,12 +251,12 @@ func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics { flushedChunksLifespanStats: analytics.NewStatistics("ingester_flushed_chunks_lifespan_seconds"), flushedChunksUtilizationStats: analytics.NewStatistics("ingester_flushed_chunks_utilization"), chunksCreatedTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ingester_chunks_created_total", Help: "The total number of chunks created in the ingester.", }), samplesPerChunk: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ingester", Name: "samples_per_chunk", Help: "The number of samples in a chunk.", @@ -261,7 +264,7 @@ func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics { Buckets: prometheus.LinearBuckets(4096, 2048, 6), }), blocksPerChunk: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ingester", Name: "blocks_per_chunk", Help: "The number of blocks in a chunk.", @@ -272,10 +275,17 @@ func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics { chunkCreatedStats: analytics.NewCounter("ingester_chunk_created"), shutdownMarker: promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ingester", Name: "shutdown_marker", Help: "1 if prepare shutdown has been called, 0 otherwise", }), + + flushQueueLength: promauto.With(r).NewGauge(prometheus.GaugeOpts{ + Namespace: metricsNamespace, + Subsystem: "ingester", + Name: "flush_queue_length", + Help: "The total number of series pending in the flush queue.", + }), } } diff --git a/pkg/ingester/recovery_test.go b/pkg/ingester/recovery_test.go index 7a0351c5e365..69767ddd522f 100644 --- a/pkg/ingester/recovery_test.go +++ b/pkg/ingester/recovery_test.go @@ -22,6 +22,7 @@ import ( "github.com/grafana/loki/pkg/logproto" loki_runtime "github.com/grafana/loki/pkg/runtime" "github.com/grafana/loki/pkg/storage/chunk" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -226,7 +227,7 @@ func TestSeriesRecoveryNoDuplicates(t *testing.T) { chunks: map[string][]chunk.Chunk{}, } - i, err := New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err := New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) mkSample := func(i int) *logproto.PushRequest { @@ -260,7 +261,7 @@ func TestSeriesRecoveryNoDuplicates(t *testing.T) { require.Equal(t, false, iter.Next()) // create a new ingester now - i, err = New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}) + i, err = New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki) require.NoError(t, err) // recover the checkpointed series diff --git a/pkg/ingester/replay_controller_test.go b/pkg/ingester/replay_controller_test.go index b4e1b81af9e1..5559022eae97 100644 --- a/pkg/ingester/replay_controller_test.go +++ b/pkg/ingester/replay_controller_test.go @@ -6,6 +6,8 @@ import ( "time" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/util/constants" ) type dumbFlusher struct { @@ -24,7 +26,7 @@ func (f *dumbFlusher) Flush() { } } -func nilMetrics() *ingesterMetrics { return newIngesterMetrics(nil) } +func nilMetrics() *ingesterMetrics { return newIngesterMetrics(nil, constants.Loki) } func TestReplayController(t *testing.T) { var ops []string diff --git a/pkg/logcli/query/query.go b/pkg/logcli/query/query.go index 615ae84c2dd8..6a71f0979abc 100644 --- a/pkg/logcli/query/query.go +++ b/pkg/logcli/query/query.go @@ -28,6 +28,7 @@ import ( "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper" "github.com/grafana/loki/pkg/util/cfg" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/marshal" "github.com/grafana/loki/pkg/validation" @@ -441,7 +442,7 @@ func (q *Query) DoLocalQuery(out output.LogOutput, statistics bool, orgID string conf.StorageConfig.TSDBShipperConfig.Mode = indexshipper.ModeReadOnly conf.StorageConfig.TSDBShipperConfig.IndexGatewayClientConfig.Disabled = true - querier, err := storage.NewStore(conf.StorageConfig, conf.ChunkStoreConfig, conf.SchemaConfig, limits, cm, prometheus.DefaultRegisterer, util_log.Logger) + querier, err := storage.NewStore(conf.StorageConfig, conf.ChunkStoreConfig, conf.SchemaConfig, limits, cm, prometheus.DefaultRegisterer, util_log.Logger, constants.Loki) if err != nil { return err } diff --git a/pkg/loghttp/push/push.go b/pkg/loghttp/push/push.go index ab6c41a9839f..dffa5ab1a05e 100644 --- a/pkg/loghttp/push/push.go +++ b/pkg/loghttp/push/push.go @@ -23,6 +23,7 @@ import ( "github.com/grafana/loki/pkg/logql/syntax" "github.com/grafana/loki/pkg/util" loki_util "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/unmarshal" unmarshal2 "github.com/grafana/loki/pkg/util/unmarshal/legacy" ) @@ -31,17 +32,17 @@ var ( contentType = http.CanonicalHeaderKey("Content-Type") contentEnc = http.CanonicalHeaderKey("Content-Encoding") bytesIngested = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "distributor_bytes_received_total", Help: "The total number of uncompressed bytes received per tenant. Includes structured metadata bytes.", }, []string{"tenant", "retention_hours"}) structuredMetadataBytesIngested = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "distributor_structured_metadata_bytes_received_total", Help: "The total number of uncompressed bytes received per tenant for entries' structured metadata", }, []string{"tenant", "retention_hours"}) linesIngested = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "distributor_lines_received_total", Help: "The total number of lines received per tenant", }, []string{"tenant"}) diff --git a/pkg/logql/engine.go b/pkg/logql/engine.go index 7729f2941f88..8bdcfe8501fc 100644 --- a/pkg/logql/engine.go +++ b/pkg/logql/engine.go @@ -31,6 +31,7 @@ import ( "github.com/grafana/loki/pkg/logqlmodel" "github.com/grafana/loki/pkg/logqlmodel/stats" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/httpreq" logutil "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/spanlogger" @@ -50,7 +51,7 @@ var ( }, []string{"query_type"}) QueriesBlocked = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "blocked_queries", Help: "Count of queries blocked by per-tenant policy", }, []string{"user"}) diff --git a/pkg/logql/mapper_metrics.go b/pkg/logql/mapper_metrics.go index ce9598de3beb..3588231700a6 100644 --- a/pkg/logql/mapper_metrics.go +++ b/pkg/logql/mapper_metrics.go @@ -3,6 +3,8 @@ package logql import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) // expression type used in metrics @@ -28,19 +30,19 @@ type MapperMetrics struct { func newMapperMetrics(registerer prometheus.Registerer, mapper string) *MapperMetrics { return &MapperMetrics{ DownstreamQueries: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "query_frontend_shards_total", Help: "Number of downstream queries by expression type", ConstLabels: prometheus.Labels{"mapper": mapper}, }, []string{"type"}), ParsedQueries: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "query_frontend_sharding_parsed_queries_total", Help: "Number of parsed queries by evaluation type", ConstLabels: prometheus.Labels{"mapper": mapper}, }, []string{"type"}), DownstreamFactor: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "query_frontend_shard_factor", Help: "Number of downstream queries per request", Buckets: prometheus.ExponentialBuckets(1, 4, 8), diff --git a/pkg/logql/metrics.go b/pkg/logql/metrics.go index 049001b935fa..3ba3a9c61535 100644 --- a/pkg/logql/metrics.go +++ b/pkg/logql/metrics.go @@ -19,6 +19,7 @@ import ( "github.com/grafana/loki/pkg/logqlmodel" logql_stats "github.com/grafana/loki/pkg/logqlmodel/stats" "github.com/grafana/loki/pkg/querier/astmapper" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/httpreq" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/spanlogger" @@ -41,38 +42,38 @@ const ( var ( bytesPerSecond = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "logql_querystats_bytes_processed_per_seconds", Help: "Distribution of bytes processed per second for LogQL queries.", // 50MB 100MB 200MB 400MB 600MB 800MB 1GB 2GB 3GB 4GB 5GB 6GB 7GB 8GB 9GB 10GB 15GB 20GB 30GB, 40GB 50GB 60GB Buckets: []float64{50 * 1e6, 100 * 1e6, 400 * 1e6, 600 * 1e6, 800 * 1e6, 1 * 1e9, 2 * 1e9, 3 * 1e9, 4 * 1e9, 5 * 1e9, 6 * 1e9, 7 * 1e9, 8 * 1e9, 9 * 1e9, 10 * 1e9, 15 * 1e9, 20 * 1e9, 30 * 1e9, 40 * 1e9, 50 * 1e9, 60 * 1e9}, }, []string{"status_code", "type", "range", "latency_type", "sharded"}) execLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "logql_querystats_latency_seconds", Help: "Distribution of latency for LogQL queries.", // 0.25 0.5 1 2 4 8 16 32 64 128 Buckets: prometheus.ExponentialBuckets(0.250, 2, 10), }, []string{"status_code", "type", "range"}) chunkDownloadLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "logql_querystats_chunk_download_latency_seconds", Help: "Distribution of chunk downloads latency for LogQL queries.", // 0.25 0.5 1 2 4 8 16 32 64 128 Buckets: prometheus.ExponentialBuckets(0.250, 2, 10), }, []string{"status_code", "type", "range"}) duplicatesTotal = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "logql_querystats_duplicates_total", Help: "Total count of duplicates found while executing LogQL queries.", }) chunkDownloadedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "logql_querystats_downloaded_chunk_total", Help: "Total count of chunks downloaded found while executing LogQL queries.", }, []string{"status_code", "type", "range"}) ingesterLineTotal = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "logql_querystats_ingester_sent_lines_total", Help: "Total count of lines sent from ingesters while executing LogQL queries.", }) diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 7b28d4b5ef41..99717057566c 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -57,6 +57,7 @@ import ( "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" "github.com/grafana/loki/pkg/tracing" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/fakeauth" "github.com/grafana/loki/pkg/util/limiter" util_log "github.com/grafana/loki/pkg/util/log" @@ -110,11 +111,13 @@ type Config struct { Common common.Config `yaml:"common,omitempty"` ShutdownDelay time.Duration `yaml:"shutdown_delay" category:"experimental"` + + MetricsNamespace string `yaml:"metrics_namespace"` } // RegisterFlags registers flag. func (c *Config) RegisterFlags(f *flag.FlagSet) { - c.Server.MetricsNamespace = "loki" + c.Server.MetricsNamespace = constants.Loki c.Server.ExcludeRequestInLog = true // Set the default module list to 'all' @@ -146,6 +149,8 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&c.ShutdownDelay, "shutdown-delay", 0, "How long to wait between SIGTERM and shutdown. After receiving SIGTERM, Loki will report 503 Service Unavailable status via /ready endpoint.") + f.StringVar(&c.MetricsNamespace, "metrics-namespace", "cortex", "Namespace of the metrics that in previous releases had cortex as namespace.") + c.registerServerFlagsWithChangedDefaultValues(f) c.Common.RegisterFlags(f) c.Distributor.RegisterFlags(f) diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index 2b00c20be374..7c67f5e3df70 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -69,6 +69,7 @@ import ( boltdbcompactor "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/boltdb/compactor" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/httpreq" "github.com/grafana/loki/pkg/util/limiter" util_log "github.com/grafana/loki/pkg/util/log" @@ -131,7 +132,7 @@ const ( ) func (t *Loki) initServer() (services.Service, error) { - prometheus.MustRegister(version.NewCollector("loki")) + prometheus.MustRegister(version.NewCollector(constants.Loki)) // unregister default go collector prometheus.Unregister(collectors.NewGoCollector()) // register collector with additional metrics @@ -224,7 +225,7 @@ func (t *Loki) initInternalServer() (services.Service, error) { } func (t *Loki) initRing() (_ services.Service, err error) { - t.ring, err = ring.New(t.Cfg.Ingester.LifecyclerConfig.RingConfig, "ingester", ingester.RingKey, util_log.Logger, prometheus.WrapRegistererWithPrefix("cortex_", prometheus.DefaultRegisterer)) + t.ring, err = ring.New(t.Cfg.Ingester.LifecyclerConfig.RingConfig, "ingester", ingester.RingKey, util_log.Logger, prometheus.WrapRegistererWithPrefix(t.Cfg.MetricsNamespace+"_", prometheus.DefaultRegisterer)) if err != nil { return } @@ -313,6 +314,7 @@ func (t *Loki) initDistributor() (services.Service, error) { t.ring, t.Overrides, prometheus.DefaultRegisterer, + t.Cfg.MetricsNamespace, ) if err != nil { return nil, err @@ -537,7 +539,7 @@ func (t *Loki) initIngester() (_ services.Service, err error) { level.Warn(util_log.Logger).Log("msg", "The config setting shutdown marker path is not set. The /ingester/prepare_shutdown endpoint won't work") } - t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Cfg.IngesterClient, t.Store, t.Overrides, t.tenantConfigs, prometheus.DefaultRegisterer, t.Cfg.Distributor.WriteFailuresLogging) + t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Cfg.IngesterClient, t.Store, t.Overrides, t.tenantConfigs, prometheus.DefaultRegisterer, t.Cfg.Distributor.WriteFailuresLogging, t.Cfg.MetricsNamespace) if err != nil { return } @@ -617,7 +619,7 @@ func (t *Loki) initStore() (services.Service, error) { } } - store, err := storage.NewStore(t.Cfg.StorageConfig, t.Cfg.ChunkStoreConfig, t.Cfg.SchemaConfig, t.Overrides, t.clientMetrics, prometheus.DefaultRegisterer, util_log.Logger) + store, err := storage.NewStore(t.Cfg.StorageConfig, t.Cfg.ChunkStoreConfig, t.Cfg.SchemaConfig, t.Overrides, t.clientMetrics, prometheus.DefaultRegisterer, util_log.Logger, t.Cfg.MetricsNamespace) if err != nil { return nil, err } @@ -771,7 +773,7 @@ func (t *Loki) setupAsyncStore() error { } func (t *Loki) initIngesterQuerier() (_ services.Service, err error) { - t.ingesterQuerier, err = querier.NewIngesterQuerier(t.Cfg.IngesterClient, t.ring, t.Cfg.Querier.ExtraQueryDelay) + t.ingesterQuerier, err = querier.NewIngesterQuerier(t.Cfg.IngesterClient, t.ring, t.Cfg.Querier.ExtraQueryDelay, t.Cfg.MetricsNamespace) if err != nil { return nil, err } @@ -795,6 +797,7 @@ func (t *Loki) initQueryFrontendMiddleware() (_ services.Service, err error) { t.Cfg.SchemaConfig, t.cacheGenerationLoader, t.Cfg.CompactorConfig.RetentionEnabled, prometheus.DefaultRegisterer, + t.Cfg.MetricsNamespace, ) if err != nil { return @@ -874,6 +877,7 @@ func (t *Loki) initQueryFrontend() (_ services.Service, err error) { t.Cfg.Server.GRPCListenPort, util_log.Logger, prometheus.DefaultRegisterer, + t.Cfg.MetricsNamespace, t.Codec, ) if err != nil { @@ -894,7 +898,7 @@ func (t *Loki) initQueryFrontend() (_ services.Service, err error) { roundTripper := queryrange.NewSerializeRoundTripper(t.QueryFrontEndMiddleware.Wrap(frontendTripper), queryrange.DefaultCodec) - frontendHandler := transport.NewHandler(t.Cfg.Frontend.Handler, roundTripper, util_log.Logger, prometheus.DefaultRegisterer) + frontendHandler := transport.NewHandler(t.Cfg.Frontend.Handler, roundTripper, util_log.Logger, prometheus.DefaultRegisterer, t.Cfg.MetricsNamespace) if t.Cfg.Frontend.CompressResponses { frontendHandler = gziphandler.GzipHandler(frontendHandler) } @@ -1041,6 +1045,7 @@ func (t *Loki) initRuler() (_ services.Service, err error) { util_log.Logger, t.RulerStorage, t.Overrides, + t.Cfg.MetricsNamespace, ) if err != nil { @@ -1135,14 +1140,14 @@ func (t *Loki) initRuleEvaluator() (services.Service, error) { func (t *Loki) initMemberlistKV() (services.Service, error) { reg := prometheus.DefaultRegisterer - t.Cfg.MemberlistKV.MetricsNamespace = "loki" + t.Cfg.MemberlistKV.MetricsNamespace = constants.Loki t.Cfg.MemberlistKV.Codecs = []codec.Codec{ ring.GetCodec(), analytics.JSONCodec, } dnsProviderReg := prometheus.WrapRegistererWithPrefix( - "cortex_", + t.Cfg.MetricsNamespace+"_", prometheus.WrapRegistererWith( prometheus.Labels{"name": "memberlist"}, reg, @@ -1208,7 +1213,7 @@ func (t *Loki) initCompactor() (services.Service, error) { } } - t.compactor, err = compactor.NewCompactor(t.Cfg.CompactorConfig, objectClients, deleteRequestStoreClient, t.Cfg.SchemaConfig, t.Overrides, prometheus.DefaultRegisterer) + t.compactor, err = compactor.NewCompactor(t.Cfg.CompactorConfig, objectClients, deleteRequestStoreClient, t.Cfg.SchemaConfig, t.Overrides, prometheus.DefaultRegisterer, t.Cfg.MetricsNamespace) if err != nil { return nil, err } @@ -1296,7 +1301,7 @@ func (t *Loki) initIndexGateway() (services.Service, error) { tableRange := period.GetIndexTableNumberRange(periodEndTime) indexClient, err := storage.NewIndexClient(period, tableRange, t.Cfg.StorageConfig, t.Cfg.SchemaConfig, t.Overrides, t.clientMetrics, shardingStrategy, - prometheus.DefaultRegisterer, log.With(util_log.Logger, "index-store", fmt.Sprintf("%s-%s", period.IndexType, period.From.String())), + prometheus.DefaultRegisterer, log.With(util_log.Logger, "index-store", fmt.Sprintf("%s-%s", period.IndexType, period.From.String())), t.Cfg.MetricsNamespace, ) if err != nil { return nil, err @@ -1417,7 +1422,7 @@ func (t *Loki) initBloomCompactorRing() (services.Service, error) { } func (t *Loki) initQueryScheduler() (services.Service, error) { - s, err := scheduler.NewScheduler(t.Cfg.QueryScheduler, t.Overrides, util_log.Logger, t.querySchedulerRingManager, prometheus.DefaultRegisterer) + s, err := scheduler.NewScheduler(t.Cfg.QueryScheduler, t.Overrides, util_log.Logger, t.querySchedulerRingManager, prometheus.DefaultRegisterer, t.Cfg.MetricsNamespace) if err != nil { return nil, err } diff --git a/pkg/lokifrontend/frontend/config.go b/pkg/lokifrontend/frontend/config.go index 290c097de266..83b2f517148a 100644 --- a/pkg/lokifrontend/frontend/config.go +++ b/pkg/lokifrontend/frontend/config.go @@ -39,7 +39,7 @@ func (cfg *CombinedFrontendConfig) RegisterFlags(f *flag.FlagSet) { // Returned RoundTripper can be wrapped in more round-tripper middlewares, and then eventually registered // into HTTP server using the Handler from this package. Returned RoundTripper is always non-nil // (if there are no errors), and it uses the returned frontend (if any). -func InitFrontend(cfg CombinedFrontendConfig, ring ring.ReadRing, limits v1.Limits, grpcListenPort int, log log.Logger, reg prometheus.Registerer, codec transport.Codec) (queryrangebase.Handler, *v1.Frontend, *v2.Frontend, error) { +func InitFrontend(cfg CombinedFrontendConfig, ring ring.ReadRing, limits v1.Limits, grpcListenPort int, log log.Logger, reg prometheus.Registerer, metricsNamespace string, codec transport.Codec) (queryrangebase.Handler, *v1.Frontend, *v2.Frontend, error) { switch { case cfg.DownstreamURL != "": // If the user has specified a downstream Prometheus, then we should use that. @@ -60,12 +60,12 @@ func InitFrontend(cfg CombinedFrontendConfig, ring ring.ReadRing, limits v1.Limi cfg.FrontendV2.Port = grpcListenPort } - fr, err := v2.NewFrontend(cfg.FrontendV2, ring, log, reg, codec) + fr, err := v2.NewFrontend(cfg.FrontendV2, ring, log, reg, codec, metricsNamespace) return fr, nil, fr, err default: // No scheduler = use original frontend. - fr, err := v1.New(cfg.FrontendV1, limits, log, reg) + fr, err := v1.New(cfg.FrontendV1, limits, log, reg, metricsNamespace) if err != nil { return nil, nil, nil, err } diff --git a/pkg/lokifrontend/frontend/transport/handler.go b/pkg/lokifrontend/frontend/transport/handler.go index 92c29cc89644..06f1ebe1c7b6 100644 --- a/pkg/lokifrontend/frontend/transport/handler.go +++ b/pkg/lokifrontend/frontend/transport/handler.go @@ -68,7 +68,7 @@ type Handler struct { } // NewHandler creates a new frontend handler. -func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logger, reg prometheus.Registerer) http.Handler { +func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logger, reg prometheus.Registerer, metricsNamespace string) http.Handler { h := &Handler{ cfg: cfg, log: log, @@ -77,18 +77,21 @@ func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logge if cfg.QueryStatsEnabled { h.querySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_query_seconds_total", - Help: "Total amount of wall clock time spend processing queries.", + Namespace: metricsNamespace, + Name: "query_seconds_total", + Help: "Total amount of wall clock time spend processing queries.", }, []string{"user"}) h.querySeries = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_query_fetched_series_total", - Help: "Number of series fetched to execute a query.", + Namespace: metricsNamespace, + Name: "query_fetched_series_total", + Help: "Number of series fetched to execute a query.", }, []string{"user"}) h.queryBytes = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_query_fetched_chunks_bytes_total", - Help: "Size of all chunks fetched to execute a query in bytes.", + Namespace: metricsNamespace, + Name: "query_fetched_chunks_bytes_total", + Help: "Size of all chunks fetched to execute a query in bytes.", }, []string{"user"}) h.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(func(user string) { diff --git a/pkg/lokifrontend/frontend/v1/frontend.go b/pkg/lokifrontend/frontend/v1/frontend.go index 836baf283a37..ff32cbf7b98f 100644 --- a/pkg/lokifrontend/frontend/v1/frontend.go +++ b/pkg/lokifrontend/frontend/v1/frontend.go @@ -80,17 +80,18 @@ type request struct { } // New creates a new frontend. Frontend implements service, and must be started and stopped. -func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Registerer) (*Frontend, error) { - queueMetrics := queue.NewMetrics("query_frontend", registerer) +func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Registerer, metricsNamespace string) (*Frontend, error) { + queueMetrics := queue.NewMetrics(registerer, metricsNamespace, "query_frontend") f := &Frontend{ cfg: cfg, log: log, limits: limits, queueMetrics: queueMetrics, queueDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Name: "cortex_query_frontend_queue_duration_seconds", - Help: "Time spend by requests queued.", - Buckets: prometheus.DefBuckets, + Namespace: metricsNamespace, + Name: "query_frontend_queue_duration_seconds", + Help: "Time spend by requests queued.", + Buckets: prometheus.DefBuckets, }), } @@ -104,8 +105,9 @@ func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Regist } f.numClients = promauto.With(registerer).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_query_frontend_connected_clients", - Help: "Number of worker clients currently connected to the frontend.", + Namespace: metricsNamespace, + Name: "query_frontend_connected_clients", + Help: "Number of worker clients currently connected to the frontend.", }, f.requestQueue.GetConnectedConsumersMetric) f.Service = services.NewBasicService(f.starting, f.running, f.stopping) diff --git a/pkg/lokifrontend/frontend/v1/frontend_test.go b/pkg/lokifrontend/frontend/v1/frontend_test.go index bd417f488598..6cfc1964d9d4 100644 --- a/pkg/lokifrontend/frontend/v1/frontend_test.go +++ b/pkg/lokifrontend/frontend/v1/frontend_test.go @@ -35,6 +35,7 @@ import ( "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" querier_worker "github.com/grafana/loki/pkg/querier/worker" "github.com/grafana/loki/pkg/queue" + "github.com/grafana/loki/pkg/util/constants" ) const ( @@ -131,7 +132,7 @@ func TestFrontendCheckReady(t *testing.T) { {"no url, no clients is not ready", 0, "not ready: number of queriers connected to query-frontend is 0", false}, } { t.Run(tt.name, func(t *testing.T) { - qm := queue.NewMetrics("query_frontend", nil) + qm := queue.NewMetrics(nil, constants.Loki, "query_frontend") f := &Frontend{ log: log.NewNopLogger(), requestQueue: queue.NewRequestQueue(5, 0, qm), @@ -210,17 +211,17 @@ func TestFrontendMetricsCleanup(t *testing.T) { assert.JSONEq(t, `{"values":["Hello", "world"]}`, string(body)) require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` - # HELP cortex_query_frontend_queue_length Number of queries in the queue. - # TYPE cortex_query_frontend_queue_length gauge - cortex_query_frontend_queue_length{user="1"} 0 - `), "cortex_query_frontend_queue_length")) + # HELP loki_query_frontend_queue_length Number of queries in the queue. + # TYPE loki_query_frontend_queue_length gauge + loki_query_frontend_queue_length{user="1"} 0 + `), "loki_query_frontend_queue_length")) fr.cleanupInactiveUserMetrics("1") require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` - # HELP cortex_query_frontend_queue_length Number of queries in the queue. - # TYPE cortex_query_frontend_queue_length gauge - `), "cortex_query_frontend_queue_length")) + # HELP loki_query_frontend_queue_length Number of queries in the queue. + # TYPE loki_query_frontend_queue_length gauge + `), "loki_query_frontend_queue_length")) } testFrontend(t, defaultFrontendConfig(), handler, test, matchMaxConcurrency, reg) @@ -242,7 +243,7 @@ func testFrontend(t *testing.T, config Config, handler queryrangebase.Handler, t httpListen, err := net.Listen("tcp", "localhost:0") require.NoError(t, err) - v1, err := New(config, limits{}, logger, reg) + v1, err := New(config, limits{}, logger, reg, constants.Loki) require.NoError(t, err) require.NotNil(t, v1) require.NoError(t, services.StartAndAwaitRunning(context.Background(), v1)) diff --git a/pkg/lokifrontend/frontend/v1/queue_test.go b/pkg/lokifrontend/frontend/v1/queue_test.go index 24b12863ca0c..efc04e338981 100644 --- a/pkg/lokifrontend/frontend/v1/queue_test.go +++ b/pkg/lokifrontend/frontend/v1/queue_test.go @@ -18,12 +18,13 @@ import ( "google.golang.org/grpc/metadata" "github.com/grafana/loki/pkg/lokifrontend/frontend/v1/frontendv1pb" + "github.com/grafana/loki/pkg/util/constants" ) func setupFrontend(t *testing.T, config Config) *Frontend { logger := log.NewNopLogger() - frontend, err := New(config, limits{queriers: 3}, logger, nil) + frontend, err := New(config, limits{queriers: 3}, logger, nil, constants.Loki) require.NoError(t, err) t.Cleanup(func() { diff --git a/pkg/lokifrontend/frontend/v2/frontend.go b/pkg/lokifrontend/frontend/v2/frontend.go index 0e36d3765ed4..cc5ef821092e 100644 --- a/pkg/lokifrontend/frontend/v2/frontend.go +++ b/pkg/lokifrontend/frontend/v2/frontend.go @@ -118,7 +118,7 @@ type enqueueResult struct { } // NewFrontend creates a new frontend. -func NewFrontend(cfg Config, ring ring.ReadRing, log log.Logger, reg prometheus.Registerer, codec transport.Codec) (*Frontend, error) { +func NewFrontend(cfg Config, ring ring.ReadRing, log log.Logger, reg prometheus.Registerer, codec transport.Codec, metricsNamespace string) (*Frontend, error) { requestsCh := make(chan *frontendRequest) schedulerWorkers, err := newFrontendSchedulerWorkers(cfg, fmt.Sprintf("%s:%d", cfg.Addr, cfg.Port), ring, requestsCh, log) @@ -140,15 +140,17 @@ func NewFrontend(cfg Config, ring ring.ReadRing, log log.Logger, reg prometheus. f.lastQueryID.Store(rand.Uint64()) promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_query_frontend_queries_in_progress", - Help: "Number of queries in progress handled by this frontend.", + Namespace: metricsNamespace, + Name: "query_frontend_queries_in_progress", + Help: "Number of queries in progress handled by this frontend.", }, func() float64 { return float64(f.requests.count()) }) promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_query_frontend_connected_schedulers", - Help: "Number of schedulers this frontend is connected to.", + Namespace: metricsNamespace, + Name: "query_frontend_connected_schedulers", + Help: "Number of schedulers this frontend is connected to.", }, func() float64 { return float64(f.schedulerWorkers.getWorkersCount()) }) diff --git a/pkg/lokifrontend/frontend/v2/frontend_test.go b/pkg/lokifrontend/frontend/v2/frontend_test.go index d70a51852672..f41b9d1381e0 100644 --- a/pkg/lokifrontend/frontend/v2/frontend_test.go +++ b/pkg/lokifrontend/frontend/v2/frontend_test.go @@ -22,6 +22,7 @@ import ( "github.com/grafana/loki/pkg/querier/queryrange" "github.com/grafana/loki/pkg/querier/stats" "github.com/grafana/loki/pkg/scheduler/schedulerpb" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/test" ) @@ -47,7 +48,7 @@ func setupFrontend(t *testing.T, schedulerReplyFunc func(f *Frontend, msg *sched cfg.Port = grpcPort logger := log.NewNopLogger() - f, err := NewFrontend(cfg, nil, logger, nil, queryrange.DefaultCodec) + f, err := NewFrontend(cfg, nil, logger, nil, queryrange.DefaultCodec, constants.Loki) require.NoError(t, err) frontendv2pb.RegisterFrontendForQuerierServer(server, f) diff --git a/pkg/querier/ingester_querier.go b/pkg/querier/ingester_querier.go index 1312cf7168ea..4bdd21cb24f2 100644 --- a/pkg/querier/ingester_querier.go +++ b/pkg/querier/ingester_querier.go @@ -41,17 +41,17 @@ type IngesterQuerier struct { extraQueryDelay time.Duration } -func NewIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration) (*IngesterQuerier, error) { +func NewIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, metricsNamespace string) (*IngesterQuerier, error) { factory := func(addr string) (ring_client.PoolClient, error) { return client.New(clientCfg, addr) } - return newIngesterQuerier(clientCfg, ring, extraQueryDelay, ring_client.PoolAddrFunc(factory)) + return newIngesterQuerier(clientCfg, ring, extraQueryDelay, ring_client.PoolAddrFunc(factory), metricsNamespace) } // newIngesterQuerier creates a new IngesterQuerier and allows to pass a custom ingester client factory // used for testing purposes -func newIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, clientFactory ring_client.PoolFactory) (*IngesterQuerier, error) { +func newIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, clientFactory ring_client.PoolFactory, metricsNamespace string) (*IngesterQuerier, error) { iq := IngesterQuerier{ ring: ring, pool: clientpool.NewPool("ingester", clientCfg.PoolConfig, ring, clientFactory, util_log.Logger), diff --git a/pkg/querier/ingester_querier_test.go b/pkg/querier/ingester_querier_test.go index a3eb6d3cc3b0..a5cfd9a54dd8 100644 --- a/pkg/querier/ingester_querier_test.go +++ b/pkg/querier/ingester_querier_test.go @@ -19,6 +19,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql" + "github.com/grafana/loki/pkg/util/constants" ) func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { @@ -104,6 +105,7 @@ func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { newReadRingMock(ringIngesters, 1), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -203,6 +205,7 @@ func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { newReadRingMock(ringIngesters, 1), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -300,6 +303,7 @@ func TestQuerier_tailDisconnectedIngesters(t *testing.T) { newReadRingMock(testData.ringIngesters, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -362,6 +366,7 @@ func TestIngesterQuerier_Volume(t *testing.T) { newReadRingMock([]ring.InstanceDesc{mockInstanceDesc("1.1.1.1", ring.ACTIVE), mockInstanceDesc("3.3.3.3", ring.ACTIVE)}, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -382,6 +387,7 @@ func TestIngesterQuerier_Volume(t *testing.T) { newReadRingMock([]ring.InstanceDesc{mockInstanceDesc("1.1.1.1", ring.ACTIVE), mockInstanceDesc("3.3.3.3", ring.ACTIVE)}, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go index d89d24a1751b..c585a5386c55 100644 --- a/pkg/querier/querier_test.go +++ b/pkg/querier/querier_test.go @@ -23,6 +23,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/storage" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -1285,7 +1286,7 @@ func TestQuerier_SelectSamplesWithDeletes(t *testing.T) { } func newQuerier(cfg Config, clientCfg client.Config, clientFactory ring_client.PoolFactory, ring ring.ReadRing, dg *mockDeleteGettter, store storage.Store, limits *validation.Overrides) (*SingleTenantQuerier, error) { - iq, err := newIngesterQuerier(clientCfg, ring, cfg.ExtraQueryDelay, clientFactory) + iq, err := newIngesterQuerier(clientCfg, ring, cfg.ExtraQueryDelay, clientFactory, constants.Loki) if err != nil { return nil, err } diff --git a/pkg/querier/queryrange/index_stats_cache_test.go b/pkg/querier/queryrange/index_stats_cache_test.go index 8c154c36a249..72b24757aef5 100644 --- a/pkg/querier/queryrange/index_stats_cache_test.go +++ b/pkg/querier/queryrange/index_stats_cache_test.go @@ -16,6 +16,7 @@ import ( "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" "github.com/grafana/loki/pkg/storage/chunk/cache" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) func TestIndexStatsCache(t *testing.T) { @@ -24,7 +25,7 @@ func TestIndexStatsCache(t *testing.T) { Cache: cache.NewMockCache(), }, } - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) require.NoError(t, err) cacheMiddleware, err := NewIndexStatsCacheMiddleware( log.NewNopLogger(), @@ -161,7 +162,7 @@ func TestIndexStatsCache_RecentData(t *testing.T) { Cache: cache.NewMockCache(), }, } - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) defer c.Stop() require.NoError(t, err) diff --git a/pkg/querier/queryrange/limits_test.go b/pkg/querier/queryrange/limits_test.go index a3b14efdbe58..66e51a97d2e8 100644 --- a/pkg/querier/queryrange/limits_test.go +++ b/pkg/querier/queryrange/limits_test.go @@ -20,6 +20,7 @@ import ( "github.com/grafana/loki/pkg/logqlmodel" base "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/math" ) @@ -57,7 +58,7 @@ func Test_seriesLimiter(t *testing.T) { l := WithSplitByLimits(fakeLimits{maxSeries: 1, maxQueryParallelism: 2}, time.Hour) tpw, stopper, err := NewMiddleware(cfg, testEngineOpts, util_log.Logger, l, config.SchemaConfig{ Configs: testSchemas, - }, nil, false, nil) + }, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -227,7 +228,7 @@ func Test_MaxQueryLookBack(t *testing.T) { maxQueryParallelism: 1, }, config.SchemaConfig{ Configs: testSchemas, - }, nil, false, nil) + }, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } diff --git a/pkg/querier/queryrange/log_result_cache.go b/pkg/querier/queryrange/log_result_cache.go index a83041a94cd5..ee29e385e0d2 100644 --- a/pkg/querier/queryrange/log_result_cache.go +++ b/pkg/querier/queryrange/log_result_cache.go @@ -23,6 +23,7 @@ import ( "github.com/grafana/loki/pkg/logqlmodel/stats" "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" "github.com/grafana/loki/pkg/storage/chunk/cache" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/validation" ) @@ -36,11 +37,11 @@ type LogResultCacheMetrics struct { func NewLogResultCacheMetrics(registerer prometheus.Registerer) *LogResultCacheMetrics { return &LogResultCacheMetrics{ CacheHit: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "query_frontend_log_result_cache_hit_total", }), CacheMiss: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "query_frontend_log_result_cache_miss_total", }), } diff --git a/pkg/querier/queryrange/metrics.go b/pkg/querier/queryrange/metrics.go index feb005b997f6..390f2c81d771 100644 --- a/pkg/querier/queryrange/metrics.go +++ b/pkg/querier/queryrange/metrics.go @@ -28,10 +28,10 @@ func NewMiddlewareMapperMetrics(registerer prometheus.Registerer) *MiddlewareMap } } -func NewMetrics(registerer prometheus.Registerer) *Metrics { +func NewMetrics(registerer prometheus.Registerer, metricsNamespace string) *Metrics { return &Metrics{ - InstrumentMiddlewareMetrics: queryrangebase.NewInstrumentMiddlewareMetrics(registerer), - RetryMiddlewareMetrics: queryrangebase.NewRetryMiddlewareMetrics(registerer), + InstrumentMiddlewareMetrics: queryrangebase.NewInstrumentMiddlewareMetrics(registerer, metricsNamespace), + RetryMiddlewareMetrics: queryrangebase.NewRetryMiddlewareMetrics(registerer, metricsNamespace), MiddlewareMapperMetrics: NewMiddlewareMapperMetrics(registerer), SplitByMetrics: NewSplitByMetrics(registerer), LogResultCacheMetrics: NewLogResultCacheMetrics(registerer), diff --git a/pkg/querier/queryrange/queryrangebase/instrumentation.go b/pkg/querier/queryrange/queryrangebase/instrumentation.go index 813e73be5656..fe25a0bf1dfd 100644 --- a/pkg/querier/queryrange/queryrangebase/instrumentation.go +++ b/pkg/querier/queryrange/queryrangebase/instrumentation.go @@ -39,10 +39,10 @@ type InstrumentMiddlewareMetrics struct { } // NewInstrumentMiddlewareMetrics makes a new InstrumentMiddlewareMetrics. -func NewInstrumentMiddlewareMetrics(registerer prometheus.Registerer) *InstrumentMiddlewareMetrics { +func NewInstrumentMiddlewareMetrics(registerer prometheus.Registerer, metricsNamespace string) *InstrumentMiddlewareMetrics { return &InstrumentMiddlewareMetrics{ duration: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Name: "frontend_query_range_duration_seconds", Help: "Total time spent in seconds doing query range requests.", Buckets: prometheus.DefBuckets, diff --git a/pkg/querier/queryrange/queryrangebase/results_cache.go b/pkg/querier/queryrange/queryrangebase/results_cache.go index 05d6a26f672f..1e54b5585940 100644 --- a/pkg/querier/queryrange/queryrangebase/results_cache.go +++ b/pkg/querier/queryrange/queryrangebase/results_cache.go @@ -29,6 +29,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/storage/chunk/cache" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/math" "github.com/grafana/loki/pkg/util/spanlogger" "github.com/grafana/loki/pkg/util/validation" @@ -52,12 +53,12 @@ type ResultsCacheMetrics struct { func NewResultsCacheMetrics(registerer prometheus.Registerer) *ResultsCacheMetrics { return &ResultsCacheMetrics{ versionComparisons: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "results_cache_version_comparisons_total", Help: "Comparisons of cache key versions in the results cache between query-frontends & queriers", }), versionComparisonFailures: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "results_cache_version_comparisons_failed", Help: "Comparison failures of cache key versions in the results cache between query-frontends & queriers", }, []string{"reason"}), diff --git a/pkg/querier/queryrange/queryrangebase/results_cache_test.go b/pkg/querier/queryrange/queryrangebase/results_cache_test.go index bfe3ecea5f0b..8020764d1f4a 100644 --- a/pkg/querier/queryrange/queryrangebase/results_cache_test.go +++ b/pkg/querier/queryrange/queryrangebase/results_cache_test.go @@ -18,6 +18,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logqlmodel/stats" "github.com/grafana/loki/pkg/storage/chunk/cache" + "github.com/grafana/loki/pkg/util/constants" ) const ( @@ -755,7 +756,7 @@ func TestResultsCache(t *testing.T) { Cache: cache.NewMockCache(), }, } - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) require.NoError(t, err) rcm, err := NewResultsCacheMiddleware( log.NewNopLogger(), @@ -801,7 +802,7 @@ func TestResultsCacheRecent(t *testing.T) { var cfg ResultsCacheConfig flagext.DefaultValues(&cfg) cfg.CacheConfig.Cache = cache.NewMockCache() - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) require.NoError(t, err) rcm, err := NewResultsCacheMiddleware( log.NewNopLogger(), @@ -868,7 +869,7 @@ func TestResultsCacheMaxFreshness(t *testing.T) { var cfg ResultsCacheConfig flagext.DefaultValues(&cfg) cfg.CacheConfig.Cache = cache.NewMockCache() - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) require.NoError(t, err) fakeLimits := tc.fakeLimits rcm, err := NewResultsCacheMiddleware( @@ -912,7 +913,7 @@ func Test_resultsCache_MissingData(t *testing.T) { Cache: cache.NewMockCache(), }, } - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) require.NoError(t, err) rm, err := NewResultsCacheMiddleware( log.NewNopLogger(), @@ -1027,7 +1028,7 @@ func TestResultsCacheShouldCacheFunc(t *testing.T) { var cfg ResultsCacheConfig flagext.DefaultValues(&cfg) cfg.CacheConfig.Cache = cache.NewMockCache() - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) require.NoError(t, err) rcm, err := NewResultsCacheMiddleware( log.NewNopLogger(), diff --git a/pkg/querier/queryrange/queryrangebase/retry.go b/pkg/querier/queryrange/queryrangebase/retry.go index bae716973c9c..5dbad8d82582 100644 --- a/pkg/querier/queryrange/queryrangebase/retry.go +++ b/pkg/querier/queryrange/queryrangebase/retry.go @@ -18,10 +18,10 @@ type RetryMiddlewareMetrics struct { retriesCount prometheus.Histogram } -func NewRetryMiddlewareMetrics(registerer prometheus.Registerer) *RetryMiddlewareMetrics { +func NewRetryMiddlewareMetrics(registerer prometheus.Registerer, metricsNamespace string) *RetryMiddlewareMetrics { return &RetryMiddlewareMetrics{ retriesCount: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Name: "query_frontend_retries", Help: "Number of times a request is retried.", Buckets: []float64{0, 1, 2, 3, 4, 5}, @@ -39,9 +39,9 @@ type retry struct { // NewRetryMiddleware returns a middleware that retries requests if they // fail with 500 or a non-HTTP error. -func NewRetryMiddleware(log log.Logger, maxRetries int, metrics *RetryMiddlewareMetrics) Middleware { +func NewRetryMiddleware(log log.Logger, maxRetries int, metrics *RetryMiddlewareMetrics, metricsNamespace string) Middleware { if metrics == nil { - metrics = NewRetryMiddlewareMetrics(nil) + metrics = NewRetryMiddlewareMetrics(nil, metricsNamespace) } return MiddlewareFunc(func(next Handler) Handler { diff --git a/pkg/querier/queryrange/queryrangebase/retry_test.go b/pkg/querier/queryrange/queryrangebase/retry_test.go index 71d05b61ebc5..2c4a15bb9f48 100644 --- a/pkg/querier/queryrange/queryrangebase/retry_test.go +++ b/pkg/querier/queryrange/queryrangebase/retry_test.go @@ -11,6 +11,8 @@ import ( "github.com/grafana/dskit/httpgrpc" "github.com/stretchr/testify/require" "go.uber.org/atomic" + + "github.com/grafana/loki/pkg/util/constants" ) func TestRetry(t *testing.T) { @@ -59,7 +61,7 @@ func TestRetry(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { try.Store(0) - h := NewRetryMiddleware(log.NewNopLogger(), 5, nil).Wrap(tc.handler) + h := NewRetryMiddleware(log.NewNopLogger(), 5, nil, constants.Loki).Wrap(tc.handler) req := &PrometheusRequest{ Query: `{env="test"} |= "error"`, } @@ -78,7 +80,7 @@ func Test_RetryMiddlewareCancel(t *testing.T) { var try atomic.Int32 ctx, cancel := context.WithCancel(context.Background()) cancel() - _, err := NewRetryMiddleware(log.NewNopLogger(), 5, nil).Wrap( + _, err := NewRetryMiddleware(log.NewNopLogger(), 5, nil, constants.Loki).Wrap( HandlerFunc(func(c context.Context, r Request) (Response, error) { try.Inc() return nil, ctx.Err() @@ -88,7 +90,7 @@ func Test_RetryMiddlewareCancel(t *testing.T) { require.Equal(t, ctx.Err(), err) ctx, cancel = context.WithCancel(context.Background()) - _, err = NewRetryMiddleware(log.NewNopLogger(), 5, nil).Wrap( + _, err = NewRetryMiddleware(log.NewNopLogger(), 5, nil, constants.Loki).Wrap( HandlerFunc(func(c context.Context, r Request) (Response, error) { try.Inc() cancel() diff --git a/pkg/querier/queryrange/querysharding_test.go b/pkg/querier/queryrange/querysharding_test.go index 79d789b1ac4b..8c77afe0410c 100644 --- a/pkg/querier/queryrange/querysharding_test.go +++ b/pkg/querier/queryrange/querysharding_test.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase/definitions" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) var ( @@ -407,7 +408,7 @@ func Test_InstantSharding(t *testing.T) { cpyPeriodConf.RowShards = 3 sharding := NewQueryShardMiddleware(log.NewNopLogger(), ShardingConfigs{ cpyPeriodConf, - }, testEngineOpts, queryrangebase.NewInstrumentMiddlewareMetrics(nil), + }, testEngineOpts, queryrangebase.NewInstrumentMiddlewareMetrics(nil, constants.Loki), nilShardingMetrics, fakeLimits{ maxSeries: math.MaxInt32, @@ -467,7 +468,7 @@ func Test_SeriesShardingHandler(t *testing.T) { RowShards: 3, }, }, - queryrangebase.NewInstrumentMiddlewareMetrics(nil), + queryrangebase.NewInstrumentMiddlewareMetrics(nil, constants.Loki), nilShardingMetrics, fakeLimits{ maxQueryParallelism: 10, @@ -770,7 +771,7 @@ func TestShardingAcrossConfigs_SeriesSharding(t *testing.T) { mware := NewSeriesQueryShardMiddleware( log.NewNopLogger(), confs, - queryrangebase.NewInstrumentMiddlewareMetrics(nil), + queryrangebase.NewInstrumentMiddlewareMetrics(nil, constants.Loki), nilShardingMetrics, fakeLimits{ maxQueryParallelism: 10, diff --git a/pkg/querier/queryrange/roundtrip.go b/pkg/querier/queryrange/roundtrip.go index 3dd03750e3b1..5442fcee42b9 100644 --- a/pkg/querier/queryrange/roundtrip.go +++ b/pkg/querier/queryrange/roundtrip.go @@ -24,6 +24,7 @@ import ( base "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" "github.com/grafana/loki/pkg/storage/chunk/cache" "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/util/constants" logutil "github.com/grafana/loki/pkg/util/log" ) @@ -81,7 +82,7 @@ func newResultsCacheFromConfig(cfg base.ResultsCacheConfig, registerer prometheu return nil, errors.Errorf("%s cache is not configured", cacheType) } - c, err := cache.New(cfg.CacheConfig, registerer, log, cacheType) + c, err := cache.New(cfg.CacheConfig, registerer, log, cacheType, constants.Loki) if err != nil { return nil, err } @@ -103,8 +104,9 @@ func NewMiddleware( cacheGenNumLoader base.CacheGenNumberLoader, retentionEnabled bool, registerer prometheus.Registerer, + metricsNamespace string, ) (base.Middleware, Stopper, error) { - metrics := NewMetrics(registerer) + metrics := NewMetrics(registerer, metricsNamespace) var ( resultsCache cache.Cache @@ -154,13 +156,13 @@ func NewMiddleware( } indexStatsTripperware, err := NewIndexStatsTripperware(cfg, log, limits, schema, codec, statsCache, - cacheGenNumLoader, retentionEnabled, metrics) + cacheGenNumLoader, retentionEnabled, metrics, metricsNamespace) if err != nil { return nil, nil, err } metricsTripperware, err := NewMetricTripperware(cfg, engineOpts, log, limits, schema, codec, resultsCache, - cacheGenNumLoader, retentionEnabled, PrometheusExtractor{}, metrics, indexStatsTripperware) + cacheGenNumLoader, retentionEnabled, PrometheusExtractor{}, metrics, indexStatsTripperware, metricsNamespace) if err != nil { return nil, nil, err } @@ -172,27 +174,27 @@ func NewMiddleware( // NOTE: When we would start caching response from non-metric queries we would have to consider cache gen headers as well in // MergeResponse implementation for Loki codecs same as it is done in Cortex at https://github.com/cortexproject/cortex/blob/21bad57b346c730d684d6d0205efef133422ab28/pkg/querier/queryrange/query_range.go#L170 - logFilterTripperware, err := NewLogFilterTripperware(cfg, engineOpts, log, limits, schema, codec, resultsCache, metrics, indexStatsTripperware) + logFilterTripperware, err := NewLogFilterTripperware(cfg, engineOpts, log, limits, schema, codec, resultsCache, metrics, indexStatsTripperware, metricsNamespace) if err != nil { return nil, nil, err } - seriesTripperware, err := NewSeriesTripperware(cfg, log, limits, metrics, schema, DefaultCodec) + seriesTripperware, err := NewSeriesTripperware(cfg, log, limits, metrics, schema, DefaultCodec, metricsNamespace) if err != nil { return nil, nil, err } - labelsTripperware, err := NewLabelsTripperware(cfg, log, limits, codec, metrics, schema) + labelsTripperware, err := NewLabelsTripperware(cfg, log, limits, codec, metrics, schema, metricsNamespace) if err != nil { return nil, nil, err } - instantMetricTripperware, err := NewInstantMetricTripperware(cfg, engineOpts, log, limits, schema, codec, metrics, indexStatsTripperware) + instantMetricTripperware, err := NewInstantMetricTripperware(cfg, engineOpts, log, limits, schema, codec, metrics, indexStatsTripperware, metricsNamespace) if err != nil { return nil, nil, err } - seriesVolumeTripperware, err := NewVolumeTripperware(cfg, log, limits, schema, codec, volumeCache, cacheGenNumLoader, retentionEnabled, metrics) + seriesVolumeTripperware, err := NewVolumeTripperware(cfg, log, limits, schema, codec, volumeCache, cacheGenNumLoader, retentionEnabled, metrics, metricsNamespace) if err != nil { return nil, nil, err } @@ -388,6 +390,7 @@ func NewLogFilterTripperware( c cache.Cache, metrics *Metrics, indexStatsTripperware base.Middleware, + metricsNamespace string, ) (base.Middleware, error) { return base.MiddlewareFunc(func(next base.Handler) base.Handler { statsHandler := indexStatsTripperware.Wrap(next) @@ -442,7 +445,7 @@ func NewLogFilterTripperware( if cfg.MaxRetries > 0 { queryRangeMiddleware = append( queryRangeMiddleware, base.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics), - base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics), + base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics, metricsNamespace), ) } @@ -496,6 +499,7 @@ func NewSeriesTripperware( metrics *Metrics, schema config.SchemaConfig, merger base.Merger, + metricsNamespace string, ) (base.Middleware, error) { queryRangeMiddleware := []base.Middleware{ StatsCollectorMiddleware(), @@ -510,7 +514,7 @@ func NewSeriesTripperware( if cfg.MaxRetries > 0 { queryRangeMiddleware = append(queryRangeMiddleware, base.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics), - base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics), + base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics, metricsNamespace), ) } @@ -543,6 +547,7 @@ func NewLabelsTripperware( merger base.Merger, metrics *Metrics, schema config.SchemaConfig, + metricsNamespace string, ) (base.Middleware, error) { queryRangeMiddleware := []base.Middleware{ StatsCollectorMiddleware(), @@ -556,7 +561,7 @@ func NewLabelsTripperware( if cfg.MaxRetries > 0 { queryRangeMiddleware = append(queryRangeMiddleware, base.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics), - base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics), + base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics, metricsNamespace), ) } @@ -583,6 +588,7 @@ func NewMetricTripperware( extractor base.Extractor, metrics *Metrics, indexStatsTripperware base.Middleware, + metricsNamespace string, ) (base.Middleware, error) { cacheKey := cacheKeyLimits{limits, cfg.Transformer} var queryCacheMiddleware base.Middleware @@ -673,7 +679,7 @@ func NewMetricTripperware( queryRangeMiddleware = append( queryRangeMiddleware, base.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics), - base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics), + base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics, metricsNamespace), ) } @@ -701,6 +707,7 @@ func NewInstantMetricTripperware( merger base.Merger, metrics *Metrics, indexStatsTripperware base.Middleware, + metricsNamespace string, ) (base.Middleware, error) { return base.MiddlewareFunc(func(next base.Handler) base.Handler { statsHandler := indexStatsTripperware.Wrap(next) @@ -731,7 +738,7 @@ func NewInstantMetricTripperware( queryRangeMiddleware = append( queryRangeMiddleware, base.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics), - base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics), + base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics, metricsNamespace), ) } @@ -752,6 +759,7 @@ func NewVolumeTripperware( cacheGenNumLoader base.CacheGenNumberLoader, retentionEnabled bool, metrics *Metrics, + metricsNamespace string, ) (base.Middleware, error) { // Parallelize the volume requests, so it doesn't send a huge request to a single index-gw (i.e. {app=~".+"} for 30d). // Indices are sharded by 24 hours, so we split the volume request in 24h intervals. @@ -795,6 +803,7 @@ func NewVolumeTripperware( log, metrics, schema, + metricsNamespace, ) if err != nil { @@ -868,6 +877,7 @@ func NewIndexStatsTripperware( cacheGenNumLoader base.CacheGenNumberLoader, retentionEnabled bool, metrics *Metrics, + metricsNamespace string, ) (base.Middleware, error) { // Parallelize the index stats requests, so it doesn't send a huge request to a single index-gw (i.e. {app=~".+"} for 30d). // Indices are sharded by 24 hours, so we split the stats request in 24h intervals. @@ -912,6 +922,7 @@ func NewIndexStatsTripperware( log, metrics, schema, + metricsNamespace, ) if err != nil { return nil, err @@ -928,6 +939,7 @@ func sharedIndexTripperware( log log.Logger, metrics *Metrics, schema config.SchemaConfig, + metricsNamespace string, ) (base.Middleware, error) { return base.MiddlewareFunc(func(next base.Handler) base.Handler { middlewares := []base.Middleware{ @@ -948,7 +960,7 @@ func sharedIndexTripperware( middlewares = append( middlewares, base.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics), - base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics), + base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics, metricsNamespace), ) } diff --git a/pkg/querier/queryrange/roundtrip_test.go b/pkg/querier/queryrange/roundtrip_test.go index e702d2aece31..375b624ae5b8 100644 --- a/pkg/querier/queryrange/roundtrip_test.go +++ b/pkg/querier/queryrange/roundtrip_test.go @@ -30,6 +30,7 @@ import ( "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/index/seriesvolume" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/validation" valid "github.com/grafana/loki/pkg/validation" @@ -175,7 +176,7 @@ func TestMetricsTripperware(t *testing.T) { noCacheTestCfg.CacheIndexStatsResults = false tpw, stopper, err := NewMiddleware(noCacheTestCfg, testEngineOpts, util_log.Logger, l, config.SchemaConfig{ Configs: testSchemasTSDB, - }, nil, false, nil) + }, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -223,7 +224,7 @@ func TestMetricsTripperware(t *testing.T) { // Configure with cache tpw, stopper, err = NewMiddleware(testConfig, testEngineOpts, util_log.Logger, l, config.SchemaConfig{ Configs: testSchemasTSDB, - }, nil, false, nil) + }, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -259,7 +260,7 @@ func TestLogFilterTripperware(t *testing.T) { noCacheTestCfg := testConfig noCacheTestCfg.CacheResults = false noCacheTestCfg.CacheIndexStatsResults = false - tpw, stopper, err := NewMiddleware(noCacheTestCfg, testEngineOpts, util_log.Logger, l, config.SchemaConfig{Configs: testSchemasTSDB}, nil, false, nil) + tpw, stopper, err := NewMiddleware(noCacheTestCfg, testEngineOpts, util_log.Logger, l, config.SchemaConfig{Configs: testSchemasTSDB}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -325,7 +326,7 @@ func TestInstantQueryTripperware(t *testing.T) { queryTimeout: 1 * time.Minute, maxSeries: 1, } - tpw, stopper, err := NewMiddleware(testShardingConfigNoCache, testEngineOpts, util_log.Logger, l, config.SchemaConfig{Configs: testSchemasTSDB}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testShardingConfigNoCache, testEngineOpts, util_log.Logger, l, config.SchemaConfig{Configs: testSchemasTSDB}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -370,7 +371,7 @@ func TestInstantQueryTripperware(t *testing.T) { } func TestSeriesTripperware(t *testing.T) { - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -401,7 +402,7 @@ func TestSeriesTripperware(t *testing.T) { } func TestLabelsTripperware(t *testing.T) { - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -447,7 +448,7 @@ func TestLabelsTripperware(t *testing.T) { } func TestIndexStatsTripperware(t *testing.T) { - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -497,7 +498,7 @@ func TestVolumeTripperware(t *testing.T) { volumeEnabled: true, maxSeries: 42, } - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, limits, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, limits, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -553,7 +554,7 @@ func TestVolumeTripperware(t *testing.T) { }) t.Run("range queries return a prometheus style metrics response, putting volumes in buckets based on the step", func(t *testing.T) { - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, volumeEnabled: true}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, volumeEnabled: true}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -741,7 +742,7 @@ func TestNewTripperware_Caches(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - _, stopper, err := NewMiddleware(tc.config, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + _, stopper, err := NewMiddleware(tc.config, testEngineOpts, util_log.Logger, fakeLimits{maxQueryLength: 48 * time.Hour, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -771,7 +772,7 @@ func TestNewTripperware_Caches(t *testing.T) { } func TestLogNoFilter(t *testing.T) { - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -820,7 +821,7 @@ func TestPostQueries(t *testing.T) { } func TestTripperware_EntriesLimit(t *testing.T) { - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxEntriesLimitPerQuery: 5000, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, fakeLimits{maxEntriesLimitPerQuery: 5000, maxQueryParallelism: 1}, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -866,7 +867,7 @@ func TestTripperware_RequiredLabels(t *testing.T) { } { t.Run(test.qs, func(t *testing.T) { limits := fakeLimits{maxEntriesLimitPerQuery: 5000, maxQueryParallelism: 1, requiredLabels: []string{"app"}} - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, limits, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, limits, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -970,7 +971,7 @@ func TestTripperware_RequiredNumberLabels(t *testing.T) { maxQueryParallelism: 1, requiredNumberLabels: tc.requiredNumberLabels, } - tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, limits, config.SchemaConfig{Configs: testSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(testConfig, testEngineOpts, util_log.Logger, limits, config.SchemaConfig{Configs: testSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } @@ -1146,7 +1147,7 @@ func TestMetricsTripperware_SplitShardStats(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - tpw, stopper, err := NewMiddleware(statsTestCfg, testEngineOpts, util_log.Logger, l, config.SchemaConfig{Configs: statsSchemas}, nil, false, nil) + tpw, stopper, err := NewMiddleware(statsTestCfg, testEngineOpts, util_log.Logger, l, config.SchemaConfig{Configs: statsSchemas}, nil, false, nil, constants.Loki) if stopper != nil { defer stopper.Stop() } diff --git a/pkg/querier/queryrange/split_by_interval.go b/pkg/querier/queryrange/split_by_interval.go index 4ca6be9d270f..f3f2c13d6004 100644 --- a/pkg/querier/queryrange/split_by_interval.go +++ b/pkg/querier/queryrange/split_by_interval.go @@ -12,6 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/math" "github.com/grafana/dskit/tenant" @@ -41,7 +42,7 @@ type SplitByMetrics struct { func NewSplitByMetrics(r prometheus.Registerer) *SplitByMetrics { return &SplitByMetrics{ splits: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "query_frontend_partitions", Help: "Number of time-based partitions (sub-requests) per request", Buckets: prometheus.ExponentialBuckets(1, 4, 5), // 1 -> 1024 diff --git a/pkg/querier/queryrange/volume_cache_test.go b/pkg/querier/queryrange/volume_cache_test.go index 009302783b55..ebe9ef8094b8 100644 --- a/pkg/querier/queryrange/volume_cache_test.go +++ b/pkg/querier/queryrange/volume_cache_test.go @@ -16,6 +16,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/cache" "github.com/grafana/loki/pkg/storage/stores/index/seriesvolume" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) func TestVolumeCache(t *testing.T) { @@ -25,7 +26,7 @@ func TestVolumeCache(t *testing.T) { Cache: cache.NewMockCache(), }, } - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) require.NoError(t, err) cacheMiddleware, err := NewVolumeCacheMiddleware( log.NewNopLogger(), @@ -284,7 +285,7 @@ func TestVolumeCache_RecentData(t *testing.T) { Cache: cache.NewMockCache(), }, } - c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache) + c, err := cache.New(cfg.CacheConfig, nil, log.NewNopLogger(), stats.ResultCache, constants.Loki) defer c.Stop() require.NoError(t, err) diff --git a/pkg/queue/dequeue_qos_test.go b/pkg/queue/dequeue_qos_test.go index 0709f4723dfb..6b1de885943a 100644 --- a/pkg/queue/dequeue_qos_test.go +++ b/pkg/queue/dequeue_qos_test.go @@ -11,6 +11,8 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" + + "github.com/grafana/loki/pkg/util/constants" ) const ( @@ -56,7 +58,7 @@ func BenchmarkQueryFairness(t *testing.B) { for _, useActor := range []bool{false, true} { t.Run(fmt.Sprintf("use hierarchical queues = %v", useActor), func(t *testing.B) { - requestQueue := NewRequestQueue(1024, 0, NewMetrics("query_scheduler", nil)) + requestQueue := NewRequestQueue(1024, 0, NewMetrics(nil, constants.Loki, "query_scheduler")) enqueueRequestsForActor(t, []string{}, useActor, requestQueue, numSubRequestsActorA, 50*time.Millisecond) enqueueRequestsForActor(t, []string{"a"}, useActor, requestQueue, numSubRequestsActorA, 100*time.Millisecond) enqueueRequestsForActor(t, []string{"b"}, useActor, requestQueue, numSubRequestsActorB, 50*time.Millisecond) @@ -131,7 +133,7 @@ func TestQueryFairnessAcrossSameLevel(t *testing.T) { 456: [210] **/ - requestQueue := NewRequestQueue(1024, 0, NewMetrics("query_scheduler", nil)) + requestQueue := NewRequestQueue(1024, 0, NewMetrics(nil, constants.Loki, "query_scheduler")) _ = requestQueue.Enqueue("tenant1", []string{}, r(0), 0, nil) _ = requestQueue.Enqueue("tenant1", []string{}, r(1), 0, nil) _ = requestQueue.Enqueue("tenant1", []string{}, r(2), 0, nil) diff --git a/pkg/queue/metrics.go b/pkg/queue/metrics.go index 1442b5653628..5d00edb1a3b1 100644 --- a/pkg/queue/metrics.go +++ b/pkg/queue/metrics.go @@ -12,28 +12,28 @@ type Metrics struct { querierWaitTime *prometheus.HistogramVec // Per querier wait time } -func NewMetrics(subsystem string, registerer prometheus.Registerer) *Metrics { +func NewMetrics(registerer prometheus.Registerer, metricsNamespace, subsystem string) *Metrics { return &Metrics{ queueLength: promauto.With(registerer).NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Subsystem: subsystem, Name: "queue_length", Help: "Number of queries in the queue.", }, []string{"user"}), discardedRequests: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Subsystem: subsystem, Name: "discarded_requests_total", Help: "Total number of query requests discarded.", }, []string{"user"}), enqueueCount: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: metricsNamespace, Subsystem: subsystem, Name: "enqueue_count", Help: "Total number of enqueued (sub-)queries.", }, []string{"user", "level"}), querierWaitTime: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: metricsNamespace, Subsystem: subsystem, Name: "querier_wait_seconds", Help: "Time spend waiting for new requests.", diff --git a/pkg/queue/queue_test.go b/pkg/queue/queue_test.go index fe8d1a0a6a3e..a2cb42441c02 100644 --- a/pkg/queue/queue_test.go +++ b/pkg/queue/queue_test.go @@ -11,6 +11,8 @@ import ( "github.com/grafana/dskit/services" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/util/constants" ) func BenchmarkGetNextRequest(b *testing.B) { @@ -45,7 +47,7 @@ func BenchmarkGetNextRequest(b *testing.B) { queues := make([]*RequestQueue, 0, b.N) for n := 0; n < b.N; n++ { - queue := NewRequestQueue(maxOutstandingPerTenant, 0, NewMetrics("query_scheduler", nil)) + queue := NewRequestQueue(maxOutstandingPerTenant, 0, NewMetrics(nil, constants.Loki, "query_scheduler")) queues = append(queues, queue) for ix := 0; ix < queriers; ix++ { @@ -103,7 +105,7 @@ func BenchmarkQueueRequest(b *testing.B) { requests := make([]string, 0, numTenants) for n := 0; n < b.N; n++ { - q := NewRequestQueue(maxOutstandingPerTenant, 0, NewMetrics("query_scheduler", nil)) + q := NewRequestQueue(maxOutstandingPerTenant, 0, NewMetrics(nil, constants.Loki, "query_scheduler")) for ix := 0; ix < queriers; ix++ { q.RegisterConsumerConnection(fmt.Sprintf("querier-%d", ix)) @@ -133,7 +135,7 @@ func BenchmarkQueueRequest(b *testing.B) { func TestRequestQueue_GetNextRequestForQuerier_ShouldGetRequestAfterReshardingBecauseQuerierHasBeenForgotten(t *testing.T) { const forgetDelay = 3 * time.Second - queue := NewRequestQueue(1, forgetDelay, NewMetrics("query_scheduler", nil)) + queue := NewRequestQueue(1, forgetDelay, NewMetrics(nil, constants.Loki, "query_scheduler")) // Start the queue service. ctx := context.Background() @@ -304,7 +306,7 @@ func TestContextCond(t *testing.T) { func TestMaxQueueSize(t *testing.T) { t.Run("queue size is tracked per tenant", func(t *testing.T) { maxSize := 3 - queue := NewRequestQueue(maxSize, 0, NewMetrics("query_scheduler", nil)) + queue := NewRequestQueue(maxSize, 0, NewMetrics(nil, constants.Loki, "query_scheduler")) queue.RegisterConsumerConnection("querier") // enqueue maxSize items with different actors diff --git a/pkg/ruler/base/client_pool.go b/pkg/ruler/base/client_pool.go index b81a2d227821..ca2a3ac2d45f 100644 --- a/pkg/ruler/base/client_pool.go +++ b/pkg/ruler/base/client_pool.go @@ -33,7 +33,7 @@ func (p *rulerClientsPool) GetClientFor(addr string) (RulerClient, error) { return c.(RulerClient), nil } -func newRulerClientPool(clientCfg grpcclient.Config, logger log.Logger, reg prometheus.Registerer) ClientsPool { +func newRulerClientPool(clientCfg grpcclient.Config, logger log.Logger, reg prometheus.Registerer, metricsNamespace string) ClientsPool { // We prefer sane defaults instead of exposing further config options. poolCfg := client.PoolConfig{ CheckInterval: time.Minute, @@ -42,8 +42,9 @@ func newRulerClientPool(clientCfg grpcclient.Config, logger log.Logger, reg prom } clientsCount := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Name: "cortex_ruler_clients", - Help: "The current number of ruler clients in the pool.", + Namespace: metricsNamespace, + Name: "ruler_clients", + Help: "The current number of ruler clients in the pool.", }) return &rulerClientsPool{ diff --git a/pkg/ruler/base/compat.go b/pkg/ruler/base/compat.go index 9503f131dc00..35d40c3f44b9 100644 --- a/pkg/ruler/base/compat.go +++ b/pkg/ruler/base/compat.go @@ -234,29 +234,34 @@ type RulesManager interface { // ManagerFactory is a function that creates new RulesManager for given user and notifier.Manager. type ManagerFactory func(ctx context.Context, userID string, notifier *notifier.Manager, logger log.Logger, reg prometheus.Registerer) RulesManager -func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engine *promql.Engine, overrides RulesLimits, reg prometheus.Registerer) ManagerFactory { +func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engine *promql.Engine, overrides RulesLimits, reg prometheus.Registerer, metricsNamespace string) ManagerFactory { totalWrites := promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_ruler_write_requests_total", - Help: "Number of write requests to ingesters.", + Namespace: metricsNamespace, + Name: "ruler_write_requests_total", + Help: "Number of write requests to ingesters.", }) failedWrites := promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_ruler_write_requests_failed_total", - Help: "Number of failed write requests to ingesters.", + Namespace: metricsNamespace, + Name: "ruler_write_requests_failed_total", + Help: "Number of failed write requests to ingesters.", }) totalQueries := promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_ruler_queries_total", - Help: "Number of queries executed by ruler.", + Namespace: metricsNamespace, + Name: "ruler_queries_total", + Help: "Number of queries executed by ruler.", }) failedQueries := promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_ruler_queries_failed_total", - Help: "Number of failed queries by ruler.", + Namespace: metricsNamespace, + Name: "ruler_queries_failed_total", + Help: "Number of failed queries by ruler.", }) var rulerQuerySeconds *prometheus.CounterVec if cfg.EnableQueryStats { rulerQuerySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ruler_query_seconds_total", - Help: "Total amount of wall clock time spent processing queries by the ruler.", + Namespace: metricsNamespace, + Name: "ruler_query_seconds_total", + Help: "Total amount of wall clock time spent processing queries by the ruler.", }, []string{"user"}) } diff --git a/pkg/ruler/base/manager.go b/pkg/ruler/base/manager.go index c6bbf70c35a3..3787fbe16263 100644 --- a/pkg/ruler/base/manager.go +++ b/pkg/ruler/base/manager.go @@ -46,9 +46,10 @@ type DefaultMultiTenantManager struct { configUpdatesTotal *prometheus.CounterVec registry prometheus.Registerer logger log.Logger + metricsNamespace string } -func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, reg prometheus.Registerer, logger log.Logger, limits RulesLimits) (*DefaultMultiTenantManager, error) { +func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, reg prometheus.Registerer, logger log.Logger, limits RulesLimits, metricsNamespace string) (*DefaultMultiTenantManager, error) { userManagerMetrics := NewManagerMetrics(cfg.DisableRuleGroupLabel, func(k, v string) string { // When "by-rule" sharding is enabled, each rule group is assigned a unique name to work around some of Prometheus' // assumptions, and metrics are exported based on these rule group names. If we kept these unique rule group names @@ -73,23 +74,24 @@ func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, reg mapper: newMapper(cfg.RulePath, logger), userManagers: map[string]RulesManager{}, userManagerMetrics: userManagerMetrics, + metricsNamespace: metricsNamespace, managersTotal: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Name: "ruler_managers_total", Help: "Total number of managers registered and running in the ruler", }), lastReloadSuccessful: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Name: "ruler_config_last_reload_successful", Help: "Boolean set to 1 whenever the last configuration reload attempt was successful.", }, []string{"user"}), lastReloadSuccessfulTimestamp: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Name: "ruler_config_last_reload_successful_seconds", Help: "Timestamp of the last successful configuration reload.", }, []string{"user"}), configUpdatesTotal: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "cortex", + Namespace: metricsNamespace, Name: "ruler_config_updates_total", Help: "Total number of config updates triggered by a user", }, []string{"user"}), @@ -213,7 +215,7 @@ func (r *DefaultMultiTenantManager) getOrCreateNotifier(userID string) (*notifie } reg := prometheus.WrapRegistererWith(prometheus.Labels{"user": userID}, r.registry) - reg = prometheus.WrapRegistererWithPrefix("cortex_", reg) + reg = prometheus.WrapRegistererWithPrefix(r.metricsNamespace+"_", reg) n = newRulerNotifier(¬ifier.Options{ QueueCapacity: r.cfg.NotificationQueueCapacity, Registerer: reg, diff --git a/pkg/ruler/base/manager_test.go b/pkg/ruler/base/manager_test.go index 4ab0d7553b82..c2cdc5885409 100644 --- a/pkg/ruler/base/manager_test.go +++ b/pkg/ruler/base/manager_test.go @@ -14,13 +14,14 @@ import ( "go.uber.org/atomic" "github.com/grafana/loki/pkg/ruler/rulespb" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/test" ) func TestSyncRuleGroups(t *testing.T) { dir := t.TempDir() - m, err := NewDefaultMultiTenantManager(Config{RulePath: dir}, factory, nil, log.NewNopLogger(), ruleLimits{}) + m, err := NewDefaultMultiTenantManager(Config{RulePath: dir}, factory, nil, log.NewNopLogger(), ruleLimits{}, constants.Loki) require.NoError(t, err) const user = "testUser" diff --git a/pkg/ruler/base/ruler.go b/pkg/ruler/base/ruler.go index 67d12ac12119..eba29f0baed2 100644 --- a/pkg/ruler/base/ruler.go +++ b/pkg/ruler/base/ruler.go @@ -254,38 +254,42 @@ type Ruler struct { allowedTenants *util.AllowedTenants - registry prometheus.Registerer - logger log.Logger + registry prometheus.Registerer + logger log.Logger + metricsNamespace string } // NewRuler creates a new ruler from a distributor and chunk store. -func NewRuler(cfg Config, manager MultiTenantManager, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits RulesLimits) (*Ruler, error) { - return newRuler(cfg, manager, reg, logger, ruleStore, limits, newRulerClientPool(cfg.ClientTLSConfig, logger, reg)) +func NewRuler(cfg Config, manager MultiTenantManager, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits RulesLimits, metricsNamespace string) (*Ruler, error) { + return newRuler(cfg, manager, reg, logger, ruleStore, limits, newRulerClientPool(cfg.ClientTLSConfig, logger, reg, metricsNamespace), metricsNamespace) } -func newRuler(cfg Config, manager MultiTenantManager, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits RulesLimits, clientPool ClientsPool) (*Ruler, error) { +func newRuler(cfg Config, manager MultiTenantManager, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits RulesLimits, clientPool ClientsPool, metricsNamespace string) (*Ruler, error) { if err := cfg.Validate(logger); err != nil { return nil, fmt.Errorf("invalid ruler config: %w", err) } ruler := &Ruler{ - cfg: cfg, - store: ruleStore, - manager: manager, - registry: reg, - logger: logger, - limits: limits, - clientsPool: clientPool, - allowedTenants: util.NewAllowedTenants(cfg.EnabledTenants, cfg.DisabledTenants), + cfg: cfg, + store: ruleStore, + manager: manager, + registry: reg, + logger: logger, + limits: limits, + clientsPool: clientPool, + allowedTenants: util.NewAllowedTenants(cfg.EnabledTenants, cfg.DisabledTenants), + metricsNamespace: metricsNamespace, ringCheckErrors: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Name: "cortex_ruler_ring_check_errors_total", - Help: "Number of errors that have occurred when checking the ring for ownership", + Namespace: metricsNamespace, + Name: "ruler_ring_check_errors_total", + Help: "Number of errors that have occurred when checking the ring for ownership", }), rulerSync: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ruler_sync_rules_total", - Help: "Total number of times the ruler sync operation triggered.", + Namespace: metricsNamespace, + Name: "ruler_sync_rules_total", + Help: "Total number of times the ruler sync operation triggered.", }, []string{"reason"}), } @@ -300,7 +304,7 @@ func newRuler(cfg Config, manager MultiTenantManager, reg prometheus.Registerer, ringStore, err := kv.NewClient( cfg.Ring.KVStore, ring.GetCodec(), - kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("cortex_", reg), "ruler"), + kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", reg), "ruler"), logger, ) if err != nil { @@ -329,12 +333,12 @@ func enableSharding(r *Ruler, ringStore kv.Client) error { delegate = ring.NewAutoForgetDelegate(r.cfg.Ring.HeartbeatTimeout*ringAutoForgetUnhealthyPeriods, delegate, r.logger) rulerRingName := "ruler" - r.lifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, rulerRingName, ringKey, ringStore, delegate, r.logger, prometheus.WrapRegistererWithPrefix("cortex_", r.registry)) + r.lifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, rulerRingName, ringKey, ringStore, delegate, r.logger, prometheus.WrapRegistererWithPrefix(r.metricsNamespace+"_", r.registry)) if err != nil { return errors.Wrap(err, "failed to initialize ruler's lifecycler") } - r.ring, err = ring.NewWithStoreClientAndStrategy(r.cfg.Ring.ToRingConfig(), rulerRingName, ringKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("cortex_", r.registry), r.logger) + r.ring, err = ring.NewWithStoreClientAndStrategy(r.cfg.Ring.ToRingConfig(), rulerRingName, ringKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix(r.metricsNamespace+"_", r.registry), r.logger) if err != nil { return errors.Wrap(err, "failed to initialize ruler's ring") } diff --git a/pkg/ruler/base/ruler_test.go b/pkg/ruler/base/ruler_test.go index 26a8ddcaf98c..694dc143b4a3 100644 --- a/pkg/ruler/base/ruler_test.go +++ b/pkg/ruler/base/ruler_test.go @@ -55,6 +55,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/client/hedging" "github.com/grafana/loki/pkg/storage/chunk/client/testutils" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) func defaultRulerConfig(t testing.TB, store rulestore.RuleStore) Config { @@ -148,7 +149,7 @@ func testSetup(t *testing.T, q storage.Querier) (*promql.Engine, storage.Queryab func newManager(t *testing.T, cfg Config, q storage.Querier) *DefaultMultiTenantManager { engine, queryable, pusher, logger, overrides, reg := testSetup(t, q) - manager, err := NewDefaultMultiTenantManager(cfg, DefaultTenantManagerFactory(cfg, pusher, queryable, engine, overrides, nil), reg, logger, overrides) + manager, err := NewDefaultMultiTenantManager(cfg, DefaultTenantManagerFactory(cfg, pusher, queryable, engine, overrides, nil, constants.Loki), reg, logger, overrides, constants.Loki) require.NoError(t, err) return manager @@ -160,7 +161,7 @@ func newMultiTenantManager(t *testing.T, cfg Config, q storage.Querier, amConf m overrides := ruleLimits{evalDelay: 0, maxRuleGroups: 20, maxRulesPerRuleGroup: 15} overrides.alertManagerConfig = amConf - manager, err := NewDefaultMultiTenantManager(cfg, DefaultTenantManagerFactory(cfg, pusher, queryable, engine, overrides, nil), reg, logger, overrides) + manager, err := NewDefaultMultiTenantManager(cfg, DefaultTenantManagerFactory(cfg, pusher, queryable, engine, overrides, nil, constants.Loki), reg, logger, overrides, constants.Loki) require.NoError(t, err) return manager @@ -196,9 +197,9 @@ func (p *mockRulerClientsPool) GetClientFor(addr string) (RulerClient, error) { return nil, fmt.Errorf("unable to find ruler for add %s", addr) } -func newMockClientsPool(cfg Config, logger log.Logger, reg prometheus.Registerer, rulerAddrMap map[string]*Ruler) *mockRulerClientsPool { +func newMockClientsPool(cfg Config, logger log.Logger, reg prometheus.Registerer, metricsNamespace string, rulerAddrMap map[string]*Ruler) *mockRulerClientsPool { return &mockRulerClientsPool{ - ClientsPool: newRulerClientPool(cfg.ClientTLSConfig, logger, reg), + ClientsPool: newRulerClientPool(cfg.ClientTLSConfig, logger, reg, metricsNamespace), cfg: cfg, rulerAddrMap: rulerAddrMap, } @@ -212,8 +213,8 @@ func buildRuler(t *testing.T, rulerConfig Config, q storage.Querier, clientMetri storage, err := NewLegacyRuleStore(rulerConfig.StoreConfig, hedging.Config{}, clientMetrics, promRules.FileLoader{}, log.NewNopLogger()) require.NoError(t, err) - managerFactory := DefaultTenantManagerFactory(rulerConfig, pusher, queryable, engine, overrides, reg) - manager, err := NewDefaultMultiTenantManager(rulerConfig, managerFactory, reg, log.NewNopLogger(), overrides) + managerFactory := DefaultTenantManagerFactory(rulerConfig, pusher, queryable, engine, overrides, reg, constants.Loki) + manager, err := NewDefaultMultiTenantManager(rulerConfig, managerFactory, reg, log.NewNopLogger(), overrides, constants.Loki) require.NoError(t, err) ruler, err := newRuler( @@ -223,7 +224,8 @@ func buildRuler(t *testing.T, rulerConfig Config, q storage.Querier, clientMetri logger, storage, overrides, - newMockClientsPool(rulerConfig, logger, reg, rulerAddrMap), + newMockClientsPool(rulerConfig, logger, reg, constants.Loki, rulerAddrMap), + constants.Loki, ) require.NoError(t, err) return ruler @@ -279,10 +281,10 @@ func TestNotifierSendsUserIDHeader(t *testing.T) { // Ensure we have metrics in the notifier. assert.NoError(t, prom_testutil.GatherAndCompare(manager.registry.(*prometheus.Registry), strings.NewReader(` - # HELP cortex_prometheus_notifications_dropped_total Total number of alerts dropped due to errors when sending to Alertmanager. - # TYPE cortex_prometheus_notifications_dropped_total counter - cortex_prometheus_notifications_dropped_total{user="1"} 0 - `), "cortex_prometheus_notifications_dropped_total")) + # HELP loki_prometheus_notifications_dropped_total Total number of alerts dropped due to errors when sending to Alertmanager. + # TYPE loki_prometheus_notifications_dropped_total counter + loki_prometheus_notifications_dropped_total{user="1"} 0 + `), "loki_prometheus_notifications_dropped_total")) } func TestMultiTenantsNotifierSendsUserIDHeader(t *testing.T) { @@ -351,11 +353,11 @@ func TestMultiTenantsNotifierSendsUserIDHeader(t *testing.T) { // Ensure we have metrics in the notifier. assert.NoError(t, prom_testutil.GatherAndCompare(manager.registry.(*prometheus.Registry), strings.NewReader(` - # HELP cortex_prometheus_notifications_dropped_total Total number of alerts dropped due to errors when sending to Alertmanager. - # TYPE cortex_prometheus_notifications_dropped_total counter - cortex_prometheus_notifications_dropped_total{user="tenant1"} 0 - cortex_prometheus_notifications_dropped_total{user="tenant2"} 0 - `), "cortex_prometheus_notifications_dropped_total")) + # HELP loki_prometheus_notifications_dropped_total Total number of alerts dropped due to errors when sending to Alertmanager. + # TYPE loki_prometheus_notifications_dropped_total counter + loki_prometheus_notifications_dropped_total{user="tenant1"} 0 + loki_prometheus_notifications_dropped_total{user="tenant2"} 0 + `), "loki_prometheus_notifications_dropped_total")) } func TestRuler_Rules(t *testing.T) { @@ -1515,7 +1517,7 @@ func TestDeleteTenantRuleGroups(t *testing.T) { obj, rs := setupRuleGroupsStore(t, ruleGroups) require.Equal(t, 3, obj.GetObjectCount()) - api, err := NewRuler(Config{}, nil, nil, log.NewNopLogger(), rs, nil) + api, err := NewRuler(Config{}, nil, nil, log.NewNopLogger(), rs, nil, constants.Loki) require.NoError(t, err) { diff --git a/pkg/ruler/evaluator_remote.go b/pkg/ruler/evaluator_remote.go index 451057fb6651..4f953876d6c0 100644 --- a/pkg/ruler/evaluator_remote.go +++ b/pkg/ruler/evaluator_remote.go @@ -39,6 +39,7 @@ import ( "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/logqlmodel" "github.com/grafana/loki/pkg/util/build" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/httpreq" "github.com/grafana/loki/pkg/util/spanlogger" ) @@ -86,21 +87,21 @@ func NewRemoteEvaluator(client httpgrpc.HTTPClient, overrides RulesLimits, logge func newMetrics(registerer prometheus.Registerer) *metrics { reqDurationSecs := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ruler_remote_eval", Name: "request_duration_seconds", // 0.005000, 0.015000, 0.045000, 0.135000, 0.405000, 1.215000, 3.645000, 10.935000, 32.805000 Buckets: prometheus.ExponentialBuckets(0.005, 3, 9), }, []string{"user"}) responseSizeBytes := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ruler_remote_eval", Name: "response_bytes", // 32, 128, 512, 2K, 8K, 32K, 128K, 512K, 2M, 8M Buckets: prometheus.ExponentialBuckets(32, 4, 10), }, []string{"user"}) responseSizeSamples := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ruler_remote_eval", Name: "response_samples", // 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144 @@ -108,12 +109,12 @@ func newMetrics(registerer prometheus.Registerer) *metrics { }, []string{"user"}) successfulEvals := prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ruler_remote_eval", Name: "success_total", }, []string{"user"}) failedEvals := prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "ruler_remote_eval", Name: "failure_total", }, []string{"reason", "user"}) diff --git a/pkg/ruler/memstore.go b/pkg/ruler/memstore.go index 664f9bfcfb53..b70d17a954b3 100644 --- a/pkg/ruler/memstore.go +++ b/pkg/ruler/memstore.go @@ -21,6 +21,7 @@ import ( "github.com/grafana/loki/pkg/querier/series" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) const ( @@ -45,15 +46,15 @@ type memstoreMetrics struct { func newMemstoreMetrics(r prometheus.Registerer) *memstoreMetrics { return &memstoreMetrics{ evaluations: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ruler_memory_for_state_evaluations_total", }, []string{"status", "tenant"}), samples: promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ruler_memory_samples", }), cacheHits: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "ruler_memory_for_state_cache_hits_total", }, []string{"tenant"}), } diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 5fcb17ea9166..dd90ccb15339 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -10,7 +10,7 @@ import ( "github.com/grafana/loki/pkg/ruler/rulestore" ) -func NewRuler(cfg Config, evaluator Evaluator, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits RulesLimits) (*ruler.Ruler, error) { +func NewRuler(cfg Config, evaluator Evaluator, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits RulesLimits, metricsNamespace string) (*ruler.Ruler, error) { // For backward compatibility, client and clients are defined in the remote_write config. // When both are present, an error is thrown. if len(cfg.RemoteWrite.Clients) > 0 && cfg.RemoteWrite.Client != nil { @@ -31,6 +31,7 @@ func NewRuler(cfg Config, evaluator Evaluator, reg prometheus.Registerer, logger reg, logger, limits, + metricsNamespace, ) if err != nil { return nil, err @@ -42,5 +43,6 @@ func NewRuler(cfg Config, evaluator Evaluator, reg prometheus.Registerer, logger logger, ruleStore, limits, + metricsNamespace, ) } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 8dfbc5eb777c..e9cf447e733e 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -114,7 +114,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { } // NewScheduler creates a new Scheduler. -func NewScheduler(cfg Config, limits Limits, log log.Logger, ringManager *lokiring.RingManager, registerer prometheus.Registerer) (*Scheduler, error) { +func NewScheduler(cfg Config, limits Limits, log log.Logger, ringManager *lokiring.RingManager, registerer prometheus.Registerer, metricsNamespace string) (*Scheduler, error) { if cfg.UseSchedulerRing { if ringManager == nil { return nil, errors.New("ring manager can't be empty when use_scheduler_ring is true") @@ -123,7 +123,7 @@ func NewScheduler(cfg Config, limits Limits, log log.Logger, ringManager *lokiri } } - queueMetrics := queue.NewMetrics("query_scheduler", registerer) + queueMetrics := queue.NewMetrics(registerer, metricsNamespace, "query_scheduler") s := &Scheduler{ cfg: cfg, log: log, @@ -137,24 +137,29 @@ func NewScheduler(cfg Config, limits Limits, log log.Logger, ringManager *lokiri } s.queueDuration = promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Name: "cortex_query_scheduler_queue_duration_seconds", - Help: "Time spend by requests in queue before getting picked up by a querier.", - Buckets: prometheus.DefBuckets, + Namespace: metricsNamespace, + Name: "query_scheduler_queue_duration_seconds", + Help: "Time spend by requests in queue before getting picked up by a querier.", + Buckets: prometheus.DefBuckets, }) s.connectedQuerierClients = promauto.With(registerer).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_query_scheduler_connected_querier_clients", - Help: "Number of querier worker clients currently connected to the query-scheduler.", + Namespace: metricsNamespace, + Name: "query_scheduler_connected_querier_clients", + Help: "Number of querier worker clients currently connected to the query-scheduler.", }, s.requestQueue.GetConnectedConsumersMetric) s.connectedFrontendClients = promauto.With(registerer).NewGaugeFunc(prometheus.GaugeOpts{ - Name: "cortex_query_scheduler_connected_frontend_clients", - Help: "Number of query-frontend worker clients currently connected to the query-scheduler.", + Namespace: metricsNamespace, + Name: "query_scheduler_connected_frontend_clients", + Help: "Number of query-frontend worker clients currently connected to the query-scheduler.", }, s.getConnectedFrontendClientsMetric) s.schedulerRunning = promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ - Name: "cortex_query_scheduler_running", - Help: "Value will be 1 if the scheduler is in the ReplicationSet and actively receiving/processing requests", + Namespace: metricsNamespace, + Name: "query_scheduler_running", + Help: "Value will be 1 if the scheduler is in the ReplicationSet and actively receiving/processing requests", }) s.inflightRequests = promauto.With(registerer).NewSummary(prometheus.SummaryOpts{ - Name: "cortex_query_scheduler_inflight_requests", + Namespace: metricsNamespace, + Name: "query_scheduler_inflight_requests", Help: "Number of inflight requests (either queued or processing) sampled at a regular interval. Quantile buckets keep track of inflight requests over the last 60s.", Objectives: map[float64]float64{0.5: 0.05, 0.75: 0.02, 0.8: 0.02, 0.9: 0.01, 0.95: 0.01, 0.99: 0.001}, MaxAge: time.Minute, diff --git a/pkg/storage/batch.go b/pkg/storage/batch.go index a1a0ec9784ea..20cc45b69590 100644 --- a/pkg/storage/batch.go +++ b/pkg/storage/batch.go @@ -23,6 +23,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk" "github.com/grafana/loki/pkg/storage/chunk/fetcher" "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" ) @@ -46,25 +47,25 @@ func NewChunkMetrics(r prometheus.Registerer, maxBatchSize int) *ChunkMetrics { return &ChunkMetrics{ refs: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "index", Name: "chunk_refs_total", Help: "Number of chunks refs downloaded, partitioned by whether they intersect the query bounds.", }, []string{"status"}), series: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "store", Name: "series_total", Help: "Number of series referenced by a query, partitioned by whether they satisfy matchers.", }, []string{"status"}), chunks: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "store", Name: "chunks_downloaded_total", Help: "Number of chunks referenced or downloaded, partitioned by if they satisfy matchers.", }, []string{"status"}), batches: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "store", Name: "chunks_per_batch", Help: "The chunk batch size, partitioned by if they satisfy matchers.", diff --git a/pkg/storage/chunk/cache/background.go b/pkg/storage/chunk/cache/background.go index b2042f7c83a8..16feb62551f5 100644 --- a/pkg/storage/chunk/cache/background.go +++ b/pkg/storage/chunk/cache/background.go @@ -12,6 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "go.uber.org/atomic" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/flagext" util_log "github.com/grafana/loki/pkg/util/log" ) @@ -74,41 +75,41 @@ func NewBackground(name string, cfg BackgroundConfig, cache Cache, reg prometheu sizeLimit: cfg.WriteBackSizeLimit.Val(), droppedWriteBack: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_dropped_background_writes_total", Help: "Total count of dropped write backs to cache.", ConstLabels: prometheus.Labels{"name": name}, }), droppedWriteBackBytes: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_dropped_background_writes_bytes_total", Help: "Amount of data dropped in write backs to cache.", ConstLabels: prometheus.Labels{"name": name}, }), queueLength: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_background_queue_length", Help: "Length of the cache background writeback queue.", ConstLabels: prometheus.Labels{"name": name}, }), queueBytes: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_background_queue_bytes", Help: "Amount of data in the background writeback queue.", ConstLabels: prometheus.Labels{"name": name}, }), enqueuedBytes: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_background_enqueued_bytes_total", Help: "Counter of bytes enqueued over time to the background writeback queue.", ConstLabels: prometheus.Labels{"name": name}, }), dequeuedBytes: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_background_dequeued_bytes_total", Help: "Counter of bytes dequeued over time from the background writeback queue.", ConstLabels: prometheus.Labels{"name": name}, diff --git a/pkg/storage/chunk/cache/cache.go b/pkg/storage/chunk/cache/cache.go index edea291b4c5d..f651b252cdaa 100644 --- a/pkg/storage/chunk/cache/cache.go +++ b/pkg/storage/chunk/cache/cache.go @@ -92,7 +92,7 @@ func IsCacheConfigured(cfg Config) bool { } // New creates a new Cache using Config. -func New(cfg Config, reg prometheus.Registerer, logger log.Logger, cacheType stats.CacheType) (Cache, error) { +func New(cfg Config, reg prometheus.Registerer, logger log.Logger, cacheType stats.CacheType, metricsNamespace string) (Cache, error) { // Have additional check for embeddedcache with distributed mode, because those cache will already be initialized in modules // but still need stats collector wrapper for it. @@ -120,7 +120,7 @@ func New(cfg Config, reg prometheus.Registerer, logger log.Logger, cacheType sta cfg.Memcache.Expiration = cfg.DefaultValidity } - client := NewMemcachedClient(cfg.MemcacheClient, cfg.Prefix, reg, logger) + client := NewMemcachedClient(cfg.MemcacheClient, cfg.Prefix, reg, logger, metricsNamespace) cache := NewMemcached(cfg.Memcache, client, cfg.Prefix, reg, logger, cacheType) cacheName := cfg.Prefix + "memcache" diff --git a/pkg/storage/chunk/cache/embeddedcache.go b/pkg/storage/chunk/cache/embeddedcache.go index 2536fed0cc86..8e412af4a81e 100644 --- a/pkg/storage/chunk/cache/embeddedcache.go +++ b/pkg/storage/chunk/cache/embeddedcache.go @@ -14,6 +14,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/logqlmodel/stats" + "github.com/grafana/loki/pkg/util/constants" ) const ( @@ -107,7 +108,7 @@ func NewEmbeddedCache(name string, cfg EmbeddedCacheConfig, reg prometheus.Regis done: make(chan struct{}), entriesAddedNew: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "embeddedcache", Name: "added_new_total", Help: "The total number of new entries added to the cache", @@ -115,7 +116,7 @@ func NewEmbeddedCache(name string, cfg EmbeddedCacheConfig, reg prometheus.Regis }), entriesEvicted: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "embeddedcache", Name: "evicted_total", Help: "The total number of evicted entries", @@ -123,7 +124,7 @@ func NewEmbeddedCache(name string, cfg EmbeddedCacheConfig, reg prometheus.Regis }, []string{"reason"}), entriesCurrent: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "embeddedcache", Name: "entries", Help: "Current number of entries in the cache", @@ -131,7 +132,7 @@ func NewEmbeddedCache(name string, cfg EmbeddedCacheConfig, reg prometheus.Regis }), memoryBytes: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "embeddedcache", Name: "memory_bytes", Help: "The current cache size in bytes", diff --git a/pkg/storage/chunk/cache/instrumented.go b/pkg/storage/chunk/cache/instrumented.go index b2bef524ac34..c1f515debf68 100644 --- a/pkg/storage/chunk/cache/instrumented.go +++ b/pkg/storage/chunk/cache/instrumented.go @@ -9,12 +9,14 @@ import ( otlog "github.com/opentracing/opentracing-go/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) // Instrument returns an instrumented cache. func Instrument(name string, cache Cache, reg prometheus.Registerer) Cache { valueSize := promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_value_size_bytes", Help: "Size of values in the cache.", // Cached chunks are generally in the KBs, but cached index can @@ -29,7 +31,7 @@ func Instrument(name string, cache Cache, reg prometheus.Registerer) Cache { Cache: cache, requestDuration: instr.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_request_duration_seconds", Help: "Total time spent in seconds doing cache requests.", // Cache requests are very quick: smallest bucket is 16us, biggest is 1s. @@ -38,14 +40,14 @@ func Instrument(name string, cache Cache, reg prometheus.Registerer) Cache { }, []string{"method", "status_code"})), fetchedKeys: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_fetched_keys", Help: "Total count of keys requested from cache.", ConstLabels: prometheus.Labels{"name": name}, }), hits: promauto.With(reg).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_hits", Help: "Total count of keys found in cache.", ConstLabels: prometheus.Labels{"name": name}, diff --git a/pkg/storage/chunk/cache/memcached.go b/pkg/storage/chunk/cache/memcached.go index 764c1b0abf79..9b6150839cd2 100644 --- a/pkg/storage/chunk/cache/memcached.go +++ b/pkg/storage/chunk/cache/memcached.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/logqlmodel/stats" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/math" ) @@ -62,7 +63,7 @@ func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string, reg cacheType: cacheType, requestDuration: instr.NewHistogramCollector( promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "memcache_request_duration_seconds", Help: "Total time spent in seconds doing memcache requests.", // 16us, 64us, 256us, 1.024ms, 4.096ms, 16.384ms, 65.536ms, 150ms, 250ms, 500ms, 1s diff --git a/pkg/storage/chunk/cache/memcached_client.go b/pkg/storage/chunk/cache/memcached_client.go index 1db5e217055e..b497b5c5917f 100644 --- a/pkg/storage/chunk/cache/memcached_client.go +++ b/pkg/storage/chunk/cache/memcached_client.go @@ -19,6 +19,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/sony/gobreaker" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" ) @@ -95,7 +96,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st // NewMemcachedClient creates a new MemcacheClient that gets its server list // from SRV and updates the server list on a regular basis. -func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Registerer, logger log.Logger) MemcachedClient { +func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Registerer, logger log.Logger, metricsNamespace string) MemcachedClient { var selector serverSelector if cfg.ConsistentHash { selector = DefaultMemcachedJumpHashSelector() @@ -107,7 +108,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg client.Timeout = cfg.Timeout client.MaxIdleConns = cfg.MaxIdleConns - dnsProviderRegisterer := prometheus.WrapRegistererWithPrefix("cortex_", prometheus.WrapRegistererWith(prometheus.Labels{ + dnsProviderRegisterer := prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", prometheus.WrapRegistererWith(prometheus.Labels{ "name": name, }, r)) @@ -127,14 +128,14 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg quit: make(chan struct{}), numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "memcache_client_servers", Help: "The number of memcache servers discovered.", ConstLabels: prometheus.Labels{"name": name}, }), skipped: promauto.With(r).NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "memcache_client_set_skip_total", Help: "Total number of skipped set operations because of the value is larger than the max-item-size.", ConstLabels: prometheus.Labels{"name": name}, diff --git a/pkg/storage/chunk/client/alibaba/oss_object_client.go b/pkg/storage/chunk/client/alibaba/oss_object_client.go index ea32d384e599..b14b4d5a0c8e 100644 --- a/pkg/storage/chunk/client/alibaba/oss_object_client.go +++ b/pkg/storage/chunk/client/alibaba/oss_object_client.go @@ -13,12 +13,13 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/grafana/loki/pkg/storage/chunk/client" + "github.com/grafana/loki/pkg/util/constants" ) const NoSuchKeyErr = "NoSuchKey" var ossRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "oss_request_duration_seconds", Help: "Time spent doing OSS requests.", Buckets: prometheus.ExponentialBuckets(0.005, 4, 7), diff --git a/pkg/storage/chunk/client/aws/dynamodb_metrics.go b/pkg/storage/chunk/client/aws/dynamodb_metrics.go index 47b275f8bf5d..f1fedfb20bff 100644 --- a/pkg/storage/chunk/client/aws/dynamodb_metrics.go +++ b/pkg/storage/chunk/client/aws/dynamodb_metrics.go @@ -4,6 +4,8 @@ import ( "github.com/grafana/dskit/instrument" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) type dynamoDBMetrics struct { @@ -19,7 +21,7 @@ func newMetrics(r prometheus.Registerer) *dynamoDBMetrics { m := dynamoDBMetrics{} m.dynamoRequestDuration = instrument.NewHistogramCollector(promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "dynamo_request_duration_seconds", Help: "Time spent doing DynamoDB requests.", @@ -28,27 +30,27 @@ func newMetrics(r prometheus.Registerer) *dynamoDBMetrics { Buckets: prometheus.ExponentialBuckets(0.001, 4, 9), }, []string{"operation", "status_code"})) m.dynamoConsumedCapacity = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "dynamo_consumed_capacity_total", Help: "The capacity units consumed by operation.", }, []string{"operation", tableNameLabel}) m.dynamoThrottled = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "dynamo_throttled_total", Help: "The total number of throttled events.", }, []string{"operation", tableNameLabel}) m.dynamoFailures = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "dynamo_failures_total", Help: "The total number of errors while storing chunks to the chunk store.", }, []string{tableNameLabel, errorReasonLabel, "operation"}) m.dynamoDroppedRequests = promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "dynamo_dropped_requests_total", Help: "The total number of requests which were dropped due to errors encountered from dynamo.", }, []string{tableNameLabel, errorReasonLabel, "operation"}) m.dynamoQueryPagesCount = promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "dynamo_query_pages_count", Help: "Number of pages per query.", // Most queries will have one page, however this may increase with fuzzy diff --git a/pkg/storage/chunk/client/aws/s3_storage_client.go b/pkg/storage/chunk/client/aws/s3_storage_client.go index b1927b5db6b2..d21513f1150b 100644 --- a/pkg/storage/chunk/client/aws/s3_storage_client.go +++ b/pkg/storage/chunk/client/aws/s3_storage_client.go @@ -32,6 +32,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/client/hedging" storageawscommon "github.com/grafana/loki/pkg/storage/common/aws" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" loki_instrument "github.com/grafana/loki/pkg/util/instrument" ) @@ -45,7 +46,7 @@ var ( ) var s3RequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "s3_request_duration_seconds", Help: "Time spent doing S3 requests.", Buckets: []float64{.025, .05, .1, .25, .5, 1, 2}, diff --git a/pkg/storage/chunk/client/azure/blob_storage_client.go b/pkg/storage/chunk/client/azure/blob_storage_client.go index 34feba288142..0126c048e9b3 100644 --- a/pkg/storage/chunk/client/azure/blob_storage_client.go +++ b/pkg/storage/chunk/client/azure/blob_storage_client.go @@ -29,6 +29,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/client/hedging" client_util "github.com/grafana/loki/pkg/storage/chunk/client/util" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" loki_instrument "github.com/grafana/loki/pkg/util/instrument" "github.com/grafana/loki/pkg/util/log" ) @@ -122,7 +123,7 @@ func (c *BlobStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagS f.StringVar(&c.StorageAccountName, prefix+"azure.account-name", "", "Azure storage account name.") f.Var(&c.StorageAccountKey, prefix+"azure.account-key", "Azure storage account key.") f.StringVar(&c.ConnectionString, prefix+"azure.connection-string", "", "If `connection-string` is set, the values of `account-name` and `endpoint-suffix` values will not be used. Use this method over `account-key` if you need to authenticate via a SAS token. Or if you use the Azurite emulator.") - f.StringVar(&c.ContainerName, prefix+"azure.container-name", "loki", "Name of the storage account blob container used to store chunks. This container must be created before running cortex.") + f.StringVar(&c.ContainerName, prefix+"azure.container-name", constants.Loki, "Name of the storage account blob container used to store chunks. This container must be created before running cortex.") f.StringVar(&c.EndpointSuffix, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The storage account name will be prefixed to this value to create the FQDN.") f.BoolVar(&c.UseManagedIdentity, prefix+"azure.use-managed-identity", false, "Use Managed Identity to authenticate to the Azure storage account.") f.BoolVar(&c.UseFederatedToken, prefix+"azure.use-federated-token", false, "Use Federated Token to authenticate to the Azure storage account.") @@ -150,7 +151,7 @@ type BlobStorageMetrics struct { func NewBlobStorageMetrics() BlobStorageMetrics { b := BlobStorageMetrics{ requestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "azure_blob_request_duration_seconds", Help: "Time spent doing azure blob requests.", // Latency seems to range from a few ms to a few secs and is @@ -158,7 +159,7 @@ func NewBlobStorageMetrics() BlobStorageMetrics { Buckets: prometheus.ExponentialBuckets(0.005, 4, 6), }, []string{"operation", "status_code"}), egressBytesTotal: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "azure_blob_egress_bytes_total", Help: "Total bytes downloaded from Azure Blob Storage.", }), diff --git a/pkg/storage/chunk/client/baidubce/bos_storage_client.go b/pkg/storage/chunk/client/baidubce/bos_storage_client.go index e32e3f083e7c..7b3fe633d66b 100644 --- a/pkg/storage/chunk/client/baidubce/bos_storage_client.go +++ b/pkg/storage/chunk/client/baidubce/bos_storage_client.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/grafana/loki/pkg/storage/chunk/client" + "github.com/grafana/loki/pkg/util/constants" ) // NoSuchKeyErr The resource you requested does not exist. @@ -26,7 +27,7 @@ const NoSuchKeyErr = "NoSuchKey" const DefaultEndpoint = bos.DEFAULT_SERVICE_DOMAIN var bosRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "bos_request_duration_seconds", Help: "Time spent doing BOS requests.", Buckets: prometheus.ExponentialBuckets(0.005, 4, 6), diff --git a/pkg/storage/chunk/client/cassandra/instrumentation.go b/pkg/storage/chunk/client/cassandra/instrumentation.go index a02ef42513fb..31db2b1f542b 100644 --- a/pkg/storage/chunk/client/cassandra/instrumentation.go +++ b/pkg/storage/chunk/client/cassandra/instrumentation.go @@ -6,10 +6,12 @@ import ( "github.com/gocql/gocql" "github.com/prometheus/client_golang/prometheus" + + "github.com/grafana/loki/pkg/util/constants" ) var requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cassandra_request_duration_seconds", Help: "Time spent doing Cassandra requests.", Buckets: prometheus.ExponentialBuckets(0.001, 4, 9), diff --git a/pkg/storage/chunk/client/congestion/metrics.go b/pkg/storage/chunk/client/congestion/metrics.go index 5d1ddf916f37..83c035c806dc 100644 --- a/pkg/storage/chunk/client/congestion/metrics.go +++ b/pkg/storage/chunk/client/congestion/metrics.go @@ -1,6 +1,10 @@ package congestion -import "github.com/prometheus/client_golang/prometheus" +import ( + "github.com/grafana/loki/pkg/util/constants" + + "github.com/prometheus/client_golang/prometheus" +) type Metrics struct { currentLimit prometheus.Gauge @@ -25,7 +29,7 @@ func NewMetrics(name string, cfg Config) *Metrics { "name": name, } - const namespace = "loki" + const namespace = constants.Loki const subsystem = "store_congestion_control" m := Metrics{ currentLimit: prometheus.NewGauge(prometheus.GaugeOpts{ diff --git a/pkg/storage/chunk/client/gcp/instrumentation.go b/pkg/storage/chunk/client/gcp/instrumentation.go index c4863734a07f..5f6a6cb066f0 100644 --- a/pkg/storage/chunk/client/gcp/instrumentation.go +++ b/pkg/storage/chunk/client/gcp/instrumentation.go @@ -12,11 +12,13 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "google.golang.org/api/option" "google.golang.org/grpc" + + "github.com/grafana/loki/pkg/util/constants" ) var ( bigtableRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "bigtable_request_duration_seconds", Help: "Time spent doing Bigtable requests.", @@ -26,7 +28,7 @@ var ( }, []string{"operation", "status_code"}) gcsRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "gcs_request_duration_seconds", Help: "Time spent doing GCS requests.", diff --git a/pkg/storage/chunk/client/ibmcloud/cos_object_client.go b/pkg/storage/chunk/client/ibmcloud/cos_object_client.go index ada6edb41c89..c576dd2da475 100644 --- a/pkg/storage/chunk/client/ibmcloud/cos_object_client.go +++ b/pkg/storage/chunk/client/ibmcloud/cos_object_client.go @@ -29,6 +29,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/client" "github.com/grafana/loki/pkg/storage/chunk/client/hedging" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/log" ) @@ -46,7 +47,7 @@ var ( ) var cosRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cos_request_duration_seconds", Help: "Time spent doing cos requests.", Buckets: []float64{.025, .05, .1, .25, .5, 1, 2}, diff --git a/pkg/storage/chunk/client/metrics.go b/pkg/storage/chunk/client/metrics.go index e1bd68f2ccf2..4f507621a3a4 100644 --- a/pkg/storage/chunk/client/metrics.go +++ b/pkg/storage/chunk/client/metrics.go @@ -7,6 +7,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/storage/chunk" + "github.com/grafana/loki/pkg/util/constants" ) // takes a chunk client and exposes metrics for its operations. @@ -33,22 +34,22 @@ type ChunkClientMetrics struct { func NewChunkClientMetrics(reg prometheus.Registerer) ChunkClientMetrics { return ChunkClientMetrics{ chunksPutPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_stored_chunks_total", Help: "Total stored chunks per user.", }, []string{"user"}), chunksSizePutPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_stored_chunk_bytes_total", Help: "Total bytes stored in chunks per user.", }, []string{"user"}), chunksFetchedPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_fetched_chunks_total", Help: "Total fetched chunks per user.", }, []string{"user"}), chunksSizeFetchedPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_fetched_chunk_bytes_total", Help: "Total bytes fetched in chunks per user.", }, []string{"user"}), diff --git a/pkg/storage/chunk/fetcher/fetcher.go b/pkg/storage/chunk/fetcher/fetcher.go index ccf9dd6694e6..fd90f685e981 100644 --- a/pkg/storage/chunk/fetcher/fetcher.go +++ b/pkg/storage/chunk/fetcher/fetcher.go @@ -17,6 +17,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/cache" "github.com/grafana/loki/pkg/storage/chunk/client" "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/spanlogger" ) @@ -36,12 +37,12 @@ var ( Help: "Total number of chunks asynchronously dequeued from a buffer and written back to the chunk cache.", }) cacheCorrupt = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "cache_corrupt_chunks_total", Help: "Total count of corrupt chunks found in cache.", }) chunkFetchedSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "chunk_fetcher", Name: "fetched_size_bytes", Help: "Compressed chunk size distribution fetched from storage.", diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index 01a99e2cf2b3..4aa9f5ee674a 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -401,7 +401,7 @@ func (cfg *Config) Validate() error { } // NewIndexClient creates a new index client of the desired type specified in the PeriodConfig -func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, cfg Config, schemaCfg config.SchemaConfig, limits StoreLimits, cm ClientMetrics, shardingStrategy indexgateway.ShardingStrategy, registerer prometheus.Registerer, logger log.Logger) (index.Client, error) { +func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, cfg Config, schemaCfg config.SchemaConfig, limits StoreLimits, cm ClientMetrics, shardingStrategy indexgateway.ShardingStrategy, registerer prometheus.Registerer, logger log.Logger, metricsNamespace string) (index.Client, error) { switch true { case util.StringsContain(testingStorageTypes, periodCfg.IndexType): @@ -419,7 +419,7 @@ func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, return indexGatewayClient, nil } - gateway, err := gatewayclient.NewGatewayClient(cfg.BoltDBShipperConfig.IndexGatewayClientConfig, registerer, limits, logger) + gateway, err := gatewayclient.NewGatewayClient(cfg.BoltDBShipperConfig.IndexGatewayClientConfig, registerer, limits, logger, metricsNamespace) if err != nil { return nil, err } diff --git a/pkg/storage/factory_test.go b/pkg/storage/factory_test.go index a5dcbd133863..ea11f36fd309 100644 --- a/pkg/storage/factory_test.go +++ b/pkg/storage/factory_test.go @@ -17,6 +17,7 @@ import ( "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/boltdb" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/validation" ) @@ -45,7 +46,7 @@ func TestFactoryStop(t *testing.T) { limits, err := validation.NewOverrides(defaults, nil) require.NoError(t, err) - store, err := NewStore(cfg, storeConfig, schemaConfig, limits, cm, nil, log.NewNopLogger()) + store, err := NewStore(cfg, storeConfig, schemaConfig, limits, cm, nil, log.NewNopLogger(), constants.Loki) require.NoError(t, err) store.Stop() @@ -84,7 +85,7 @@ func TestCassandraInMultipleSchemas(t *testing.T) { limits, err := validation.NewOverrides(defaults, nil) require.NoError(t, err) - store, err := NewStore(cfg, storeConfig, schemaCfg, limits, cm, nil, log.NewNopLogger()) + store, err := NewStore(cfg, storeConfig, schemaCfg, limits, cm, nil, log.NewNopLogger(), constants.Loki) require.NoError(t, err) store.Stop() @@ -143,7 +144,7 @@ func TestNamedStores(t *testing.T) { require.True(t, os.IsNotExist(err)) } - store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.NoError(t, err) defer store.Stop() @@ -160,7 +161,7 @@ func TestNamedStores(t *testing.T) { t.Run("period config referring to unrecognized store", func(t *testing.T) { schemaConfig := schemaConfig schemaConfig.Configs[0].ObjectType = "not-found" - _, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + _, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.Error(t, err) require.Contains(t, err.Error(), "unrecognized chunk client type not-found, choose one of:") }) diff --git a/pkg/storage/hack/main.go b/pkg/storage/hack/main.go index f4e58568e9ca..93278b429c9a 100644 --- a/pkg/storage/hack/main.go +++ b/pkg/storage/hack/main.go @@ -65,7 +65,7 @@ func getStore(cm storage.ClientMetrics) (storage.Store, *config.SchemaConfig, er }, } - store, err := storage.NewStore(storeConfig, config.ChunkStoreConfig{}, schemaCfg, &validation.Overrides{}, cm, prometheus.DefaultRegisterer, util_log.Logger) + store, err := storage.NewStore(storeConfig, config.ChunkStoreConfig{}, schemaCfg, &validation.Overrides{}, cm, prometheus.DefaultRegisterer, util_log.Logger, "cortex") return store, &schemaCfg, err } diff --git a/pkg/storage/store.go b/pkg/storage/store.go index d120d84ea79d..6bdf083a6948 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -85,11 +85,14 @@ type LokiStore struct { chunkFilterer chunk.RequestChunkFilterer congestionControllerFactory func(cfg congestion.Config, logger log.Logger, metrics *congestion.Metrics) congestion.Controller + + metricsNamespace string } // NewStore creates a new Loki Store using configuration supplied. func NewStore(cfg Config, storeCfg config.ChunkStoreConfig, schemaCfg config.SchemaConfig, limits StoreLimits, clientMetrics ClientMetrics, registerer prometheus.Registerer, logger log.Logger, + metricsNamespace string, ) (*LokiStore, error) { if len(schemaCfg.Configs) != 0 { if index := config.ActivePeriodConfig(schemaCfg.Configs); index != -1 && index < len(schemaCfg.Configs) { @@ -99,7 +102,7 @@ func NewStore(cfg Config, storeCfg config.ChunkStoreConfig, schemaCfg config.Sch } } - indexReadCache, err := cache.New(cfg.IndexQueriesCacheConfig, registerer, logger, stats.IndexCache) + indexReadCache, err := cache.New(cfg.IndexQueriesCacheConfig, registerer, logger, stats.IndexCache, metricsNamespace) if err != nil { return nil, err } @@ -108,14 +111,14 @@ func NewStore(cfg Config, storeCfg config.ChunkStoreConfig, schemaCfg config.Sch level.Warn(logger).Log("msg", "write dedupe cache is deprecated along with legacy index types. Consider using TSDB index which does not require a write dedupe cache.") } - writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig, registerer, logger, stats.WriteDedupeCache) + writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig, registerer, logger, stats.WriteDedupeCache, metricsNamespace) if err != nil { return nil, err } chunkCacheCfg := storeCfg.ChunkCacheConfig chunkCacheCfg.Prefix = "chunks" - chunksCache, err := cache.New(chunkCacheCfg, registerer, logger, stats.ChunkCache) + chunksCache, err := cache.New(chunkCacheCfg, registerer, logger, stats.ChunkCache, metricsNamespace) if err != nil { return nil, err } @@ -123,7 +126,7 @@ func NewStore(cfg Config, storeCfg config.ChunkStoreConfig, schemaCfg config.Sch chunkCacheCfgL2 := storeCfg.ChunkCacheConfigL2 chunkCacheCfgL2.Prefix = "chunksl2" // TODO(E.Welch) would we want to disambiguate this cache in the stats? I think not but we'd need to change stats.ChunkCache to do so. - chunksCacheL2, err := cache.New(chunkCacheCfgL2, registerer, logger, stats.ChunkCache) + chunksCacheL2, err := cache.New(chunkCacheCfgL2, registerer, logger, stats.ChunkCache, metricsNamespace) if err != nil { return nil, err } @@ -167,6 +170,8 @@ func NewStore(cfg Config, storeCfg config.ChunkStoreConfig, schemaCfg config.Sch logger: logger, limits: limits, + + metricsNamespace: metricsNamespace, } if err := s.init(); err != nil { return nil, err @@ -261,7 +266,7 @@ func (s *LokiStore) storeForPeriod(p config.PeriodConfig, tableRange config.Tabl if p.IndexType == config.TSDBType { if shouldUseIndexGatewayClient(s.cfg.TSDBShipperConfig.Config) { // inject the index-gateway client into the index store - gw, err := gatewayclient.NewGatewayClient(s.cfg.TSDBShipperConfig.IndexGatewayClientConfig, indexClientReg, s.limits, indexClientLogger) + gw, err := gatewayclient.NewGatewayClient(s.cfg.TSDBShipperConfig.IndexGatewayClientConfig, indexClientReg, s.limits, indexClientLogger, s.metricsNamespace) if err != nil { return nil, nil, nil, err } @@ -296,7 +301,7 @@ func (s *LokiStore) storeForPeriod(p config.PeriodConfig, tableRange config.Tabl }, nil } - idx, err := NewIndexClient(p, tableRange, s.cfg, s.schemaCfg, s.limits, s.clientMetrics, nil, indexClientReg, indexClientLogger) + idx, err := NewIndexClient(p, tableRange, s.cfg, s.schemaCfg, s.limits, s.clientMetrics, nil, indexClientReg, indexClientLogger, s.metricsNamespace) if err != nil { return nil, nil, nil, errors.Wrap(err, "error creating index client") } diff --git a/pkg/storage/store_test.go b/pkg/storage/store_test.go index a8b9de5bedac..380c410b7b54 100644 --- a/pkg/storage/store_test.go +++ b/pkg/storage/store_test.go @@ -32,6 +32,7 @@ import ( "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/boltdb" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/marshal" "github.com/grafana/loki/pkg/validation" @@ -215,7 +216,7 @@ func getLocalStore(cm ClientMetrics) Store { }, } - store, err := NewStore(storeConfig, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err := NewStore(storeConfig, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) if err != nil { panic(err) } @@ -1061,7 +1062,7 @@ func TestStore_indexPrefixChange(t *testing.T) { limits, err := validation.NewOverrides(validation.Limits{}, nil) require.NoError(t, err) - store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.NoError(t, err) // build and add chunks to the store @@ -1131,7 +1132,7 @@ func TestStore_indexPrefixChange(t *testing.T) { // restart to load the updated schema store.Stop() - store, err = NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err = NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.NoError(t, err) defer store.Stop() @@ -1238,7 +1239,7 @@ func TestStore_MultiPeriod(t *testing.T) { } ResetBoltDBIndexClientsWithShipper() - store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.NoError(t, err) // time ranges adding a chunk for each store and a chunk which overlaps both the stores @@ -1280,7 +1281,7 @@ func TestStore_MultiPeriod(t *testing.T) { store.Stop() ResetBoltDBIndexClientsWithShipper() - store, err = NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err = NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.NoError(t, err) defer store.Stop() @@ -1568,7 +1569,7 @@ func TestStore_BoltdbTsdbSameIndexPrefix(t *testing.T) { } ResetBoltDBIndexClientsWithShipper() - store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err := NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.NoError(t, err) // time ranges adding a chunk for each store and a chunk which overlaps both the stores @@ -1628,7 +1629,7 @@ func TestStore_BoltdbTsdbSameIndexPrefix(t *testing.T) { require.Len(t, tsdbFiles, 1) require.Regexp(t, regexp.MustCompile(fmt.Sprintf(`\d{10}-%s-\d{19}\.tsdb\.gz`, ingesterName)), tsdbFiles[0].Name()) - store, err = NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger) + store, err = NewStore(cfg, config.ChunkStoreConfig{}, schemaConfig, limits, cm, nil, util_log.Logger, constants.Loki) require.NoError(t, err) defer store.Stop() diff --git a/pkg/storage/stores/index/metrics.go b/pkg/storage/stores/index/metrics.go index 0154ab2e4ccf..924122f950a8 100644 --- a/pkg/storage/stores/index/metrics.go +++ b/pkg/storage/stores/index/metrics.go @@ -3,6 +3,8 @@ package index import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) type metrics struct { @@ -12,7 +14,7 @@ type metrics struct { func newMetrics(reg prometheus.Registerer) *metrics { return &metrics{ indexQueryLatency: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "index_request_duration_seconds", Help: "Time (in seconds) spent in serving index query requests", Buckets: prometheus.ExponentialBucketsRange(0.005, 100, 12), diff --git a/pkg/storage/stores/series/index/caching_index_client.go b/pkg/storage/stores/series/index/caching_index_client.go index 49760fb3d456..dd6e7348f8fd 100644 --- a/pkg/storage/stores/series/index/caching_index_client.go +++ b/pkg/storage/stores/series/index/caching_index_client.go @@ -15,32 +15,33 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/storage/chunk/cache" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" ) var ( cacheCorruptErrs = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "querier_index_cache_corruptions_total", Help: "The number of cache corruptions for the index cache.", }) cacheHits = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "querier_index_cache_hits_total", Help: "The number of cache hits for the index cache.", }) cacheGets = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "querier_index_cache_gets_total", Help: "The number of gets for the index cache.", }) cachePuts = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "querier_index_cache_puts_total", Help: "The number of puts for the index cache.", }) cacheEncodeErrs = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "querier_index_cache_encode_errors_total", Help: "The number of errors for the index cache while encoding the body.", }) diff --git a/pkg/storage/stores/series/index/table_manager.go b/pkg/storage/stores/series/index/table_manager.go index b7ca725022e4..8c952b213cc2 100644 --- a/pkg/storage/stores/series/index/table_manager.go +++ b/pkg/storage/stores/series/index/table_manager.go @@ -20,6 +20,7 @@ import ( tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" ) @@ -41,31 +42,31 @@ type tableManagerMetrics struct { func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics { m := tableManagerMetrics{} m.syncTableDuration = promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "table_manager_sync_duration_seconds", Help: "Time spent syncing tables.", Buckets: prometheus.DefBuckets, }, []string{"operation", "status_code"}) m.tableCapacity = promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "table_capacity_units", Help: "Per-table capacity, measured in DynamoDB capacity units.", }, []string{"op", "table"}) m.createFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "table_manager_create_failures", Help: "Number of table creation failures during the last table-manager reconciliation", }) m.deleteFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "table_manager_delete_failures", Help: "Number of table deletion failures during the last table-manager reconciliation", }) m.lastSuccessfulSync = promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "table_manager_sync_success_timestamp_seconds", Help: "Timestamp of the last successful table manager sync.", }) diff --git a/pkg/storage/stores/series/series_index_store.go b/pkg/storage/stores/series/series_index_store.go index 9498f3a16be4..e8fc1f64ccbb 100644 --- a/pkg/storage/stores/series/series_index_store.go +++ b/pkg/storage/stores/series/series_index_store.go @@ -27,6 +27,7 @@ import ( "github.com/grafana/loki/pkg/storage/stores/index/stats" series_index "github.com/grafana/loki/pkg/storage/stores/series/index" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/extract" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/util/spanlogger" @@ -34,27 +35,27 @@ import ( var ( indexLookupsPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_index_lookups_per_query", Help: "Distribution of #index lookups per query.", Buckets: prometheus.ExponentialBuckets(1, 2, 5), }) preIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_series_pre_intersection_per_query", Help: "Distribution of #series (pre intersection) per query.", // A reasonable upper bound is around 100k - 10*(8^(6-1)) = 327k. Buckets: prometheus.ExponentialBuckets(10, 8, 6), }) postIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_series_post_intersection_per_query", Help: "Distribution of #series (post intersection) per query.", // A reasonable upper bound is around 100k - 10*(8^(6-1)) = 327k. Buckets: prometheus.ExponentialBuckets(10, 8, 6), }) chunksPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_chunks_per_query", Help: "Distribution of #chunks per query.", // For 100k series for 7 week, could be 1.2m - 10*(8^(7-1)) = 2.6m. diff --git a/pkg/storage/stores/series/series_store_test.go b/pkg/storage/stores/series/series_store_test.go index 4bb2fc531a57..6210a189dba8 100644 --- a/pkg/storage/stores/series/series_store_test.go +++ b/pkg/storage/stores/series/series_store_test.go @@ -27,6 +27,7 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/client/testutils" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/series/index" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -89,7 +90,7 @@ func newTestChunkStoreConfigWithMockStorage(t require.TestingT, schemaCfg config }, nil) require.NoError(t, err) - store, err := storage.NewStore(storage.Config{MaxChunkBatchSize: 1}, storeCfg, schemaCfg, limits, cm, prometheus.NewRegistry(), log.NewNopLogger()) + store, err := storage.NewStore(storage.Config{MaxChunkBatchSize: 1}, storeCfg, schemaCfg, limits, cm, prometheus.NewRegistry(), log.NewNopLogger(), constants.Loki) require.NoError(t, err) tm, err := index.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, testutils.NewMockStorage(), nil, nil, nil) require.NoError(t, err) diff --git a/pkg/storage/stores/series_store_write.go b/pkg/storage/stores/series_store_write.go index a338bf6ef85a..db22c5caa120 100644 --- a/pkg/storage/stores/series_store_write.go +++ b/pkg/storage/stores/series_store_write.go @@ -13,24 +13,25 @@ import ( "github.com/grafana/loki/pkg/storage/chunk/fetcher" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/index" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/spanlogger" ) var ( DedupedChunksTotal = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_deduped_chunks_total", Help: "Count of chunks which were not stored because they have already been stored by another replica.", }) DedupedBytesTotal = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_deduped_bytes_total", Help: "Count of bytes from chunks which were not stored because they have already been stored by another replica.", }) IndexEntriesPerChunk = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "chunk_store_index_entries_per_chunk", Help: "Number of entries written to storage per chunk.", Buckets: prometheus.ExponentialBuckets(1, 2, 5), diff --git a/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go b/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go index 3536f9fadc3f..8fac60263811 100644 --- a/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go +++ b/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/boltdb" shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/util" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/validation" ) @@ -242,7 +243,7 @@ func newTestStore(t testing.TB, clientMetrics storage.ClientMetrics) *testStore }, } - store, err := storage.NewStore(cfg, config.ChunkStoreConfig{}, schemaCfg, limits, clientMetrics, nil, util_log.Logger) + store, err := storage.NewStore(cfg, config.ChunkStoreConfig{}, schemaCfg, limits, clientMetrics, nil, util_log.Logger, constants.Loki) require.NoError(t, err) return &testStore{ indexDir: indexDir, diff --git a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go index dab3458b7775..cc0eb791ba78 100644 --- a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go +++ b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go @@ -25,6 +25,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/storage/stores/series/index" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/discovery" util_math "github.com/grafana/loki/pkg/util/math" ) @@ -108,9 +109,9 @@ type GatewayClient struct { // // If it is configured to be in ring mode, a pool of GRPC connections to all Index Gateway instances is created using a ring. // Otherwise, it creates a GRPC connection pool to as many addresses as can be resolved from the given address. -func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, limits indexgateway.Limits, logger log.Logger) (*GatewayClient, error) { +func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, limits indexgateway.Limits, logger log.Logger, metricsNamespace string) (*GatewayClient, error) { latency := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "index_gateway_request_duration_seconds", Help: "Time (in seconds) spent serving requests when using the index gateway", Buckets: instrument.DefBuckets, @@ -164,7 +165,7 @@ func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, lim HealthCheckTimeout: sgClient.cfg.PoolConfig.RemoteTimeout, } clients := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "index_gateway_clients", Help: "The current number of index gateway clients.", }) diff --git a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go index bb96a68e24bb..0ec6e81c1775 100644 --- a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go +++ b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/storage/stores/series/index" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -191,7 +192,7 @@ func TestGatewayClient_RingMode(t *testing.T) { cfg.Mode = indexgateway.RingMode cfg.Ring = igwRing - c, err := NewGatewayClient(cfg, nil, o, logger) + c, err := NewGatewayClient(cfg, nil, o, logger, constants.Loki) require.NoError(t, err) require.NotNil(t, c) @@ -222,7 +223,7 @@ func TestGatewayClient_RingMode(t *testing.T) { cfg.Mode = indexgateway.RingMode cfg.Ring = igwRing - c, err := NewGatewayClient(cfg, nil, o, logger) + c, err := NewGatewayClient(cfg, nil, o, logger, constants.Loki) require.NoError(t, err) require.NotNil(t, c) @@ -253,7 +254,7 @@ func TestGatewayClient(t *testing.T) { cfg.PoolConfig = clientpool.PoolConfig{ClientCleanupPeriod: 500 * time.Millisecond} overrides, _ := validation.NewOverrides(validation.Limits{}, nil) - gatewayClient, err := NewGatewayClient(cfg, prometheus.DefaultRegisterer, overrides, logger) + gatewayClient, err := NewGatewayClient(cfg, prometheus.DefaultRegisterer, overrides, logger, constants.Loki) require.NoError(t, err) ctx := user.InjectOrgID(context.Background(), "fake") @@ -440,11 +441,11 @@ func TestDoubleRegistration(t *testing.T) { Address: "my-store-address:1234", } - client, err := NewGatewayClient(clientCfg, r, o, logger) + client, err := NewGatewayClient(clientCfg, r, o, logger, constants.Loki) require.NoError(t, err) defer client.Stop() - client, err = NewGatewayClient(clientCfg, r, o, logger) + client, err = NewGatewayClient(clientCfg, r, o, logger, constants.Loki) require.NoError(t, err) defer client.Stop() } diff --git a/pkg/storage/stores/shipper/indexshipper/indexgateway/grpc.go b/pkg/storage/stores/shipper/indexshipper/indexgateway/grpc.go index 9b9fa9572704..de8edda70c08 100644 --- a/pkg/storage/stores/shipper/indexshipper/indexgateway/grpc.go +++ b/pkg/storage/stores/shipper/indexshipper/indexgateway/grpc.go @@ -7,6 +7,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "google.golang.org/grpc" + + "github.com/grafana/loki/pkg/util/constants" ) type ServerInterceptors struct { @@ -16,7 +18,7 @@ type ServerInterceptors struct { func NewServerInterceptors(r prometheus.Registerer) *ServerInterceptors { requestCount := promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Subsystem: "index_gateway", Name: "requests_total", Help: "Total amount of requests served by the index gateway", diff --git a/pkg/storage/util_test.go b/pkg/storage/util_test.go index 72ae05d4e99b..6dff5146af42 100644 --- a/pkg/storage/util_test.go +++ b/pkg/storage/util_test.go @@ -25,6 +25,7 @@ import ( "github.com/grafana/loki/pkg/storage/stores" index_stats "github.com/grafana/loki/pkg/storage/stores/index/stats" loki_util "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" ) @@ -243,7 +244,7 @@ func (m *mockChunkStore) GetChunks(_ context.Context, _ string, _, _ model.Time, refs = append(refs, r) } - cache, err := cache.New(cache.Config{Prefix: "chunks"}, nil, util_log.Logger, stats.ChunkCache) + cache, err := cache.New(cache.Config{Prefix: "chunks"}, nil, util_log.Logger, stats.ChunkCache, constants.Loki) if err != nil { panic(err) } diff --git a/pkg/util/constants/metrics_namespace.go b/pkg/util/constants/metrics_namespace.go new file mode 100644 index 000000000000..4d5b8a257e37 --- /dev/null +++ b/pkg/util/constants/metrics_namespace.go @@ -0,0 +1,6 @@ +package constants + +const ( + Loki = "loki" + Cortex = "cortex" +) diff --git a/pkg/util/log/experimental.go b/pkg/util/log/experimental.go index 6072e93e424d..ed26c06af347 100644 --- a/pkg/util/log/experimental.go +++ b/pkg/util/log/experimental.go @@ -5,11 +5,13 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) var experimentalFeaturesInUse = promauto.NewCounter( prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "experimental_features_in_use_total", Help: "The number of experimental features in use.", }, diff --git a/pkg/util/log/log.go b/pkg/util/log/log.go index c6f59217be93..97377ef0275c 100644 --- a/pkg/util/log/log.go +++ b/pkg/util/log/log.go @@ -15,6 +15,8 @@ import ( "github.com/grafana/dskit/server" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/pkg/util/constants" ) var ( @@ -120,17 +122,17 @@ func newPrometheusLogger(l dslog.Level, format string, reg prometheus.Registerer ) logMessages := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "log_messages_total", Help: "DEPRECATED. Use internal_log_messages_total for the same functionality. Total number of log messages created by Loki itself.", }, []string{"level"}) internalLogMessages := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "internal_log_messages_total", Help: "Total number of log messages created by Loki itself.", }, []string{"level"}) logFlushes := promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "log_flushes", Help: "Histogram of log flushes using the line-buffered logger.", Buckets: prometheus.ExponentialBuckets(1, 2, int(math.Log2(float64(logEntries)))+1), diff --git a/pkg/util/server/recovery.go b/pkg/util/server/recovery.go index 4c0155e16db2..713d77e44a30 100644 --- a/pkg/util/server/recovery.go +++ b/pkg/util/server/recovery.go @@ -14,13 +14,14 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" + "github.com/grafana/loki/pkg/util/constants" ) const maxStacksize = 8 * 1024 var ( panicTotal = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "panic_total", Help: "The total number of panic triggered", }) diff --git a/pkg/validation/validate.go b/pkg/validation/validate.go index 9cdef90385df..09c444aa6498 100644 --- a/pkg/validation/validate.go +++ b/pkg/validation/validate.go @@ -6,6 +6,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/util/flagext" ) @@ -84,7 +85,7 @@ func (e *ErrStreamRateLimit) Error() string { // MutatedSamples is a metric of the total number of lines mutated, by reason. var MutatedSamples = promauto.NewCounterVec( prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "mutated_samples_total", Help: "The total number of samples that have been mutated.", }, @@ -94,7 +95,7 @@ var MutatedSamples = promauto.NewCounterVec( // MutatedBytes is a metric of the total mutated bytes, by reason. var MutatedBytes = promauto.NewCounterVec( prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "mutated_bytes_total", Help: "The total number of bytes that have been mutated.", }, @@ -104,7 +105,7 @@ var MutatedBytes = promauto.NewCounterVec( // DiscardedBytes is a metric of the total discarded bytes, by reason. var DiscardedBytes = promauto.NewCounterVec( prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "discarded_bytes_total", Help: "The total number of bytes that were discarded.", }, @@ -114,7 +115,7 @@ var DiscardedBytes = promauto.NewCounterVec( // DiscardedSamples is a metric of the number of discarded samples, by reason. var DiscardedSamples = promauto.NewCounterVec( prometheus.CounterOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "discarded_samples_total", Help: "The total number of samples that were discarded.", }, @@ -122,7 +123,7 @@ var DiscardedSamples = promauto.NewCounterVec( ) var LineLengthHist = promauto.NewHistogram(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: constants.Loki, Name: "bytes_per_line", Help: "The total number of bytes per line.", Buckets: prometheus.ExponentialBuckets(1, 8, 8), // 1B -> 16MB