diff --git a/pkg/storage/bloom/v1/bloom_tokenizer.go b/pkg/storage/bloom/v1/bloom_tokenizer.go index 96ad1420b303..97dd6b3d0baa 100644 --- a/pkg/storage/bloom/v1/bloom_tokenizer.go +++ b/pkg/storage/bloom/v1/bloom_tokenizer.go @@ -112,6 +112,7 @@ func (bt *BloomTokenizer) Populate(swb *SeriesWithBloom, chks Iterator[ChunkRefW chunkSuccessfulInserts int chunkCachedInserts int chunkCollisionInserts int + chunkBytes int chk = chks.At() itr = chk.Itr ) @@ -123,7 +124,7 @@ func (bt *BloomTokenizer) Populate(swb *SeriesWithBloom, chks Iterator[ChunkRefW // raw tokenizer, we could iterate once and just return (prefix, token) pairs from the tokenizer. // Double points for them being different-ln references to the same data. line := itr.Entry().Line - sourceBytes += len(line) + chunkBytes += len(line) chunkTokenizer := NewPrefixedTokenIter(tokenBuf, prefixLn, bt.lineTokenizer.Tokens(line)) for chunkTokenizer.Next() { tok := chunkTokenizer.At() @@ -174,6 +175,10 @@ func (bt *BloomTokenizer) Populate(swb *SeriesWithBloom, chks Iterator[ChunkRefW } } + + // add the recorded chunkbytes to the sourcebytes counter in case we return early via error + sourceBytes += chunkBytes + var es multierror.MultiError if err := itr.Close(); err != nil { es.Add(errors.Wrapf(err, "error closing chunk: %#v", chk.Ref)) @@ -194,6 +199,7 @@ func (bt *BloomTokenizer) Populate(swb *SeriesWithBloom, chks Iterator[ChunkRefW bt.metrics.insertsTotal.WithLabelValues(tokenTypeChunkPrefixed, collisionTypeFalse).Add(float64(chunkSuccessfulInserts)) bt.metrics.insertsTotal.WithLabelValues(tokenTypeChunkPrefixed, collisionTypeCache).Add(float64(chunkCachedInserts)) bt.metrics.insertsTotal.WithLabelValues(tokenTypeChunkPrefixed, collisionTypeTrue).Add(float64(chunkCollisionInserts)) + bt.metrics.sourceBytesAdded.Add(float64(chunkBytes)) } if err := chks.Err(); err != nil { diff --git a/pkg/storage/bloom/v1/metrics.go b/pkg/storage/bloom/v1/metrics.go index 6de8c41a791a..22e315e00175 100644 --- a/pkg/storage/bloom/v1/metrics.go +++ b/pkg/storage/bloom/v1/metrics.go @@ -17,6 +17,7 @@ type Metrics struct { blockSeriesIterated prometheus.Counter tokensTotal prometheus.Counter insertsTotal *prometheus.CounterVec + sourceBytesAdded prometheus.Counter blockSize prometheus.Histogram blockFlushReason *prometheus.CounterVec @@ -99,6 +100,11 @@ func NewMetrics(r prometheus.Registerer) *Metrics { Name: "bloom_inserts_total", Help: "Number of inserts into the bloom filter. collision type may be `false` (no collision), `cache` (found in token cache) or true (found in bloom filter). token_type may be either `raw` (the original ngram) or `chunk_prefixed` (the ngram with the chunk prefix)", }, []string{"token_type", "collision"}), + sourceBytesAdded: promauto.With(r).NewCounter(prometheus.CounterOpts{ + Namespace: constants.Loki, + Name: "bloom_source_bytes_added_total", + Help: "Number of bytes from chunks added to the bloom filter", + }), blockSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ Namespace: constants.Loki,