From a7e9ea808c82846e7252edace96ca559853282a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Wed, 18 Jan 2023 05:14:50 +0200 Subject: [PATCH] Merge release 0.30 into main (#6041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * compact: remove cancel on SyncMetas errors (#5923) in a favour of 86b4039948b0918ca2ba121637d1d0d5b3f768c0 SyncMetas will retry if it's retriable. Also, the cleanPartialMarked calls are surrounded by runutil.Repeat() will be repeated, the ones not and are not retriable will throw an interrupt to run.Group() by returning err and Group will call cancel() as it's configured for its interrupt func. Signed-off-by: Seena Fallah Signed-off-by: Seena Fallah * Cut v0.30.0-rc.0 (#5992) * Cut v0.30.0-rc.0 Signed-off-by: bwplotka * mdox fix. Signed-off-by: bwplotka Signed-off-by: bwplotka Signed-off-by: Giedrius Statkevičius * Cut 0.30.0 (#6011) Signed-off-by: bwplotka Signed-off-by: bwplotka Signed-off-by: Giedrius Statkevičius * *: cut 0.30.1 (#6017) * fix duplicate metrics registration in redis client (#6009) * fix duplicate metrics registration in redis client Signed-off-by: Kama Huang * fixed test Signed-off-by: Kama Huang Signed-off-by: Kama Huang * *: cut 0.30.1 Add CHANGELOG entry. Signed-off-by: Giedrius Statkevičius Signed-off-by: Kama Huang Signed-off-by: Giedrius Statkevičius Co-authored-by: Kama Huang <121007071+kama910@users.noreply.github.com> Signed-off-by: Giedrius Statkevičius * Tracing: Fix sampler defaults (#5887) * Fix sampler defaults Signed-off-by: Matej Gera * Add CHANGELOG Signed-off-by: Matej Gera * Replace checkout with git-shallow-clone (#5829) Signed-off-by: Matej Gera Signed-off-by: Matej Gera Signed-off-by: Matej Gera Signed-off-by: Giedrius Statkevičius * CHANGELOG: fix Signed-off-by: Giedrius Statkevičius Signed-off-by: Seena Fallah Signed-off-by: bwplotka Signed-off-by: Giedrius Statkevičius Signed-off-by: Kama Huang Signed-off-by: Matej Gera Co-authored-by: Seena Fallah Co-authored-by: Kama Huang <121007071+kama910@users.noreply.github.com> --- CHANGELOG.md | 20 +++++++++++--- pkg/tracing/jaeger/config_yaml.go | 45 ++++++++++++++++++++----------- pkg/tracing/jaeger/jaeger_test.go | 43 +++++++++++++++++++++++++++-- 3 files changed, 88 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22d33b3cbe..57a05310b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,14 +20,24 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#6035](https://github.com/thanos-io/thanos/pull/6035) Replicate: Support all types of matchers to match blocks for replication. Change matcher parameter from string slice to a single string. ### Fixed -- [#5995] (https://github.com/thanos-io/thanos/pull/5993) Sidecar: Loads the TLS certificate during startup. + +- [#5995](https://github.com/thanos-io/thanos/pull/5995) Sidecar: Loads the TLS certificate during startup. - [#6044](https://github.com/thanos-io/thanos/pull/6044) Receive: mark ouf of window errors as conflict, if out-of-window samples ingestion is activated ### Changed - [#6010](https://github.com/thanos-io/thanos/pull/6010) *: Upgrade Prometheus to v0.41.0. +- [#5887](https://github.com/thanos-io/thanos/pull/5887) Tracing: Make sure rate limiting sampler is the default, as was the case in version pre-0.29.0. + +## [v0.30.1](https://github.com/thanos-io/thanos/tree/release-0.30) - 4.01.2023 + +### Fixed + +- [#6009](https://github.com/thanos-io/thanos/pull/6009) Query Frontend/Store: fix duplicate metrics registration in Redis client + +## [v0.30.0](https://github.com/thanos-io/thanos/tree/release-0.30) - 2.01.2023 -## [v0.30.0](https://github.com/thanos-io/thanos/tree/release-0.30) - in progress. +NOTE: Querier's `query.promql-engine` flag enabling new PromQL engine is now unhidden. We encourage users to use new experimental PromQL engine for efficiency reasons. ### Fixed @@ -41,6 +51,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#5893](https://github.com/thanos-io/thanos/pull/5893) Cache: Fixed redis client not respecting `SetMultiBatchSize` config value. - [#5966](https://github.com/thanos-io/thanos/pull/5966) Query: Fixed mint and maxt when selecting series for the `api/v1/series` HTTP endpoint. - [#5997](https://github.com/thanos-io/thanos/pull/5997) Rule: switch to miekgdns DNS resolver as the default one. +- [#5948](https://github.com/thanos-io/thanos/pull/5948) Store: `chunks_fetched_duration` wrong calculation. +- [#5910](https://github.com/thanos-io/thanos/pull/5910) Receive: Fixed ketama quorum bug that was could cause success response for failed replication. This also optimize heavily receiver CPU use. ### Added @@ -62,10 +74,12 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#5785](https://github.com/thanos-io/thanos/pull/5785) Query: `thanos_store_nodes_grpc_connections` now trimms `external_labels` label name longer than 1000 character. It also allows customizations in what labels to preserve using `query.conn-metric.label` flag. - [#5542](https://github.com/thanos-io/thanos/pull/5542) Mixin: Added query concurrency panel to Querier dashboard. - [#5846](https://github.com/thanos-io/thanos/pull/5846) Query Frontend: vertical query sharding supports subqueries. -- [#5909](https://github.com/thanos-io/thanos/pull/5909) Receive: compact tenant head after no appends have happened for 1.5 `tsdb.max-block-size`. - [#5593](https://github.com/thanos-io/thanos/pull/5593) Cache: switch Redis client to [Rueidis](https://github.com/rueian/rueidis). Rueidis is [faster](https://github.com/rueian/rueidis#benchmark-comparison-with-go-redis-v9) and provides [client-side caching](https://redis.io/docs/manual/client-side-caching/). It is highly recommended to use it so that repeated requests for the same key would not be needed. - [#5896](https://github.com/thanos-io/thanos/pull/5896) *: Upgrade Prometheus to v0.40.7 without implementing native histogram support. *Querying native histograms will fail with `Error executing query: invalid chunk encoding ""` and native histograms in write requests are ignored.* - [#5999](https://github.com/thanos-io/thanos/pull/5999) *: Upgrade Alertmanager dependency to v0.25.0. +- [#5909](https://github.com/thanos-io/thanos/pull/5909) Receive: Compact tenant head after no appends have happened for 1.5 `tsdb.max-block-size`. +- [#5838](https://github.com/thanos-io/thanos/pull/5838) Mixin: Added data touched type to Store dashboard. +- [#5922](https://github.com/thanos-io/thanos/pull/5922) Compact: Retry on clean, partial marked errors when possible. ### Removed diff --git a/pkg/tracing/jaeger/config_yaml.go b/pkg/tracing/jaeger/config_yaml.go index 71009070b3..fae3f8c21c 100644 --- a/pkg/tracing/jaeger/config_yaml.go +++ b/pkg/tracing/jaeger/config_yaml.go @@ -19,6 +19,13 @@ import ( tracesdk "go.opentelemetry.io/otel/sdk/trace" ) +const ( + SamplerTypeRemote = "remote" + SamplerTypeProbabilistic = "probabilistic" + SamplerTypeConstant = "const" + SamplerTypeRateLimiting = "ratelimiting" +) + type ParentBasedSamplerConfig struct { LocalParentSampled bool `yaml:"local_parent_sampled"` RemoteParentSampled bool `yaml:"remote_parent_sampled"` @@ -114,22 +121,27 @@ func getSamplingFraction(samplerType string, samplingFactor float64) float64 { func getSampler(config Config) tracesdk.Sampler { samplerType := config.SamplerType + if samplerType == "" { + samplerType = SamplerTypeRateLimiting + } samplingFraction := getSamplingFraction(samplerType, config.SamplerParam) var sampler tracesdk.Sampler switch samplerType { - case "probabilistic": - sampler = tracesdk.ParentBased(tracesdk.TraceIDRatioBased(samplingFraction)) - case "const": + case SamplerTypeProbabilistic: + sampler = tracesdk.TraceIDRatioBased(samplingFraction) + case SamplerTypeConstant: if samplingFraction == 1.0 { sampler = tracesdk.AlwaysSample() } else { sampler = tracesdk.NeverSample() } - case "remote": + case SamplerTypeRemote: remoteOptions := getRemoteOptions(config) sampler = jaegerremote.New(config.ServiceName, remoteOptions...) - case "ratelimiting": + // Fallback always to default (rate limiting). + case SamplerTypeRateLimiting: + default: // The same config options are applicable to both remote and rate-limiting samplers. remoteOptions := getRemoteOptions(config) sampler = jaegerremote.New(config.ServiceName, remoteOptions...) @@ -137,17 +149,20 @@ func getSampler(config Config) tracesdk.Sampler { if ok { sampler.Update(config.SamplerParam) } - default: - var root tracesdk.Sampler - var parentOptions []tracesdk.ParentBasedSamplerOption - if config.SamplerParentConfig.LocalParentSampled { - parentOptions = append(parentOptions, tracesdk.WithLocalParentSampled(root)) - } - if config.SamplerParentConfig.RemoteParentSampled { - parentOptions = append(parentOptions, tracesdk.WithRemoteParentSampled(root)) - } - sampler = tracesdk.ParentBased(root, parentOptions...) } + + // Use parent-based to make sure we respect the span parent, if + // it is sampled. Optionally, allow user to specify the + // parent-based options. + var parentOptions []tracesdk.ParentBasedSamplerOption + if config.SamplerParentConfig.LocalParentSampled { + parentOptions = append(parentOptions, tracesdk.WithLocalParentSampled(sampler)) + } + if config.SamplerParentConfig.RemoteParentSampled { + parentOptions = append(parentOptions, tracesdk.WithRemoteParentSampled(sampler)) + } + sampler = tracesdk.ParentBased(sampler, parentOptions...) + return sampler } diff --git a/pkg/tracing/jaeger/jaeger_test.go b/pkg/tracing/jaeger/jaeger_test.go index 3c8b2f2e0b..021b8156dc 100644 --- a/pkg/tracing/jaeger/jaeger_test.go +++ b/pkg/tracing/jaeger/jaeger_test.go @@ -24,7 +24,7 @@ var parentConfig = ParentBasedSamplerConfig{LocalParentSampled: true} // This test shows that if sample factor will enable tracing on client process, even when it would be disabled on server // it will be still enabled for all spans within this span. -func TestContextTracing_ClientEnablesTracing(t *testing.T) { +func TestContextTracing_ClientEnablesProbabilisticTracing(t *testing.T) { exp := tracetest.NewInMemoryExporter() config := Config{ SamplerType: "probabilistic", @@ -65,7 +65,7 @@ func TestContextTracing_ClientEnablesTracing(t *testing.T) { // This test shows that if sample factor will disable tracing on client process, when it would be enabled on server // it will be still disabled for all spans within this span. -func TestContextTracing_ClientDisablesTracing(t *testing.T) { +func TestContextTracing_ClientDisablesProbabilisticTracing(t *testing.T) { exp := tracetest.NewInMemoryExporter() config := Config{ @@ -105,6 +105,45 @@ func TestContextTracing_ClientDisablesTracing(t *testing.T) { tracing.ContextTracing_ClientDisablesTracing(t, exp, clientRoot, srvRoot, srvChild) } +func TestContextTracing_ClientDisablesAlwaysOnSampling(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + + config := Config{ + SamplerType: SamplerTypeConstant, + SamplerParam: 0, + } + sampler := getSampler(config) + tracerOtel := newTraceProvider( + context.Background(), + "tracerOtel", + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + sampler, // never sample + []attribute.KeyValue{}, + ) + tracer, _ := migration.Bridge(tracerOtel, log.NewNopLogger()) + + clientRoot, clientCtx := tracing.StartSpan(tracing.ContextWithTracer(context.Background(), tracer), "a") + + config.SamplerParam = 1 + sampler2 := getSampler(config) + // Simulate Server process with different tracer, but with client span in context. + srvTracerOtel := newTraceProvider( + context.Background(), + "srvTracerOtel", + log.NewNopLogger(), + tracesdk.NewSimpleSpanProcessor(exp), + sampler2, // never sample + []attribute.KeyValue{}, + ) + srvTracer, _ := migration.Bridge(srvTracerOtel, log.NewNopLogger()) + + srvRoot, srvCtx := tracing.StartSpan(tracing.ContextWithTracer(clientCtx, srvTracer), "b") + srvChild, _ := tracing.StartSpan(srvCtx, "bb") + + tracing.ContextTracing_ClientDisablesTracing(t, exp, clientRoot, srvRoot, srvChild) +} + // This test shows that if span will contain special baggage (for example from special HTTP header), even when sample // factor will disable client & server tracing, it will be still enabled for all spans within this span. func TestContextTracing_ForceTracing(t *testing.T) {