Tempo Ingesters register to loki ring #2766

jlynch93 · 2023-08-04T15:49:22Z

Describe the bug
Tempo Ingesters registerd to lokis ingester ring which caused loki to go down and stop returning logs.

To Reproduce
Steps to reproduce the behavior:
Unsure of how to reproduce this issue as it has never happened in our current deployment before.

Expected behavior
Loki ingesters should register to loki and tempo ingesters should register to tempo.

Environment:
Current deployed is using tempo-distributed helm chart into eks. attached is the tempo.yaml and nginx conf for tempo-gateway
Tempo.yaml:

compactor:
  compaction:
    block_retention: 720h
    compacted_block_retention: 1h
    compaction_cycle: 30s
    compaction_window: 1h
    max_block_bytes: 107374182400
    max_compaction_objects: 6000000
    max_time_per_tenant: 5m
    retention_concurrency: 10
    v2_in_buffer_bytes: 5242880
    v2_out_buffer_bytes: 20971520
    v2_prefetch_traces_count: 1000
  ring:
    kvstore:
      store: memberlist
distributor:
  receivers:
    jaeger:
      protocols:
        grpc:
          endpoint: 0.0.0.0:14250
        thrift_binary:
          endpoint: 0.0.0.0:6832
        thrift_compact:
          endpoint: 0.0.0.0:6831
        thrift_http:
          endpoint: 0.0.0.0:14268
    opencensus:
      endpoint: 0.0.0.0:55678
    otlp:
      protocols:
        grpc:
          endpoint: 0.0.0.0:4317
        http:
          endpoint: 0.0.0.0:4318
    zipkin:
      endpoint: 0.0.0.0:9411
  ring:
    kvstore:
      store: memberlist
ingester:
  lifecycler:
    ring:
      kvstore:
        store: memberlist
      replication_factor: 3
    tokens_file_path: /var/tempo/tokens.json
memberlist:
  join_members:
  - grafana-tempo-gossip-ring
  rejoin_interval: 1m
metrics_generator:
  processor:
    service_graphs:
      dimensions:
      - deployment_environment
      histogram_buckets:
      - 0.1
      - 0.2
      - 0.4
      - 0.8
      - 1.6
      - 3.2
      - 6.4
      - 12.8
      max_items: 30000
      wait: 30s
      workers: 10
    span_metrics:
      dimensions:
      - k8s_deployment_name
      - deployment_name
      - deployment_environment
      - deployment.environment
      - code_function
      - code_namespace
      - http_host
      - http_method
      - http_status_code
      - http_target
      - http_user_agent
      - operation
      - span_kind
      - service_name
      - error
      - status_code
      - otel_library_name
      - connection_category
      histogram_buckets:
      - 0.002
      - 0.004
      - 0.008
      - 0.016
      - 0.032
      - 0.064
      - 0.128
      - 0.256
      - 0.512
      - 1.02
      - 2.05
      - 4.1
      - 8.2
      - 16.4
      - 32.8
      - 65.6
      - 132
      - 264
  registry:
    collection_interval: 15s
    external_labels: {}
    stale_duration: 15m
  ring:
    kvstore:
      store: memberlist
  storage:
    path: /var/tempo/wal
    remote_write:
    - name: metrics-generator-tempo
      url: http://mimir-nginx.grafana-mimir.svc:80/api/v1/push
    remote_write_flush_deadline: 1m
multitenancy_enabled: false
overrides:
  ingestion_burst_size_bytes: 2000000000
  ingestion_rate_limit_bytes: 1500000000
  max_bytes_per_tag_values_query: 0
  max_bytes_per_trace: 500000000
  max_search_duration: 0
  max_traces_per_user: 0
  metrics_generator_processors:
  - service-graphs
  - span-metrics
  per_tenant_override_config: /conf/overrides.yaml
querier:
  frontend_worker:
    frontend_address: grafana-tempo-query-frontend-discovery:9095
    grpc_client_config:
      max_send_msg_size: 500000000
  max_concurrent_queries: 20
  search:
    external_endpoints: []
    external_hedge_requests_at: 20s
    external_hedge_requests_up_to: 2
    prefer_self: 10
    query_timeout: 600s
query_frontend:
  max_outstanding_per_tenant: 200000
  search:
    concurrent_jobs: 1000
    max_duration: 0
    max_result_limit: 1000
    target_bytes_per_job: 10485760000
  tolerate_failed_blocks: 100000
server:
  grpc_server_max_recv_msg_size: 91943040000
  grpc_server_max_send_msg_size: 91943040000
  http_listen_port: 3100
  log_format: logfmt
  log_level: info
storage:
  trace:
    backend: s3
    block:
      version: vParquet
    blocklist_poll: 5m
    cache: memcached
    local:
      path: /var/tempo/traces
    memcached:
      consistent_hash: true
      host: grafana-tempo-memcached
      service: memcached-client
      timeout: 1500ms
    pool:
      max_workers: 300
    s3:
      bucket: HIDDEN
      endpoint: s3.us-east-1.amazonaws.com
      region: us-east-1
    wal:
      path: /var/tempo/wal

tempo-gateway nginx:

worker_processes  5;  ## Default: 1
error_log  /dev/stderr;
pid        /tmp/nginx.pid;
worker_rlimit_nofile 8192;

events {
  worker_connections  4096;  ## Default: 1024
}

http {
  client_body_temp_path /tmp/client_temp;
  proxy_temp_path       /tmp/proxy_temp_path;
  fastcgi_temp_path     /tmp/fastcgi_temp;
  uwsgi_temp_path       /tmp/uwsgi_temp;
  scgi_temp_path        /tmp/scgi_temp;

  proxy_http_version    1.1;

  default_type application/octet-stream;
  log_format   main '$remote_addr - $remote_user [$time_local]  $status '
        '"$request" $body_bytes_sent "$http_referer" '
        '"$http_user_agent" "$http_x_forwarded_for"';
  access_log   /dev/stderr  main;

  sendfile     on;
  tcp_nopush   on;
  resolver kube-dns.kube-system.svc.cluster.local;

  server {
    listen             8080;

    location = / {
      return 200 'OK';
      auth_basic off;
    }

    location = /jaeger/api/traces {
      proxy_pass       http://grafana-tempo-distributor.grafana-tempo.svc.cluster.local:14268/api/traces;
    }

    location = /zipkin/spans {
      proxy_pass       http://grafana-tempo-distributor.grafana-tempo.svc.cluster.local:9411/spans;
    }

    location = /otlp/v1/traces {
      proxy_pass       http://grafana-tempo-distributor.grafana-tempo.svc.cluster.local:4318/v1/traces;
    }

    location ^~ /api {
      proxy_pass       http://grafana-tempo-query-frontend.grafana-tempo.svc.cluster.local:3100$request_uri;
    }

    location = /flush {
      proxy_pass       http://grafana-tempo-ingester.grafana-tempo.svc.cluster.local:3100$request_uri;
    }

    location = /shutdown {
      proxy_pass       http://grafana-tempo-ingester.grafana-tempo.svc.cluster.local:3100$request_uri;
    }

    location = /distributor/ring {
      proxy_pass       http://grafana-tempo-distributor.grafana-tempo.svc.cluster.local:3100$request_uri;
    }

    location = /ingester/ring {
      proxy_pass       http://grafana-tempo-distributor.grafana-tempo.svc.cluster.local:3100$request_uri;
    }

    location = /compactor/ring {
      proxy_pass       http://grafana-tempo-compactor.grafana-tempo.svc.cluster.local:3100$request_uri;
    }
  }
}

Additional Context
Only log line that directed us to the issue was:
level=warn ts=2023-08-04T14:32:18.386282517Z caller=logging.go:86 traceID=54e1a62fbdffbc09 orgID=fake msg="POST /loki/api/v1/push (500) 4.35479ms Response: \"rpc error: code = Unimplemented desc = unknown service logproto.Pusher\\n\" ws: false; Connection: close; Content-Length: 177219; Content-Type: application/x-protobuf; User-Agent: promtail/2.6.1; "

The text was updated successfully, but these errors were encountered:

jlynch93 · 2023-08-04T16:00:54Z

Created the same issue in loki as well: grafana/loki#10172. You can find the loki configs there as well!

mdisibio · 2023-08-04T18:17:18Z

Hi, we have experienced this issue too and work around it by setting different cluster_label for every Tempo and Loki install. The config looks like:

memberlist:
    ...
    cluster_label: <cluster>.<namespace>

We set it to the cluster and namespace of the install, although anything will work as long as they are different. Can you try setting this and see if it resolves the issue? It sounds like maybe it needs a default in the helm chart.

The root issue (as far as I can remember) is that Tempo and Loki use the same generic ring names like "ingester". So when two Loki and Tempo ingester pods can see each other, they sometimes register in the wrong ring. Changing the ring name isn't straightforward because then new pods wouldn't be able to communicate with older pods, causing rockiness until the whole install is upgraded.

jdziat · 2023-08-04T18:19:53Z

@mdisibio Odd that this would happen across namespaces, i assume it has to do with how dns discovery is being done?

joe-elliott · 2023-08-07T12:55:46Z

I believe this happens due to IP reuse in k8s. After a node in memberlist disappears the cluster will still reach out to it for a certain timeout period. If a Tempo cluster has a pod shutdown and a Loki cluster has a pod start up and claim its IP within the timeout then the two memberlist clusters can join together.

We generally saw this occurring when rolling out two clusters at the same time.

jmichalek132 · 2023-08-13T18:40:16Z

We have ran into this too, would it make sense to configure the cluster label (each mimir loki and tempo having a different value)for the helm chart by default to avoid people running into this?

gespi1 · 2023-10-03T18:58:47Z

just ran into this myself in prod

hobbsh · 2023-11-25T03:08:42Z

We are running Tempo/Loki/Mimir on spot nodes so there is probably more than average turnover for things and have hit this. I am wondering if that's why but also why it is happening across namespaces. It basically causes everything to freeze up so seems like a pretty important issue. We've rolled out LGTM with the helm charts and I'm wondering if it makes sense to default the cluster labels at least @joe-elliott? Or if this is something better solved in the code?

@mdisibio I'm not seeing a cluster_label setting for tempo here - did you only set it for Loki?

For those using the loki-distributed helm chart, setting the following value (thanks @mdisibio for the tip!) seems to solve it:

loki:
  structuredConfig:
    memberlist:
      cluster_label: "loki.loki"

joe-elliott · 2023-11-27T13:11:44Z

We've rolled out LGTM with the helm charts and I'm wondering if it makes sense to default the cluster labels at least @joe-elliott?

Yeah, I'd be on board with defaulting the cluster label in the helm chart. I don't know much about helm, but if there were some way to put a unique value in cluster_label that would be even better so that people installing multiple Lokis/Tempos would not stumble on this issue.

@mdisibio I'm not seeing a cluster_label setting for tempo here - did you only set it for Loki?

Both Loki and Tempo use the same memberlist config. That cluster_label field is available in Tempo as well it just doesn't appear to be documented. We try to maintain a dump of all of our config options to provide an exhaustive list. It includes cluster_label but even it's not perfect.

hobbsh · 2023-11-27T16:59:06Z

Thanks @joe-elliott, I'll get some PRs going!

joe-elliott · 2023-11-27T17:28:01Z

Love it! Thanks @hobbsh

github-actions · 2024-01-27T00:03:34Z

This issue has been automatically marked as stale because it has not had any activity in the past 60 days.
The next time this stale check runs, the stale label will be removed if there is new activity. The issue will be closed after 15 days if there is no new activity.
Please apply keepalive label to exempt this Issue.

jmichalek132 · 2024-02-11T10:59:53Z

still relevant

elcomtik · 2024-03-25T14:37:16Z

experienced just recently, @hobbsh thanks for solution

jlynch93 mentioned this issue Aug 4, 2023

Tempo Ingesters register to lokis ring grafana/loki#10172

Open

hobbsh mentioned this issue Nov 27, 2023

chore: set unique cluster label for each release grafana/helm-charts#2803

Closed

github-actions bot added the stale Used for stale issues / PRs label Jan 27, 2024

github-actions bot closed this as not planned Won't fix, can't repro, duplicate, stale Feb 11, 2024

nlamirault mentioned this issue Aug 26, 2024

LGTM : Fix Ring labels for applications portefaix/portefaix-kubernetes#5625

Open

thechaak mentioned this issue Aug 27, 2024

[bitnami/grafana-mimir] Tempo Ingesters register to mimir ring and make the whole stack crash bitnami/charts#29071

Closed

vkg23 mentioned this issue Nov 4, 2024

gossip ring memberlist - cross-connect within namespace #4273

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Tempo Ingesters register to loki ring #2766

Tempo Ingesters register to loki ring #2766

jlynch93 commented Aug 4, 2023 •

edited

Loading

jlynch93 commented Aug 4, 2023

mdisibio commented Aug 4, 2023

jdziat commented Aug 4, 2023

joe-elliott commented Aug 7, 2023

jmichalek132 commented Aug 13, 2023

gespi1 commented Oct 3, 2023

hobbsh commented Nov 25, 2023 •

edited

Loading

joe-elliott commented Nov 27, 2023

hobbsh commented Nov 27, 2023

joe-elliott commented Nov 27, 2023

github-actions bot commented Jan 27, 2024

jmichalek132 commented Feb 11, 2024

elcomtik commented Mar 25, 2024

Tempo Ingesters register to loki ring #2766

Tempo Ingesters register to loki ring #2766

Comments

jlynch93 commented Aug 4, 2023 • edited Loading

jlynch93 commented Aug 4, 2023

mdisibio commented Aug 4, 2023

jdziat commented Aug 4, 2023

joe-elliott commented Aug 7, 2023

jmichalek132 commented Aug 13, 2023

gespi1 commented Oct 3, 2023

hobbsh commented Nov 25, 2023 • edited Loading

joe-elliott commented Nov 27, 2023

hobbsh commented Nov 27, 2023

joe-elliott commented Nov 27, 2023

github-actions bot commented Jan 27, 2024

jmichalek132 commented Feb 11, 2024

elcomtik commented Mar 25, 2024

jlynch93 commented Aug 4, 2023 •

edited

Loading

hobbsh commented Nov 25, 2023 •

edited

Loading