diff --git a/Makefile b/Makefile index cc7983754e..4f7bdfdb60 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ format: $(JSONNET_SRC) $(JSONNETFMT) go-format .PHONY: lint lint: $(JSONNET_LINT) $(JSONNET_VENDOR_DIR) go-lint @echo ">>>>> Running linter" - echo ${JSONNET_SRC} | $(XARGS) -n 1 -- $(JSONNET_LINT) -J "$(JSONNET_VENDOR_DIR)" + echo ${JSONNET_SRC} | $(XARGS) -n 1 -- $(JSONNET_LINT) -J "$(JSONNET_VENDOR_DIR)" -J lib .PHONY: go-lint go-lint: ## Runs various static analysis against our code. @@ -157,7 +157,7 @@ resources/services/observatorium-tenants-template.yaml: services/observatorium-t resources/services/observatorium-template.yaml: resources/.tmp/tenants/rbac.json services/observatorium.libsonnet services/observatorium-template.jsonnet $(JSONNET) $(GOJSONTOYAML) $(JSONNETFMT) @echo ">>>>> Running observatorium templates" - $(JSONNET) -J "$(JSONNET_VENDOR_DIR)" services/observatorium-template.jsonnet | $(GOJSONTOYAML) > $@ + $(JSONNET) -J "$(JSONNET_VENDOR_DIR)" -J lib services/observatorium-template.jsonnet | $(GOJSONTOYAML) > $@ resources/services/observatorium-metrics-template.yaml: $(wildcard services/observatorium-metrics-*) $(JSONNET) $(GOJSONTOYAML) $(JSONNETFMT) @echo ">>>>> Running observatorium-metrics templates" @@ -165,7 +165,7 @@ resources/services/observatorium-metrics-template.yaml: $(wildcard services/obse resources/services/observatorium-logs-template.yaml: $(wildcard services/observatorium-logs-*) $(JSONNET) $(GOJSONTOYAML) $(JSONNETFMT) @echo ">>>>> Running observatorium-logs templates" - $(JSONNET) -J "$(JSONNET_VENDOR_DIR)" services/observatorium-logs-template.jsonnet | $(GOJSONTOYAML) > $@ + $(JSONNET) -J "$(JSONNET_VENDOR_DIR)" -J lib services/observatorium-logs-template.jsonnet | $(GOJSONTOYAML) > $@ resources/services/observatorium-traces-template.yaml: $(wildcard services/observatorium-traces-*) $(JSONNET) $(GOJSONTOYAML) $(JSONNETFMT) @echo ">>>>> Running observatorium-traces templates" diff --git a/jsonnetfile.json b/jsonnetfile.json index 0556346f13..6a5932b070 100644 --- a/jsonnetfile.json +++ b/jsonnetfile.json @@ -37,6 +37,15 @@ }, "version": "master" }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/docsonnet.git", + "subdir": "doc-util" + } + }, + "version": "master" + }, { "source": { "git": { @@ -142,4 +151,4 @@ } ], "legacyImports": true -} \ No newline at end of file +} diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index d09e8aaa4c..2a2da25b26 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -51,6 +51,16 @@ "version": "6db00c292d3a1c71661fc875f90e0ec7caa538c2", "sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM=" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "consul" + } + }, + "version": "18a7aa2cd2154057a16d4667f6b1debda8bc50a6", + "sum": "Po3c1Ic96ngrJCtOazic/7OsLkoILOKZWXWyZWl+od8=" + }, { "source": { "git": { @@ -61,6 +71,36 @@ "version": "4d4b5b1ce01003547a110f93cc86b8b7afb282a6", "sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8=" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "jaeger-agent-mixin" + } + }, + "version": "18a7aa2cd2154057a16d4667f6b1debda8bc50a6", + "sum": "nsukyr2SS8h97I2mxvBazXZp2fxu1i6eg+rKq3/NRwY=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "ksonnet-util" + } + }, + "version": "18a7aa2cd2154057a16d4667f6b1debda8bc50a6", + "sum": "0y3AFX9LQSpfWTxWKSwoLgbt0Wc9nnCwhMH2szKzHv0=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "memcached" + } + }, + "version": "18a7aa2cd2154057a16d4667f6b1debda8bc50a6", + "sum": "SWywAq4U0MRPMbASU0Ez8O9ArRNeoZzb75sEuReueow=" + }, { "source": { "git": { @@ -81,6 +121,16 @@ "version": "881db2241f0c5007c3e831caf34b0c645202b4ab", "sum": "Je2SxBKu+1WrKEEG60zjSKaY/6TPX8uRz5bsaw0a8oA=" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/loki.git", + "subdir": "production/ksonnet/loki" + } + }, + "version": "ea14d1348e5a3ae7ed075a5fc0b0b93304bb2ebd", + "sum": "i2I9UmJ1Etvw2QsHyi0zJ6PQMKwR+1X/Ohs1jmKEV24=" + }, { "source": { "git": { @@ -111,6 +161,26 @@ "version": "6f8fe448c7a021bbdac78e5d848e2fb7346a1d99", "sum": "U/RwaRP/ps+5dVyeVxeFEb2psfZHQgEjHU2Jeb454Kg=" }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/docsonnet.git", + "subdir": "doc-util" + } + }, + "version": "2eae33a828320269c42acf38e808479a33e416db", + "sum": "lppHbNARpG3YTpuSv94X9TyIE9TfV3CyTVceIHSRxpc=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/k8s-libsonnet.git", + "subdir": "1.26" + } + }, + "version": "9e5b48eee32913938d3cac30f183b49ecd9fe13a", + "sum": "7pl3HQqiKg4zJ0dWFqMo9yMGDEvlVdxgPGr1rMm0/LE=" + }, { "source": { "git": { @@ -170,8 +240,8 @@ "subdir": "configuration" } }, - "version": "361c20e9bbc6533e10e1582fa0e88c378818018b", - "sum": "l1Xz/Hs/ba0wdjugaK90fCxqy2DuW8sMY95HuR0/QHI=" + "version": "a05c34ec1334f7811df0900930f9784e4aace552", + "sum": "mLtOlWUztW8Kb+p7baP2Tl16na5qGKJwA4mmFljXVe4=" }, { "source": { diff --git a/lib/k.libsonnet b/lib/k.libsonnet new file mode 100644 index 0000000000..ba846036c2 --- /dev/null +++ b/lib/k.libsonnet @@ -0,0 +1 @@ +(import 'github.com/jsonnet-libs/k8s-libsonnet/1.26/main.libsonnet') diff --git a/resources/services/observatorium-logs-template.yaml b/resources/services/observatorium-logs-template.yaml index 289568ded5..74e95d1851 100644 --- a/resources/services/observatorium-logs-template.yaml +++ b/resources/services/observatorium-logs-template.yaml @@ -462,33 +462,18 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-compactor-grpc - spec: - clusterIP: None - ports: - - name: grpc - port: 9095 - targetPort: 9095 - selector: - app.kubernetes.io/component: compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - kind: Service - metadata: - labels: - app.kubernetes.io/component: compactor - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-compactor-http + name: observatorium-loki-compactor spec: ports: - - name: metrics + - name: compactor-http-metrics port: 3100 targetPort: 3100 + - name: compactor-grpc + port: 9095 + targetPort: 9095 + - name: compactor-gossip-ring + port: 7946 + targetPort: 7946 selector: app.kubernetes.io/component: compactor app.kubernetes.io/instance: observatorium @@ -505,7 +490,7 @@ objects: name: observatorium-loki-compactor spec: endpoints: - - port: metrics + - port: compactor-http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -533,7 +518,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium - serviceName: observatorium-loki-compactor-grpc + serviceName: observatorium-loki-compactor template: metadata: labels: @@ -562,13 +547,18 @@ objects: - -target=compactor - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=${LOKI_REPLICATION_FACTOR} + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: @@ -690,7 +680,6 @@ objects: config.yaml: |- "analytics": "reporting_enabled": false - "auth_enabled": true "chunk_store_config": "chunk_cache_config": "memcached": @@ -698,15 +687,17 @@ objects: "parallelism": 100 "memcached_client": "addresses": "dns+observatorium-loki-chunk-cache.${NAMESPACE}.svc.cluster.local:11211" - "consistent_hash": true "max_idle_conns": 100 - "timeout": "100ms" - "update_interval": "1m" - "max_look_back_period": "0s" "common": - "compactor_grpc_address": "observatorium-loki-compactor-grpc.${NAMESPACE}.svc.cluster.local:9095" + "compactor_grpc_address": "observatorium-loki-compactor.${NAMESPACE}.svc.cluster.local:9095" + "ring": + "kvstore": + "store": "memberlist" "compactor": "compaction_interval": "2h" + "compactor_ring": + "kvstore": + "store": "memberlist" "shared_store": "s3" "working_directory": "/data/loki/compactor" "distributor": @@ -715,13 +706,14 @@ objects: "store": "memberlist" "frontend": "compress_responses": true - "scheduler_address": "observatorium-loki-query-scheduler-grpc.${NAMESPACE}.svc.cluster.local:9095" - "tail_proxy_url": "observatorium-loki-querier-http.${NAMESPACE}.svc.cluster.local:3100" + "log_queries_longer_than": "5s" + "scheduler_address": "observatorium-loki-query-scheduler-headless.${NAMESPACE}.svc.cluster.local:9095" + "tail_proxy_url": "observatorium-loki-querier.${NAMESPACE}.svc.cluster.local:3100" "frontend_worker": "grpc_client_config": "max_send_msg_size": 104857600 "match_max_concurrent": true - "scheduler_address": "observatorium-loki-query-scheduler-grpc.${NAMESPACE}.svc.cluster.local:9095" + "scheduler_address": "observatorium-loki-query-scheduler-headless.${NAMESPACE}.svc.cluster.local:9095" "ingester": "chunk_block_size": 262144 "chunk_encoding": "snappy" @@ -732,17 +724,18 @@ objects: "heartbeat_period": "5s" "interface_names": - "eth0" - "join_after": "60s" + "join_after": "30s" "num_tokens": 512 "ring": "heartbeat_timeout": "1m" "kvstore": "store": "memberlist" + "replication_factor": ${LOKI_REPLICATION_FACTOR} "max_transfer_retries": 0 "wal": "dir": "/data/loki/wal" "enabled": true - "replay_memory_ceiling": "4GB" + "replay_memory_ceiling": ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} "ingester_client": "grpc_client_config": "max_recv_msg_size": 67108864 @@ -782,25 +775,17 @@ objects: "max_join_retries": 10 "min_join_backoff": "1s" "querier": - "engine": - "max_look_back_period": "30s" - "extra_query_delay": "0s" - "max_concurrent": 2 - "query_ingesters_within": "3h" - "tail_max_duration": "1h" + "max_concurrent": ${LOKI_QUERIER_MAX_CONCURRENCY} + "query_ingesters_within": "2h" "query_range": "align_queries_with_step": true "cache_results": true "max_retries": 5 - "parallelise_shardable_queries": false "results_cache": "cache": "memcached_client": "addresses": "dns+observatorium-loki-results-cache.${NAMESPACE}.svc.cluster.local:11211" - "consistent_hash": true - "max_idle_conns": 16 "timeout": "500ms" - "update_interval": "1m" "query_scheduler": "max_outstanding_requests_per_tenant": 256 "ruler": @@ -810,17 +795,20 @@ objects: "enable_alertmanager_v2": false "enable_api": true "enable_sharding": true + "external_url": "${ALERTMANAGER_EXTERNAL_URL}" "ring": "kvstore": "store": "memberlist" "rule_path": "/data" "storage": + "s3": + "access_key_id": "${RULER_AWS_ACCESS_KEY_ID}" + "bucketnames": "${RULER_S3_BUCKETS}" + "region": "${RULER_S3_REGION}" + "secret_access_key": "${RULER_AWS_SECRET_ACCESS_KEY}" "type": "s3" "wal": "dir": "/data/loki/wal" - "max_age": "4h" - "min_age": "5m" - "truncate_frequency": "60m" "schema_config": "configs": - "from": "2020-10-01" @@ -842,21 +830,22 @@ objects: "http_server_write_timeout": "10m" "log_level": "${LOKI_LOG_LEVEL}" "storage_config": + "aws": + "access_key_id": "${AWS_ACCESS_KEY_ID}" + "bucketnames": "${S3_BUCKETS}" + "region": "${S3_REGION}" + "secret_access_key": "${AWS_SECRET_ACCESS_KEY}" "boltdb_shipper": "active_index_directory": "/data/loki/index" "cache_location": "/data/loki/index_cache" - "cache_ttl": "24h" "index_gateway_client": - "server_address": "observatorium-loki-index-gateway-grpc.${NAMESPACE}.svc.cluster.local:9095" - "resync_interval": "5m" + "server_address": "observatorium-loki-index-gateway.${NAMESPACE}.svc.cluster.local:9095" "shared_store": "s3" "index_queries_cache_config": "memcached": "batch_size": 100 - "parallelism": 100 "memcached_client": "addresses": "dns+observatorium-loki-index-query-cache.${NAMESPACE}.svc.cluster.local:11211" - "consistent_hash": true "tracing": "enabled": true overrides.yaml: '{}' @@ -918,13 +907,18 @@ objects: - -target=distributor - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=${LOKI_REPLICATION_FACTOR} + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: @@ -1040,13 +1034,12 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-distributor-grpc + name: observatorium-loki-distributor-http spec: - clusterIP: None ports: - - name: grpc - port: 9095 - targetPort: 9095 + - name: metrics + port: 3100 + targetPort: 3100 selector: app.kubernetes.io/component: distributor app.kubernetes.io/instance: observatorium @@ -1061,12 +1054,18 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-distributor-http + name: observatorium-loki-distributor spec: ports: - - name: metrics + - name: distributor-http-metrics port: 3100 targetPort: 3100 + - name: distributor-grpc + port: 9095 + targetPort: 9095 + - name: distributor-gossip-ring + port: 7946 + targetPort: 7946 selector: app.kubernetes.io/component: distributor app.kubernetes.io/instance: observatorium @@ -1083,7 +1082,7 @@ objects: name: observatorium-loki-distributor spec: endpoints: - - port: metrics + - port: distributor-http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -1105,7 +1104,7 @@ objects: spec: clusterIP: None ports: - - name: gossip + - name: gossip-ring port: 7946 protocol: TCP targetPort: 7946 @@ -1123,33 +1122,18 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-index-gateway-grpc - spec: - clusterIP: None - ports: - - name: grpc - port: 9095 - targetPort: 9095 - selector: - app.kubernetes.io/component: index-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - kind: Service - metadata: - labels: - app.kubernetes.io/component: index-gateway - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-index-gateway-http + name: observatorium-loki-index-gateway spec: ports: - - name: metrics + - name: index-gateway-http-metrics port: 3100 targetPort: 3100 + - name: index-gateway-grpc + port: 9095 + targetPort: 9095 + - name: index-gateway-gossip-ring + port: 7946 + targetPort: 7946 selector: app.kubernetes.io/component: index-gateway app.kubernetes.io/instance: observatorium @@ -1166,7 +1150,7 @@ objects: name: observatorium-loki-index-gateway spec: endpoints: - - port: metrics + - port: index-gateway-http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -1194,7 +1178,7 @@ objects: app.kubernetes.io/instance: observatorium app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium - serviceName: observatorium-loki-index-gateway-grpc + serviceName: observatorium-loki-index-gateway template: metadata: labels: @@ -1223,13 +1207,18 @@ objects: - -target=index-gateway - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=${LOKI_REPLICATION_FACTOR} + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: @@ -1355,33 +1344,18 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-ingester-grpc - spec: - clusterIP: None - ports: - - name: grpc - port: 9095 - targetPort: 9095 - selector: - app.kubernetes.io/component: ingester - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - kind: Service - metadata: - labels: - app.kubernetes.io/component: ingester - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-ingester-http + name: observatorium-loki-ingester spec: ports: - - name: metrics + - name: ingester-http-metrics port: 3100 targetPort: 3100 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + - name: ingester-gossip-ring + port: 7946 + targetPort: 7946 selector: app.kubernetes.io/component: ingester app.kubernetes.io/instance: observatorium @@ -1398,7 +1372,7 @@ objects: name: observatorium-loki-ingester spec: endpoints: - - port: metrics + - port: ingester-http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -1427,7 +1401,7 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium loki.grafana.com/gossip: "true" - serviceName: observatorium-loki-ingester-grpc + serviceName: observatorium-loki-ingester template: metadata: labels: @@ -1457,14 +1431,18 @@ objects: - -target=ingester - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=${LOKI_REPLICATION_FACTOR} - - -ingester.wal-replay-memory-ceiling=${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: @@ -1631,13 +1609,18 @@ objects: - -target=querier - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=${LOKI_REPLICATION_FACTOR} + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: @@ -1753,13 +1736,12 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-querier-grpc + name: observatorium-loki-querier-http spec: - clusterIP: None ports: - - name: grpc - port: 9095 - targetPort: 9095 + - name: metrics + port: 3100 + targetPort: 3100 selector: app.kubernetes.io/component: querier app.kubernetes.io/instance: observatorium @@ -1774,12 +1756,15 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-querier-http + name: observatorium-loki-querier spec: ports: - - name: metrics + - name: querier-http-metrics port: 3100 targetPort: 3100 + - name: querier-grpc + port: 9095 + targetPort: 9095 selector: app.kubernetes.io/component: querier app.kubernetes.io/instance: observatorium @@ -1796,7 +1781,7 @@ objects: name: observatorium-loki-querier spec: endpoints: - - port: metrics + - port: querier-http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -1852,13 +1837,18 @@ objects: - -target=query-frontend - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=${LOKI_REPLICATION_FACTOR} + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: @@ -1972,13 +1962,12 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-query-frontend-grpc + name: observatorium-loki-query-frontend-http spec: - clusterIP: None ports: - - name: grpc - port: 9095 - targetPort: 9095 + - name: metrics + port: 3100 + targetPort: 3100 selector: app.kubernetes.io/component: query-frontend app.kubernetes.io/instance: observatorium @@ -1993,12 +1982,15 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-query-frontend-http + name: observatorium-loki-query-frontend spec: ports: - - name: metrics + - name: http-metrics port: 3100 targetPort: 3100 + - name: grpclb + port: 9095 + targetPort: 9095 selector: app.kubernetes.io/component: query-frontend app.kubernetes.io/instance: observatorium @@ -2015,7 +2007,7 @@ objects: name: observatorium-loki-query-frontend spec: endpoints: - - port: metrics + - port: http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -2071,13 +2063,18 @@ objects: - -target=query-scheduler - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=${LOKI_REPLICATION_FACTOR} + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: @@ -2191,33 +2188,20 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-query-scheduler-grpc + name: observatorium-loki-query-scheduler-headless spec: clusterIP: None ports: - - name: grpc - port: 9095 - targetPort: 9095 - selector: - app.kubernetes.io/component: query-scheduler - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium -- apiVersion: v1 - kind: Service - metadata: - labels: - app.kubernetes.io/component: query-scheduler - app.kubernetes.io/instance: observatorium - app.kubernetes.io/name: loki - app.kubernetes.io/part-of: observatorium - app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-query-scheduler-http - spec: - ports: - - name: metrics + - name: http-metrics port: 3100 targetPort: 3100 + - name: grpclb + port: 9095 + targetPort: 9095 + - name: gossip-ring + port: 7946 + targetPort: 7946 + publishNotReadyAddresses: true selector: app.kubernetes.io/component: query-scheduler app.kubernetes.io/instance: observatorium @@ -2234,7 +2218,7 @@ objects: name: observatorium-loki-query-scheduler spec: endpoints: - - port: metrics + - port: http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -2253,13 +2237,12 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-ruler-grpc + name: observatorium-loki-ruler-http spec: - clusterIP: None ports: - - name: grpc - port: 9095 - targetPort: 9095 + - name: metrics + port: 3100 + targetPort: 3100 selector: app.kubernetes.io/component: ruler app.kubernetes.io/instance: observatorium @@ -2274,12 +2257,18 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium app.kubernetes.io/version: ${LOKI_IMAGE_TAG} - name: observatorium-loki-ruler-http + name: observatorium-loki-ruler spec: ports: - - name: metrics + - name: ruler-http-metrics port: 3100 targetPort: 3100 + - name: ruler-grpc + port: 9095 + targetPort: 9095 + - name: ruler-gossip-ring + port: 7946 + targetPort: 7946 selector: app.kubernetes.io/component: ruler app.kubernetes.io/instance: observatorium @@ -2296,7 +2285,7 @@ objects: name: observatorium-loki-ruler spec: endpoints: - - port: metrics + - port: ruler-http-metrics namespaceSelector: matchNames: - ${NAMESPACE} @@ -2325,7 +2314,7 @@ objects: app.kubernetes.io/name: loki app.kubernetes.io/part-of: observatorium loki.grafana.com/gossip: "true" - serviceName: observatorium-loki-ruler-grpc + serviceName: observatorium-loki-ruler template: metadata: labels: @@ -2355,18 +2344,18 @@ objects: - -target=ruler - -config.file=/etc/loki/config/config.yaml - -limits.per-user-override-config=/etc/loki/config/overrides.yaml - - -log.level=error - - -s3.buckets=$(S3_BUCKETS) - - -s3.region=$(S3_REGION) - - -s3.access-key-id=$(AWS_ACCESS_KEY_ID) - - -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY) - - -ruler.storage.s3.buckets=$(RULER_S3_BUCKETS) - - -ruler.storage.s3.region=$(RULER_S3_REGION) - - -ruler.storage.s3.access-key-id=$(RULER_AWS_ACCESS_KEY_ID) - - -ruler.storage.s3.secret-access-key=$(RULER_AWS_SECRET_ACCESS_KEY) - - -distributor.replication-factor=1 - - -ruler.external.url="${ALERTMANAGER_EXTERNAL_URL}" + - -config.expand-env=true env: + - name: LOKI_LOG_LEVEL + value: ${LOKI_LOG_LEVEL} + - name: LOKI_REPLICATION_FACTOR + value: ${LOKI_REPLICATION_FACTOR} + - name: LOKI_QUERIER_MAX_CONCURRENCY + value: ${LOKI_QUERIER_MAX_CONCURRENCY} + - name: LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING + value: ${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING} + - name: ALERTMANAGER_EXTERNAL_URL + value: ${ALERTMANAGER_EXTERNAL_URL} - name: S3_BUCKETS valueFrom: secretKeyRef: diff --git a/services/observatorium-logs-template-overwrites.libsonnet b/services/observatorium-logs-template-overwrites.libsonnet index 354fe20199..1af6b3a49f 100644 --- a/services/observatorium-logs-template-overwrites.libsonnet +++ b/services/observatorium-logs-template-overwrites.libsonnet @@ -91,52 +91,9 @@ local jaegerAgentSidecar = (import 'sidecars/jaeger-agent.libsonnet')({ }, }, } - else if m.kind == 'StatefulSet' && std.length(std.findSubstr('querier', name)) != 0 then - m { - spec+: { - template+: { - spec+: { - containers: [ - c { - args: std.filter(function(arg) - !std.member([ - '-distributor.replication-factor', - '-querier.max-concurrent', - '-querier.worker-match-max-concurrent', - ], std.split(arg, '=')[0]), super.args) - + [ - '-distributor.replication-factor=${LOKI_REPLICATION_FACTOR}', - '-querier.max-concurrent=${LOKI_QUERIER_MAX_CONCURRENCY}', - '-querier.worker-match-max-concurrent', - ], - } - for c in super.containers - ], - }, - }, - } + jaegerAgentSidecar.spec, - } else if m.kind == 'StatefulSet' && std.length(std.findSubstr('ingester', name)) != 0 then m { spec+: { - template+: { - spec+: { - containers: [ - c { - args: std.filter(function(arg) - !std.member([ - '-distributor.replication-factor', - '-ingester.wal-replay-memory-ceiling', - ], std.split(arg, '=')[0]), super.args) - + [ - '-distributor.replication-factor=${LOKI_REPLICATION_FACTOR}', - '-ingester.wal-replay-memory-ceiling=${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING}', - ], - } - for c in super.containers - ], - }, - }, volumeClaimTemplates: [ t { spec: { @@ -156,18 +113,6 @@ local jaegerAgentSidecar = (import 'sidecars/jaeger-agent.libsonnet')({ else if m.kind == 'StatefulSet' && std.length(std.findSubstr('ruler', name)) != 0 then m { spec+: { - template+: { - spec+: { - containers: [ - c { - args+: [ - '-ruler.external.url="${ALERTMANAGER_EXTERNAL_URL}"', - ], - } - for c in super.containers - ], - }, - }, volumeClaimTemplates: [ t { spec: { @@ -190,15 +135,7 @@ local jaegerAgentSidecar = (import 'sidecars/jaeger-agent.libsonnet')({ template+: { spec+: { containers: [ - c { - args: std.filter(function(arg) - !std.member([ - '-distributor.replication-factor', - ], std.split(arg, '=')[0]), super.args) - + [ - '-distributor.replication-factor=${LOKI_REPLICATION_FACTOR}', - ], - } + + c + if std.length(std.findSubstr('query-frontend', c.name)) != 0 then // The frontend will only return ready once a querier has connected to it. // Because the service used for connecting the querier to the frontend only lists ready diff --git a/services/observatorium-logs.libsonnet b/services/observatorium-logs.libsonnet index a2f9376594..0d6f7a94e8 100644 --- a/services/observatorium-logs.libsonnet +++ b/services/observatorium-logs.libsonnet @@ -71,8 +71,13 @@ local lokiCaches = (import 'components/loki-caches.libsonnet'); image: '%s:%s' % ['${LOKI_IMAGE}', cfg.version], commonLabels+: obs.config.commonLabels, query+: { - concurrency: 2, // overwritten in observatorium-logs-template-overwrites.libsonnet + concurrency: '${LOKI_QUERIER_MAX_CONCURRENCY}', }, + ruler: { + externalUrl: '${ALERTMANAGER_EXTERNAL_URL}', + }, + replicationFactor: '${LOKI_REPLICATION_FACTOR}', + logLevel: '${LOKI_LOG_LEVEL}', objectStorageConfig: { secretName: '${LOKI_S3_SECRET}', bucketsKey: 'bucket', @@ -92,7 +97,7 @@ local lokiCaches = (import 'components/loki-caches.libsonnet'); ringName: 'gossip-ring', }, wal: { - replayMemoryCeiling: '4GB', // overwritten in observatorium-logs-template-overwrites.libsonnet + replayMemoryCeiling: '${LOKI_INGESTER_WAL_REPLAY_MEMORY_CEILING}', }, replicas: { compactor: 1, // Loki supports only a single compactor instance. @@ -255,16 +260,12 @@ local lokiCaches = (import 'components/loki-caches.libsonnet'); }, ruler+: { enable_alertmanager_discovery: true, - enable_alertmanager_v2: false, - alertmanager_url: 'http://_http._tcp.observatorium-alertmanager.${ALERTMANAGER_NAMESPACE}.svc.cluster.local', + enable_alertmanager_v2::: false, + alertmanager_url::: 'http://_http._tcp.observatorium-alertmanager.${ALERTMANAGER_NAMESPACE}.svc.cluster.local', alertmanager_refresh_interval: '1m', }, - querier+: { - query_timeout:: '', - }, server+: { http_server_write_timeout: '10m', - log_level: '${LOKI_LOG_LEVEL}', }, tracing: { enabled: true,