Skip to content

Commit

Permalink
Update namespace count query from metrics profile examples
Browse files Browse the repository at this point in the history
Signed-off-by: Raul Sevilla <[email protected]>
  • Loading branch information
rsevilla87 committed Jul 5, 2021
1 parent 8945673 commit 909b98c
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 3 deletions.
2 changes: 1 addition & 1 deletion examples/metrics-profiles/metrics-aggregated.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ metrics:
metricName: P99APIEtcdRequestLatency

# Cluster metrics
- query: count(kube_namespace_created)
- query: sum(kube_namespace_status_phase) by (phase) > 0
metricName: namespaceCount

- query: sum(kube_pod_status_phase{}) by (phase)
Expand Down
89 changes: 87 additions & 2 deletions examples/metrics-profiles/metrics.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,93 @@
metrics:
# API server
- query: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb!~"WATCH", subresource!="log"}[2m])) by (verb,resource,subresource,instance,le)) > 0
metricName: API99thLatency

- query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH",subresource!="log"}[2m])) by (verb,instance,resource,code) > 0
metricName: APIRequestRate

- query: sum(apiserver_current_inflight_requests{}) by (request_kind) > 0
metricName: APIInflightRequests

# Containers & pod metrics
- query: sum(irate(container_cpu_usage_seconds_total{name!="",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler|monitoring|logging|image-registry)"}[2m]) * 100) by (pod, namespace, node)
metricName: podCPU

- query: sum(container_memory_rss{name!="",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler|monitoring|logging|image-registry)"}) by (pod, namespace, node)
metricName: podMemory

- query: (sum(rate(container_fs_writes_bytes_total{container!="",device!~".+dm.+"}[5m])) by (device, container, node) and on (node) kube_node_role{role="master"}) > 0
metricName: containerDiskUsage

# Kubelet & CRI-O metrics
- query: sum(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m]) * 100) by (node) and on (node) kube_node_role{role="worker"}
metricName: kubeletCPU

- query: sum(process_resident_memory_bytes{service="kubelet",job="kubelet"}) by (node) and on (node) kube_node_role{role="worker"}
metricName: kubeletMemory

- query: sum(irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m]) * 100) by (node) and on (node) kube_node_role{role="worker"}
metricName: crioCPU

- query: sum(process_resident_memory_bytes{service="kubelet",job="crio"}) by (node) and on (node) kube_node_role{role="worker"}
metricName: crioMemory

# Node metrics
- query: sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) > 0
metricName: nodeCPU

- query: avg(node_memory_MemAvailable_bytes) by (instance)
metricName: nodeMemoryAvailable

- query: avg(node_memory_Active_bytes) by (instance)
metricName: nodeMemoryActive

- query: avg(node_memory_Cached_bytes) by (instance) + avg(node_memory_Buffers_bytes) by (instance)
metricName: nodeMemoryCached+nodeMemoryBuffers

- query: irate(node_network_receive_bytes_total{device=~"^(ens|eth|bond|team).*"}[2m])
metricName: rxNetworkBytes

- query: irate(node_network_transmit_bytes_total{device=~"^(ens|eth|bond|team).*"}[2m])
metricName: txNetworkBytes

- query: rate(node_disk_written_bytes_total{device!~"^(dm|rb).*"}[2m])
metricName: nodeDiskWrittenBytes

- query: rate(node_disk_read_bytes_total{device!~"^(dm|rb).*"}[2m])
metricName: nodeDiskReadBytes

- query: sum(rate(etcd_server_leader_changes_seen_total[2m]))
metricName: etcdLeaderChangesRate

# Etcd metrics
- query: etcd_server_is_leader > 0
metricName: etcdServerIsLeader

- query: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m]))
metricName: 99thEtcdDiskBackendCommitDurationSeconds

- query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m]))
metricName: 99thEtcdDiskWalFsyncDurationSeconds

- query: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m]))
metricName: 99thEtcdRoundTripTimeSeconds

- query: etcd_mvcc_db_total_size_in_bytes
metricName: etcdDBPhysicalSizeBytes

- query: etcd_mvcc_db_total_size_in_use_in_bytes
metricName: etcdDBLogicalSizeBytes

- query: sum(rate(etcd_object_counts{}[5m])) by (resource) > 0
metricName: etcdObjectCount

- query: sum by (cluster_version)(etcd_cluster_version)
metricName: etcdVersion
instant: true

# Cluster metrics
- query: count(kube_namespace_created)
- query: sum(kube_namespace_status_phase) by (phase) > 0
metricName: namespaceCount

- query: sum(kube_pod_status_phase{}) by (phase)
Expand Down Expand Up @@ -29,4 +115,3 @@ metrics:
- query: cluster_version{type="completed"}
metricName: clusterVersion
instant: true

0 comments on commit 909b98c

Please sign in to comment.