-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathconfiguration.yaml
304 lines (300 loc) · 14.3 KB
/
configuration.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
##
## NOTE: Change settings here, in the corresonding
## Secret or ConfigMap entry. Ensure settings
## are uncommeted here AND deployment.yaml
## in order for it to take effect.
##
---
apiVersion: v1
kind: Secret
metadata:
# versioned, cadence independent of app version
name: cka-secrets-v1
labels:
app.kubernetes.io/name: circonus-kubernetes-agent
stringData:
## Circonus API Key is REQUIRED
circonus-api-key: ""
## For in-cluster operation, the service account token
## will be used. Only set this to use a DIFFERENT token
## than the kubernetes-bearer-token-file setting
## below. The file will always take precedence, ensure
## kubernetes-bearer-token-file is set to "" when using
## this setting.
#kubernetes-bearer-token: ""
---
apiVersion: v1
kind: ConfigMap
metadata:
# versioned, cadence independent of app version
name: cka-config-v1
labels:
app.kubernetes.io/name: circonus-kubernetes-agent
data:
#circonus-api-key-file: ""
#circonus-api-app: "circonus-kubernetes-agent"
#circonus-api-url: "https://api.circonus.com/v2"
#circonus-api-ca-file: ""
#circonus-api-debug: "false"
## broker to use when creating a new httptrap check
#circonus-check-broker-cid: "/broker/35"
#circonus-check-broker-ca-file: ""
## create a check, if one cannot be found using the target
#circonus-check-create: "true"
## or, turn create off, and specify a check which has already been created
#circonus-check-bundle-cid: ""
## comman delimited list of k:v tags to add to the check
#circonus-check-tags: ""
## Use a static target to ensure that the agent can find the check
## the next time the pod starts. Otherwise, the pod's hostname will
## be used and a new check would be created each time the pod is
## created when create is enabled. The kubernetes-name will be
## used if check-target is not set.
circonus-check-target: ""
## set a custom display title for the check when it is created
#circonus-check-title: ""
## comma delimited list of k:v streamtags to add to every metric
#circonus-default-streamtags: ""
##
## set a name identifying the cluster, to be used in the check
## title when it is created
kubernetes-name: ""
#kubernetes-api-url: "https://kubernetes.default.svc"
#kubernetes-api-ca-file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
#kubernetes-bearer-token-file: "/var/run/secrets/kubernetes.io/serviceaccount/token"
## collect event metrics - default is enabled for dashboard
kubernetes-enable-events: "true"
## collect metrics from kube-state-metrics if running - default is enabled for dashboard
kubernetes-enable-kube-state-metrics: "true"
## kube-state-metrics fieldSelector query, default from https://github.com/kubernetes/kube-state-metrics/blob/master/examples/standard/service.yaml
kubernetes-ksm-field-selector-query: "metadata.name=kube-state-metrics"
## kube-state-metrics metrics port, no default, service endpoint ports will be used if not set
kubernetes-ksm-metrics-port: ""
## kube-state-metrics metrics port name, default from https://github.com/kubernetes/kube-state-metrics/blob/master/examples/standard/service.yaml
## if using helm or some other tool, look at the configuration to see if the port is named differently in the service endpoint...
kubernetes-ksm-metrics-port-name: "http-metrics"
## collect metrics from api-server - default is enabled for dashboard
kubernetes-enable-api-server: "true"
## collect node metrics - default is enabled for dashboard
kubernetes-enable-nodes: "true"
## expression to use for node labelSelector - blank = all nodes
kubernetes-node-selector: ""
## collect kublet /stats/summary performance metrics (e.g. cpu, memory, fs) - default is enabled for dashboard (k8s <v1.18 only)
kubernetes-enable-node-stats: "true"
## collect kublet /metrics observation metrics - default is enabled for dashboard
kubernetes-enable-node-metrics: "true"
## enable kubelet cadvisor metrics
kubernetes-enable-cadvisor-metrics: "false"
## enable kubelet node resource metrics (k8s v1.18+)
kubernetes-enable-node-resource-metrics: "true"
## enable kubelet node probe metrics (k8s v1.18+)
kubernetes-enable-node-probe-metrics: "false"
## enable kube-dns/coredns metrics - default is enabled for dashboard
kubernetes-enable-dns-metrics: "true"
## port to request `/metrics` from if scrape/port annotations not defined on kube-dns/coredns service (e.g. GKE)
kubernetes-dns-metrics-port: "10054"
## include pod metrics, requires nodes to be enabled - - default is enabled for dashboard
kubernetes-include-pod-metrics: "true"
## include only pods with this label key, blank = all pods
kubernetes-pod-label-key: ""
## include only pods with label key and value, blank = all pods with label key
kubernetes-pod-label-val: ""
## include container metrics, requires nodes+pods to be enabled
kubernetes-include-container-metrics: "false"
## collection interval, how often to collect metrics (note if a previous
## collection is still in progress another will NOT be started)
#kubernetes-collection-interval: "1m"
## api request timelimit
#kubernetes-api-timelimit: "10s"
##
## dynamic collectors (see readme in github repository)
##
dynamic-collectors.yaml: |
collectors:
- name: ""
disable: true
type: ""
schema: ""
selectors:
label: ""
field: ""
control:
annotation: ""
label: ""
value: ""
metric_port:
annotation: ""
label: ""
value: ""
metric_path:
annotation: ""
label: ""
value: ""
##
## Metric filters control which metrics are passed on by the broker
## NOTE: This list is applied to the check every time the agent pod starts.
## Updates through any other method will be overwritten by this list.
##
## NOTE: By default, all dynamically collected metrics will all be submitted.
## To apply filters to them, remove the current 'NO_LOCAL_FILTER' rule, and
## add specific rules to control the flow of metrics.
metric-filters.json: |
{
"metric_filters": [
["allow", "^.+$", "tags", "and(collector:dynamic)", "NO_LOCAL_FILTER dynamically collected metrics"],
["allow", "^(Disk|Memory|PID)Pressure$", "node status"],
["allow", "^(container|node|pod)_.*$", "node metrics k8s v1.18+"],
["allow", "^(kube_)?pod_container_status_(running|terminated|waiting|ready)(_count)?$", "containers"],
["allow", "^(kube_)?pod_container_status_(terminated|waiting)_reason(_count)?$", "containers health"],
["allow", "^(kube_)?pod_init_container_status_(terminated|waiting)_reason(_count)?$", "init containers health"],
["allow", "^(kube_)?pod_status_(ready|scheduled)(_count)?$", "tags", "and(condition:true)", "pods"],
["allow", "^(kube_)?pod_status_phase(_count)?$", "tags", "and(or(phase:Running,phase:Pending,phase:Failed,phase:Succeeded))", "pods"],
["allow", "^(node|kubelet_running_pod_count|Ready)$", "nodes"],
["allow", "^(pod|node)_cpu_usage_seconds_total$", "utilization"],
["allow", "^(pod|node)_memory_working_set_bytes$", "utilization"],
["allow", "^(used|capacity)$", "tags", "and(or(units:bytes,units:percent),or(resource:memory,resource:fs,volume_name:*),not(container_name:*),not(sys_container:*))", "utilization"],
["allow", "^NetworkUnavailable$", "node status"],
["allow", "^[rt]x$", "tags", "and(resource:network,or(units:bytes,units:errors),not(container_name:*),not(sys_container:*))", "utilization"],
["allow", "^apiserver_request_total$", "tags", "and(or(code:5*,code:4*))", "api req errors"],
["allow", "^authenticated_user_requests$", "api auth"],
["allow", "^authentication_attempts$", "api auth health"],
["allow", "^cadvisor.*$", "cadvisor"],
["allow", "^capacity_.*$", "node capacity"],
["allow", "^collect_.*$", "agent collection stats"],
["allow", "^coredns*", "dns health"],
["allow", "^coredns_(dns|forward)_request_(count_total|duration_seconds_avg)$", "dns health"],
["allow", "^coredns_(dns|forward)_response_rcode_count_total$", "dns health"],
["allow", "^daemonset_scheduled_delta$", "health"],
["allow", "^deployment_generation_delta$", "health"],
["allow", "^events$", "events"],
["allow", "^kube_(service_labels|deployment_labels|pod_container_info|pod_deleted)$", "ksm inventory"],
["allow", "^kube_(service|deployment)_labels$", "ksm inventory"],
["allow", "^kube_daemonset_status_(current|desired)_number_scheduled$", "health"],
["allow", "^kube_deployment_(created|spec_replicas)$", "deployments"],
["allow", "^kube_deployment_(metadata|status_observed)_generation$", "health"],
["allow", "^kube_deployment_status_(replicas|replicas_updated|replicas_available|replicas_unavailable)$", "deployments"],
["allow", "^kube_deployment_status_replicas_unavailable$", "deployments"],
["allow", "^kube_hpa_(spec_max|status_current)_replicas$", "scale"],
["allow", "^kube_job_status_failed$", "health"],
["allow", "^kube_namespace_status_phase$", "namespaces"],
["allow", "^kube_namespace_status_phase$", "tags", "and(or(phase:Active,phase:Terminating))", "namespaces"],
["allow", "^kube_node_spec_unschedulable$", "node status"],
["allow", "^kube_node_status_allocatable$", "node status"],
["allow", "^kube_node_status_condition$", "node status health"],
["allow", "^kube_persistentvolume_status_phase$", "health"],
["allow", "^kube_pod_info$", "pods"],
["allow", "^kube_pod_start_time$", "pods"],
["allow", "^kube_pod_status_condition$", "pods"],
["allow", "^kube_statefulset_status_(replicas|replicas_ready)$", "health"],
["allow", "^kubedns*","dns health"],
["allow", "^kubelet_.*$", "node metrics k8s v1.18+"],
["allow", "^machine_.*$", "node metrics k8s v1.18+"],
["allow", "^pod_container_status$", "containers"],
["allow", "^pod_container_status_(terminated|waiting)_reason(_count)?$", "containers health"],
["allow", "^pod_init_container_status_(terminated|waiting)_reason(_count)?$", "init containers health"],
["allow", "^pod_status_(ready|scheduled)$", "pods"],
["allow", "^pod_status_phase$", "pods"],
["allow", "^prober_.*$", "node metrics/probes k8s v1.18+"],
["allow", "^resource_(request|limit)$", "resources"],
["allow", "^statefulset_replica_delta$", "health"],
["allow", "^usage(Milli|Nano)Cores$", "tags", "and(not(container_name:*),not(sys_container:*))", "utilization"],
["allow", "^utilization$", "utilization health"],
["deny", "^.+$", "all other metrics"]
]
}
##
## alert configuration
##
## one of, contact.email OR contact.group_cid
##
## Note: if neither supplied, all alertring and some dashboard functionality will be disabled
##
default-alerts.json: |
{
"contact": {
"email": "",
"group_cid": ""
},
"rule_settings": {
"crashloops_container": {
"disabled": true,
"threshold": "0",
"window": 300
},
"crashloops_init_container": {
"disabled": true,
"threshold": "0",
"window": 300
},
"cpu_utilization": {
"disabled": true,
"threshold": "75",
"window": 900
},
"disk_pressure": {
"disabled": true,
"threshold": "0",
"window": 300
},
"memory_pressure": {
"disabled": true,
"threshold": "0",
"window": 300
},
"pid_pressure": {
"disabled": true,
"threshold": "0",
"window": 300
},
"network_unavailable": {
"disabled": true,
"threshold": "0.99",
"window": 300
},
"job_failures": {
"disabled": true,
"threshold": "0",
"window": 300
},
"persistent_volume_failures": {
"disabled": true,
"threshold": "0",
"window": 300
},
"pod_pending_delays": {
"disabled": true,
"threshold": "0.99",
"window": 900
},
"deployment_glitches": {
"disabled": true,
"min_threshold": "0",
"min_window": 300,
"max_threshold": "0",
"max_window": 300
},
"daemonsets_not_ready": {
"disabled": true,
"min_threshold": "0",
"min_window": 300,
"max_threshold": "0",
"max_window": 300
},
"statefulsets_not_ready": {
"disabled": true,
"min_threshold": "0",
"min_window": 300,
"max_threshold": "0",
"max_window": 300
}
}
}
##
## "rules" is an array of custom alert rule set objects (see https://login.circonus.com/resources/api/calls/rule_set
## for more information on object format alternatively, use the UI to view the API Object of an existing rule set)
##
custom-rules.json: |
{
"rules":[
]
}