forked from criteo/consul-templaterb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetrics.erb
141 lines (127 loc) · 5.33 KB
/
metrics.erb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# A template for exporting metrics for prometheus using consul-templaterb
# This template can be configure the following way with environment variables
# Environment variables to filter services/instances
# SERVICES_TAG_FILTER: basic tag filter for service (default HTTP)
# INSTANCE_MUST_TAG: Second level of filtering (optional, default to SERVICES_TAG_FILTER)
# INSTANCE_EXCLUDE_TAG: Exclude instances having the given tag
# EXCLUDE_SERVICES: comma-separated services regexps to exclude (example: lbl7.*,netsvc-probe.*,consul-probed.*)
# PROMETHEUS_EXPORTED_SERVICE_META: comma-separated list of meta to export into metrics of service
<%
service_metas_to_export = (ENV['PROMETHEUS_EXPORTED_SERVICE_META'] || 'OWNERS,version,os,criteo_flavor,node_group').split(',')
service_tag_filter = ENV['SERVICES_TAG_FILTER'] || nil
instance_must_tag = ENV['INSTANCE_MUST_TAG'] || service_tag_filter
instance_exclude_tag = ENV['INSTANCE_EXCLUDE_TAG']
# Services to hide
services_blacklist_raw = (ENV['EXCLUDE_SERVICES'] || 'lbl7.*,netsvc-probe.*,consul-probed.*').split(',')
services_blacklist = services_blacklist_raw.map { |v| Regexp.new(v) }
backends = {}
all_stats = {}
service_count = 0
statuses = agent_members().map {|m| m.status}.reduce({}) do |sum, s|
v = sum[s] || 0
sum[s] = v + 1
sum
end
%>
# HELP consul_members_count A gauge of number of serf members with their count and their status('alive', 'leaving', 'left', 'failed')
# TYPE consul_members_count gauge
<%
statuses.each do |k, v|
%>
consul_members_count{serf="lan",status="<%= k %>"} <%= v %><%
end
services(tag: service_tag_filter).each do |service_name, tags|
if !services_blacklist.any? {|r| r.match(service_name)} && (instance_must_tag.nil? || tags.include?(instance_must_tag))
service_count += 1
srv = service(service_name)
all_stats[service_name] = srv.endpoint.stats
srv.each do |snode|
key = "#{snode['Service']['Service']}"
metas = {}
service_kind = snode['Service']['Kind']
metas['__service_kind'] = service_kind if service_kind
service_metas_to_export.each do |k|
meta = snode.service_or_node_meta_value(k)
if meta
key += "::#{k}=#{meta}"
metas[k] = meta
end
end
back = backends[key]
if back.nil?
back = {
metas: metas,
service_name: service_name,
state: {
'passing' => 0,
'warning' => 0,
'critical' => 0,
}
}
backends[key] = back
end
state = snode.status
back[:state][state] += 1
end
end
end
%>
# HELP consul_datacenters_count A gauge of number of datacenters available
# TYPE consul_nodes_count gauge
consul_datacenters_count <%= datacenters().count %>
# HELP consul_nodes_count A gauge of number of nodes in the cluster
# TYPE consul_nodes_count gauge
consul_nodes_count <%= nodes().count %>
# HELP consul_services_count A gauge of the number of services
# TYPE consul_services_count gauge
consul_services_count <%= service_count %>
<%
%i[success errors bytes_read changes network_bytes].each do |sym|
%>
# HELP consul_net_info_<%= sym.to_s %> consul-templaterb global stats for <%= sym.to_s %>
# TYPE consul_net_info_<%= sym.to_s %> counter
consul_net_info_<%= sym.to_s %> <%= @net_info[sym] %>
<%
end
%>
# HELP consul_service_count A gauge of number instances of service with their current state
# TYPE consul_service_count gauge
# HELP consul_service_stats_requests_total A counter of requests performed by consul-templaterb, shows activity on a service
# TYPE consul_service_stats_requests_total counter
# HELP consul_service_stats_requests_bytes A counter of bytes transfered from Consul Agent to consul-templaterb
# TYPE consul_service_stats_requests_bytes counter
# HELP consul_service_stats_requests_bytes_per_sec A counter of bytes transfered from Consul Agent to consul-templaterb
# TYPE consul_service_stats_requests_bytes_per_sec gauge
# HELP consul_service_changes_total A counter of changes that occured on a service
# TYPE consul_service_changes_total counter
<%
def escape_meta(val)
val.gsub('"', '\"')
end
json_backends = {}
now = Time.now.utc
backends.each_pair do |k, service_info|
service_name = service_info[:service_name]
meta_string = ""
service_info[:metas].each_pair do |k,v|
meta_string+=",#{k}=\"#{escape_meta(v)}\""
end
service_info[:state].each_pair do |state_name, state_count|
%>consul_service_count{service="<%= service_name %>",state="<%= state_name %>"<%= meta_string %>} <%= state_count %>
<%
end
end
# Global statistics
all_stats.each_pair do |service_name, current_stats|
%>consul_service_stats_requests_total{service="<%= escape_meta(service_name) %>",type="success"} <%= current_stats.successes %>
<%
if current_stats.errors > 0
%>consul_service_stats_requests_total{service="<%= escape_meta(service_name) %>",type="errors"} <%= current_stats.errors %>
<%
end
%>consul_service_stats_requests_bytes{service="<%= escape_meta(service_name) %>"} <%= current_stats.body_bytes %>
consul_service_stats_requests_bytes_per_sec{service="<%= escape_meta(service_name) %>"} <%= current_stats.bytes_per_sec(now) %>
consul_service_changes_total{service="<%= escape_meta(service_name) %>"} <%= current_stats.changes %>
<%
end
%>