Skip to content

Commit

Permalink
feat(PrometheusPluginRework) Prometheus 3.0 Rework
Browse files Browse the repository at this point in the history
Rework and Upgrade Prometheus Plugins Metrics and Labels
  • Loading branch information
hfukada committed May 9, 2022
1 parent 23f50ea commit f2df285
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 101 deletions.
169 changes: 104 additions & 65 deletions kong/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ local stream_available, stream_api = pcall(require, "kong.tools.stream_api")

local role = kong.configuration.role

local DEFAULT_BUCKETS = { 1, 2, 5, 7, 10, 15, 20, 25, 30, 40, 50, 60, 70,
80, 90, 100, 200, 300, 400, 500, 1000,
2000, 5000, 10000, 30000, 60000 }
local KONG_LATENCY_BUCKETS = { 1, 2, 5, 7, 10, 15, 20, 30, 50, 75, 100, 200, 500, 750, 1000}
local UPSTREAM_LATENCY_BUCKETS = {25, 50, 80, 100, 250, 400, 700, 1000, 2000, 5000, 10000, 30000, 60000 }

local metrics = {}
-- prometheus.lua instance
local prometheus
local node_id = kong.node.get_id()

-- use the same counter library shipped with Kong
package.loaded['prometheus_resty_counter'] = require("resty.counter")
Expand All @@ -39,21 +40,18 @@ local function init()
prometheus = require("kong.plugins.prometheus.prometheus").init(shm, "kong_")

-- global metrics
if kong_subsystem == "http" then
metrics.connections = prometheus:gauge("nginx_http_current_connections",
"Number of HTTP connections",
{"state"})
else
metrics.connections = prometheus:gauge("nginx_stream_current_connections",
"Number of Stream connections",
{"state"})
end
metrics.connections = prometheus:gauge("nginx_current_connections",
"Number of connections by subsystem",
{"node_id", "subsystem", "state"})
metrics.timers = prometheus:gauge("nginx_timers",
"Number of nginx timers",
{"state"})
metrics.db_reachable = prometheus:gauge("datastore_reachable",
"Datastore reachable from Kong, " ..
"0 is unreachable")
metrics.node_info = prometheus:gauge("node_info",
"Kong Node metadata information",
{"node_id", "version"})
-- only export upstream health metrics in traditional mode and data plane
if role ~= "control_plane" then
metrics.upstream_target_health = prometheus:gauge("upstream_target_health",
Expand All @@ -66,44 +64,50 @@ local function init()
local memory_stats = {}
memory_stats.worker_vms = prometheus:gauge("memory_workers_lua_vms_bytes",
"Allocated bytes in worker Lua VM",
{"pid", "kong_subsystem"})
{"node_id", "pid", "kong_subsystem"})
memory_stats.shms = prometheus:gauge("memory_lua_shared_dict_bytes",
"Allocated slabs in bytes in a shared_dict",
{"shared_dict", "kong_subsystem"})
{"node_id", "shared_dict", "kong_subsystem"})
memory_stats.shm_capacity = prometheus:gauge("memory_lua_shared_dict_total_bytes",
"Total capacity in bytes of a shared_dict",
{"shared_dict", "kong_subsystem"})
{"node_id", "shared_dict", "kong_subsystem"})

local res = kong.node.get_memory_stats()
for shm_name, value in pairs(res.lua_shared_dicts) do
memory_stats.shm_capacity:set(value.capacity, { shm_name, kong_subsystem })
memory_stats.shm_capacity:set(value.capacity, { node_id, shm_name, kong_subsystem })
end

metrics.memory_stats = memory_stats

-- per service/route
if kong_subsystem == "http" then
metrics.status = prometheus:counter("http_status",
"HTTP status codes per service/route in Kong",
{"service", "route", "code"})
metrics.status = prometheus:counter("request_count",
"HTTP status codes per consumer/service/route in Kong",
{"service", "route", "code", "source", "consumer"})
else
metrics.status = prometheus:counter("stream_status",
"Stream status codes per service/route in Kong",
{"service", "route", "code"})
"Stream status codes per consumer/service/route in Kong",
{"service", "route", "code", "source", "consumer"})
end
metrics.latency = prometheus:histogram("latency",
"Latency added by Kong, total " ..
"request time and upstream latency " ..
"for each service/route in Kong",
{"service", "route", "type"},
DEFAULT_BUCKETS) -- TODO make this configurable
metrics.kong_latency = prometheus:histogram("kong_latency",
"Latency added by Kong and enabled plugins " ..
"for each service/route in Kong",
{"service", "route"},
KONG_LATENCY_BUCKETS)
metrics.upstream_latency = prometheus:histogram("upstream_latency",
"Latency added by upstream response " ..
"for each service/route in Kong",
{"service", "route"},
UPSTREAM_LATENCY_BUCKETS)
metrics.total_latency = prometheus:histogram("total_request_latency",
"Total latency incurred during requests " ..
"for each service/route in Kong",
{"service", "route"},
UPSTREAM_LATENCY_BUCKETS)
metrics.bandwidth = prometheus:counter("bandwidth",
"Total bandwidth in bytes " ..
"consumed per service/route in Kong",
{"service", "route", "type"})
metrics.consumer_status = prometheus:counter("http_consumer_status",
"HTTP status codes for customer per service/route in Kong",
{"service", "route", "code", "consumer"})
"Total bandwidth (ingress/egress) " ..
"throughput in bytes",
{"service", "route", "direction", "consumer"})

-- Hybrid mode status
if role == "control_plane" then
Expand Down Expand Up @@ -146,8 +150,9 @@ end

-- Since in the prometheus library we create a new table for each diverged label
-- so putting the "more dynamic" label at the end will save us some memory
local labels_table = {0, 0, 0}
local labels_table4 = {0, 0, 0, 0}
local labels_table = {0, 0, 0, 0}
local labels_table_status = {0, 0, 0, 0, 0}
local latency_labels_table = {0, 0}
local upstream_target_addr_health_table = {
{ value = 0, labels = { 0, 0, 0, "healthchecks_off", ngx.config.subsystem } },
{ value = 0, labels = { 0, 0, 0, "healthy", ngx.config.subsystem } },
Expand Down Expand Up @@ -190,10 +195,33 @@ if kong_subsystem == "http" then
route_name = message.route.name or message.route.id
end

local consumer = ""
if message and serialized.consumer ~= nil then
consumer = serialized.consumer
end

labels_table[1] = service_name
labels_table[2] = route_name
labels_table[3] = message.response.status
metrics.status:inc(1, labels_table)
labels_table[4] = consumer

labels_table_status[1] = service_name
labels_table_status[2] = route_name
labels_table_status[3] = message.response.status
labels_table_status[5] = consumer

latency_labels_table[1] = service_name
latency_labels_table[2] = route_name

-- If number of upstream requests is greater than 0, assume upstream is the status code originator
-- Else, assume kong is the originator.
if(#(message.tries) > 0) then
labels_table_status[4] = "upstream"
else
labels_table_status[4] = "kong"
end

metrics.status:inc(1, labels_table_status)

local request_size = tonumber(message.request.size)
if request_size and request_size > 0 then
Expand All @@ -209,33 +237,22 @@ if kong_subsystem == "http" then

local request_latency = message.latencies.request
if request_latency and request_latency >= 0 then
labels_table[3] = "request"
metrics.latency:observe(request_latency, labels_table)
metrics.total_latency:observe(request_latency, latency_labels_table)
end

local upstream_latency = message.latencies.proxy
if upstream_latency ~= nil and upstream_latency >= 0 then
labels_table[3] = "upstream"
metrics.latency:observe(upstream_latency, labels_table)
metrics.upstream_latency:observe(upstream_latency, latency_labels_table)
end

local kong_proxy_latency = message.latencies.kong
if kong_proxy_latency ~= nil and kong_proxy_latency >= 0 then
labels_table[3] = "kong"
metrics.latency:observe(kong_proxy_latency, labels_table)
metrics.kong_latency:observe(kong_proxy_latency, latency_labels_table)
end

if serialized.consumer ~= nil then
labels_table4[1] = labels_table[1]
labels_table4[2] = labels_table[2]
labels_table4[3] = message.response.status
labels_table4[4] = serialized.consumer
metrics.consumer_status:inc(1, labels_table4)
end
end

else
function log(message)
function log(message, serialized)
if not metrics then
kong.log.err("prometheus: can not log metrics because of an initialization "
.. "error, please make sure that you've declared "
Expand All @@ -256,10 +273,34 @@ else
route_name = message.route.name or message.route.id
end

local consumer = ""
if message and serialized.consumer ~= nil then
consumer = serialized.consumer
end


labels_table[1] = service_name
labels_table[2] = route_name
labels_table[3] = message.session.status
metrics.status:inc(1, labels_table)
labels_table[4] = consumer

latency_labels_table[1] = service_name
latency_labels_table[2] = route_name

labels_table_status[1] = service_name
labels_table_status[2] = route_name
labels_table_status[3] = message.session.status
labels_table_status[5] = consumer

-- If number of upstream requests is greater than 0, assume upstream is the status code originator
-- Else, assume kong is the originator.
if(#(message.tries) > 0) then
labels_table_status[4] = "upstream"
else
labels_table_status[4] = "kong"
end

metrics.status:inc(1, labels_table_status)

local ingress_size = tonumber(message.session.received)
if ingress_size and ingress_size > 0 then
Expand All @@ -275,14 +316,12 @@ else

local session_latency = message.latencies.session
if session_latency and session_latency >= 0 then
labels_table[3] = "request"
metrics.latency:observe(session_latency, labels_table)
metrics.total_latency:observe(session_latency, latency_labels_table)
end

local kong_proxy_latency = message.latencies.kong
if kong_proxy_latency ~= nil and kong_proxy_latency >= 0 then
labels_table[3] = "kong"
metrics.latency:observe(kong_proxy_latency, labels_table)
metrics.kong_latency:observe(kong_proxy_latency, latency_labels_table)
end
end
end
Expand All @@ -296,13 +335,13 @@ local function metric_data()
end

local nginx_statistics = kong.nginx.get_statistics()
metrics.connections:set(nginx_statistics['connections_accepted'], { "accepted" })
metrics.connections:set(nginx_statistics['connections_handled'], { "handled" })
metrics.connections:set(nginx_statistics['total_requests'], { "total" })
metrics.connections:set(nginx_statistics['connections_active'], { "active" })
metrics.connections:set(nginx_statistics['connections_reading'], { "reading" })
metrics.connections:set(nginx_statistics['connections_writing'], { "writing" })
metrics.connections:set(nginx_statistics['connections_waiting'], { "waiting" })
metrics.connections:set(nginx_statistics['connections_accepted'], { node_id, kong_subsystem, "accepted" })
metrics.connections:set(nginx_statistics['connections_handled'], { node_id, kong_subsystem, "handled" })
metrics.connections:set(nginx_statistics['total_requests'], { node_id, kong_subsystem, "total" })
metrics.connections:set(nginx_statistics['connections_active'], { node_id, kong_subsystem, "active" })
metrics.connections:set(nginx_statistics['connections_reading'], { node_id, kong_subsystem, "reading" })
metrics.connections:set(nginx_statistics['connections_writing'], { node_id, kong_subsystem, "writing" })
metrics.connections:set(nginx_statistics['connections_waiting'], { node_id, kong_subsystem,"waiting" })

metrics.timers:set(ngx_timer_running_count(), {"running"})
metrics.timers:set(ngx_timer_pending_count(), {"pending"})
Expand Down Expand Up @@ -357,11 +396,11 @@ local function metric_data()
-- memory stats
local res = kong.node.get_memory_stats()
for shm_name, value in pairs(res.lua_shared_dicts) do
metrics.memory_stats.shms:set(value.allocated_slabs, { shm_name, kong_subsystem })
metrics.memory_stats.shms:set(value.allocated_slabs, { node_id, shm_name, kong_subsystem })
end
for i = 1, #res.workers_lua_vms do
metrics.memory_stats.worker_vms:set(res.workers_lua_vms[i].http_allocated_gc,
{ res.workers_lua_vms[i].pid, kong_subsystem })
{ node_id, res.workers_lua_vms[i].pid, kong_subsystem })
end

-- Hybrid mode status
Expand Down
2 changes: 1 addition & 1 deletion kong/plugins/prometheus/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ prometheus.init()

local PrometheusHandler = {
PRIORITY = 13,
VERSION = "1.6.0",
VERSION = "3.0.0",
}

function PrometheusHandler.init_worker()
Expand Down
Loading

0 comments on commit f2df285

Please sign in to comment.