Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(plugin) Adjust Prometheus to be the reference Impl for metrics #8712

Merged
merged 4 commits into from
Jun 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 105 additions & 65 deletions kong/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ local stream_available, stream_api = pcall(require, "kong.tools.stream_api")

local role = kong.configuration.role

local DEFAULT_BUCKETS = { 1, 2, 5, 7, 10, 15, 20, 25, 30, 40, 50, 60, 70,
80, 90, 100, 200, 300, 400, 500, 1000,
2000, 5000, 10000, 30000, 60000 }
local KONG_LATENCY_BUCKETS = { 1, 2, 5, 7, 10, 15, 20, 30, 50, 75, 100, 200, 500, 750, 1000}
local UPSTREAM_LATENCY_BUCKETS = {25, 50, 80, 100, 250, 400, 700, 1000, 2000, 5000, 10000, 30000, 60000 }

local metrics = {}
-- prometheus.lua instance
local prometheus
local node_id = kong.node.get_id()

-- use the same counter library shipped with Kong
package.loaded['prometheus_resty_counter'] = require("resty.counter")
Expand All @@ -39,21 +40,20 @@ local function init()
prometheus = require("kong.plugins.prometheus.prometheus").init(shm, "kong_")

-- global metrics
if kong_subsystem == "http" then
metrics.connections = prometheus:gauge("nginx_http_current_connections",
"Number of HTTP connections",
{"state"})
else
metrics.connections = prometheus:gauge("nginx_stream_current_connections",
"Number of Stream connections",
{"state"})
end
metrics.connections = prometheus:gauge("nginx_connections_total",
"Number of connections by subsystem",
hfukada marked this conversation as resolved.
Show resolved Hide resolved
{"node_id", "subsystem", "state"})
metrics.nginx_requests_total = prometheus:gauge("nginx_requests_total",
"Number of requests total", {"node_id", "subsystem"})
metrics.timers = prometheus:gauge("nginx_timers",
"Number of nginx timers",
{"state"})
metrics.db_reachable = prometheus:gauge("datastore_reachable",
"Datastore reachable from Kong, " ..
"0 is unreachable")
metrics.node_info = prometheus:gauge("node_info",
hfukada marked this conversation as resolved.
Show resolved Hide resolved
"Kong Node metadata information",
{"node_id", "version"})
-- only export upstream health metrics in traditional mode and data plane
if role ~= "control_plane" then
metrics.upstream_target_health = prometheus:gauge("upstream_target_health",
Expand All @@ -66,44 +66,60 @@ local function init()
local memory_stats = {}
memory_stats.worker_vms = prometheus:gauge("memory_workers_lua_vms_bytes",
"Allocated bytes in worker Lua VM",
{"pid", "kong_subsystem"})
{"node_id", "pid", "kong_subsystem"})
memory_stats.shms = prometheus:gauge("memory_lua_shared_dict_bytes",
"Allocated slabs in bytes in a shared_dict",
{"shared_dict", "kong_subsystem"})
{"node_id", "shared_dict", "kong_subsystem"})
memory_stats.shm_capacity = prometheus:gauge("memory_lua_shared_dict_total_bytes",
"Total capacity in bytes of a shared_dict",
{"shared_dict", "kong_subsystem"})
{"node_id", "shared_dict", "kong_subsystem"})

local res = kong.node.get_memory_stats()
for shm_name, value in pairs(res.lua_shared_dicts) do
memory_stats.shm_capacity:set(value.capacity, { shm_name, kong_subsystem })
memory_stats.shm_capacity:set(value.capacity, { node_id, shm_name, kong_subsystem })
end

metrics.memory_stats = memory_stats

-- per service/route
if kong_subsystem == "http" then
metrics.status = prometheus:counter("http_status",
"HTTP status codes per service/route in Kong",
{"service", "route", "code"})
metrics.status = prometheus:counter("http_requests_total",
"HTTP status codes per consumer/service/route in Kong",
{"service", "route", "code", "source", "consumer"})
hfukada marked this conversation as resolved.
Show resolved Hide resolved
else
metrics.status = prometheus:counter("stream_status",
"Stream status codes per service/route in Kong",
{"service", "route", "code"})
"Stream status codes per consumer/service/route in Kong",
{"service", "route", "code", "source"})
end
metrics.kong_latency = prometheus:histogram("kong_latency_ms",
"Latency added by Kong and enabled plugins " ..
"for each service/route in Kong",
{"service", "route"},
KONG_LATENCY_BUCKETS)
metrics.upstream_latency = prometheus:histogram("upstream_latency_ms",
"Latency added by upstream response " ..
"for each service/route in Kong",
{"service", "route"},
UPSTREAM_LATENCY_BUCKETS)


if kong_subsystem == "http" then
metrics.total_latency = prometheus:histogram("request_latency_ms",
"Total latency incurred during requests " ..
"for each service/route in Kong",
{"service", "route"},
UPSTREAM_LATENCY_BUCKETS)
else
metrics.total_latency = prometheus:histogram("session_duration_ms",
"latency incurred in stream session " ..
"for each service/route in Kong",
{"service", "route"},
UPSTREAM_LATENCY_BUCKETS)
end
metrics.latency = prometheus:histogram("latency",
"Latency added by Kong, total " ..
"request time and upstream latency " ..
"for each service/route in Kong",
{"service", "route", "type"},
DEFAULT_BUCKETS) -- TODO make this configurable
metrics.bandwidth = prometheus:counter("bandwidth",
"Total bandwidth in bytes " ..
"consumed per service/route in Kong",
{"service", "route", "type"})
metrics.consumer_status = prometheus:counter("http_consumer_status",
"HTTP status codes for customer per service/route in Kong",
{"service", "route", "code", "consumer"})
metrics.bandwidth = prometheus:counter("bandwidth_bytes",
"Total bandwidth (ingress/egress) " ..
"throughput in bytes",
{"service", "route", "direction", "consumer"})

-- Hybrid mode status
if role == "control_plane" then
Expand Down Expand Up @@ -146,8 +162,9 @@ end

-- Since in the prometheus library we create a new table for each diverged label
-- so putting the "more dynamic" label at the end will save us some memory
local labels_table = {0, 0, 0}
local labels_table4 = {0, 0, 0, 0}
local labels_table = {0, 0, 0, 0}
local labels_table_status = {0, 0, 0, 0, 0}
local latency_labels_table = {0, 0}
local upstream_target_addr_health_table = {
{ value = 0, labels = { 0, 0, 0, "healthchecks_off", ngx.config.subsystem } },
{ value = 0, labels = { 0, 0, 0, "healthy", ngx.config.subsystem } },
Expand Down Expand Up @@ -190,10 +207,32 @@ if kong_subsystem == "http" then
route_name = message.route.name or message.route.id
end

local consumer = ""
if message and serialized.consumer ~= nil then
consumer = serialized.consumer
end

labels_table[1] = service_name
labels_table[2] = route_name
labels_table[3] = message.response.status
metrics.status:inc(1, labels_table)
labels_table[4] = consumer

labels_table_status[1] = service_name
labels_table_status[2] = route_name
labels_table_status[3] = message.response.status

if kong.response.get_source() == "service" then
labels_table_status[4] = "service"
else
labels_table_status[4] = "kong"
end

labels_table_status[5] = consumer
hfukada marked this conversation as resolved.
Show resolved Hide resolved

latency_labels_table[1] = service_name
latency_labels_table[2] = route_name

metrics.status:inc(1, labels_table_status)

local request_size = tonumber(message.request.size)
if request_size and request_size > 0 then
Expand All @@ -209,33 +248,22 @@ if kong_subsystem == "http" then

local request_latency = message.latencies.request
if request_latency and request_latency >= 0 then
labels_table[3] = "request"
metrics.latency:observe(request_latency, labels_table)
metrics.total_latency:observe(request_latency, latency_labels_table)
end

local upstream_latency = message.latencies.proxy
if upstream_latency ~= nil and upstream_latency >= 0 then
labels_table[3] = "upstream"
metrics.latency:observe(upstream_latency, labels_table)
metrics.upstream_latency:observe(upstream_latency, latency_labels_table)
end

local kong_proxy_latency = message.latencies.kong
if kong_proxy_latency ~= nil and kong_proxy_latency >= 0 then
labels_table[3] = "kong"
metrics.latency:observe(kong_proxy_latency, labels_table)
metrics.kong_latency:observe(kong_proxy_latency, latency_labels_table)
end

if serialized.consumer ~= nil then
labels_table4[1] = labels_table[1]
labels_table4[2] = labels_table[2]
labels_table4[3] = message.response.status
labels_table4[4] = serialized.consumer
metrics.consumer_status:inc(1, labels_table4)
end
end

else
function log(message)
function log(message, serialized)
if not metrics then
kong.log.err("prometheus: can not log metrics because of an initialization "
.. "error, please make sure that you've declared "
Expand All @@ -259,6 +287,16 @@ else
labels_table[1] = service_name
labels_table[2] = route_name
labels_table[3] = message.session.status

if kong.response.get_source() == "service" then
labels_table[4] = "service"
else
labels_table[4] = "kong"
end

latency_labels_table[1] = service_name
latency_labels_table[2] = route_name

metrics.status:inc(1, labels_table)

local ingress_size = tonumber(message.session.received)
Expand All @@ -275,14 +313,12 @@ else

local session_latency = message.latencies.session
if session_latency and session_latency >= 0 then
labels_table[3] = "request"
metrics.latency:observe(session_latency, labels_table)
metrics.total_latency:observe(session_latency, latency_labels_table)
end

local kong_proxy_latency = message.latencies.kong
if kong_proxy_latency ~= nil and kong_proxy_latency >= 0 then
labels_table[3] = "kong"
metrics.latency:observe(kong_proxy_latency, labels_table)
metrics.kong_latency:observe(kong_proxy_latency, latency_labels_table)
end
end
end
Expand All @@ -296,13 +332,17 @@ local function metric_data()
end

local nginx_statistics = kong.nginx.get_statistics()
metrics.connections:set(nginx_statistics['connections_accepted'], { "accepted" })
metrics.connections:set(nginx_statistics['connections_handled'], { "handled" })
metrics.connections:set(nginx_statistics['total_requests'], { "total" })
metrics.connections:set(nginx_statistics['connections_active'], { "active" })
metrics.connections:set(nginx_statistics['connections_reading'], { "reading" })
metrics.connections:set(nginx_statistics['connections_writing'], { "writing" })
metrics.connections:set(nginx_statistics['connections_waiting'], { "waiting" })
metrics.connections:set(nginx_statistics['connections_accepted'], { node_id, kong_subsystem, "accepted" })
metrics.connections:set(nginx_statistics['connections_handled'], { node_id, kong_subsystem, "handled" })
metrics.connections:set(nginx_statistics['total_requests'], { node_id, kong_subsystem, "total" })
metrics.connections:set(nginx_statistics['connections_active'], { node_id, kong_subsystem, "active" })
metrics.connections:set(nginx_statistics['connections_reading'], { node_id, kong_subsystem, "reading" })
metrics.connections:set(nginx_statistics['connections_writing'], { node_id, kong_subsystem, "writing" })
metrics.connections:set(nginx_statistics['connections_waiting'], { node_id, kong_subsystem,"waiting" })
metrics.connections:set(nginx_statistics['connections_accepted'], { node_id, kong_subsystem, "accepted" })
metrics.connections:set(nginx_statistics['connections_handled'], { node_id, kong_subsystem, "handled" })

metrics.nginx_requests_total:set(nginx_statistics['total_requests'], { node_id, kong_subsystem })

metrics.timers:set(ngx_timer_running_count(), {"running"})
metrics.timers:set(ngx_timer_pending_count(), {"pending"})
Expand Down Expand Up @@ -357,11 +397,11 @@ local function metric_data()
-- memory stats
local res = kong.node.get_memory_stats()
for shm_name, value in pairs(res.lua_shared_dicts) do
metrics.memory_stats.shms:set(value.allocated_slabs, { shm_name, kong_subsystem })
metrics.memory_stats.shms:set(value.allocated_slabs, { node_id, shm_name, kong_subsystem })
end
for i = 1, #res.workers_lua_vms do
metrics.memory_stats.worker_vms:set(res.workers_lua_vms[i].http_allocated_gc,
{ res.workers_lua_vms[i].pid, kong_subsystem })
{ node_id, res.workers_lua_vms[i].pid, kong_subsystem })
end

-- Hybrid mode status
Expand Down
2 changes: 1 addition & 1 deletion kong/plugins/prometheus/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ prometheus.init()

local PrometheusHandler = {
PRIORITY = 13,
VERSION = "1.6.0",
VERSION = "3.0.0",
}

function PrometheusHandler.init_worker()
Expand Down
Loading