Skip to content

Commit

Permalink
feat: #207 metrics ingestion endpoint v0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
bohdan-shulha committed Oct 5, 2024
1 parent ea97761 commit d94f130
Show file tree
Hide file tree
Showing 9 changed files with 523 additions and 7 deletions.
250 changes: 250 additions & 0 deletions api-nodes/Http/Controllers/MetricsController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
<?php

namespace ApiNodes\Http\Controllers;

use App\Models\DeploymentData\Process;
use App\Models\Node;
use App\Util\Promexport;
use Illuminate\Http\Request;
use Illuminate\Http\Response;
use Illuminate\Log\Logger;
use Illuminate\Support\Facades\Http;

class MetricsController
{
const HISTOGRAM_LABELS = [
'0.005',
'0.01',
'0.025',
'0.05',
'0.1',
'0.25',
'0.5',
'1',
'2.5',
'5',
'10',
'+Inf',
];

const DISK_USAGE_PATHS = [
'/',
];

const DISK_IO_DEVICES = [
'sda',
'sdb',
'sdc',
];

const DISK_IO_OPERATIONS = [
'reads',
'writes',
];

// Please don't judge me for this code. I'm a PHP developer.
public function __invoke(Request $request, Logger $log, Node $node)
{
if ($node->swarm === null) {
return new Response('{}', 204);
}

// TODO: cache this with the new Laravel cache system (stale-while-revalidate from the recent release)
$interfaces = collect($node->data->host->networks)->pluck('if_name')->unique()->toArray();

$services = $node->team->services->mapWithKeys(function ($service) {
$processes = collect($service->latestDeployment->data->processes);

$ruleIds = $processes->mapWithKeys(function (Process $process) {
$caddyIds = collect($process->caddy)->pluck('id');
$redirectRuleIds = collect($process->redirectRules)->pluck('id');
$rewriteRuleIds = collect($process->rewriteRules)->pluck('id');

$ruleIds = $caddyIds->merge($redirectRuleIds)->merge($rewriteRuleIds)->toArray();

$serversNames = collect($process->caddy)->pluck('publishedPort')->unique()->map(function ($port) {
return match ($port) {
80 => 'http',
443 => 'https',
default => 'listen_'.$port,
};
})->toArray();

return [
$process->name => [
'ruleIds' => $ruleIds,
'serversNames' => $serversNames,
],
];
})->toArray();

return [$service->id => $ruleIds];
})->toArray();

$services['ptah_404'] = [
'ptah_404' => [
'ruleIds' => ['ptah_404'],
'serversNames' => ['http', 'https'],
],
];

$log->info('Services:', [
'services' => $services,
]);

foreach ($request->all() as $metricsDoc) {
if ($metricsDoc === null) {
continue;
}

$query = '';
$query .= '?extra_label=swarm_id='.$node->swarm->id;
$query .= '&extra_label=node_id='.$node->id;

$ingestMetrics = [];

$lines = explode("\n", $metricsDoc);
foreach ($lines as $line) {
if (empty($line) || strpos($line, '#') === 0) {
continue;
}

$metric = Promexport::parseLine($line);
if ($metric) {
$labels = $metric['labels'];

switch ($metric['metric']) {
case 'ptah_caddy_http_requests_duration_bucket':
case 'ptah_caddy_http_requests_ttfb_bucket':
if (empty($labels['le'])) {
break;
}

if (! in_array($labels['le'], self::HISTOGRAM_LABELS)) {
break;
}

// no break, fall through
case 'ptah_caddy_http_requests_count':
case 'ptah_caddy_http_requests_duration_count':
case 'ptah_caddy_http_requests_duration_sum':
case 'ptah_caddy_http_requests_ttfb_count':
case 'ptah_caddy_http_requests_ttfb_sum':
if (empty($labels['status_code'])) {
break;
}

if ($labels['status_code'] < 100 || $labels['status_code'] > 599) {
break;
}

// no break, fall through
case 'ptah_caddy_http_requests_in_flight':
if (empty($labels['service_id']) || empty($labels['process_id']) || empty($labels['server_name']) || empty($labels['rule_id'])) {
break;
}

if (! isset($services[$labels['service_id']]) && $labels['service_id'] !== 'ptah_404') {
break;
}

$service = $services[$labels['service_id']];

if (! isset($service[$labels['process_id']])) {
break;
}

$process = $service[$labels['process_id']];

if (! in_array($labels['server_name'], $process['serversNames'])) {
break;
}

if (! in_array($labels['rule_id'], $process['ruleIds'])) {
break;
}

$ingestMetrics[] = $line;

break;
case 'ptah_node_disk_io_ops_count':
if (empty($labels['device']) || empty($labels['operation'])) {
break;
}

if (! in_array($labels['device'], self::DISK_IO_DEVICES) || ! in_array($labels['operation'], self::DISK_IO_OPERATIONS)) {
break;
}

$ingestMetrics[] = $line;

break;
case 'ptah_node_network_rx_bytes':
case 'ptah_node_network_tx_bytes':
if (empty($labels['interface'])) {
break;
}

if (! in_array($labels['interface'], $interfaces)) {
break;
}

$ingestMetrics[] = $line;

break;
case 'ptah_node_disk_usage_free':
case 'ptah_node_disk_usage_total':
case 'ptah_node_disk_usage_used':
if (empty($labels['path'])) {
break;
}

if (! in_array($labels['path'], self::DISK_USAGE_PATHS)) {
break;
}

$ingestMetrics[] = $line;

break;
case 'ptah_node_cpu_idle':
case 'ptah_node_cpu_nice':
case 'ptah_node_cpu_system':
case 'ptah_node_cpu_total':
case 'ptah_node_cpu_user':
case 'ptah_node_load_avg_1':
case 'ptah_node_load_avg_5':
case 'ptah_node_load_avg_15':
case 'ptah_node_memory_free':
case 'ptah_node_memory_total':
case 'ptah_node_memory_used':
case 'ptah_node_uptime':
$ingestMetrics[] = $line;

break;
default:
// unknown metric

break;
}
}
}

$log->info('Metrics:', [
'metrics' => $ingestMetrics,
]);

// TODO: use value from config (/env vars)
$response = Http::withBody(implode("\n", $ingestMetrics), 'text/plain')->post("http://127.0.0.1:8080/api/v1/import/prometheus$query");

$log->info('VictoriaMetrics response:', [
'status' => $response->status(),
'body' => $response->body(),
]);
}

$response = new Response('{}', 204);
$response->headers->set('Content-Type', 'application/json');

return $response;
}
}
4 changes: 3 additions & 1 deletion api-nodes/Http/Controllers/TaskController.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ public function complete(NodeTask $task, Request $request)
}

if ($task->is_pending) {
// TODO: change to ???, 409 (Conflict) should be used for completed tasks
return new Response(['error' => "Task didn't start yet."], 409);
}

$resultClass = $task->type->result();
var_dump($request->all());

$result = $resultClass::validateAndCreate($request->all());

$task->complete($result);
Expand All @@ -35,6 +36,7 @@ public function fail(NodeTask $task, Request $request)
}

if ($task->is_pending) {
// TODO: change to ???, 409 (Conflict) should be used for completed tasks
return new Response(['error' => "Task didn't start yet."], 409);
}

Expand Down
2 changes: 1 addition & 1 deletion app/Actions/Nodes/InitCluster.php
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ private function getCaddyProcessConfig(Node $node): array
'placementNodeId' => $node->id,
'launchMode' => LaunchMode::Daemon->value,
'dockerRegistryId' => null,
'dockerImage' => 'caddy:2.8-alpine',
'dockerImage' => 'ghcr.io/ptah-sh/ptah-caddy:latest',
'releaseCommand' => [
'command' => null,
],
Expand Down
41 changes: 39 additions & 2 deletions app/Actions/Nodes/RebuildCaddy.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym

$handlers = [];

$handlers[] = [
'handler' => 'ptah_observer',
'service_id' => strval($deployment->service->id),
'process_id' => $process->name,
'rule_id' => $caddy->id,
];

$pathRegexps = [];
foreach ($process->rewriteRules as $rewriteRule) {
$pathRegexps[] = [
Expand Down Expand Up @@ -64,6 +71,7 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym
];

$routes[] = [
'group' => $process->dockerName,
'match' => [
[
'host' => [$caddy->domain],
Expand All @@ -73,12 +81,14 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym
'handle' => $handlers,
];

// FIXME: Here goes a big "OOPS": redirect rules are repeated for each caddy rule in the process
foreach ($process->redirectRules as $redirectRule) {
$regexpName = dockerize_name($redirectRule->id);

$pathTo = preg_replace("/\\$(\d+)/", "{http.regexp.$regexpName.$1}", $redirectRule->pathTo);

$routes[] = [
'group' => $process->dockerName,
'match' => [
[
'host' => [$redirectRule->domainFrom],
Expand All @@ -89,6 +99,12 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym
],
],
'handle' => [
[
'handler' => 'ptah_observer',
'service_id' => strval($deployment->service->id),
'process_id' => $process->name,
'rule_id' => $redirectRule->id,
],
[
'handler' => 'static_response',
'status_code' => (string) $redirectRule->statusCode,
Expand All @@ -102,11 +118,17 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym
];
}

$serverName = match ($caddy->publishedPort) {
80 => 'http',
443 => 'https',
default => "listen_{$caddy->publishedPort}",
};

return [
'apps' => [
'http' => [
'servers' => [
"listen_{$caddy->publishedPort}" => [
$serverName => [
'listen' => [
"0.0.0.0:{$caddy->publishedPort}",
],
Expand All @@ -125,7 +147,16 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym
$caddy = [
'apps' => [
'http' => [
'servers' => (object) [],
'servers' => [
'http' => [
'listen' => ['0.0.0.0:80'],
'routes' => [],
],
'https' => [
'listen' => ['0.0.0.0:443'],
'routes' => [],
],
],
],
],
];
Expand All @@ -146,6 +177,12 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym
],
],
'handle' => [
[
'handler' => 'ptah_observer',
'service_id' => 'ptah_404',
'process_id' => 'ptah_404',
'rule_id' => 'ptah_404',
],
[
'handler' => 'static_response',
'status_code' => '404',
Expand Down
Loading

0 comments on commit d94f130

Please sign in to comment.