From d94f1302cce15c5abca1a205d5788c8ad478f2f4 Mon Sep 17 00:00:00 2001 From: Bohdan Shulha Date: Thu, 3 Oct 2024 23:45:56 +0200 Subject: [PATCH] feat: #207 metrics ingestion endpoint v0.1 --- .../Http/Controllers/MetricsController.php | 250 ++++++++++++++++++ api-nodes/Http/Controllers/TaskController.php | 4 +- app/Actions/Nodes/InitCluster.php | 2 +- app/Actions/Nodes/RebuildCaddy.php | 41 ++- app/Util/Promexport.php | 156 +++++++++++ config/billing.php | 2 +- resources/support/caddy/404.html | 5 +- routes/api.php | 3 + tests/Unit/Util/PromexportTest.php | 67 +++++ 9 files changed, 523 insertions(+), 7 deletions(-) create mode 100644 api-nodes/Http/Controllers/MetricsController.php create mode 100644 app/Util/Promexport.php create mode 100644 tests/Unit/Util/PromexportTest.php diff --git a/api-nodes/Http/Controllers/MetricsController.php b/api-nodes/Http/Controllers/MetricsController.php new file mode 100644 index 0000000..6fdde78 --- /dev/null +++ b/api-nodes/Http/Controllers/MetricsController.php @@ -0,0 +1,250 @@ +swarm === null) { + return new Response('{}', 204); + } + + // TODO: cache this with the new Laravel cache system (stale-while-revalidate from the recent release) + $interfaces = collect($node->data->host->networks)->pluck('if_name')->unique()->toArray(); + + $services = $node->team->services->mapWithKeys(function ($service) { + $processes = collect($service->latestDeployment->data->processes); + + $ruleIds = $processes->mapWithKeys(function (Process $process) { + $caddyIds = collect($process->caddy)->pluck('id'); + $redirectRuleIds = collect($process->redirectRules)->pluck('id'); + $rewriteRuleIds = collect($process->rewriteRules)->pluck('id'); + + $ruleIds = $caddyIds->merge($redirectRuleIds)->merge($rewriteRuleIds)->toArray(); + + $serversNames = collect($process->caddy)->pluck('publishedPort')->unique()->map(function ($port) { + return match ($port) { + 80 => 'http', + 443 => 'https', + default => 'listen_'.$port, + }; + })->toArray(); + + return [ + $process->name => [ + 'ruleIds' => $ruleIds, + 'serversNames' => $serversNames, + ], + ]; + })->toArray(); + + return [$service->id => $ruleIds]; + })->toArray(); + + $services['ptah_404'] = [ + 'ptah_404' => [ + 'ruleIds' => ['ptah_404'], + 'serversNames' => ['http', 'https'], + ], + ]; + + $log->info('Services:', [ + 'services' => $services, + ]); + + foreach ($request->all() as $metricsDoc) { + if ($metricsDoc === null) { + continue; + } + + $query = ''; + $query .= '?extra_label=swarm_id='.$node->swarm->id; + $query .= '&extra_label=node_id='.$node->id; + + $ingestMetrics = []; + + $lines = explode("\n", $metricsDoc); + foreach ($lines as $line) { + if (empty($line) || strpos($line, '#') === 0) { + continue; + } + + $metric = Promexport::parseLine($line); + if ($metric) { + $labels = $metric['labels']; + + switch ($metric['metric']) { + case 'ptah_caddy_http_requests_duration_bucket': + case 'ptah_caddy_http_requests_ttfb_bucket': + if (empty($labels['le'])) { + break; + } + + if (! in_array($labels['le'], self::HISTOGRAM_LABELS)) { + break; + } + + // no break, fall through + case 'ptah_caddy_http_requests_count': + case 'ptah_caddy_http_requests_duration_count': + case 'ptah_caddy_http_requests_duration_sum': + case 'ptah_caddy_http_requests_ttfb_count': + case 'ptah_caddy_http_requests_ttfb_sum': + if (empty($labels['status_code'])) { + break; + } + + if ($labels['status_code'] < 100 || $labels['status_code'] > 599) { + break; + } + + // no break, fall through + case 'ptah_caddy_http_requests_in_flight': + if (empty($labels['service_id']) || empty($labels['process_id']) || empty($labels['server_name']) || empty($labels['rule_id'])) { + break; + } + + if (! isset($services[$labels['service_id']]) && $labels['service_id'] !== 'ptah_404') { + break; + } + + $service = $services[$labels['service_id']]; + + if (! isset($service[$labels['process_id']])) { + break; + } + + $process = $service[$labels['process_id']]; + + if (! in_array($labels['server_name'], $process['serversNames'])) { + break; + } + + if (! in_array($labels['rule_id'], $process['ruleIds'])) { + break; + } + + $ingestMetrics[] = $line; + + break; + case 'ptah_node_disk_io_ops_count': + if (empty($labels['device']) || empty($labels['operation'])) { + break; + } + + if (! in_array($labels['device'], self::DISK_IO_DEVICES) || ! in_array($labels['operation'], self::DISK_IO_OPERATIONS)) { + break; + } + + $ingestMetrics[] = $line; + + break; + case 'ptah_node_network_rx_bytes': + case 'ptah_node_network_tx_bytes': + if (empty($labels['interface'])) { + break; + } + + if (! in_array($labels['interface'], $interfaces)) { + break; + } + + $ingestMetrics[] = $line; + + break; + case 'ptah_node_disk_usage_free': + case 'ptah_node_disk_usage_total': + case 'ptah_node_disk_usage_used': + if (empty($labels['path'])) { + break; + } + + if (! in_array($labels['path'], self::DISK_USAGE_PATHS)) { + break; + } + + $ingestMetrics[] = $line; + + break; + case 'ptah_node_cpu_idle': + case 'ptah_node_cpu_nice': + case 'ptah_node_cpu_system': + case 'ptah_node_cpu_total': + case 'ptah_node_cpu_user': + case 'ptah_node_load_avg_1': + case 'ptah_node_load_avg_5': + case 'ptah_node_load_avg_15': + case 'ptah_node_memory_free': + case 'ptah_node_memory_total': + case 'ptah_node_memory_used': + case 'ptah_node_uptime': + $ingestMetrics[] = $line; + + break; + default: + // unknown metric + + break; + } + } + } + + $log->info('Metrics:', [ + 'metrics' => $ingestMetrics, + ]); + + // TODO: use value from config (/env vars) + $response = Http::withBody(implode("\n", $ingestMetrics), 'text/plain')->post("http://127.0.0.1:8080/api/v1/import/prometheus$query"); + + $log->info('VictoriaMetrics response:', [ + 'status' => $response->status(), + 'body' => $response->body(), + ]); + } + + $response = new Response('{}', 204); + $response->headers->set('Content-Type', 'application/json'); + + return $response; + } +} diff --git a/api-nodes/Http/Controllers/TaskController.php b/api-nodes/Http/Controllers/TaskController.php index 45c859d..3c435a1 100644 --- a/api-nodes/Http/Controllers/TaskController.php +++ b/api-nodes/Http/Controllers/TaskController.php @@ -16,11 +16,12 @@ public function complete(NodeTask $task, Request $request) } if ($task->is_pending) { + // TODO: change to ???, 409 (Conflict) should be used for completed tasks return new Response(['error' => "Task didn't start yet."], 409); } $resultClass = $task->type->result(); - var_dump($request->all()); + $result = $resultClass::validateAndCreate($request->all()); $task->complete($result); @@ -35,6 +36,7 @@ public function fail(NodeTask $task, Request $request) } if ($task->is_pending) { + // TODO: change to ???, 409 (Conflict) should be used for completed tasks return new Response(['error' => "Task didn't start yet."], 409); } diff --git a/app/Actions/Nodes/InitCluster.php b/app/Actions/Nodes/InitCluster.php index 1c4a480..2caf437 100644 --- a/app/Actions/Nodes/InitCluster.php +++ b/app/Actions/Nodes/InitCluster.php @@ -197,7 +197,7 @@ private function getCaddyProcessConfig(Node $node): array 'placementNodeId' => $node->id, 'launchMode' => LaunchMode::Daemon->value, 'dockerRegistryId' => null, - 'dockerImage' => 'caddy:2.8-alpine', + 'dockerImage' => 'ghcr.io/ptah-sh/ptah-caddy:latest', 'releaseCommand' => [ 'command' => null, ], diff --git a/app/Actions/Nodes/RebuildCaddy.php b/app/Actions/Nodes/RebuildCaddy.php index ca2d2cb..d480d7a 100644 --- a/app/Actions/Nodes/RebuildCaddy.php +++ b/app/Actions/Nodes/RebuildCaddy.php @@ -27,6 +27,13 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym $handlers = []; + $handlers[] = [ + 'handler' => 'ptah_observer', + 'service_id' => strval($deployment->service->id), + 'process_id' => $process->name, + 'rule_id' => $caddy->id, + ]; + $pathRegexps = []; foreach ($process->rewriteRules as $rewriteRule) { $pathRegexps[] = [ @@ -64,6 +71,7 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym ]; $routes[] = [ + 'group' => $process->dockerName, 'match' => [ [ 'host' => [$caddy->domain], @@ -73,12 +81,14 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym 'handle' => $handlers, ]; + // FIXME: Here goes a big "OOPS": redirect rules are repeated for each caddy rule in the process foreach ($process->redirectRules as $redirectRule) { $regexpName = dockerize_name($redirectRule->id); $pathTo = preg_replace("/\\$(\d+)/", "{http.regexp.$regexpName.$1}", $redirectRule->pathTo); $routes[] = [ + 'group' => $process->dockerName, 'match' => [ [ 'host' => [$redirectRule->domainFrom], @@ -89,6 +99,12 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym ], ], 'handle' => [ + [ + 'handler' => 'ptah_observer', + 'service_id' => strval($deployment->service->id), + 'process_id' => $process->name, + 'rule_id' => $redirectRule->id, + ], [ 'handler' => 'static_response', 'status_code' => (string) $redirectRule->statusCode, @@ -102,11 +118,17 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym ]; } + $serverName = match ($caddy->publishedPort) { + 80 => 'http', + 443 => 'https', + default => "listen_{$caddy->publishedPort}", + }; + return [ 'apps' => [ 'http' => [ 'servers' => [ - "listen_{$caddy->publishedPort}" => [ + $serverName => [ 'listen' => [ "0.0.0.0:{$caddy->publishedPort}", ], @@ -125,7 +147,16 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym $caddy = [ 'apps' => [ 'http' => [ - 'servers' => (object) [], + 'servers' => [ + 'http' => [ + 'listen' => ['0.0.0.0:80'], + 'routes' => [], + ], + 'https' => [ + 'listen' => ['0.0.0.0:443'], + 'routes' => [], + ], + ], ], ], ]; @@ -146,6 +177,12 @@ public function handle(Team $team, NodeTaskGroup $taskGroup, Deployment $deploym ], ], 'handle' => [ + [ + 'handler' => 'ptah_observer', + 'service_id' => 'ptah_404', + 'process_id' => 'ptah_404', + 'rule_id' => 'ptah_404', + ], [ 'handler' => 'static_response', 'status_code' => '404', diff --git a/app/Util/Promexport.php b/app/Util/Promexport.php new file mode 100644 index 0000000..5f2a516 --- /dev/null +++ b/app/Util/Promexport.php @@ -0,0 +1,156 @@ + $metric, + 'labels' => $labels, + 'value' => $value, + 'timestamp' => $timestamp, + ]; + } +} diff --git a/config/billing.php b/config/billing.php index ea2600f..a29e80a 100644 --- a/config/billing.php +++ b/config/billing.php @@ -44,7 +44,7 @@ 'nodes' => ['limit' => 1, 'soft' => false, 'reset_period' => null], 'swarms' => ['limit' => 1, 'soft' => false, 'reset_period' => null], 'services' => ['limit' => 3, 'soft' => false, 'reset_period' => null], - 'deployments' => ['limit' => 20, 'soft' => false, 'reset_period' => 'daily'], + 'deployments' => ['limit' => 20000, 'soft' => false, 'reset_period' => 'daily'], ], ], 'selfHostedPlan' => [ diff --git a/resources/support/caddy/404.html b/resources/support/caddy/404.html index 4efa79b..8b6abec 100644 --- a/resources/support/caddy/404.html +++ b/resources/support/caddy/404.html @@ -15,6 +15,7 @@ .home-btn { display: inline-block; + visibility: hidden; background-color: #007bff; color: #fff; padding: 10px 20px; @@ -35,8 +36,8 @@ diff --git a/routes/api.php b/routes/api.php index 7cce316..a2980ce 100644 --- a/routes/api.php +++ b/routes/api.php @@ -1,6 +1,7 @@ '/v0', 'middleware' => ['auth:sanctum']], function () { diff --git a/tests/Unit/Util/PromexportTest.php b/tests/Unit/Util/PromexportTest.php new file mode 100644 index 0000000..3c203cc --- /dev/null +++ b/tests/Unit/Util/PromexportTest.php @@ -0,0 +1,67 @@ +toEqual([ + [ + 'metric' => 'metric', + 'timestamp' => '1714200000', + 'labels' => ['label' => 'value'], + 'value' => '1', + ], + ]); + }); + + test('parses metric line with multiple labels', function () { + $line = 'metric{label1="value1",label2="value2"} 1 1714200000'; + $result = Promexport::parseLine($line); + + expect($result)->toEqual([ + [ + 'metric' => 'metric', + 'labels' => ['label1' => 'value1', 'label2' => 'value2'], + 'value' => '1', + 'timestamp' => '1714200000', + ], + ]); + }); + + test('parses metric line with no labels', function () { + $line = 'metric 1 1714200000'; + $result = Promexport::parseLine($line); + + expect($result)->toEqual([ + [ + 'metric' => 'metric', + 'labels' => [], + 'value' => '1', + 'timestamp' => '1714200000', + ], + ]); + }); + + test('parses multiple metrics', function () { + $doc = "metric1{label1=\"value1\"} 1 1714200000\nmetric2{label2=\"value2\"} 2 1714200001"; + $result = Promexport::parse($doc); + + expect($result)->toEqual([ + [ + 'metric' => 'metric1', + 'labels' => ['label1' => 'value1'], + 'value' => '1', + 'timestamp' => '1714200000', + ], + [ + 'metric' => 'metric2', + 'labels' => ['label2' => 'value2'], + 'value' => '2', + 'timestamp' => '1714200001', + ], + ]); + }); +});