Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add prometheus client and collect metrics for notification #388

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion packages/brain/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import {
BlockExplorerApi,
ValidatorApi,
DappnodeSignatureVerifier,
DappmanagerApi
DappmanagerApi,
PrometheusApi
} from "./modules/apiClients/index.js";
import { startUiServer, startLaunchpadApi } from "./modules/apiServers/index.js";
import * as dotenv from "dotenv";
Expand Down Expand Up @@ -57,6 +58,13 @@ logger.debug(
);

// Create API instances. Must preceed db initialization
export const prometheusApi = new PrometheusApi({
baseUrl: "http://prometheus.dms.dappnode:9090",
minGenesisTime,
secondsPerSlot,
slotsPerEpoch,
network
});
export const signerApi = new Web3SignerApi(
{
baseUrl: signerUrl,
Expand Down Expand Up @@ -114,6 +122,7 @@ export const trackValidatorsPerformanceCronTask = new CronJob(
beaconchainApi,
executionClient,
consensusClient,
prometheusApi,
dappmanagerApi,
sendNotification: true
});
Expand Down
1 change: 1 addition & 0 deletions packages/brain/src/modules/apiClients/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export { BlockExplorerApi } from "./blockExplorer/index.js";
export { DappmanagerApi } from "./dappmanager/index.js";
export { PrometheusApi } from "./prometheus/index.js";
export { BeaconchainApi } from "./beaconchain/index.js";
export { ValidatorApi } from "./validator/index.js";
export { StandardApi } from "./standard.js";
Expand Down
8 changes: 8 additions & 0 deletions packages/brain/src/modules/apiClients/prometheus/error.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { ApiError } from "../error.js";

export class PrometheusApiError extends ApiError {
constructor(message: string) {
super(message);
this.name = "PrometheusApiError";
}
}
168 changes: 168 additions & 0 deletions packages/brain/src/modules/apiClients/prometheus/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { Network } from "@stakingbrain/common";
import logger from "../../logger/index.js";
import { StandardApi } from "../standard.js";
import { AvgHostMetrics } from "./types.js";

export class PrometheusApi extends StandardApi {
private readonly minGenesisTime: number;
private readonly slotsPerEpoch: number;
private readonly secondsPerSlot: number;

constructor({
baseUrl,
minGenesisTime,
slotsPerEpoch,
secondsPerSlot,
network
}: {
baseUrl: string;
minGenesisTime: number;
slotsPerEpoch: number;
secondsPerSlot: number;
network: Network;
}) {
super({ baseUrl }, network);
this.minGenesisTime = minGenesisTime;
this.slotsPerEpoch = slotsPerEpoch;
this.secondsPerSlot = secondsPerSlot;
}

/**
* Get average host metrics for a given epoch:
* - avgCpuTemperature
* - avgCpuUsage
* - avgMemoryUsage
* - ioUtilizationPerDisk
*/
public async getPrometheusMetrics({ epoch }: { epoch: number }): Promise<AvgHostMetrics> {
try {
const { startTimestamp, endTimestamp } = this.calculateEpochTimestamps(epoch);

// Looks like query_range does not allow to request multiple queries in a single reques

// eslint-disable-next-line @typescript-eslint/no-explicit-any
const ioDisk: { metric: { device: string }; values: any[] }[] = await this.getPrometheusDataResult({
query: `irate(node_disk_io_time_seconds_total{instance="node-exporter.dms.dappnode:9100", job="nodeexporter"}[${endTimestamp - startTimestamp}s])`,
startTimestamp,
endTimestamp
});

// eslint-disable-next-line @typescript-eslint/no-explicit-any
const ioUtilizationPerDisk = ioDisk.reduce((acc: { [key: string]: any }, disk) => {
const device = disk.metric.device;
const utilization = Math.round(parseFloat(disk.values[0][1]) * 100);
acc[device] = utilization;
return acc;
}, {});

return {
startTimestamp,
endTimestamp,
avgCpuTemperature: await this.getPrometheusAvgMetric({
query: `avg_over_time(dappmanager_cpu_temperature_celsius{app="dappmanager-custom-metrics", instance="dappmanager.dappnode:80", job="manager_sd", package="dappmanager.dnp.dappnode.eth", service="dappmanager", type="current"}[${endTimestamp - startTimestamp}s])`,
startTimestamp,
endTimestamp
}),
avgCpuUsage: await this.getPrometheusAvgMetric({
query: `100 * (1 - avg(rate(node_cpu_seconds_total{instance="node-exporter.dms.dappnode:9100", job="nodeexporter", mode="idle"}[${endTimestamp - startTimestamp}s])) by (instance))`,
startTimestamp,
endTimestamp
}),
avgMemoryUsage: await this.getPrometheusAvgMetric({
query: `100 * (1 - avg(node_memory_MemAvailable_bytes{instance="node-exporter.dms.dappnode:9100", job="nodeexporter"} / node_memory_MemTotal_bytes{instance="node-exporter.dms.dappnode:9100", job="nodeexporter"}) by (instance))`,
startTimestamp,
endTimestamp
}),
ioUtilizationPerDisk
};
} catch (error) {
logger.error("Failed to get prometheus metrics", error);
throw error;
}
}

/**
* Query prometheus metric using the endpoint /api/v1/query_range.
* Used to get the data result for later processing.
*
* @see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
*/
private async getPrometheusDataResult({
query,
startTimestamp,
endTimestamp
}: {
query: string;
startTimestamp: number;
endTimestamp: number;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}): Promise<{ metric: { device: string }; values: any[] }[]> {
// Construct the request body
const requestBody = new URLSearchParams({
query,
start: startTimestamp.toString(),
end: endTimestamp.toString(),
step: `10m` // It should be higher than the time range so it returns only one value
}).toString();

return (
await this.request({
method: "POST",
endpoint: `/api/v1/query_range`,
headers: {
"Content-Type": "application/x-www-form-urlencoded"
},
body: requestBody
})
).data.result;
}

/**
* Query prometheus metric using the endpoint /api/v1/query_range.
* This method assumes there is only 1 metric in the reponse (in the array)
*
* @see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
*/
private async getPrometheusAvgMetric({
query,
startTimestamp,
endTimestamp
}: {
query: string;
startTimestamp: number;
endTimestamp: number;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}): Promise<number> {
// Construct the request body
const requestBody = new URLSearchParams({
query,
start: startTimestamp.toString(),
end: endTimestamp.toString(),
step: `10m` // It should be higher than the time range so it returns only one value
}).toString();

return Math.round(
parseFloat(
(
await this.request({
method: "POST",
endpoint: `/api/v1/query_range`,
headers: {
"Content-Type": "application/x-www-form-urlencoded"
},
body: requestBody
})
).data.result[0].values[0][1]
)
);
}

private calculateEpochTimestamps(epoch: number): { startTimestamp: number; endTimestamp: number } {
const startTimestamp = this.minGenesisTime + epoch * this.slotsPerEpoch * this.secondsPerSlot;
const endTimestamp = startTimestamp + this.slotsPerEpoch * this.secondsPerSlot - 1;
return {
startTimestamp,
endTimestamp
};
}
}
10 changes: 10 additions & 0 deletions packages/brain/src/modules/apiClients/prometheus/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export interface AvgHostMetrics {
startTimestamp: number;
endTimestamp: number;
avgCpuTemperature: number;
avgCpuUsage: number;
avgMemoryUsage: number;
ioUtilizationPerDisk: {
[disk: string]: number;
};
}
4 changes: 3 additions & 1 deletion packages/brain/src/modules/apiClients/standard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,11 @@ export class StandardApi {
}
} else {
let errorMessage = "";

if (res.headers["content-type"] && res.headers["content-type"].includes("application/json")) {
try {
errorMessage = JSON.parse(Buffer.concat(data).toString())?.message;
// if its a error message in JSON we dont know the object format so print it in string format the whole error
errorMessage = Buffer.concat(data).toString();
} catch (e) {
logger.error(
`Error parsing response from ${this.requestOptions.hostname} ${endpoint} ${e.message}`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import {
import { BeaconchainApiError } from "../../apiClients/beaconchain/error.js";
import { BrainDbError } from "../../db/error.js";
import { ExecutionOfflineError, NodeSyncingError } from "./error.js";
import { DappmanagerApi } from "../../apiClients/index.js";
import { DappmanagerApi, PrometheusApi } from "../../apiClients/index.js";
import { sendValidatorsPerformanceNotifications } from "./sendValidatorsPerformanceNotifications.js";

let lastProcessedEpoch: number | undefined = undefined;
Expand All @@ -29,6 +29,7 @@ export async function fetchAndInsertValidatorsPerformanceData({
executionClient,
consensusClient,
currentEpoch,
prometheusApi,
dappmanagerApi,
sendNotification
}: {
Expand All @@ -38,6 +39,7 @@ export async function fetchAndInsertValidatorsPerformanceData({
executionClient: ExecutionClient;
consensusClient: ConsensusClient;
currentEpoch: number;
prometheusApi: PrometheusApi;
dappmanagerApi: DappmanagerApi;
sendNotification: boolean;
}): Promise<void> {
Expand Down Expand Up @@ -92,6 +94,7 @@ export async function fetchAndInsertValidatorsPerformanceData({
// Send notifications if the last epoch was processed without an error
if (sendNotification && !validatorPerformanceError)
await sendValidatorsPerformanceNotifications({
prometheusApi,
dappmanagerApi,
currentEpoch: currentEpoch.toString(),
validatorBlockStatusMap,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ExecutionClient, ConsensusClient } from "@stakingbrain/common";
import { PostgresClient, BeaconchainApi, DappmanagerApi } from "../../apiClients/index.js";
import { PostgresClient, BeaconchainApi, DappmanagerApi, PrometheusApi } from "../../apiClients/index.js";
import { BrainDataBase } from "../../db/index.js";
import logger from "../../logger/index.js";
import { fetchAndInsertValidatorsPerformanceData } from "./fetchAndInsertValidatorsPerformanceData.js";
Expand All @@ -11,6 +11,7 @@ export async function trackValidatorsPerformanceCron({
executionClient,
consensusClient,
dappmanagerApi,
prometheusApi,
sendNotification
}: {
brainDb: BrainDataBase;
Expand All @@ -19,6 +20,7 @@ export async function trackValidatorsPerformanceCron({
executionClient: ExecutionClient;
consensusClient: ConsensusClient;
dappmanagerApi: DappmanagerApi;
prometheusApi: PrometheusApi;
sendNotification: boolean;
}): Promise<void> {
try {
Expand All @@ -39,6 +41,7 @@ export async function trackValidatorsPerformanceCron({
executionClient,
consensusClient,
currentEpoch,
prometheusApi,
dappmanagerApi,
sendNotification
});
Expand Down
Loading