Skip to content

Commit

Permalink
Add queries for server and model performance metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
DaoDaoNoCode committed Jul 11, 2023
1 parent df06394 commit d94f04e
Show file tree
Hide file tree
Showing 16 changed files with 294 additions and 137 deletions.
24 changes: 17 additions & 7 deletions backend/src/routes/api/prometheus/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
PrometheusQueryResponse,
QueryType,
} from '../../../types';
import { callPrometheusThanos, callPrometheusServing } from '../../../utils/prometheusUtils';
import { callPrometheusThanos } from '../../../utils/prometheusUtils';
import { createCustomError } from '../../../utils/requestUtils';
import { logRequestDetails } from '../../../utils/fileUtils';

Expand Down Expand Up @@ -36,7 +36,9 @@ module.exports = async (fastify: KubeFastifyInstance) => {
): Promise<{ code: number; response: PrometheusQueryResponse }> => {
const { query } = request.body;

return callPrometheusThanos(fastify, request, query).catch(handleError);
return callPrometheusThanos<PrometheusQueryResponse>(fastify, request, query).catch(
handleError,
);
},
);

Expand All @@ -46,12 +48,15 @@ module.exports = async (fastify: KubeFastifyInstance) => {
request: OauthFastifyRequest<{
Body: { query: string };
}>,
): Promise<{ code: number; response: PrometheusQueryResponse }> => {
): Promise<{ code: number; response: PrometheusQueryRangeResponse }> => {
const { query } = request.body;

return callPrometheusThanos(fastify, request, query, QueryType.QUERY_RANGE).catch(
handleError,
);
return callPrometheusThanos<PrometheusQueryRangeResponse>(
fastify,
request,
query,
QueryType.QUERY_RANGE,
).catch(handleError);
},
);

Expand All @@ -65,7 +70,12 @@ module.exports = async (fastify: KubeFastifyInstance) => {
logRequestDetails(fastify, request);
const { query } = request.body;

return callPrometheusServing(fastify, request, query).catch(handleError);
return callPrometheusThanos<PrometheusQueryRangeResponse>(
fastify,
request,
query,
QueryType.QUERY_RANGE,
).catch(handleError);
},
);
};
7 changes: 3 additions & 4 deletions backend/src/utils/prometheusUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import {
KubeFastifyInstance,
OauthFastifyRequest,
PrometheusQueryRangeResponse,
PrometheusQueryResponse,
QueryType,
} from '../types';
import { DEV_MODE } from './constants';
Expand Down Expand Up @@ -84,13 +83,13 @@ const generatePrometheusHostURL = (
return `https://${instanceName}.${namespace}.svc.cluster.local:${port}`;
};

export const callPrometheusThanos = (
export const callPrometheusThanos = <T>(
fastify: KubeFastifyInstance,
request: OauthFastifyRequest,
query: string,
queryType: QueryType = QueryType.QUERY,
): Promise<{ code: number; response: PrometheusQueryResponse }> =>
callPrometheus(
): Promise<{ code: number; response: T }> =>
callPrometheus<T>(
fastify,
request,
query,
Expand Down
27 changes: 18 additions & 9 deletions frontend/src/api/prometheus/serving.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {
import useBiasMetricsEnabled from '~/concepts/explainability/useBiasMetricsEnabled';
import { ResponsePredicate } from '~/api/prometheus/usePrometheusQueryRange';
import useRefreshInterval from '~/utilities/useRefreshInterval';
import { RefreshIntervalValue } from '~/pages/modelServing/screens/const';
import { QueryTimeframeStep, RefreshIntervalValue } from '~/pages/modelServing/screens/const';
import usePerformanceMetricsEnabled from '~/pages/modelServing/screens/metrics/usePerformanceMetricsEnabled';
import useQueryRangeResourceData from './useQueryRangeResourceData';

Expand Down Expand Up @@ -45,29 +45,33 @@ export const useModelServingMetrics = (

const trustyResponsePredicate = React.useCallback<
ResponsePredicate<PrometheusQueryRangeResponseDataResult>
>((data) => data.result, []);
>((data) => data.result || [], []);

const serverRequestCount = useQueryRangeResourceData(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.REQUEST_COUNT],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.REQUEST_COUNT],
defaultResponsePredicate,
);

const serverAverageResponseTime = useQueryRangeResourceData(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.AVG_RESPONSE_TIME],
end,
timeframe,
defaultResponsePredicate,
);
const serverAverageResponseTime =
useQueryRangeResourceData<PrometheusQueryRangeResponseDataResult>(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.AVG_RESPONSE_TIME],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.AVG_RESPONSE_TIME],
trustyResponsePredicate,
);

const serverCPUUtilization = useQueryRangeResourceData(
performanceMetricsEnabled && type === PerformanceMetricType.SERVER,
queries[ServerMetricType.CPU_UTILIZATION],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.CPU_UTILIZATION],
defaultResponsePredicate,
);

Expand All @@ -76,6 +80,7 @@ export const useModelServingMetrics = (
queries[ServerMetricType.MEMORY_UTILIZATION],
end,
timeframe,
QueryTimeframeStep[ServerMetricType.MEMORY_UTILIZATION],
defaultResponsePredicate,
);

Expand All @@ -84,6 +89,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.REQUEST_COUNT_SUCCESS],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_SUCCESS],
defaultResponsePredicate,
);

Expand All @@ -92,6 +98,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.REQUEST_COUNT_FAILED],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.REQUEST_COUNT_FAILED],
defaultResponsePredicate,
);

Expand All @@ -100,6 +107,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.TRUSTY_AI_SPD],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.TRUSTY_AI_SPD],
trustyResponsePredicate,
'/api/prometheus/bias',
);
Expand All @@ -109,6 +117,7 @@ export const useModelServingMetrics = (
queries[ModelMetricType.TRUSTY_AI_DIR],
end,
timeframe,
QueryTimeframeStep[ModelMetricType.TRUSTY_AI_DIR],
trustyResponsePredicate,
'/api/prometheus/bias',
);
Expand Down
7 changes: 4 additions & 3 deletions frontend/src/api/prometheus/useQueryRangeResourceData.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { TimeframeStep, TimeframeTimeRange } from '~/pages/modelServing/screens/const';
import { TimeframeTimeRange } from '~/pages/modelServing/screens/const';
import { ContextResourceData, PrometheusQueryRangeResultValue } from '~/types';
import useRestructureContextResourceData from '~/utilities/useRestructureContextResourceData';
import { TimeframeTitle } from '~/pages/modelServing/screens/types';
import { TimeframeStepType, TimeframeTitle } from '~/pages/modelServing/screens/types';
import usePrometheusQueryRange, { ResponsePredicate } from './usePrometheusQueryRange';

const useQueryRangeResourceData = <T = PrometheusQueryRangeResultValue>(
Expand All @@ -10,6 +10,7 @@ const useQueryRangeResourceData = <T = PrometheusQueryRangeResultValue>(
query: string,
end: number,
timeframe: TimeframeTitle,
timeframeStep: TimeframeStepType,
responsePredicate: ResponsePredicate<T>,
apiPath = '/api/prometheus/serving',
): ContextResourceData<T> =>
Expand All @@ -20,7 +21,7 @@ const useQueryRangeResourceData = <T = PrometheusQueryRangeResultValue>(
query,
TimeframeTimeRange[timeframe],
end,
TimeframeStep[timeframe],
timeframeStep[timeframe],
responsePredicate,
),
);
Expand Down
24 changes: 22 additions & 2 deletions frontend/src/pages/modelServing/screens/const.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { ModelMetricType, ServerMetricType } from './metrics/ModelServingMetricsContext';
import {
QueryTimeframeStepType,
RefreshIntervalTitle,
RefreshIntervalValueType,
ServingRuntimeSize,
Expand Down Expand Up @@ -72,7 +74,7 @@ export const TimeframeTimeRange: TimeframeTimeType = {
[TimeframeTitle.ONE_HOUR]: 60 * 60,
[TimeframeTitle.ONE_DAY]: 24 * 60 * 60,
[TimeframeTitle.ONE_WEEK]: 7 * 24 * 60 * 60,
[TimeframeTitle.ONE_MONTH]: 30 * 7 * 24 * 60 * 60,
[TimeframeTitle.ONE_MONTH]: 30 * 24 * 60 * 60,
// [TimeframeTitle.UNLIMITED]: 0,
};

Expand All @@ -84,14 +86,32 @@ export const TimeframeTimeRange: TimeframeTimeType = {
* 24h * 60m * 60s => 86,400 seconds of space
* 86,400 / (24 * 12) => 300 points of prometheus data
*/
export const TimeframeStep: TimeframeStepType = {
const TimeframeStep: TimeframeStepType = {
[TimeframeTitle.ONE_HOUR]: 12,
[TimeframeTitle.ONE_DAY]: 24 * 12,
[TimeframeTitle.ONE_WEEK]: 7 * 24 * 12,
[TimeframeTitle.ONE_MONTH]: 30 * 24 * 12,
// [TimeframeTitle.UNLIMITED]: 30 * 7 * 24 * 12, // TODO: determine if we "zoom out" more
};

const TimeframeStepForRequestCountAndAverageTime = {
[TimeframeTitle.ONE_HOUR]: 5 * 60,
[TimeframeTitle.ONE_DAY]: 60 * 60,
[TimeframeTitle.ONE_WEEK]: 12 * 60 * 60,
[TimeframeTitle.ONE_MONTH]: 24 * 60 * 60,
};

export const QueryTimeframeStep: QueryTimeframeStepType = {
[ServerMetricType.REQUEST_COUNT]: TimeframeStepForRequestCountAndAverageTime,
[ServerMetricType.AVG_RESPONSE_TIME]: TimeframeStepForRequestCountAndAverageTime,
[ServerMetricType.CPU_UTILIZATION]: TimeframeStep,
[ServerMetricType.MEMORY_UTILIZATION]: TimeframeStep,
[ModelMetricType.REQUEST_COUNT_FAILED]: TimeframeStepForRequestCountAndAverageTime,
[ModelMetricType.REQUEST_COUNT_SUCCESS]: TimeframeStepForRequestCountAndAverageTime,
[ModelMetricType.TRUSTY_AI_DIR]: TimeframeStep,
[ModelMetricType.TRUSTY_AI_SPD]: TimeframeStep,
};

export const RefreshIntervalValue: RefreshIntervalValueType = {
[RefreshIntervalTitle.FIFTEEN_SECONDS]: 15 * 1000,
[RefreshIntervalTitle.THIRTY_SECONDS]: 30 * 1000,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,27 @@ import { InferenceServiceKind } from '~/k8sTypes';
import ModelMetricsPathWrapper from './ModelMetricsPathWrapper';
import { ModelServingMetricsProvider } from './ModelServingMetricsContext';
import { getModelMetricsQueries } from './utils';
import useCurrentTimeframeBrowserStorage from './useCurrentTimeframeBrowserStorage';

export type GlobalModelMetricsOutletContextProps = {
model: InferenceServiceKind;
projectName: string;
};

const GlobalModelMetricsWrapper: React.FC = () => (
<ModelMetricsPathWrapper>
{(model, projectName) => {
const queries = getModelMetricsQueries(model);
return (
<ModelServingMetricsProvider queries={queries} type={PerformanceMetricType.MODEL}>
<Outlet context={{ model, projectName }} />
</ModelServingMetricsProvider>
);
}}
</ModelMetricsPathWrapper>
);
const GlobalModelMetricsWrapper: React.FC = () => {
const [currentTimeframe] = useCurrentTimeframeBrowserStorage();
return (
<ModelMetricsPathWrapper>
{(model, projectName) => {
const queries = getModelMetricsQueries(model, currentTimeframe);
return (
<ModelServingMetricsProvider queries={queries} type={PerformanceMetricType.MODEL}>
<Outlet context={{ model, projectName }} />
</ModelServingMetricsProvider>
);
}}
</ModelMetricsPathWrapper>
);
};

export default GlobalModelMetricsWrapper;
Loading

0 comments on commit d94f04e

Please sign in to comment.