Skip to content

Commit

Permalink
[APM] Remove usage of internal client when fetching agent config etag…
Browse files Browse the repository at this point in the history
…s metrics (elastic#173001)

Closes: elastic#170031
Replaces: elastic/elasticsearch#101467

**Problem**
We need to know if an agent config has been applied at the edge (by APM
agents). This is determined by comparing the etag (hash) of the config,
with the etag applied at the edges.

Previously the agent config itself contained this information
(`config.applied_by_agent`) but when running with fleet this will
instead be captured in `agent_config` metric documents.

Currently the internal kibana user retrieves the `agent_config` metric
documents from the APM metric index (`metrics-apm-*` by default). This
index is configurable by the end-user so can be changed to something the
internal user doesn't have access to. This is a problem.

**Solution**

This PR replaces the calls made by the internal client with calls made
by the authenticated end user (via `APMEventClient`). This approach
works for requests made from the browser/UI but doesn't work for
background tasks made by fleet. To work around this we only
conditionally query the metric index if the `APMEventClient` is
available.
If `APMEventClient` is not available `applied_by_agent` will be
`undefined`

(cherry picked from commit 58c7958)
  • Loading branch information
klacabane authored and sorenlouv committed May 1, 2024
1 parent 583cc60 commit d4872eb
Show file tree
Hide file tree
Showing 25 changed files with 712 additions and 341 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ import type { MinimalAPMRouteHandlerResources } from '../../routes/apm_routes/re

export type ApmAlertsClient = Awaited<ReturnType<typeof getApmAlertsClient>>;

export async function getApmAlertsClient({ plugins, request }: MinimalAPMRouteHandlerResources) {
export async function getApmAlertsClient({
plugins,
request,
}: Pick<MinimalAPMRouteHandlerResources, 'plugins' | 'request'>) {
const ruleRegistryPluginStart = await plugins.ruleRegistry.start();
const alertsClient = await ruleRegistryPluginStart.getRacClientWithRequest(request);
const apmAlertsIndices = await alertsClient.getAuthorizedAlertsIndices(['apm']);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@ import { MinimalAPMRouteHandlerResources } from '../../routes/apm_routes/registe
export async function getApmEventClient({
context,
params,
config,
getApmIndices,
request,
}: Pick<
MinimalAPMRouteHandlerResources,
'context' | 'params' | 'config' | 'getApmIndices' | 'request'
'context' | 'params' | 'getApmIndices' | 'request'
>): Promise<APMEventClient> {
return withApmSpan('get_apm_event_client', async () => {
const coreContext = await context.core;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ export interface MlClient {
modules: MlModules;
}

export async function getMlClient({ plugins, context, request }: MinimalAPMRouteHandlerResources) {
export async function getMlClient({
plugins,
context,
request,
}: Pick<MinimalAPMRouteHandlerResources, 'plugins' | 'context' | 'request'>) {
const [coreContext, licensingContext] = await Promise.all([context.core, context.licensing]);

const mlplugin = plugins.ml;
Expand Down
7 changes: 6 additions & 1 deletion x-pack/plugins/observability_solution/apm/server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import { createApmSourceMapIndexTemplate } from './routes/source_maps/create_apm
import { addApiKeysToEveryPackagePolicyIfMissing } from './routes/fleet/api_keys/add_api_keys_to_policies_if_missing';
import { apmTutorialCustomIntegration } from '../common/tutorial/tutorials';
import { registerAssistantFunctions } from './assistant_functions';
import { getAlertDetailsContextHandler } from './routes/assistant_functions/get_observability_alert_details_context/get_alert_details_context_handler';

export class APMPlugin
implements Plugin<APMPluginSetup, void, APMPluginSetupDependencies, APMPluginStartDependencies>
Expand All @@ -52,7 +53,7 @@ export class APMPlugin
}

public setup(core: CoreSetup<APMPluginStartDependencies>, plugins: APMPluginSetupDependencies) {
this.logger = this.initContext.logger.get();
const logger = (this.logger = this.initContext.logger.get());
const config$ = this.initContext.config.create<APMConfig>();

core.savedObjects.registerType(apmTelemetry);
Expand Down Expand Up @@ -219,6 +220,10 @@ export class APMPlugin
})
);

plugins.observability.alertDetailsContextualInsightsService.registerHandler(
getAlertDetailsContextHandler(resourcePlugins, logger)
);

return { config$ };
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import moment from 'moment';
import { LatencyAggregationType } from '../../../../common/latency_aggregation_types';
import { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
import { ApmTimeseriesType, getApmTimeseries, TimeseriesChangePoint } from '../get_apm_timeseries';

export interface ChangePointGrouping {
title: string;
grouping: string;
changes: TimeseriesChangePoint[];
}

export async function getServiceChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
transactionType,
transactionName,
}: {
apmEventClient: APMEventClient;
alertStartedAt: string;
serviceName: string | undefined;
serviceEnvironment: string | undefined;
transactionType: string | undefined;
transactionName: string | undefined;
}): Promise<ChangePointGrouping[]> {
if (!serviceName) {
return [];
}

const res = await getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(12, 'hours').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Latency',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionLatency,
function: LatencyAggregationType.p95,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Throughput',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionThroughput,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Failure rate',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.transactionFailureRate,
'transaction.type': transactionType,
'transaction.name': transactionName,
},
},
{
title: 'Error events',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.errorEventRate,
},
},
],
},
});

return res
.filter((timeseries) => timeseries.changes.length > 0)
.map((timeseries) => ({
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
}));
}

export async function getExitSpanChangePoints({
apmEventClient,
alertStartedAt,
serviceName,
serviceEnvironment,
}: {
apmEventClient: APMEventClient;
alertStartedAt: string;
serviceName: string | undefined;
serviceEnvironment: string | undefined;
}): Promise<ChangePointGrouping[]> {
if (!serviceName) {
return [];
}

const res = await getApmTimeseries({
apmEventClient,
arguments: {
start: moment(alertStartedAt).subtract(30, 'minute').toISOString(),
end: alertStartedAt,
stats: [
{
title: 'Exit span latency',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.exitSpanLatency,
},
},
{
title: 'Exit span failure rate',
'service.name': serviceName,
'service.environment': serviceEnvironment,
timeseries: {
name: ApmTimeseriesType.exitSpanFailureRate,
},
},
],
},
});

return res
.filter((timeseries) => timeseries.changes.length > 0)
.map((timeseries) => {
return {
title: timeseries.stat.title,
grouping: timeseries.id,
changes: timeseries.changes,
};
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export async function getLogCategories({
arguments: args,
}: {
esClient: ElasticsearchClient;
coreContext: CoreRequestHandlerContext;
coreContext: Pick<CoreRequestHandlerContext, 'uiSettings'>;
arguments: {
start: string;
end: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { Logger } from '@kbn/core/server';
import {
AlertDetailsContextualInsightsHandlerQuery,
AlertDetailsContextualInsightsRequestContext,
} from '@kbn/observability-plugin/server/services';
import { getApmAlertsClient } from '../../../lib/helpers/get_apm_alerts_client';
import { getApmEventClient } from '../../../lib/helpers/get_apm_event_client';
import { getMlClient } from '../../../lib/helpers/get_ml_client';
import { getRandomSampler } from '../../../lib/helpers/get_random_sampler';
import { getObservabilityAlertDetailsContext } from '.';
import { APMRouteHandlerResources } from '../../apm_routes/register_apm_server_routes';

export const getAlertDetailsContextHandler = (
resourcePlugins: APMRouteHandlerResources['plugins'],
logger: Logger
) => {
return async (
requestContext: AlertDetailsContextualInsightsRequestContext,
query: AlertDetailsContextualInsightsHandlerQuery
) => {
const resources = {
getApmIndices: async () => {
const coreContext = await requestContext.core;
return resourcePlugins.apmDataAccess.setup.getApmIndices(coreContext.savedObjects.client);
},
request: requestContext.request,
params: { query: { _inspect: false } },
plugins: resourcePlugins,
context: {
core: requestContext.core,
licensing: requestContext.licensing,
alerting: resourcePlugins.alerting!.start().then((startContract) => {
return {
getRulesClient() {
return startContract.getRulesClientWithRequest(requestContext.request);
},
};
}),
rac: resourcePlugins.ruleRegistry.start().then((startContract) => {
return {
getAlertsClient() {
return startContract.getRacClientWithRequest(requestContext.request);
},
};
}),
},
};

const [apmEventClient, annotationsClient, apmAlertsClient, coreContext, mlClient] =
await Promise.all([
getApmEventClient(resources),
resourcePlugins.observability.setup.getScopedAnnotationsClient(
resources.context,
requestContext.request
),
getApmAlertsClient(resources),
requestContext.core,
getMlClient(resources),
getRandomSampler({
security: resourcePlugins.security,
probability: 1,
request: requestContext.request,
}),
]);
const esClient = coreContext.elasticsearch.client.asCurrentUser;

return getObservabilityAlertDetailsContext({
coreContext,
apmEventClient,
annotationsClient,
apmAlertsClient,
mlClient,
esClient,
query,
logger,
});
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { isEmpty } from 'lodash';
import { AlertDetailsContextualInsight } from '@kbn/observability-plugin/server/services';
import { APMDownstreamDependency } from '../get_apm_downstream_dependencies';
import { ServiceSummary } from '../get_apm_service_summary';
import { LogCategories } from '../get_log_categories';
import { ApmAnomalies } from '../get_apm_service_summary/get_anomalies';
import { ChangePointGrouping } from '../get_changepoints';

export function getApmAlertDetailsContextPrompt({
serviceName,
serviceEnvironment,
serviceSummary,
downstreamDependencies,
logCategories,
serviceChangePoints,
exitSpanChangePoints,
anomalies,
}: {
serviceName?: string;
serviceEnvironment?: string;
serviceSummary?: ServiceSummary;
downstreamDependencies?: APMDownstreamDependency[];
logCategories: LogCategories;
serviceChangePoints?: ChangePointGrouping[];
exitSpanChangePoints?: ChangePointGrouping[];
anomalies?: ApmAnomalies;
}): AlertDetailsContextualInsight[] {
const prompt: AlertDetailsContextualInsight[] = [];
if (!isEmpty(serviceSummary)) {
prompt.push({
key: 'serviceSummary',
description: 'Metadata for the service where the alert occurred',
data: serviceSummary,
});
}

if (!isEmpty(downstreamDependencies)) {
prompt.push({
key: 'downstreamDependencies',
description: `Downstream dependencies from the service "${serviceName}". Problems in these services can negatively affect the performance of "${serviceName}"`,
data: downstreamDependencies,
});
}

if (!isEmpty(serviceChangePoints)) {
prompt.push({
key: 'serviceChangePoints',
description: `Significant change points for "${serviceName}". Use this to spot dips and spikes in throughput, latency and failure rate`,
data: serviceChangePoints,
});
}

if (!isEmpty(exitSpanChangePoints)) {
prompt.push({
key: 'exitSpanChangePoints',
description: `Significant change points for the dependencies of "${serviceName}". Use this to spot dips or spikes in throughput, latency and failure rate for downstream dependencies`,
data: exitSpanChangePoints,
});
}

if (!isEmpty(logCategories)) {
prompt.push({
key: 'logCategories',
description: `Log events occurring around the time of the alert`,
data: logCategories,
});
}

if (!isEmpty(anomalies)) {
prompt.push({
key: 'anomalies',
description: `Anomalies for services running in the environment "${serviceEnvironment}"`,
data: anomalies,
});
}

return prompt;
}
Loading

0 comments on commit d4872eb

Please sign in to comment.