diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/task.test.ts b/x-pack/plugins/security_solution/server/lib/telemetry/diagnostic_task.test.ts similarity index 63% rename from x-pack/plugins/security_solution/server/lib/telemetry/task.test.ts rename to x-pack/plugins/security_solution/server/lib/telemetry/diagnostic_task.test.ts index 9b29df711663b..3008919fd2ced 100644 --- a/x-pack/plugins/security_solution/server/lib/telemetry/task.test.ts +++ b/x-pack/plugins/security_solution/server/lib/telemetry/diagnostic_task.test.ts @@ -5,13 +5,10 @@ * 2.0. */ -import moment from 'moment'; import { loggingSystemMock } from 'src/core/server/mocks'; - import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { TaskStatus } from '../../../../task_manager/server'; - -import { TelemetryDiagTask, TelemetryDiagTaskConstants } from './task'; +import { TelemetryDiagTask, TelemetryDiagTaskConstants } from './diagnostic_task'; import { createMockTelemetryEventsSender, MockTelemetryDiagnosticTask } from './mocks'; describe('test', () => { @@ -22,7 +19,7 @@ describe('test', () => { }); describe('basic diagnostic alert telemetry sanity checks', () => { - test('task can register', () => { + test('diagnostic task can register', () => { const telemetryDiagTask = new TelemetryDiagTask( logger, taskManagerMock.createSetup(), @@ -40,7 +37,7 @@ describe('test', () => { expect(mockTaskManager.registerTaskDefinitions).toHaveBeenCalled(); }); - test('task should be scheduled', async () => { + test('diagnostic task should be scheduled', async () => { const mockTaskManagerSetup = taskManagerMock.createSetup(); const telemetryDiagTask = new TelemetryDiagTask( logger, @@ -53,7 +50,7 @@ describe('test', () => { expect(mockTaskManagerStart.ensureScheduled).toHaveBeenCalled(); }); - test('task should run', async () => { + test('diagnostic task should run', async () => { const mockContext = createMockTelemetryEventsSender(true); const mockTaskManager = taskManagerMock.createSetup(); const telemetryDiagTask = new MockTelemetryDiagnosticTask(logger, mockTaskManager, mockContext); @@ -79,7 +76,7 @@ describe('test', () => { expect(telemetryDiagTask.runTask).toHaveBeenCalled(); }); - test('task should not query elastic if telemetry is not opted in', async () => { + test('diagnostic task should not query elastic if telemetry is not opted in', async () => { const mockSender = createMockTelemetryEventsSender(false); const mockTaskManager = taskManagerMock.createSetup(); new MockTelemetryDiagnosticTask(logger, mockTaskManager, mockSender); @@ -104,48 +101,4 @@ describe('test', () => { await taskRunner.run(); expect(mockSender.fetchDiagnosticAlerts).not.toHaveBeenCalled(); }); - - test('test -5 mins is returned when there is no previous task run', async () => { - const telemetryDiagTask = new TelemetryDiagTask( - logger, - taskManagerMock.createSetup(), - createMockTelemetryEventsSender(true) - ); - - const executeTo = moment().utc().toISOString(); - const executeFrom = undefined; - const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom); - - expect(newExecuteFrom).toEqual(moment(executeTo).subtract(5, 'minutes').toISOString()); - }); - - test('test -6 mins is returned when there was a previous task run', async () => { - const telemetryDiagTask = new TelemetryDiagTask( - logger, - taskManagerMock.createSetup(), - createMockTelemetryEventsSender(true) - ); - - const executeTo = moment().utc().toISOString(); - const executeFrom = moment(executeTo).subtract(6, 'minutes').toISOString(); - const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom); - - expect(newExecuteFrom).toEqual(executeFrom); - }); - - // it's possible if Kibana is down for a prolonged period the stored lastRun would have drifted - // if that is the case we will just roll it back to a 10 min search window - test('test 10 mins is returned when previous task run took longer than 10 minutes', async () => { - const telemetryDiagTask = new TelemetryDiagTask( - logger, - taskManagerMock.createSetup(), - createMockTelemetryEventsSender(true) - ); - - const executeTo = moment().utc().toISOString(); - const executeFrom = moment(executeTo).subtract(142, 'minutes').toISOString(); - const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom); - - expect(newExecuteFrom).toEqual(moment(executeTo).subtract(10, 'minutes').toISOString()); - }); }); diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/task.ts b/x-pack/plugins/security_solution/server/lib/telemetry/diagnostic_task.ts similarity index 85% rename from x-pack/plugins/security_solution/server/lib/telemetry/task.ts rename to x-pack/plugins/security_solution/server/lib/telemetry/diagnostic_task.ts index 766a8f007aeff..05d7396031a5f 100644 --- a/x-pack/plugins/security_solution/server/lib/telemetry/task.ts +++ b/x-pack/plugins/security_solution/server/lib/telemetry/diagnostic_task.ts @@ -12,6 +12,7 @@ import { TaskManagerSetupContract, TaskManagerStartContract, } from '../../../../task_manager/server'; +import { getLastTaskExecutionTimestamp } from './helpers'; import { TelemetryEventsSender, TelemetryEvent } from './sender'; export const TelemetryDiagTaskConstants = { @@ -43,7 +44,7 @@ export class TelemetryDiagTask { return { run: async () => { const executeTo = moment().utc().toISOString(); - const executeFrom = this.getLastExecutionTimestamp( + const executeFrom = getLastTaskExecutionTimestamp( executeTo, taskInstance.state?.lastExecutionTimestamp ); @@ -64,20 +65,6 @@ export class TelemetryDiagTask { }); } - public getLastExecutionTimestamp(executeTo: string, lastExecutionTimestamp?: string) { - if (lastExecutionTimestamp === undefined) { - this.logger.debug(`No last execution timestamp defined`); - return moment(executeTo).subtract(5, 'minutes').toISOString(); - } - - if (moment(executeTo).diff(lastExecutionTimestamp, 'minutes') >= 10) { - this.logger.debug(`last execution timestamp was greater than 10 minutes`); - return moment(executeTo).subtract(10, 'minutes').toISOString(); - } - - return lastExecutionTimestamp; - } - public start = async (taskManager: TaskManagerStartContract) => { try { await taskManager.ensureScheduled({ diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.test.ts b/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.test.ts new file mode 100644 index 0000000000000..a056ef783f6cf --- /dev/null +++ b/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.test.ts @@ -0,0 +1,51 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { loggingSystemMock } from 'src/core/server/mocks'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; +import { TelemetryEndpointTask } from './endpoint_task'; +import { createMockTelemetryEventsSender } from './mocks'; + +describe('test', () => { + let logger: ReturnType; + + beforeEach(() => { + logger = loggingSystemMock.createLogger(); + }); + + describe('endpoint alert telemetry checks', () => { + test('the task can register', () => { + const telemetryEndpointTask = new TelemetryEndpointTask( + logger, + taskManagerMock.createSetup(), + createMockTelemetryEventsSender(true) + ); + + expect(telemetryEndpointTask).toBeInstanceOf(TelemetryEndpointTask); + }); + }); + + test('the endpoint task should be registered', () => { + const mockTaskManager = taskManagerMock.createSetup(); + new TelemetryEndpointTask(logger, mockTaskManager, createMockTelemetryEventsSender(true)); + + expect(mockTaskManager.registerTaskDefinitions).toHaveBeenCalled(); + }); + + test('the endpoint task should be scheduled', async () => { + const mockTaskManagerSetup = taskManagerMock.createSetup(); + const telemetryEndpointTask = new TelemetryEndpointTask( + logger, + mockTaskManagerSetup, + createMockTelemetryEventsSender(true) + ); + + const mockTaskManagerStart = taskManagerMock.createStart(); + await telemetryEndpointTask.start(mockTaskManagerStart); + expect(mockTaskManagerStart.ensureScheduled).toHaveBeenCalled(); + }); +}); diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.ts b/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.ts new file mode 100644 index 0000000000000..cac92983b3878 --- /dev/null +++ b/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.ts @@ -0,0 +1,220 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import moment from 'moment'; +import { Logger } from 'src/core/server'; +import { + ConcreteTaskInstance, + TaskManagerSetupContract, + TaskManagerStartContract, +} from '../../../../task_manager/server'; +import { getLastTaskExecutionTimestamp } from './helpers'; +import { TelemetryEventsSender } from './sender'; +import { FullAgentPolicyInput } from '../../../../fleet/common/types/models/agent_policy'; +import { + EndpointMetricsAggregation, + EndpointPolicyResponseAggregation, + EndpointPolicyResponseDocument, + FleetAgentCacheItem, +} from './types'; + +export const TelemetryEndpointTaskConstants = { + TIMEOUT: '5m', + TYPE: 'security:endpoint-meta-telemetry', + INTERVAL: '24m', + VERSION: '1.0.0', +}; + +export class TelemetryEndpointTask { + private readonly logger: Logger; + private readonly sender: TelemetryEventsSender; + + constructor( + logger: Logger, + taskManager: TaskManagerSetupContract, + sender: TelemetryEventsSender + ) { + this.logger = logger; + this.sender = sender; + + taskManager.registerTaskDefinitions({ + [TelemetryEndpointTaskConstants.TYPE]: { + title: 'Security Solution Telemetry Endpoint Metrics and Info task', + timeout: TelemetryEndpointTaskConstants.TIMEOUT, + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => { + const { state } = taskInstance; + + return { + run: async () => { + const executeTo = moment().utc().toISOString(); + const lastExecutionTimestamp = getLastTaskExecutionTimestamp( + executeTo, + taskInstance.state?.lastExecutionTimestamp + ); + + const hits = await this.runTask(taskInstance.id); + + return { + state: { + lastExecutionTimestamp, + runs: (state.runs || 0) + 1, + hits, + }, + }; + }, + cancel: async () => {}, + }; + }, + }, + }); + } + + public start = async (taskManager: TaskManagerStartContract) => { + try { + await taskManager.ensureScheduled({ + id: this.getTaskId(), + taskType: TelemetryEndpointTaskConstants.TYPE, + scope: ['securitySolution'], + schedule: { + interval: TelemetryEndpointTaskConstants.INTERVAL, + }, + state: { runs: 0 }, + params: { version: TelemetryEndpointTaskConstants.VERSION }, + }); + } catch (e) { + this.logger.error(`Error scheduling task, received ${e.message}`); + } + }; + + private getTaskId = (): string => { + return `${TelemetryEndpointTaskConstants.TYPE}:${TelemetryEndpointTaskConstants.VERSION}`; + }; + + private async fetchEndpointData() { + const [epMetricsResponse, fleetAgentsResponse, policyResponse] = await Promise.allSettled([ + this.sender.fetchEndpointMetrics(), + this.sender.fetchFleetAgents(), + this.sender.fetchFailedEndpointPolicyResponses(), + ]); + + return { + endpointMetrics: + epMetricsResponse.status === 'fulfilled' ? epMetricsResponse.value : undefined, + fleetAgentsResponse: + fleetAgentsResponse.status === 'fulfilled' ? fleetAgentsResponse.value : undefined, + epPolicyResponse: policyResponse.status === 'fulfilled' ? policyResponse.value : undefined, + }; + } + + public runTask = async (taskId: string) => { + if (taskId !== this.getTaskId()) { + this.logger.debug(`Outdated task running: ${taskId}`); + return 0; + } + + const isOptedIn = await this.sender.isTelemetryOptedIn(); + if (!isOptedIn) { + this.logger.debug(`Telemetry is not opted-in.`); + return 0; + } + + const endpointData = await this.fetchEndpointData(); + + const { body: endpointMetricsResponse } = (endpointData.endpointMetrics as unknown) as { + body: EndpointMetricsAggregation; + }; + if (endpointMetricsResponse.aggregations === undefined) { + this.logger.debug(`No endpoint metrics`); + return 0; + } + + const endpointMetrics = endpointMetricsResponse.aggregations.endpoint_agents.buckets.map( + (epMetrics) => { + return { + endpoint_agent: epMetrics.latest_metrics.hits.hits[0]._source.agent.id, + endpoint_metrics: epMetrics.latest_metrics.hits.hits[0]._source, + }; + } + ); + + if (endpointMetrics.length === 0) { + this.logger.debug('no reported endpoint metrics'); + return 0; + } + + const agentsResponse = endpointData.fleetAgentsResponse; + if (agentsResponse === undefined) { + this.logger.debug('no agents to report'); + return 0; + } + + const fleetAgents = agentsResponse?.agents.reduce((cache, agent) => { + cache.set(agent.id, { policy_id: agent.policy_id, policy_version: agent.policy_revision }); + return cache; + }, new Map()); + + const endpointPolicyCache = new Map(); + for (const policyInfo of fleetAgents.values()) { + if ( + policyInfo.policy_id !== null && + policyInfo.policy_id !== undefined && + !endpointPolicyCache.has(policyInfo.policy_id) + ) { + const packagePolicies = await this.sender.fetchEndpointPolicyConfigs(policyInfo.policy_id); + packagePolicies?.inputs.forEach((input) => { + if (input.type === 'endpoint' && policyInfo.policy_id !== undefined) { + endpointPolicyCache.set(policyInfo.policy_id, input); + } + }); + } + } + + const { body: failedPolicyResponses } = (endpointData.epPolicyResponse as unknown) as { + body: EndpointPolicyResponseAggregation; + }; + const policyResponses = failedPolicyResponses.aggregations.policy_responses.buckets.reduce( + (cache, bucket) => { + const doc = bucket.latest_response.hits.hits[0]; + cache.set(bucket.key, doc); + return cache; + }, + new Map() + ); + + const telemetryPayloads = endpointMetrics.map((endpoint) => { + let policyConfig = null; + let failedPolicy = null; + + const fleetAgentId = endpoint.endpoint_metrics.elastic.agent.id; + const endpointAgentId = endpoint.endpoint_agent; + + const policyInformation = fleetAgents.get(fleetAgentId); + if (policyInformation?.policy_id) { + policyConfig = endpointPolicyCache.get(policyInformation?.policy_id); + if (policyConfig) { + failedPolicy = policyResponses.get(policyConfig?.id); + } + } + + return { + agent_id: fleetAgentId, + endpoint_id: endpointAgentId, + endpoint_metrics: { + os: endpoint.endpoint_metrics.host.os, + cpu: endpoint.endpoint_metrics.Endpoint.metrics.cpu, + memory: endpoint.endpoint_metrics.Endpoint.metrics.memory, + uptime: endpoint.endpoint_metrics.Endpoint.metrics.uptime, + }, + policy_config: policyConfig, + policy_failure: failedPolicy, + }; + }); + + this.sender.sendOnDemand('endpoint-metadata', telemetryPayloads); + return telemetryPayloads.length; + }; +} diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/helpers.task.ts b/x-pack/plugins/security_solution/server/lib/telemetry/helpers.task.ts new file mode 100644 index 0000000000000..ec81f3d0a5fa4 --- /dev/null +++ b/x-pack/plugins/security_solution/server/lib/telemetry/helpers.task.ts @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import moment from 'moment'; +import { getLastTaskExecutionTimestamp } from './helpers'; + +describe('test scheduled task helpers', () => { + test('test -5 mins is returned when there is no previous task run', async () => { + const executeTo = moment().utc().toISOString(); + const executeFrom = undefined; + const newExecuteFrom = getLastTaskExecutionTimestamp(executeTo, executeFrom); + + expect(newExecuteFrom).toEqual(moment(executeTo).subtract(5, 'minutes').toISOString()); + }); + + test('test -6 mins is returned when there was a previous task run', async () => { + const executeTo = moment().utc().toISOString(); + const executeFrom = moment(executeTo).subtract(6, 'minutes').toISOString(); + const newExecuteFrom = getLastTaskExecutionTimestamp(executeTo, executeFrom); + + expect(newExecuteFrom).toEqual(executeFrom); + }); + + // it's possible if Kibana is down for a prolonged period the stored lastRun would have drifted + // if that is the case we will just roll it back to a 10 min search window + test('test 10 mins is returned when previous task run took longer than 10 minutes', async () => { + const executeTo = moment().utc().toISOString(); + const executeFrom = moment(executeTo).subtract(142, 'minutes').toISOString(); + const newExecuteFrom = getLastTaskExecutionTimestamp(executeTo, executeFrom); + + expect(newExecuteFrom).toEqual(moment(executeTo).subtract(10, 'minutes').toISOString()); + }); +}); diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/helpers.ts b/x-pack/plugins/security_solution/server/lib/telemetry/helpers.ts new file mode 100644 index 0000000000000..e820116462fa2 --- /dev/null +++ b/x-pack/plugins/security_solution/server/lib/telemetry/helpers.ts @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import moment from 'moment'; + +export const getLastTaskExecutionTimestamp = ( + executeTo: string, + lastExecutionTimestamp?: string +) => { + if (lastExecutionTimestamp === undefined) { + return moment(executeTo).subtract(5, 'minutes').toISOString(); + } + + if (moment(executeTo).diff(lastExecutionTimestamp, 'minutes') >= 10) { + return moment(executeTo).subtract(10, 'minutes').toISOString(); + } + + return lastExecutionTimestamp; +}; diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/mocks.ts b/x-pack/plugins/security_solution/server/lib/telemetry/mocks.ts index e76fd606c2054..6738113da103d 100644 --- a/x-pack/plugins/security_solution/server/lib/telemetry/mocks.ts +++ b/x-pack/plugins/security_solution/server/lib/telemetry/mocks.ts @@ -5,8 +5,10 @@ * 2.0. */ +// eslint-disable-next-line max-classes-per-file import { TelemetryEventsSender } from './sender'; -import { TelemetryDiagTask } from './task'; +import { TelemetryDiagTask } from './diagnostic_task'; +import { TelemetryEndpointTask } from './endpoint_task'; /** * Creates a mocked Telemetry Events Sender @@ -37,3 +39,10 @@ export const createMockTelemetryEventsSender = ( export class MockTelemetryDiagnosticTask extends TelemetryDiagTask { public runTask = jest.fn(); } + +/** + * Creates a mocked Telemetry Endpoint Task + */ +export class MockTelemetryEndpointTask extends TelemetryEndpointTask { + public runTask = jest.fn(); +} diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/sender.ts b/x-pack/plugins/security_solution/server/lib/telemetry/sender.ts index 4f552b3edcda4..302f56802a5a4 100644 --- a/x-pack/plugins/security_solution/server/lib/telemetry/sender.ts +++ b/x-pack/plugins/security_solution/server/lib/telemetry/sender.ts @@ -7,19 +7,19 @@ import { cloneDeep } from 'lodash'; import axios from 'axios'; -import { LegacyAPICaller } from 'kibana/server'; +import { LegacyAPICaller, SavedObjectsClientContract } from 'kibana/server'; import { URL } from 'url'; -import { Logger, CoreStart } from '../../../../../../src/core/server'; +import { CoreStart, ElasticsearchClient, Logger } from 'src/core/server'; +import { TelemetryPluginStart, TelemetryPluginSetup } from 'src/plugins/telemetry/server'; import { transformDataToNdjson } from '../../utils/read_stream/create_stream_from_ndjson'; -import { - TelemetryPluginStart, - TelemetryPluginSetup, -} from '../../../../../../src/plugins/telemetry/server'; import { TaskManagerSetupContract, TaskManagerStartContract, } from '../../../../task_manager/server'; -import { TelemetryDiagTask } from './task'; +import { TelemetryDiagTask } from './diagnostic_task'; +import { TelemetryEndpointTask } from './endpoint_task'; +import { EndpointAppContextService } from '../../endpoint/endpoint_app_context_services'; +import { AgentService, AgentPolicyServiceInterface } from '../../../../fleet/server'; type BaseSearchTypes = string | number | boolean | object; export type SearchTypes = BaseSearchTypes | BaseSearchTypes[] | undefined; @@ -45,6 +45,7 @@ export interface TelemetryEvent { export class TelemetryEventsSender { private readonly initialCheckDelayMs = 10 * 1000; private readonly checkIntervalMs = 60 * 1000; + private readonly max_records = 10_000; private readonly logger: Logger; private core?: CoreStart; private maxQueueSize = 100; @@ -55,6 +56,11 @@ export class TelemetryEventsSender { private queue: TelemetryEvent[] = []; private isOptedIn?: boolean = true; // Assume true until the first check private diagTask?: TelemetryDiagTask; + private epMetricsTask?: TelemetryEndpointTask; + private agentService?: AgentService; + private agentPolicyService?: AgentPolicyServiceInterface; + private esClient?: ElasticsearchClient; + private savedObjectClient?: SavedObjectsClientContract; constructor(logger: Logger) { this.logger = logger.get('telemetry_events'); @@ -65,20 +71,27 @@ export class TelemetryEventsSender { if (taskManager) { this.diagTask = new TelemetryDiagTask(this.logger, taskManager, this); + this.epMetricsTask = new TelemetryEndpointTask(this.logger, taskManager, this); } } public start( core?: CoreStart, telemetryStart?: TelemetryPluginStart, - taskManager?: TaskManagerStartContract + taskManager?: TaskManagerStartContract, + endpointContextService?: EndpointAppContextService ) { this.telemetryStart = telemetryStart; this.core = core; + this.esClient = core?.elasticsearch.client.asInternalUser; + this.agentService = endpointContextService?.getAgentService(); + this.agentPolicyService = endpointContextService?.getAgentPolicyService(); + this.savedObjectClient = (core?.savedObjects.createInternalRepository() as unknown) as SavedObjectsClientContract; - if (taskManager && this.diagTask) { - this.logger.debug(`Starting diag task`); + if (taskManager && this.diagTask && this.epMetricsTask) { + this.logger.debug(`Starting diagnostic and endpoint telemetry tasks`); this.diagTask.start(taskManager); + this.epMetricsTask.start(taskManager); } this.logger.debug(`Starting local task`); @@ -126,6 +139,112 @@ export class TelemetryEventsSender { return callCluster('search', query); } + public async fetchEndpointMetrics() { + if (this.esClient === undefined) { + throw Error('could not fetch policy responses. es client is not available'); + } + + const query = { + expand_wildcards: 'open,hidden', + index: `.ds-metrics-endpoint.metrics*`, + ignore_unavailable: false, + size: 0, // no query results required - only aggregation quantity + body: { + aggs: { + endpoint_agents: { + terms: { + size: this.max_records, + field: 'agent.id.keyword', + }, + aggs: { + latest_metrics: { + top_hits: { + size: 1, + sort: [ + { + '@timestamp': { + order: 'desc', + }, + }, + ], + }, + }, + }, + }, + }, + }, + }; + + // @ts-expect-error The types of 'body.aggs' are incompatible between these types. + return this.esClient.search(query); + } + + public async fetchFleetAgents() { + if (this.esClient === undefined) { + throw Error('could not fetch policy responses. es client is not available'); + } + + return this.agentService?.listAgents(this.esClient, { + perPage: this.max_records, + showInactive: true, + sortField: 'enrolled_at', + sortOrder: 'desc', + }); + } + + public async fetchEndpointPolicyConfigs(id: string) { + if (this.savedObjectClient === undefined) { + throw Error('could not fetch endpoint policy configs. saved object client is not available'); + } + + return this.agentPolicyService?.getFullAgentPolicy(this.savedObjectClient, id); + } + + public async fetchFailedEndpointPolicyResponses() { + if (this.esClient === undefined) { + throw Error('could not fetch policy responses. es client is not available'); + } + + const query = { + expand_wildcards: 'open,hidden', + index: `.ds-metrics-endpoint.policy*`, + ignore_unavailable: false, + size: 0, // no query results required - only aggregation quantity + body: { + query: { + match: { + 'Endpoint.policy.applied.status': 'failure', + }, + }, + aggs: { + policy_responses: { + terms: { + size: this.max_records, + field: 'Endpoint.policy.applied.id.keyword', + }, + aggs: { + latest_response: { + top_hits: { + size: 1, + sort: [ + { + '@timestamp': { + order: 'desc', + }, + }, + ], + }, + }, + }, + }, + }, + }, + }; + + // @ts-expect-error The types of 'body.aggs' are incompatible between these types. + return this.esClient.search(query); + } + public queueTelemetryEvents(events: TelemetryEvent[]) { const qlength = this.queue.length; @@ -179,7 +298,7 @@ export class TelemetryEventsSender { } const [telemetryUrl, clusterInfo, licenseInfo] = await Promise.all([ - this.fetchTelemetryUrl(), + this.fetchTelemetryUrl('alerts-endpoint'), this.fetchClusterInfo(), this.fetchLicenseInfo(), ]); @@ -211,6 +330,39 @@ export class TelemetryEventsSender { this.isSending = false; } + /** + * This function sends events to the elastic telemetry channel. Caution is required + * because it does no allowlist filtering. The caller is responsible for making sure + * that there is no sensitive material or PII in the records that are sent upstream. + * + * @param channel the elastic telemetry channel + * @param toSend telemetry events + */ + public async sendOnDemand(channel: string, toSend: unknown[]) { + try { + const [telemetryUrl, clusterInfo, licenseInfo] = await Promise.all([ + this.fetchTelemetryUrl(channel), + this.fetchClusterInfo(), + this.fetchLicenseInfo(), + ]); + + this.logger.debug(`Telemetry URL: ${telemetryUrl}`); + this.logger.debug( + `cluster_uuid: ${clusterInfo?.cluster_uuid} cluster_name: ${clusterInfo?.cluster_name}` + ); + + await this.sendEvents( + toSend, + telemetryUrl, + clusterInfo.cluster_uuid, + clusterInfo.version?.number, + licenseInfo?.uid + ); + } catch (err) { + this.logger.warn(`Error sending telemetry events data: ${err}`); + } + } + private async fetchClusterInfo(): Promise { if (!this.core) { throw Error("Couldn't fetch cluster info because core is not available"); @@ -219,12 +371,12 @@ export class TelemetryEventsSender { return getClusterInfo(callCluster); } - private async fetchTelemetryUrl(): Promise { + private async fetchTelemetryUrl(channel: string): Promise { const telemetryUrl = await this.telemetrySetup?.getTelemetryUrl(); if (!telemetryUrl) { throw Error("Couldn't get telemetry URL"); } - return getV3UrlFromV2(telemetryUrl.toString(), 'alerts-endpoint'); + return getV3UrlFromV2(telemetryUrl.toString(), channel); } private async fetchLicenseInfo(): Promise { @@ -258,7 +410,6 @@ export class TelemetryEventsSender { clusterVersionNumber: string | undefined, licenseId: string | undefined ) { - // this.logger.debug(`Sending events: ${JSON.stringify(events, null, 2)}`); const ndjson = transformDataToNdjson(events); // this.logger.debug(`NDJSON: ${ndjson}`); diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/types.ts b/x-pack/plugins/security_solution/server/lib/telemetry/types.ts new file mode 100644 index 0000000000000..435f3cf49d1f1 --- /dev/null +++ b/x-pack/plugins/security_solution/server/lib/telemetry/types.ts @@ -0,0 +1,130 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +// Sec Sol Kbn telemetry instrumentation specific + +export interface FleetAgentCacheItem { + policy_id: string | undefined; + policy_version: number | undefined | null; +} + +// EP Policy Response + +export interface EndpointPolicyResponseAggregation { + hits: { + total: { value: number }; + }; + aggregations: { + policy_responses: { + buckets: Array<{ + key: string; + doc_count: number; + latest_response: EndpointPolicyResponseHits; + }>; + }; + }; +} + +interface EndpointPolicyResponseHits { + hits: { + total: { value: number }; + hits: EndpointPolicyResponseDocument[]; + }; +} + +export interface EndpointPolicyResponseDocument { + _source: { + '@timestamp': string; + agent: { + id: string; + }; + event: { + agent_id_status: string; + }; + Endpoint: {}; + }; +} + +// EP Metrics + +export interface EndpointMetricsAggregation { + hits: { + total: { value: number }; + }; + aggregations: { + endpoint_agents: { + buckets: Array<{ key: string; doc_count: number; latest_metrics: EndpointMetricHits }>; + }; + }; +} + +interface EndpointMetricHits { + hits: { + total: { value: number }; + hits: EndpointMetricDocument[]; + }; +} + +interface EndpointMetricDocument { + _source: { + '@timestamp': string; + agent: { + id: string; + }; + Endpoint: { + metrics: EndpointMetrics; + }; + elastic: { + agent: { + id: string; + }; + }; + host: { + os: EndpointMetricOS; + }; + event: { + agent_id_status: string; + }; + }; +} + +export interface EndpointMetrics { + memory: { + endpoint: { + private: { + mean: number; + latest: number; + }; + }; + }; + cpu: { + endpoint: { + histogram: { + counts: number[]; + values: number[]; + }; + mean: number; + latest: number; + }; + }; + uptime: { + endpoint: number; + system: number; + }; +} + +interface EndpointMetricOS { + Ext: { + variant: string; + }; + kernel: string; + name: string; + family: string; + version: string; + platform: string; + full: string; +} diff --git a/x-pack/plugins/security_solution/server/plugin.ts b/x-pack/plugins/security_solution/server/plugin.ts index 453e98b020cbe..cd923a4b0619f 100644 --- a/x-pack/plugins/security_solution/server/plugin.ts +++ b/x-pack/plugins/security_solution/server/plugin.ts @@ -506,7 +506,12 @@ export class Plugin implements IPlugin