elastic · pjhampton · Jun 30, 2021 · Jun 15, 2021 · Jun 15, 2021 · Jun 15, 2021
diff --git a/...olution/server/lib/telemetry/task.test.ts → ...ver/lib/telemetry/diagnostic_task.test.ts b/...olution/server/lib/telemetry/task.test.ts → ...ver/lib/telemetry/diagnostic_task.test.ts
@@ -5,13 +5,10 @@
  * 2.0.
  */
 
-import moment from 'moment';
 import { loggingSystemMock } from 'src/core/server/mocks';
-
 import { taskManagerMock } from '../../../../task_manager/server/mocks';
 import { TaskStatus } from '../../../../task_manager/server';
-
-import { TelemetryDiagTask, TelemetryDiagTaskConstants } from './task';
+import { TelemetryDiagTask, TelemetryDiagTaskConstants } from './diagnostic_task';
 import { createMockTelemetryEventsSender, MockTelemetryDiagnosticTask } from './mocks';
 
 describe('test', () => {
@@ -22,7 +19,7 @@ describe('test', () => {
   });
 
   describe('basic diagnostic alert telemetry sanity checks', () => {
-    test('task can register', () => {
+    test('diagnostic task can register', () => {
       const telemetryDiagTask = new TelemetryDiagTask(
         logger,
         taskManagerMock.createSetup(),
@@ -40,7 +37,7 @@ describe('test', () => {
     expect(mockTaskManager.registerTaskDefinitions).toHaveBeenCalled();
   });
 
-  test('task should be scheduled', async () => {
+  test('diagnostic task should be scheduled', async () => {
     const mockTaskManagerSetup = taskManagerMock.createSetup();
     const telemetryDiagTask = new TelemetryDiagTask(
       logger,
@@ -53,7 +50,7 @@ describe('test', () => {
     expect(mockTaskManagerStart.ensureScheduled).toHaveBeenCalled();
   });
 
-  test('task should run', async () => {
+  test('diagnostic task should run', async () => {
     const mockContext = createMockTelemetryEventsSender(true);
     const mockTaskManager = taskManagerMock.createSetup();
     const telemetryDiagTask = new MockTelemetryDiagnosticTask(logger, mockTaskManager, mockContext);
@@ -79,7 +76,7 @@ describe('test', () => {
     expect(telemetryDiagTask.runTask).toHaveBeenCalled();
   });
 
-  test('task should not query elastic if telemetry is not opted in', async () => {
+  test('diagnostic task should not query elastic if telemetry is not opted in', async () => {
     const mockSender = createMockTelemetryEventsSender(false);
     const mockTaskManager = taskManagerMock.createSetup();
     new MockTelemetryDiagnosticTask(logger, mockTaskManager, mockSender);
@@ -104,48 +101,4 @@ describe('test', () => {
     await taskRunner.run();
     expect(mockSender.fetchDiagnosticAlerts).not.toHaveBeenCalled();
   });
-
-  test('test -5 mins is returned when there is no previous task run', async () => {
-    const telemetryDiagTask = new TelemetryDiagTask(
-      logger,
-      taskManagerMock.createSetup(),
-      createMockTelemetryEventsSender(true)
-    );
-
-    const executeTo = moment().utc().toISOString();
-    const executeFrom = undefined;
-    const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom);
-
-    expect(newExecuteFrom).toEqual(moment(executeTo).subtract(5, 'minutes').toISOString());
-  });
-
-  test('test -6 mins is returned when there was a previous task run', async () => {
-    const telemetryDiagTask = new TelemetryDiagTask(
-      logger,
-      taskManagerMock.createSetup(),
-      createMockTelemetryEventsSender(true)
-    );
-
-    const executeTo = moment().utc().toISOString();
-    const executeFrom = moment(executeTo).subtract(6, 'minutes').toISOString();
-    const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom);
-
-    expect(newExecuteFrom).toEqual(executeFrom);
-  });
-
-  // it's possible if Kibana is down for a prolonged period the stored lastRun would have drifted
-  // if that is the case we will just roll it back to a 10 min search window
-  test('test 10 mins is returned when previous task run took longer than 10 minutes', async () => {
-    const telemetryDiagTask = new TelemetryDiagTask(
-      logger,
-      taskManagerMock.createSetup(),
-      createMockTelemetryEventsSender(true)
-    );
-
-    const executeTo = moment().utc().toISOString();
-    const executeFrom = moment(executeTo).subtract(142, 'minutes').toISOString();
-    const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom);
-
-    expect(newExecuteFrom).toEqual(moment(executeTo).subtract(10, 'minutes').toISOString());
-  });
 });
diff --git a/...ity_solution/server/lib/telemetry/task.ts → ...n/server/lib/telemetry/diagnostic_task.ts b/...ity_solution/server/lib/telemetry/task.ts → ...n/server/lib/telemetry/diagnostic_task.ts
@@ -12,6 +12,7 @@ import {
   TaskManagerSetupContract,
   TaskManagerStartContract,
 } from '../../../../task_manager/server';
+import { getLastTaskExecutionTimestamp } from './helpers';
 import { TelemetryEventsSender, TelemetryEvent } from './sender';
 
 export const TelemetryDiagTaskConstants = {
@@ -43,7 +44,7 @@ export class TelemetryDiagTask {
           return {
             run: async () => {
               const executeTo = moment().utc().toISOString();
-              const executeFrom = this.getLastExecutionTimestamp(
+              const executeFrom = getLastTaskExecutionTimestamp(
                 executeTo,
                 taskInstance.state?.lastExecutionTimestamp
               );
@@ -64,20 +65,6 @@ export class TelemetryDiagTask {
     });
   }
 
-  public getLastExecutionTimestamp(executeTo: string, lastExecutionTimestamp?: string) {
-    if (lastExecutionTimestamp === undefined) {
-      this.logger.debug(`No last execution timestamp defined`);
-      return moment(executeTo).subtract(5, 'minutes').toISOString();
-    }
-
-    if (moment(executeTo).diff(lastExecutionTimestamp, 'minutes') >= 10) {
-      this.logger.debug(`last execution timestamp was greater than 10 minutes`);
-      return moment(executeTo).subtract(10, 'minutes').toISOString();
-    }
-
-    return lastExecutionTimestamp;
-  }
-
   public start = async (taskManager: TaskManagerStartContract) => {
     try {
       await taskManager.ensureScheduled({

diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.test.ts b/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.test.ts
@@ -0,0 +1,51 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { loggingSystemMock } from 'src/core/server/mocks';
+import { taskManagerMock } from '../../../../task_manager/server/mocks';
+import { TelemetryEndpointTask } from './endpoint_task';
+import { createMockTelemetryEventsSender } from './mocks';
+
+describe('test', () => {
+  let logger: ReturnType<typeof loggingSystemMock.createLogger>;
+
+  beforeEach(() => {
+    logger = loggingSystemMock.createLogger();
+  });
+
+  describe('endpoint alert telemetry checks', () => {
+    test('the task can register', () => {
+      const telemetryEndpointTask = new TelemetryEndpointTask(
+        logger,
+        taskManagerMock.createSetup(),
+        createMockTelemetryEventsSender(true)
+      );
+
+      expect(telemetryEndpointTask).toBeInstanceOf(TelemetryEndpointTask);
+    });
+  });
+
+  test('the endpoint task should be registered', () => {
+    const mockTaskManager = taskManagerMock.createSetup();
+    new TelemetryEndpointTask(logger, mockTaskManager, createMockTelemetryEventsSender(true));
+
+    expect(mockTaskManager.registerTaskDefinitions).toHaveBeenCalled();
+  });
+
+  test('the endpoint task should be scheduled', async () => {
+    const mockTaskManagerSetup = taskManagerMock.createSetup();
+    const telemetryEndpointTask = new TelemetryEndpointTask(
+      logger,
+      mockTaskManagerSetup,
+      createMockTelemetryEventsSender(true)
+    );
+
+    const mockTaskManagerStart = taskManagerMock.createStart();
+    await telemetryEndpointTask.start(mockTaskManagerStart);
+    expect(mockTaskManagerStart.ensureScheduled).toHaveBeenCalled();
+  });
+});
diff --git a/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.ts b/x-pack/plugins/security_solution/server/lib/telemetry/endpoint_task.ts
@@ -0,0 +1,202 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import moment from 'moment';
+import { Logger } from 'src/core/server';
+import {
+  ConcreteTaskInstance,
+  TaskManagerSetupContract,
+  TaskManagerStartContract,
+} from '../../../../task_manager/server';
+import { getLastTaskExecutionTimestamp } from './helpers';
+import { TelemetryEventsSender } from './sender';
+import { FullAgentPolicyInput } from '../../../../fleet/common/types/models/agent_policy';
+import {
+  EndpointMetricsAggregation,
+  EndpointPolicyResponseAggregation,
+  EndpointPolicyResponseDocument,
+  FleetAgentCacheItem,
+} from './types';
+
+export const TelemetryEndpointTaskConstants = {
+  TIMEOUT: '5m',
+  TYPE: 'security:endpoint-meta-telemetry',
+  INTERVAL: '24h',
+  VERSION: '1.0.0',
+};
+
+export class TelemetryEndpointTask {
+  private readonly logger: Logger;
+  private readonly sender: TelemetryEventsSender;
+
+  constructor(
+    logger: Logger,
+    taskManager: TaskManagerSetupContract,
+    sender: TelemetryEventsSender
+  ) {
+    this.logger = logger;
+    this.sender = sender;
+
+    taskManager.registerTaskDefinitions({
+      [TelemetryEndpointTaskConstants.TYPE]: {
+        title: 'Security Solution Telemetry Endpoint Metrics and Info task',
+        timeout: TelemetryEndpointTaskConstants.TIMEOUT,
+        createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => {
+          const { state } = taskInstance;
+
+          return {
+            run: async () => {
+              const executeTo = moment().utc().toISOString();
+              const lastExecutionTimestamp = getLastTaskExecutionTimestamp(
+                executeTo,
+                taskInstance.state?.lastExecutionTimestamp
+              );
+
+              const hits = await this.runTask(taskInstance.id);
+
+              return {
+                state: {
+                  lastExecutionTimestamp,
+                  runs: (state.runs || 0) + 1,
+                  hits,
+                },
+              };
+            },
+            cancel: async () => {},
+          };
+        },
+      },
+    });
+  }
+
+  public start = async (taskManager: TaskManagerStartContract) => {
+    try {
+      await taskManager.ensureScheduled({
+        id: this.getTaskId(),
+        taskType: TelemetryEndpointTaskConstants.TYPE,
+        scope: ['securitySolution'],
+        schedule: {
+          interval: TelemetryEndpointTaskConstants.INTERVAL,
+        },
+        state: { runs: 0 },
+        params: { version: TelemetryEndpointTaskConstants.VERSION },
+      });
+    } catch (e) {
+      this.logger.error(`Error scheduling task, received ${e.message}`);
+    }
+  };
+
+  private getTaskId = (): string => {
+    return `${TelemetryEndpointTaskConstants.TYPE}:${TelemetryEndpointTaskConstants.VERSION}`;
+  };
+
+  public runTask = async (taskId: string) => {
+    if (taskId !== this.getTaskId()) {
+      this.logger.debug(`Outdated task running: ${taskId}`);
+      return 0;
+    }
+
+    const isOptedIn = await this.sender.isTelemetryOptedIn();
+    if (!isOptedIn) {
+      this.logger.debug(`Telemetry is not opted-in.`);
+      return 0;
+    }
+
+    const {
+      body: endpointMetricsResponse,
+    } = ((await this.sender.fetchEndpointMetrics()) as unknown) as {
+      body: EndpointMetricsAggregation;
+    };
+    const endpointMetrics = endpointMetricsResponse.aggregations.endpoint_agents.buckets.map(
+      (epMetrics) => {
+        return {
+          endpoint_agent: epMetrics.latest_metrics.hits.hits[0]._source.agent.id,
+          endpoint_metrics: epMetrics.latest_metrics.hits.hits[0]._source,
+        };
+      }
+    );
+
+    if (endpointMetrics.length === 0) {
+      this.logger.debug('no reported endpoint metrics');
+      return 0;
+    }
+
+    const agentsResponse = await this.sender.fetchFleetAgents();
+    if (agentsResponse === undefined) {
+      this.logger.debug('no agents to report');
+      return 0;
+    }
+
+    const fleetAgents = agentsResponse?.agents.reduce((cache, agent) => {
+      cache.set(agent.id, { policy_id: agent.policy_id, policy_version: agent.policy_revision });
+      return cache;
+    }, new Map<string, FleetAgentCacheItem>());
+
+    const endpointPolicyCache = new Map<string, FullAgentPolicyInput>();
+    for (const policyInfo of fleetAgents.values()) {
+      if (policyInfo.policy_id !== null && policyInfo.policy_id !== undefined) {
+        if (!endpointPolicyCache.has(policyInfo.policy_id)) {
+          const packagePolicies = await this.sender.fetchEndpointPolicyConfigs(
+            policyInfo.policy_id
+          );
+          packagePolicies?.inputs.forEach((input) => {
+            if (input.type === 'endpoint' && policyInfo.policy_id !== undefined) {
+              endpointPolicyCache.set(policyInfo.policy_id, input);
+            }
+          });
+        }
+      }
+    }
+
+    const {
+      body: failedPolicyResponses,
+    } = ((await this.sender.fetchFailedEndpointPolicyResponses()) as unknown) as {
+      body: EndpointPolicyResponseAggregation;
+    };
+    const policyResponses = failedPolicyResponses.aggregations.policy_responses.buckets.reduce(
+      (cache, bucket) => {
+        const doc = bucket.latest_response.hits.hits[0];
+        cache.set(bucket.key, doc);
+        return cache;
+      },
+      new Map<string, EndpointPolicyResponseDocument>()
+    );
+
+    const telemetryPayloads = endpointMetrics.map((endpoint) => {
+      let policyConfig = null;
+      let failedPolicy = null;
+
+      const fleetAgentId = endpoint.endpoint_metrics.elastic.agent.id;
+      const endpointAgentId = endpoint.endpoint_agent;
+
+      const policyInformation = fleetAgents.get(fleetAgentId);
+      if (policyInformation?.policy_id) {
+        policyConfig = endpointPolicyCache.get(policyInformation?.policy_id);
+        if (policyConfig) {
+          failedPolicy = policyResponses.get(policyConfig?.id);
+        }
+      }
+
+      return {
+        agent_id: fleetAgentId,
+        endpoint_id: endpointAgentId,
+        endpoint_metrics: {
+          os: endpoint.endpoint_metrics.host.os,
+          cpu: endpoint.endpoint_metrics.Endpoint.metrics.cpu,
+          memory: endpoint.endpoint_metrics.Endpoint.metrics.memory,
+          uptime: endpoint.endpoint_metrics.Endpoint.metrics.uptime,
+        },
+        policy_config: policyConfig,
+        policy_failure: failedPolicy,
+      };
+    });
+
+    // Feature flag disabling channel send for now
+    this.sender.sendOnDemand('endpoint-metadata', telemetryPayloads);
+    return telemetryPayloads.length;
+  };
+}