Skip to content

Commit

Permalink
Endpoint Telemetry: Agents Metrics + Policy Config / Response (elasti…
Browse files Browse the repository at this point in the history
…c#102171) (elastic#103851)

* [PH] Initial setup for endpoint task telemetry.

* Refactor / Add daily task for collecting fleet detail / policy resp / EP metrics

* [PH CD] Code walkthrough. Start fetching fleet policy configs.

* [PH] pass in fleet agent service rather than homebrew kuerys.

* [PH] prepare to move away from legacy es client. Get fleet ep agents.

* Fetch agent policy configs.

* Stub ep policy responses.

* Fix CI + Types. Fix dep injection. Reimagine SO client creation.

* Create SO client properly

* Fetch EP Policy responses.

* Fetch EP Policy responses.

* Remove unused import

* Fetch failed policy responses from EP data stream.

* Remove unused imports.

* Combine failed policy responses with policy configs.

* Attach fleet agent + ep agent ids

* Add dedicated channel sender. Temp disable with feature flag.

* Remove ublock from the failed policy response.

* Fetch endpoint metrics.

* Fix bad merge commit.

* Get EP telemetry.

* Record last execution time of endpoint task

* Remove send on demand feature flag.

* Simplify cache conditional.

* Refactor into Promise.allSettled

* Fix type error.

* Bail if there is no endpoint metrics

* Bump interval to 24h.

Co-authored-by: Kibana Machine <[email protected]>

Co-authored-by: Pete Hampton <[email protected]>
  • Loading branch information
kibanamachine and pjhampton authored Jun 30, 2021
1 parent fe30b1b commit 3365f1b
Show file tree
Hide file tree
Showing 10 changed files with 649 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@
* 2.0.
*/

import moment from 'moment';
import { loggingSystemMock } from 'src/core/server/mocks';

import { taskManagerMock } from '../../../../task_manager/server/mocks';
import { TaskStatus } from '../../../../task_manager/server';

import { TelemetryDiagTask, TelemetryDiagTaskConstants } from './task';
import { TelemetryDiagTask, TelemetryDiagTaskConstants } from './diagnostic_task';
import { createMockTelemetryEventsSender, MockTelemetryDiagnosticTask } from './mocks';

describe('test', () => {
Expand All @@ -22,7 +19,7 @@ describe('test', () => {
});

describe('basic diagnostic alert telemetry sanity checks', () => {
test('task can register', () => {
test('diagnostic task can register', () => {
const telemetryDiagTask = new TelemetryDiagTask(
logger,
taskManagerMock.createSetup(),
Expand All @@ -40,7 +37,7 @@ describe('test', () => {
expect(mockTaskManager.registerTaskDefinitions).toHaveBeenCalled();
});

test('task should be scheduled', async () => {
test('diagnostic task should be scheduled', async () => {
const mockTaskManagerSetup = taskManagerMock.createSetup();
const telemetryDiagTask = new TelemetryDiagTask(
logger,
Expand All @@ -53,7 +50,7 @@ describe('test', () => {
expect(mockTaskManagerStart.ensureScheduled).toHaveBeenCalled();
});

test('task should run', async () => {
test('diagnostic task should run', async () => {
const mockContext = createMockTelemetryEventsSender(true);
const mockTaskManager = taskManagerMock.createSetup();
const telemetryDiagTask = new MockTelemetryDiagnosticTask(logger, mockTaskManager, mockContext);
Expand All @@ -79,7 +76,7 @@ describe('test', () => {
expect(telemetryDiagTask.runTask).toHaveBeenCalled();
});

test('task should not query elastic if telemetry is not opted in', async () => {
test('diagnostic task should not query elastic if telemetry is not opted in', async () => {
const mockSender = createMockTelemetryEventsSender(false);
const mockTaskManager = taskManagerMock.createSetup();
new MockTelemetryDiagnosticTask(logger, mockTaskManager, mockSender);
Expand All @@ -104,48 +101,4 @@ describe('test', () => {
await taskRunner.run();
expect(mockSender.fetchDiagnosticAlerts).not.toHaveBeenCalled();
});

test('test -5 mins is returned when there is no previous task run', async () => {
const telemetryDiagTask = new TelemetryDiagTask(
logger,
taskManagerMock.createSetup(),
createMockTelemetryEventsSender(true)
);

const executeTo = moment().utc().toISOString();
const executeFrom = undefined;
const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom);

expect(newExecuteFrom).toEqual(moment(executeTo).subtract(5, 'minutes').toISOString());
});

test('test -6 mins is returned when there was a previous task run', async () => {
const telemetryDiagTask = new TelemetryDiagTask(
logger,
taskManagerMock.createSetup(),
createMockTelemetryEventsSender(true)
);

const executeTo = moment().utc().toISOString();
const executeFrom = moment(executeTo).subtract(6, 'minutes').toISOString();
const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom);

expect(newExecuteFrom).toEqual(executeFrom);
});

// it's possible if Kibana is down for a prolonged period the stored lastRun would have drifted
// if that is the case we will just roll it back to a 10 min search window
test('test 10 mins is returned when previous task run took longer than 10 minutes', async () => {
const telemetryDiagTask = new TelemetryDiagTask(
logger,
taskManagerMock.createSetup(),
createMockTelemetryEventsSender(true)
);

const executeTo = moment().utc().toISOString();
const executeFrom = moment(executeTo).subtract(142, 'minutes').toISOString();
const newExecuteFrom = telemetryDiagTask.getLastExecutionTimestamp(executeTo, executeFrom);

expect(newExecuteFrom).toEqual(moment(executeTo).subtract(10, 'minutes').toISOString());
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
TaskManagerSetupContract,
TaskManagerStartContract,
} from '../../../../task_manager/server';
import { getLastTaskExecutionTimestamp } from './helpers';
import { TelemetryEventsSender, TelemetryEvent } from './sender';

export const TelemetryDiagTaskConstants = {
Expand Down Expand Up @@ -43,7 +44,7 @@ export class TelemetryDiagTask {
return {
run: async () => {
const executeTo = moment().utc().toISOString();
const executeFrom = this.getLastExecutionTimestamp(
const executeFrom = getLastTaskExecutionTimestamp(
executeTo,
taskInstance.state?.lastExecutionTimestamp
);
Expand All @@ -64,20 +65,6 @@ export class TelemetryDiagTask {
});
}

public getLastExecutionTimestamp(executeTo: string, lastExecutionTimestamp?: string) {
if (lastExecutionTimestamp === undefined) {
this.logger.debug(`No last execution timestamp defined`);
return moment(executeTo).subtract(5, 'minutes').toISOString();
}

if (moment(executeTo).diff(lastExecutionTimestamp, 'minutes') >= 10) {
this.logger.debug(`last execution timestamp was greater than 10 minutes`);
return moment(executeTo).subtract(10, 'minutes').toISOString();
}

return lastExecutionTimestamp;
}

public start = async (taskManager: TaskManagerStartContract) => {
try {
await taskManager.ensureScheduled({
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { loggingSystemMock } from 'src/core/server/mocks';
import { taskManagerMock } from '../../../../task_manager/server/mocks';
import { TelemetryEndpointTask } from './endpoint_task';
import { createMockTelemetryEventsSender } from './mocks';

describe('test', () => {
let logger: ReturnType<typeof loggingSystemMock.createLogger>;

beforeEach(() => {
logger = loggingSystemMock.createLogger();
});

describe('endpoint alert telemetry checks', () => {
test('the task can register', () => {
const telemetryEndpointTask = new TelemetryEndpointTask(
logger,
taskManagerMock.createSetup(),
createMockTelemetryEventsSender(true)
);

expect(telemetryEndpointTask).toBeInstanceOf(TelemetryEndpointTask);
});
});

test('the endpoint task should be registered', () => {
const mockTaskManager = taskManagerMock.createSetup();
new TelemetryEndpointTask(logger, mockTaskManager, createMockTelemetryEventsSender(true));

expect(mockTaskManager.registerTaskDefinitions).toHaveBeenCalled();
});

test('the endpoint task should be scheduled', async () => {
const mockTaskManagerSetup = taskManagerMock.createSetup();
const telemetryEndpointTask = new TelemetryEndpointTask(
logger,
mockTaskManagerSetup,
createMockTelemetryEventsSender(true)
);

const mockTaskManagerStart = taskManagerMock.createStart();
await telemetryEndpointTask.start(mockTaskManagerStart);
expect(mockTaskManagerStart.ensureScheduled).toHaveBeenCalled();
});
});
Loading

0 comments on commit 3365f1b

Please sign in to comment.