Skip to content

Commit

Permalink
[RAM][Maintenance Window][8.8]Fix window maintenance workflow (#156427)
Browse files Browse the repository at this point in the history
## Summary

The way that we canceled every notification for our alert life cycle
during an active maintenance window was not close enough to what our
customers were expecting. For our persisted security solution alerts, we
did not have to change the logic because it will always be a new alert.
Therefore, @shanisagiv1, @mdefazio, @JiaweiWu, and @XavierM had a
discussion about this problem and we decided this:

To summarize, we will only keep the notification during a maintenance
window if an alert has been created/active outside of window
maintenance. We created three different scenarios to explain the new
logic and we will make the assumption that our alert has an action per
status change. For you to understand the different scenarios, I created
this legend below:
<img width="223" alt="image"
src="https://user-images.githubusercontent.com/189600/236045974-f4fa379b-db5e-41f8-91a8-2689b9f24dab.png">

### Scenario I
If an alert is active/created before a maintenance window and recovered
inside of the maintenance window then we will send notifications
<img width="463" alt="image"
src="https://user-images.githubusercontent.com/189600/236046473-d04df836-d3e6-42d8-97be-8b4f1544cc1a.png">

### Scenario II
If an alert is active/created and recovered inside of window maintenance
then we will NOT send notifications
<img width="407" alt="image"
src="https://user-images.githubusercontent.com/189600/236046913-c2f77131-9ff1-4864-9dab-89c4c429152e.png">

### Scenario III
if an alert is active/created in a maintenance window and recovered
outside of the maintenance window then we will not send notifications
<img width="496" alt="image"
src="https://user-images.githubusercontent.com/189600/236047613-e63efe52-87fa-419e-9e0e-965b1d10ae18.png">


### Checklist
- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

---------

Co-authored-by: Xavier Mouligneau <[email protected]>
Co-authored-by: Kibana Machine <[email protected]>
  • Loading branch information
3 people authored May 5, 2023
1 parent a83ab21 commit ea40798
Show file tree
Hide file tree
Showing 26 changed files with 542 additions and 103 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const metaSchema = t.partial({
flappingHistory: t.array(t.boolean),
// flapping flag that indicates whether the alert is flapping
flapping: t.boolean,
maintenanceWindowIds: t.array(t.string),
pendingRecoveredCount: t.number,
uuid: t.string,
});
Expand Down
8 changes: 8 additions & 0 deletions x-pack/plugins/alerting/server/alert/alert.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ describe('updateLastScheduledActions()', () => {
group: 'default',
},
flappingHistory: [],
maintenanceWindowIds: [],
},
});
});
Expand All @@ -357,6 +358,7 @@ describe('updateLastScheduledActions()', () => {
state: {},
meta: {
flappingHistory: [],
maintenanceWindowIds: [],
uuid: expect.any(String),
lastScheduledActions: {
date: new Date().toISOString(),
Expand All @@ -373,6 +375,7 @@ describe('updateLastScheduledActions()', () => {
const alert = new Alert<AlertInstanceState, AlertInstanceContext, DefaultActionGroupId>('1', {
meta: {
flappingHistory: [],
maintenanceWindowIds: [],
lastScheduledActions: {
date: new Date(),
group: 'default',
Expand All @@ -387,6 +390,7 @@ describe('updateLastScheduledActions()', () => {
state: {},
meta: {
flappingHistory: [],
maintenanceWindowIds: [],
uuid: expect.any(String),
lastScheduledActions: {
date: new Date().toISOString(),
Expand Down Expand Up @@ -484,6 +488,7 @@ describe('toJSON', () => {
group: 'default',
},
flappingHistory: [false, true],
maintenanceWindowIds: [],
flapping: false,
pendingRecoveredCount: 2,
},
Expand Down Expand Up @@ -548,6 +553,7 @@ describe('toRaw', () => {
meta: {
flappingHistory: [false, true, true],
flapping: false,
maintenanceWindowIds: [],
uuid: expect.any(String),
},
});
Expand All @@ -570,6 +576,7 @@ describe('setFlappingHistory', () => {
"flappingHistory": Array [
false,
],
"maintenanceWindowIds": Array [],
"uuid": Any<String>,
},
"state": Object {},
Expand Down Expand Up @@ -602,6 +609,7 @@ describe('setFlapping', () => {
"meta": Object {
"flapping": false,
"flappingHistory": Array [],
"maintenanceWindowIds": Array [],
"uuid": Any<String>,
},
"state": Object {},
Expand Down
11 changes: 10 additions & 1 deletion x-pack/plugins/alerting/server/alert/alert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export class Alert<
this.context = {} as Context;
this.meta = meta;
this.meta.uuid = meta.uuid ?? uuidV4();

this.meta.maintenanceWindowIds = meta.maintenanceWindowIds ?? [];
if (!this.meta.flappingHistory) {
this.meta.flappingHistory = [];
}
Expand Down Expand Up @@ -229,6 +229,7 @@ export class Alert<
// for a recovered alert, we only care to track the flappingHistory,
// the flapping flag, and the UUID
meta: {
maintenanceWindowIds: this.meta.maintenanceWindowIds,
flappingHistory: this.meta.flappingHistory,
flapping: this.meta.flapping,
uuid: this.meta.uuid,
Expand Down Expand Up @@ -296,4 +297,12 @@ export class Alert<
get(alert, ALERT_UUID) === this.getId() || get(alert, ALERT_UUID) === this.getUuid()
);
}

setMaintenanceWindowIds(maintenanceWindowIds: string[] = []) {
this.meta.maintenanceWindowIds = maintenanceWindowIds;
}

getMaintenanceWindowIds() {
return this.meta.maintenanceWindowIds ?? [];
}
}
20 changes: 20 additions & 0 deletions x-pack/plugins/alerting/server/alert/create_alert_factory.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});
const result = alertFactory.create('1');
expect(result).toMatchObject({
Expand Down Expand Up @@ -58,6 +59,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});
const result = alertFactory.create('1');
expect(result).toMatchObject({
Expand All @@ -82,6 +84,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});
alertFactory.create('1');
expect(alerts).toMatchObject({
Expand All @@ -103,6 +106,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 3,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});

expect(alertFactory.hasReachedAlertLimit()).toBe(false);
Expand All @@ -123,6 +127,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});
const result = alertFactory.create('1');
expect(result).toMatchObject({
Expand Down Expand Up @@ -166,6 +171,7 @@ describe('createAlertFactory()', () => {
canSetRecoveryContext: true,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: ['test-id-1'],
});
const result = alertFactory.create('1');
expect(result).toMatchObject({
Expand All @@ -184,6 +190,11 @@ describe('createAlertFactory()', () => {
const recoveredAlerts = getRecoveredAlertsFn!();
expect(Array.isArray(recoveredAlerts)).toBe(true);
expect(recoveredAlerts.length).toEqual(2);
expect(processAlerts).toHaveBeenLastCalledWith(
expect.objectContaining({
maintenanceWindowIds: ['test-id-1'],
})
);
});

test('returns empty array if no recovered alerts', () => {
Expand All @@ -194,6 +205,7 @@ describe('createAlertFactory()', () => {
maxAlerts: 1000,
canSetRecoveryContext: true,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});
const result = alertFactory.create('1');
expect(result).toMatchObject({
Expand Down Expand Up @@ -221,6 +233,7 @@ describe('createAlertFactory()', () => {
maxAlerts: 1000,
canSetRecoveryContext: true,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});
const result = alertFactory.create('1');
expect(result).toMatchObject({
Expand All @@ -247,6 +260,7 @@ describe('createAlertFactory()', () => {
maxAlerts: 1000,
canSetRecoveryContext: false,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});
const result = alertFactory.create('1');
expect(result).toMatchObject({
Expand Down Expand Up @@ -275,6 +289,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});

const limit = alertFactory.alertLimit.getValue();
Expand All @@ -293,6 +308,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});

const limit = alertFactory.alertLimit.getValue();
Expand All @@ -308,6 +324,7 @@ describe('createAlertFactory()', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});

const limit = alertFactory.alertLimit.getValue();
Expand All @@ -324,11 +341,13 @@ describe('createAlertFactory()', () => {
maxAlerts: 1000,
canSetRecoveryContext: true,
autoRecoverAlerts: false,
maintenanceWindowIds: [],
});
const result = alertFactory.create('1');
expect(result).toEqual({
meta: {
flappingHistory: [],
maintenanceWindowIds: [],
uuid: expect.any(String),
},
state: {},
Expand All @@ -354,6 +373,7 @@ describe('getPublicAlertFactory', () => {
logger,
maxAlerts: 1000,
autoRecoverAlerts: true,
maintenanceWindowIds: [],
});

expect(alertFactory.create).toBeDefined();
Expand Down
3 changes: 3 additions & 0 deletions x-pack/plugins/alerting/server/alert/create_alert_factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export interface CreateAlertFactoryOpts<
logger: Logger;
maxAlerts: number;
autoRecoverAlerts: boolean;
maintenanceWindowIds: string[];
canSetRecoveryContext?: boolean;
}

Expand All @@ -66,6 +67,7 @@ export function createAlertFactory<
logger,
maxAlerts,
autoRecoverAlerts,
maintenanceWindowIds,
canSetRecoveryContext = false,
}: CreateAlertFactoryOpts<State, Context>): AlertFactory<State, Context, ActionGroupIds> {
// Keep track of which alerts we started with so we can determine which have recovered
Expand Down Expand Up @@ -152,6 +154,7 @@ export function createAlertFactory<
autoRecoverAlerts,
// flappingSettings.enabled is false, as we only want to use this function to get the recovered alerts
flappingSettings: DISABLE_FLAPPING_SETTINGS,
maintenanceWindowIds,
});
return Object.keys(currentRecoveredAlerts ?? {}).map(
(alertId: string) => currentRecoveredAlerts[alertId]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ describe('Legacy Alerts Client', () => {
'1': testAlert1,
'2': testAlert2,
},
{}
{},
['test-id-1']
);

expect(createAlertFactory).toHaveBeenCalledWith({
Expand All @@ -136,6 +137,7 @@ describe('Legacy Alerts Client', () => {
maxAlerts: 1000,
canSetRecoveryContext: false,
autoRecoverAlerts: true,
maintenanceWindowIds: ['test-id-1'],
});
});

Expand All @@ -151,7 +153,8 @@ describe('Legacy Alerts Client', () => {
'1': testAlert1,
'2': testAlert2,
},
{}
{},
[]
);

alertsClient.getExecutorServices();
Expand All @@ -170,7 +173,8 @@ describe('Legacy Alerts Client', () => {
'1': testAlert1,
'2': testAlert2,
},
{}
{},
[]
);

alertsClient.checkLimitUsage();
Expand All @@ -189,7 +193,8 @@ describe('Legacy Alerts Client', () => {
'1': testAlert1,
'2': testAlert2,
},
{}
{},
[]
);

alertsClient.hasReachedAlertLimit();
Expand Down Expand Up @@ -230,7 +235,8 @@ describe('Legacy Alerts Client', () => {
'1': testAlert1,
'2': testAlert2,
},
{}
{},
[]
);

alertsClient.processAndLogAlerts({
Expand All @@ -257,6 +263,7 @@ describe('Legacy Alerts Client', () => {
alertLimit: 1000,
autoRecoverAlerts: true,
flappingSettings: DEFAULT_FLAPPING_SETTINGS,
maintenanceWindowIds: ['window-id1', 'window-id2'],
});

expect(getAlertsForNotification).toHaveBeenCalledWith(
Expand Down Expand Up @@ -289,7 +296,6 @@ describe('Legacy Alerts Client', () => {
ruleRunMetricsStore,
canSetRecoveryContext: false,
shouldPersistAlerts: true,
maintenanceWindowIds: ['window-id1', 'window-id2'],
});

expect(alertsClient.getProcessedAlerts('active')).toEqual({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ export class LegacyAlertsClient<

public initialize(
activeAlertsFromState: Record<string, RawAlertInstance>,
recoveredAlertsFromState: Record<string, RawAlertInstance>
recoveredAlertsFromState: Record<string, RawAlertInstance>,
maintenanceWindowIds: string[]
) {
for (const id in activeAlertsFromState) {
if (activeAlertsFromState.hasOwnProperty(id)) {
Expand Down Expand Up @@ -107,6 +108,7 @@ export class LegacyAlertsClient<
maxAlerts: this.options.maxAlerts,
autoRecoverAlerts: this.options.ruleType.autoRecoverAlerts ?? true,
canSetRecoveryContext: this.options.ruleType.doesSetRecoveryContext ?? false,
maintenanceWindowIds,
});
}

Expand All @@ -125,7 +127,7 @@ export class LegacyAlertsClient<
ruleRunMetricsStore: RuleRunMetricsStore;
flappingSettings: RulesSettingsFlappingProperties;
notifyWhen: RuleNotifyWhenType | null;
maintenanceWindowIds?: string[];
maintenanceWindowIds: string[];
}) {
const {
newAlerts: processedAlertsNew,
Expand All @@ -143,6 +145,7 @@ export class LegacyAlertsClient<
? this.options.ruleType.autoRecoverAlerts
: true,
flappingSettings,
maintenanceWindowIds,
});

const { trimmedAlertsRecovered, earlyRecoveredAlerts } = trimRecoveredAlerts(
Expand Down Expand Up @@ -178,7 +181,6 @@ export class LegacyAlertsClient<
ruleRunMetricsStore,
canSetRecoveryContext: this.options.ruleType.doesSetRecoveryContext ?? false,
shouldPersistAlerts: shouldLogAndScheduleActionsForAlerts,
maintenanceWindowIds,
});
}

Expand Down
Loading

0 comments on commit ea40798

Please sign in to comment.