Skip to content

Commit

Permalink
[Security Solution] Add retryIfConflict util for 409 conflicts in…
Browse files Browse the repository at this point in the history
… Integration tests (#174185)

## Summary

Fixes: #171428

**NOTE: the test where this was reported wasn't skipped, so this PR does
not unskip any tests.** However, the Flaky Test Runs help us determine
that the issue is no longer reproducible.

The `deleteAllPrebuiltRuleAssets` utility reported a `409 Conflict`,
presumably from `security-rule` assets that were attempted to be deleted
while they were being updated by a parallel process.

This PR wraps the `es.deleteByQuery` calls in the utils
`deleteAllPrebuiltRuleAssets` and `deleteAllTimelines` with a new
`retryIfConflict` helper, that will retry the operation if the ES
request fails with a `409`.

## Flaky test run

`bundled_prebuilt_rules_package` - **ESS** and **Serverless**:
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4790

`large_prebuilt_rules_package` - **ESS** and **Serverless**:
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4791

`update_prebuilt_rules_package` - **ESS** and **Serverless**:
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4792

`management` - **ESS** and **Serverless**:
https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/4793

### For maintainers

- [ ] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
  • Loading branch information
jpdjere authored Jan 11, 2024
1 parent 81d6478 commit b8c7306
Show file tree
Hide file tree
Showing 13 changed files with 105 additions and 42 deletions.
2 changes: 1 addition & 1 deletion x-pack/test/security_solution_api_integration/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"prebuilt_rules_update_prebuilt_rules_package:runner:serverless": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package serverless serverlessEnv",
"prebuilt_rules_update_prebuilt_rules_package:qa:serverless": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package serverless qaEnv",
"prebuilt_rules_update_prebuilt_rules_package:server:ess": "npm run initialize-server:dr:default prebuilt_rules/update_prebuilt_rules_package ess",
"prebuilt_rules_update_prebuilt_rules_package:runner:ess": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package ess essEnvs",
"prebuilt_rules_update_prebuilt_rules_package:runner:ess": "npm run run-tests:dr:default prebuilt_rules/update_prebuilt_rules_package ess essEnv",

"rule_execution_logic:server:serverless": "npm run initialize-server:dr:default rule_execution_logic serverless",
"rule_execution_logic:runner:serverless": "npm run run-tests:dr:default rule_execution_logic serverless serverlessEnv",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA install_bundled_prebuilt_rules', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});

it('should list `security_detection_engine` as a bundled fleet package in the `fleet_package.json` file', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA prerelease_packages', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
await deletePrebuiltRulesFleetPackage(supertest);
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA install_large_prebuilt_rules_package', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});

afterEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});

it('should install a package containing 15000 prebuilt rules without crashing', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ export default ({ getService }: FtrProviderContext): void => {
beforeEach(async () => {
await deletePrebuiltRulesFleetPackage(supertest);
await deleteAllRules(supertest, log);
await deleteAllTimelines(es);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllTimelines(es, log);
await deleteAllPrebuiltRuleAssets(es, log);
});

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA Prebuilt Rules status', () => {
describe('get_prebuilt_rules_status', () => {
beforeEach(async () => {
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
await deleteAllRules(supertest, log);
});

Expand Down Expand Up @@ -110,7 +110,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
Expand All @@ -130,7 +130,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
Expand All @@ -152,7 +152,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Recreate the rules without bumping any versions
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);

Expand Down Expand Up @@ -238,7 +238,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);

// Delete the previous versions of rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);

// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
Expand All @@ -261,7 +261,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);

// Delete the previous versions of rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);

// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
Expand All @@ -286,7 +286,7 @@ export default ({ getService }: FtrProviderContext): void => {

describe('get_prebuilt_rules_status - legacy', () => {
beforeEach(async () => {
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
await deleteAllRules(supertest, log);
});

Expand Down Expand Up @@ -367,7 +367,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
Expand All @@ -387,7 +387,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Recreate the rules without bumping any versions
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);

Expand Down Expand Up @@ -473,7 +473,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);

// Delete the previous versions of rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);

// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ import {
export default ({ getService }: FtrProviderContext): void => {
const supertest = getService('supertest');
const es = getService('es');
const log = getService('log');

describe('@ess @serverless @skipInQA get_prebuilt_timelines_status', () => {
beforeEach(async () => {
await deleteAllTimelines(es);
await deleteAllTimelines(es, log);
});

it('should return the number of timeline templates available to install', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ export default ({ getService }: FtrProviderContext): void => {
describe('@ess @serverless @skipInQA install and upgrade prebuilt rules with mock rule assets', () => {
beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllTimelines(es);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllTimelines(es, log);
await deleteAllPrebuiltRuleAssets(es, log);
});

describe(`rule package without historical versions`, () => {
Expand Down Expand Up @@ -96,7 +96,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
Expand Down Expand Up @@ -177,7 +177,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
// Increment the version of one of the installed rules and create the new rule assets
ruleAssetSavedObjects[0]['security-rule'].version += 1;
await createPrebuiltRuleAssetSavedObjects(es, ruleAssetSavedObjects);
Expand Down Expand Up @@ -315,7 +315,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRulesAndTimelines(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);

// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
Expand Down Expand Up @@ -423,7 +423,7 @@ export default ({ getService }: FtrProviderContext): void => {
await installPrebuiltRules(es, supertest);

// Clear previous rule assets
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);

// Add a new rule version
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ export default ({ getService }: FtrProviderContext): void => {

beforeEach(async () => {
await deleteAllRules(supertest, log);
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
});

it('should allow user to install prebuilt rules from scratch, then install new rules and upgrade existing rules from the new package', async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ export default ({ getService }: FtrProviderContext): void => {

describe('when there are installed prebuilt rules', () => {
beforeEach(async () => {
await deleteAllPrebuiltRuleAssets(es);
await deleteAllPrebuiltRuleAssets(es, log);
await installMockPrebuiltRules(supertest, es);
});

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { DeleteByQueryResponse } from '@elastic/elasticsearch/lib/api/types';
import { ToolingLog } from '@kbn/tooling-log';

// Number of times to retry when conflicts occur
const RETRY_ATTEMPTS = 2;

// Delay between retries when conflicts occur
const RETRY_DELAY = 200;

/*
* Retry an Elasticsearch deleteByQuery operation if it runs into 409 Conflicts,
* up to a maximum number of attempts.
*/
export async function retryIfDeleteByQueryConflicts<T>(
logger: ToolingLog,
name: string,
operation: () => Promise<DeleteByQueryResponse>,
retries: number = RETRY_ATTEMPTS,
retryDelay: number = RETRY_DELAY
): Promise<DeleteByQueryResponse> {
const operationResult = await operation();
if (!operationResult.failures || operationResult.failures?.length === 0) {
return operationResult;
}

for (const failure of operationResult.failures) {
if (failure.status === 409) {
// if no retries left, throw it
if (retries <= 0) {
logger.error(`${name} conflict, exceeded retries`);
throw new Error(`${name} conflict, exceeded retries`);
}

// Otherwise, delay a bit before retrying
logger.debug(`${name} conflict, retrying ...`);
await waitBeforeNextRetry(retryDelay);
return await retryIfDeleteByQueryConflicts(logger, name, operation, retries - 1);
}
}

return operationResult;
}

async function waitBeforeNextRetry(retryDelay: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, retryDelay));
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,26 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { ToolingLog } from '@kbn/tooling-log';
import type { Client } from '@elastic/elasticsearch';
import { SECURITY_SOLUTION_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server';
import { retryIfDeleteByQueryConflicts } from '../../retry_delete_by_query_conflicts';

/**
* Remove all prebuilt rule assets from the security solution savedObjects index
* @param es The ElasticSearch handle
*/
export const deleteAllPrebuiltRuleAssets = async (es: Client): Promise<void> => {
await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:security-rule',
wait_for_completion: true,
refresh: true,
body: {},
export const deleteAllPrebuiltRuleAssets = async (
es: Client,
logger: ToolingLog
): Promise<void> => {
await retryIfDeleteByQueryConflicts(logger, deleteAllPrebuiltRuleAssets.name, async () => {
return await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:security-rule',
wait_for_completion: true,
refresh: true,
body: {},
});
});
};
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { ToolingLog } from '@kbn/tooling-log';
import type { Client } from '@elastic/elasticsearch';
import { SECURITY_SOLUTION_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server';
import { retryIfDeleteByQueryConflicts } from '../../retry_delete_by_query_conflicts';

/**
* Remove all timelines from the security solution savedObjects index
* @param es The ElasticSearch handle
*/
export const deleteAllTimelines = async (es: Client): Promise<void> => {
await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:siem-ui-timeline',
wait_for_completion: true,
refresh: true,
body: {},
export const deleteAllTimelines = async (es: Client, logger: ToolingLog): Promise<void> => {
await retryIfDeleteByQueryConflicts(logger, deleteAllTimelines.name, async () => {
return await es.deleteByQuery({
index: SECURITY_SOLUTION_SAVED_OBJECT_INDEX,
q: 'type:siem-ui-timeline',
wait_for_completion: true,
refresh: true,
body: {},
});
});
};

0 comments on commit b8c7306

Please sign in to comment.