From ef9f37300890654f4228e4edf186dc822c1f64e7 Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Sat, 19 Oct 2024 03:25:33 +1100 Subject: [PATCH] [8.x] [EEM] Replace hashed ID with human readable ID (#193652) (#196902) # Backport This will backport the following commits from `main` to `8.x`: - [[EEM] Replace hashed ID with human readable ID (#193652)](https://github.com/elastic/kibana/pull/193652) ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) Co-authored-by: Milton Hultgren --- .../check_registered_types.test.ts | 2 +- .../kbn-entities-schema/src/schema/common.ts | 2 +- .../entity_manager/docs/entity_definitions.md | 16 +-- .../fixtures/builtin_entity_definition.ts | 4 +- .../helpers/fixtures/entity_definition.ts | 4 +- .../generate_latest_processors.test.ts.snap | 108 ++---------------- .../generate_latest_processors.ts | 53 ++------- .../generate_latest_transform.test.ts.snap | 7 -- .../transform/generate_latest_transform.ts | 12 +- .../server/saved_objects/entity_definition.ts | 28 ++++- .../apis/entity_manager/definitions.ts | 1 + 11 files changed, 59 insertions(+), 178 deletions(-) diff --git a/src/core/server/integration_tests/ci_checks/saved_objects/check_registered_types.test.ts b/src/core/server/integration_tests/ci_checks/saved_objects/check_registered_types.test.ts index 2eaa14bf24eda..fa2af23224dc6 100644 --- a/src/core/server/integration_tests/ci_checks/saved_objects/check_registered_types.test.ts +++ b/src/core/server/integration_tests/ci_checks/saved_objects/check_registered_types.test.ts @@ -91,7 +91,7 @@ describe('checking migration metadata changes on all registered SO types', () => "endpoint:unified-user-artifact-manifest": "71c7fcb52c658b21ea2800a6b6a76972ae1c776e", "endpoint:user-artifact-manifest": "1c3533161811a58772e30cdc77bac4631da3ef2b", "enterprise_search_telemetry": "9ac912e1417fc8681e0cd383775382117c9e3d3d", - "entity-definition": "e3811fd5fbb878d170067c0d6897a2e63010af36", + "entity-definition": "1c6bff35c423d5dc5650bc806cf2899e4706a0bc", "entity-discovery-api-key": "c267a65c69171d1804362155c1378365f5acef88", "entity-engine-status": "8cb7dcb13f5e2ea8f2e08dd4af72c110e2051120", "epm-packages": "8042d4a1522f6c4e6f5486e791b3ffe3a22f88fd", diff --git a/x-pack/packages/kbn-entities-schema/src/schema/common.ts b/x-pack/packages/kbn-entities-schema/src/schema/common.ts index caecf48d88aac..3383cfaf7debb 100644 --- a/x-pack/packages/kbn-entities-schema/src/schema/common.ts +++ b/x-pack/packages/kbn-entities-schema/src/schema/common.ts @@ -145,7 +145,7 @@ export type MetadataField = z.infer; export const identityFieldsSchema = z .object({ field: z.string(), - optional: z.boolean(), + optional: z.literal(false), }) .or(z.string().transform((value) => ({ field: value, optional: false }))); diff --git a/x-pack/plugins/entity_manager/docs/entity_definitions.md b/x-pack/plugins/entity_manager/docs/entity_definitions.md index da02bc7f69c3f..fd08f1effd5a8 100644 --- a/x-pack/plugins/entity_manager/docs/entity_definitions.md +++ b/x-pack/plugins/entity_manager/docs/entity_definitions.md @@ -7,14 +7,11 @@ Entity definitions are a core concept of the entity model. They define the way t > [!NOTE] > Entity definitions are based on transform and as such a subset of the configuration is tightly coupled to transform settings. While we provide defaults for these settings, one can still update properties such as `frequency`, `sync.time.delay` and `sync.time.field` (see [transform documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-transform.html)). -When creating a definition (see [entity definition schema](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L21)), entity manager will create two transforms to collect entities based on the configured [identityFields](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L29): -- the history transform creates a snapshot of entities over time, reading documents from the configured source indices and grouping them by the identity fields and a date histogram. For a given entity the transform creates at most one document per interval (configured by the `history.settings.interval` setting), with its associated metrics and metadata fields aggregated over that interval. While metrics support [multiple aggregations](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/common.ts#L13), metadata use a `terms` aggregation (to be expanded by https://github.com/elastic/elastic-entity-model/issues/130). To limit the amount of data processed when created, history transform accepts a `history.settings.lookbackPeriod` that defaults to 1h. -- the summary transform creates one document per entity, reading documents from the history transform output indices. Each entity document gets overwritten over time, updating metadata and metrics with the following rules: metrics get the value of the most recent history document while metadata are aggregated over a computed period that attempts to limit the amount of data it looks at. +When creating a definition (see [entity definition schema](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L21)), entity manager will create a transforms to collect entities based on the configured [identityFields](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L29). +The transform creates one document per entity, reading documents from the configured source indices and grouping them by the identity fields. Each entity document gets overwritten each time the transform runs. -The definition allows defining an optional backfill transform. This works on the principle that transforms only capture an immutable snapshot of the data at the time they execute. If data is ingested with delay and falls in a bucket that was already covered by a previous [transform checkpoint](https://www.elastic.co/guide/en/elasticsearch/reference/current/transform-checkpoints.html), the data will never be transformed in the output. Ideally one would sync the transform on the [event.ingested field](https://www.elastic.co/guide/en/elasticsearch/reference/current/transform-checkpoints.html#sync-field-ingest-timestamp) to work with delayed data, when that is not possible or desirable the backfill transform can be a fallback. Backfill transform will output its data to the same history indice, because transform uses deterministic ids for the generated document, it will not create duplicate but instead upsert documents from the initial history transform pass. -To enable the backfill transform set a value to `history.settings.backfillSyncDelay` higher than the `history.settings.syncDelay`. The backfill lookback and frequency can also be configured. - -History and summary transforms will output their data to indices where history writes to time-based (monthly) indices (`.entities.v1.history..`) and summary writes to a unique indice (`.entities.v1.latest.`). For convenience we create type-based aliases on top on these indices, where the type is extracted from the `entityDefinition.type` property. For a definition of `type: service`, the data can be read through the `entities-service-history` and `entities-service-latest` aliases. +The transforms outputs the data to a unique index (`.entities.v1.latest.`). +For convenience we create type-based aliases on top on these indices, where the type is extracted from the `entityDefinition.type` property. For a definition of `type: service`, the data can be read through the `entities-service-history` and `entities-service-latest` aliases. #### Iterating on a definition @@ -48,7 +45,7 @@ Let's look at the most basic example, one that only discovers entities. ``` This definition will look inside the `logs-*` index pattern for documents that container the field `host.name` and group them based on that value to create the entities. It will run the discovery every 2 minutes. -The documents will be of type "host" so they can be queried via `entities-host-history` or `entities-host-latest`. Beyond the basic `entity` fields, each entity document will also contain all the identify fields at the root of the document, this it is easy to find your hosts by filtering by `host.name`. Note that it is not necessary to add the `identifyFields` as metadata as these will be automatically collected in the output documents, and that it is possible to set `identityFields` as optional. +The documents will be of type "host" so they can be queried via `entities-host-history` or `entities-host-latest`. Beyond the basic `entity` fields, each entity document will also contain all the identify fields at the root of the document, this it is easy to find your hosts by filtering by `host.name`. Note that it is not necessary to add the `identifyFields` as metadata as these will be automatically collected in the output documents. An entity document for this definition will look like below. @@ -213,8 +210,7 @@ __service_from_logs definition__ "indexPatterns": ["logs-*"], /** the field/combination of fields identifying an entity **/ "identityFields": [ - "service.name", // == { "field": "service.name", "optional": false } - { "field": "service.environment", "optional": true } + "service.name", ], "displayNameTemplate": "{{service.name}}{{#service.environment}}:{{.}}{{/service.environment}}", // mustache template /** diff --git a/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/builtin_entity_definition.ts b/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/builtin_entity_definition.ts index b1e506150fb60..4d2327d5a3c12 100644 --- a/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/builtin_entity_definition.ts +++ b/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/builtin_entity_definition.ts @@ -16,8 +16,8 @@ export const builtInEntityDefinition = entityDefinitionSchema.parse({ latest: { timestampField: '@timestamp', }, - identityFields: ['log.logger', { field: 'event.category', optional: true }], - displayNameTemplate: '{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}', + identityFields: ['log.logger'], + displayNameTemplate: '{{log.logger}}', metadata: ['tags', 'host.name', 'host.os.name', { source: '_index', destination: 'sourceIndex' }], metrics: [], }); diff --git a/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/entity_definition.ts b/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/entity_definition.ts index 00ab9ac7759af..6eec77eca3548 100644 --- a/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/entity_definition.ts +++ b/x-pack/plugins/entity_manager/server/lib/entities/helpers/fixtures/entity_definition.ts @@ -20,8 +20,8 @@ export const rawEntityDefinition = { syncDelay: '10s', }, }, - identityFields: ['log.logger', { field: 'event.category', optional: true }], - displayNameTemplate: '{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}', + identityFields: ['log.logger'], + displayNameTemplate: '{{log.logger}}', metadata: ['tags', 'host.name', 'host.os.name', { source: '_index', destination: 'sourceIndex' }], metrics: [ { diff --git a/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/__snapshots__/generate_latest_processors.test.ts.snap b/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/__snapshots__/generate_latest_processors.test.ts.snap index 218deda422fe2..37f600f3a271f 100644 --- a/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/__snapshots__/generate_latest_processors.test.ts.snap +++ b/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/__snapshots__/generate_latest_processors.test.ts.snap @@ -37,52 +37,13 @@ Array [ "field": "entity.identityFields", "value": Array [ "log.logger", - "event.category", ], }, }, Object { - "script": Object { - "description": "Generated the entity.id field", - "source": "// This function will recursively collect all the values of a HashMap of HashMaps -Collection collectValues(HashMap subject) { - Collection values = new ArrayList(); - // Iterate through the values - for(Object value: subject.values()) { - // If the value is a HashMap, recurse - if (value instanceof HashMap) { - values.addAll(collectValues((HashMap) value)); - } else { - values.add(String.valueOf(value)); - } - } - return values; -} -// Create the string builder -StringBuilder entityId = new StringBuilder(); -if (ctx[\\"entity\\"][\\"identity\\"] != null) { - // Get the values as a collection - Collection values = collectValues(ctx[\\"entity\\"][\\"identity\\"]); - // Convert to a list and sort - List sortedValues = new ArrayList(values); - Collections.sort(sortedValues); - // Create comma delimited string - for(String instanceValue: sortedValues) { - entityId.append(instanceValue); - entityId.append(\\":\\"); - } - // Assign the entity.id - ctx[\\"entity\\"][\\"id\\"] = entityId.length() > 0 ? entityId.substring(0, entityId.length() - 1) : \\"unknown\\"; -}", - }, - }, - Object { - "fingerprint": Object { - "fields": Array [ - "entity.id", - ], - "method": "MurmurHash3", - "target_field": "entity.id", + "set": Object { + "field": "entity.id", + "value": "{{{entity.identity.log.logger}}}", }, }, Object { @@ -123,13 +84,6 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) { "value": "{{entity.identity.log.logger}}", }, }, - Object { - "set": Object { - "field": "event.category", - "if": "ctx.entity?.identity?.event?.category != null", - "value": "{{entity.identity.event.category}}", - }, - }, Object { "remove": Object { "field": "entity.identity", @@ -139,7 +93,7 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) { Object { "set": Object { "field": "entity.displayName", - "value": "{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}", + "value": "{{log.logger}}", }, }, Object { @@ -188,52 +142,13 @@ Array [ "field": "entity.identityFields", "value": Array [ "log.logger", - "event.category", ], }, }, Object { - "script": Object { - "description": "Generated the entity.id field", - "source": "// This function will recursively collect all the values of a HashMap of HashMaps -Collection collectValues(HashMap subject) { - Collection values = new ArrayList(); - // Iterate through the values - for(Object value: subject.values()) { - // If the value is a HashMap, recurse - if (value instanceof HashMap) { - values.addAll(collectValues((HashMap) value)); - } else { - values.add(String.valueOf(value)); - } - } - return values; -} -// Create the string builder -StringBuilder entityId = new StringBuilder(); -if (ctx[\\"entity\\"][\\"identity\\"] != null) { - // Get the values as a collection - Collection values = collectValues(ctx[\\"entity\\"][\\"identity\\"]); - // Convert to a list and sort - List sortedValues = new ArrayList(values); - Collections.sort(sortedValues); - // Create comma delimited string - for(String instanceValue: sortedValues) { - entityId.append(instanceValue); - entityId.append(\\":\\"); - } - // Assign the entity.id - ctx[\\"entity\\"][\\"id\\"] = entityId.length() > 0 ? entityId.substring(0, entityId.length() - 1) : \\"unknown\\"; -}", - }, - }, - Object { - "fingerprint": Object { - "fields": Array [ - "entity.id", - ], - "method": "MurmurHash3", - "target_field": "entity.id", + "set": Object { + "field": "entity.id", + "value": "{{{entity.identity.log.logger}}}", }, }, Object { @@ -274,13 +189,6 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) { "value": "{{entity.identity.log.logger}}", }, }, - Object { - "set": Object { - "field": "event.category", - "if": "ctx.entity?.identity?.event?.category != null", - "value": "{{entity.identity.event.category}}", - }, - }, Object { "remove": Object { "field": "entity.identity", @@ -290,7 +198,7 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) { Object { "set": Object { "field": "entity.displayName", - "value": "{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}", + "value": "{{log.logger}}", }, }, Object { diff --git a/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/generate_latest_processors.ts b/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/generate_latest_processors.ts index 0e3812de2e320..787633246dede 100644 --- a/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/generate_latest_processors.ts +++ b/x-pack/plugins/entity_manager/server/lib/entities/ingest_pipeline/generate_latest_processors.ts @@ -140,52 +140,13 @@ export function generateLatestProcessors(definition: EntityDefinition) { }, }, { - script: { - description: 'Generated the entity.id field', - source: cleanScript(` - // This function will recursively collect all the values of a HashMap of HashMaps - Collection collectValues(HashMap subject) { - Collection values = new ArrayList(); - // Iterate through the values - for(Object value: subject.values()) { - // If the value is a HashMap, recurse - if (value instanceof HashMap) { - values.addAll(collectValues((HashMap) value)); - } else { - values.add(String.valueOf(value)); - } - } - return values; - } - - // Create the string builder - StringBuilder entityId = new StringBuilder(); - - if (ctx["entity"]["identity"] != null) { - // Get the values as a collection - Collection values = collectValues(ctx["entity"]["identity"]); - - // Convert to a list and sort - List sortedValues = new ArrayList(values); - Collections.sort(sortedValues); - - // Create comma delimited string - for(String instanceValue: sortedValues) { - entityId.append(instanceValue); - entityId.append(":"); - } - - // Assign the entity.id - ctx["entity"]["id"] = entityId.length() > 0 ? entityId.substring(0, entityId.length() - 1) : "unknown"; - } - `), - }, - }, - { - fingerprint: { - fields: ['entity.id'], - target_field: 'entity.id', - method: 'MurmurHash3', + set: { + field: 'entity.id', + value: definition.identityFields + .map((identityField) => identityField.field) + .sort() + .map((identityField) => `{{{entity.identity.${identityField}}}}`) + .join('-'), }, }, ...(definition.staticFields != null diff --git a/x-pack/plugins/entity_manager/server/lib/entities/transform/__snapshots__/generate_latest_transform.test.ts.snap b/x-pack/plugins/entity_manager/server/lib/entities/transform/__snapshots__/generate_latest_transform.test.ts.snap index 49f8ff4536120..94303584c45dc 100644 --- a/x-pack/plugins/entity_manager/server/lib/entities/transform/__snapshots__/generate_latest_transform.test.ts.snap +++ b/x-pack/plugins/entity_manager/server/lib/entities/transform/__snapshots__/generate_latest_transform.test.ts.snap @@ -139,16 +139,9 @@ Object { }, }, "group_by": Object { - "entity.identity.event.category": Object { - "terms": Object { - "field": "event.category", - "missing_bucket": true, - }, - }, "entity.identity.log.logger": Object { "terms": Object { "field": "log.logger", - "missing_bucket": false, }, }, }, diff --git a/x-pack/plugins/entity_manager/server/lib/entities/transform/generate_latest_transform.ts b/x-pack/plugins/entity_manager/server/lib/entities/transform/generate_latest_transform.ts index 573bb2225f183..c273469e3d3e3 100644 --- a/x-pack/plugins/entity_manager/server/lib/entities/transform/generate_latest_transform.ts +++ b/x-pack/plugins/entity_manager/server/lib/entities/transform/generate_latest_transform.ts @@ -32,13 +32,9 @@ export function generateLatestTransform( filter.push(getElasticsearchQueryOrThrow(definition.filter)); } - if (definition.identityFields.some(({ optional }) => !optional)) { - definition.identityFields - .filter(({ optional }) => !optional) - .forEach(({ field }) => { - filter.push({ exists: { field } }); - }); - } + definition.identityFields.forEach(({ field }) => { + filter.push({ exists: { field } }); + }); filter.push({ range: { @@ -108,7 +104,7 @@ const generateTransformPutRequest = ({ (acc, id) => ({ ...acc, [`entity.identity.${id.field}`]: { - terms: { field: id.field, missing_bucket: id.optional }, + terms: { field: id.field }, }, }), {} diff --git a/x-pack/plugins/entity_manager/server/saved_objects/entity_definition.ts b/x-pack/plugins/entity_manager/server/saved_objects/entity_definition.ts index bdea2b71e4141..f95e236e93f6d 100644 --- a/x-pack/plugins/entity_manager/server/saved_objects/entity_definition.ts +++ b/x-pack/plugins/entity_manager/server/saved_objects/entity_definition.ts @@ -5,7 +5,11 @@ * 2.0. */ -import { SavedObjectModelDataBackfillFn } from '@kbn/core-saved-objects-server'; +import { + SavedObjectModelDataBackfillFn, + SavedObjectModelTransformationDoc, + SavedObjectModelUnsafeTransformFn, +} from '@kbn/core-saved-objects-server'; import { SavedObject, SavedObjectsType } from '@kbn/core/server'; import { EntityDefinition } from '@kbn/entities-schema'; import { @@ -35,6 +39,20 @@ export const backfillInstalledComponents: SavedObjectModelDataBackfillFn< return savedObject; }; +const removeOptionalIdentityFields: SavedObjectModelUnsafeTransformFn< + EntityDefinition, + EntityDefinition +> = (savedObject) => { + // Doing only this may break displayNameTemplates + savedObject.attributes.identityFields = savedObject.attributes.identityFields.filter( + (identityField) => identityField.optional === false + ); + + return { + document: savedObject as SavedObjectModelTransformationDoc, + }; +}; + export const entityDefinition: SavedObjectsType = { name: SO_ENTITY_DEFINITION_TYPE, hidden: false, @@ -97,5 +115,13 @@ export const entityDefinition: SavedObjectsType = { }, ], }, + '4': { + changes: [ + { + type: 'unsafe_transform', + transformFn: removeOptionalIdentityFields, + }, + ], + }, }, }; diff --git a/x-pack/test/api_integration/apis/entity_manager/definitions.ts b/x-pack/test/api_integration/apis/entity_manager/definitions.ts index a1fdab08ff42a..468e53767b4e8 100644 --- a/x-pack/test/api_integration/apis/entity_manager/definitions.ts +++ b/x-pack/test/api_integration/apis/entity_manager/definitions.ts @@ -169,6 +169,7 @@ export default function ({ getService }: FtrProviderContext) { const parsedSample = entityLatestSchema.safeParse(sample.hits.hits[0]._source); expect(parsedSample.success).to.be(true); + expect(parsedSample.data?.entity.id).to.be('admin-console'); }); it('should delete entities data when specified', async () => {