diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index df4c45c070..be7e4dec2c 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -43,6 +43,8 @@ Thanks, you're awesome :-) --> had `reusable.top_level:false`. This PR affects `ecs_flat.yml`, the csv file and the sample Elasticsearch templates. #495, #813 * Removed the `order` attribute from the `ecs_nested.yml` and `ecs_flat.yml` files. #811 +* In `ecs_nested.yml`, the array of strings that used to be in `reusable.expected` + has been replaced by an array of objects with 3 keys: 'as', 'at' and 'full'. #864 #### Bugfixes @@ -63,6 +65,20 @@ Thanks, you're awesome :-) --> * Allow shorthand notation for including all subfields in subsets. #805 * Add `ref` option to generator allowing schemas to be built for a specific ECS version. #851 * Add `template-settings` and `mapping-settings` options to allow override of defaults in generated ES templates. #856 +* When overriding ECS field sets via the `--include` flag, it's no longer necessary + to duplicate the field set's mandatory attributes. The customizations are merged + before validation. #864 +* Add ability to nest field sets as another name. #864 +* Add ability to nest field sets within themselves (e.g. `process` => `process.parent`). #864 +* New attribute `reused_here` is added in `ecs_nested.yml`. It obsoletes the + previous attribute `nestings`, and is able to fully capture details of other + field sets reused under this one. #864 +* When chained reuses are needed (e.g. `group` => `user`, then `user` => many places), + it's now necessary to force the order with new attribute `reusable.order`. This + attribute is otherwise optional. It's currently only needed for `group`. #864 +* There's a new representation of ECS at `generated/ecs/ecs.yml`, which is a deeply nested + representation of the fields. This file is not in git, as it's only meant for + developers working on the ECS tools. #864 #### Deprecated diff --git a/Makefile b/Makefile index 823e0980cb..e2826b46bc 100644 --- a/Makefile +++ b/Makefile @@ -102,7 +102,7 @@ setup: ve # Run the ECS tests .PHONY: test test: ve - $(PYTHON) -m unittest discover --start-directory scripts/tests + $(PYTHON) -m unittest discover -v --start-directory scripts/tests # Create a virtualenv to run Python. .PHONY: ve diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc index ffe4744f88..69afa74407 100644 --- a/docs/field-details.asciidoc +++ b/docs/field-details.asciidoc @@ -453,12 +453,6 @@ example: `co.uk` // =============================================================== -| <> -| User's group relevant to the event. - -// =============================================================== - - |===== [[ecs-cloud]] @@ -1012,12 +1006,6 @@ example: `co.uk` // =============================================================== -| <> -| User's group relevant to the event. - -// =============================================================== - - |===== [[ecs-dll]] @@ -2755,12 +2743,6 @@ example: `1325` // =============================================================== -| <> -| User's group relevant to the event. - -// =============================================================== - - |===== [[ecs-http]] @@ -5269,12 +5251,6 @@ example: `co.uk` // =============================================================== -| <> -| User's group relevant to the event. - -// =============================================================== - - |===== [[ecs-service]] @@ -5610,12 +5586,6 @@ example: `co.uk` // =============================================================== -| <> -| User's group relevant to the event. - -// =============================================================== - - |===== [[ecs-threat]] @@ -6728,7 +6698,7 @@ example: `outside` ==== Field Reuse -The `vlan` fields are expected to be nested at: `network.vlan`, `network.inner.vlan`, `observer.egress.vlan`, `observer.ingress.vlan`. +The `vlan` fields are expected to be nested at: `network.inner.vlan`, `network.vlan`, `observer.egress.vlan`, `observer.ingress.vlan`. Note also that the `vlan` fields are not expected to be used directly at the top level. diff --git a/generated/ecs/.gitignore b/generated/ecs/.gitignore new file mode 100644 index 0000000000..9a834320d8 --- /dev/null +++ b/generated/ecs/.gitignore @@ -0,0 +1 @@ +ecs.yml diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml index 14cc581f24..8584349a0a 100644 --- a/generated/ecs/ecs_nested.yml +++ b/generated/ecs/ecs_nested.yml @@ -7,7 +7,7 @@ agent: be populated with details of the agent running on the host or observer where the event happened or the measurement was taken.' fields: - build.original: + agent.build.original: dashed_name: agent-build-original description: 'Extended build information for the agent. @@ -22,7 +22,7 @@ agent: normalize: [] short: Extended build information for the agent. type: keyword - ephemeral_id: + agent.ephemeral_id: dashed_name: agent-ephemeral-id description: 'Ephemeral identifier of this agent (if one exists). @@ -35,7 +35,7 @@ agent: normalize: [] short: Ephemeral identifier of this agent. type: keyword - id: + agent.id: dashed_name: agent-id description: 'Unique identifier of this agent (if one exists). @@ -48,7 +48,7 @@ agent: normalize: [] short: Unique identifier of this agent. type: keyword - name: + agent.name: dashed_name: agent-name description: 'Custom name of the agent. @@ -65,7 +65,7 @@ agent: normalize: [] short: Custom name of the agent. type: keyword - type: + agent.type: dashed_name: agent-type description: 'Type of the agent. @@ -80,7 +80,7 @@ agent: normalize: [] short: Type of the agent. type: keyword - version: + agent.version: dashed_name: agent-version description: Version of the agent. example: 6.0.0-rc2 @@ -107,7 +107,7 @@ as: of a single administrative entity or domain that presents a common, clearly defined routing policy to the internet. fields: - number: + as.number: dashed_name: as-number description: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. @@ -119,7 +119,7 @@ as: short: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. type: long - organization.name: + as.organization.name: dashed_name: as-organization-name description: Organization name. example: Google LLC @@ -140,10 +140,18 @@ as: prefix: as. reusable: expected: - - client - - destination - - server - - source + - as: as + at: client + full: client.as + - as: as + at: destination + full: destination.as + - as: as + at: server + full: server.as + - as: as + at: source + full: source.as top_level: false short: Fields describing an Autonomous System (Internet routing prefix). title: Autonomous System @@ -240,7 +248,7 @@ client: in that category, you should still ensure that source and destination are filled appropriately.' fields: - address: + client.address: dashed_name: client-address description: 'Some event client addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store @@ -255,7 +263,7 @@ client: normalize: [] short: Client network address. type: keyword - as.number: + client.as.number: dashed_name: client-as-number description: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. @@ -268,7 +276,7 @@ client: short: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. type: long - as.organization.name: + client.as.organization.name: dashed_name: client-as-organization-name description: Organization name. example: Google LLC @@ -285,7 +293,7 @@ client: original_fieldset: as short: Organization name. type: keyword - bytes: + client.bytes: dashed_name: client-bytes description: Bytes sent from the client to the server. example: 184 @@ -296,7 +304,7 @@ client: normalize: [] short: Bytes sent from the client to the server. type: long - domain: + client.domain: dashed_name: client-domain description: Client domain. flat_name: client.domain @@ -306,7 +314,7 @@ client: normalize: [] short: Client domain. type: keyword - geo.city_name: + client.geo.city_name: dashed_name: client-geo-city-name description: City name. example: Montreal @@ -318,7 +326,7 @@ client: original_fieldset: geo short: City name. type: keyword - geo.continent_name: + client.geo.continent_name: dashed_name: client-geo-continent-name description: Name of the continent. example: North America @@ -330,7 +338,7 @@ client: original_fieldset: geo short: Name of the continent. type: keyword - geo.country_iso_code: + client.geo.country_iso_code: dashed_name: client-geo-country-iso-code description: Country ISO code. example: CA @@ -342,7 +350,7 @@ client: original_fieldset: geo short: Country ISO code. type: keyword - geo.country_name: + client.geo.country_name: dashed_name: client-geo-country-name description: Country name. example: Canada @@ -354,7 +362,7 @@ client: original_fieldset: geo short: Country name. type: keyword - geo.location: + client.geo.location: dashed_name: client-geo-location description: Longitude and latitude. example: '{ "lon": -73.614830, "lat": 45.505918 }' @@ -365,7 +373,7 @@ client: original_fieldset: geo short: Longitude and latitude. type: geo_point - geo.name: + client.geo.name: dashed_name: client-geo-name description: 'User-defined description of a location, at the level of granularity they care about. @@ -383,7 +391,7 @@ client: original_fieldset: geo short: User-defined description of a location. type: keyword - geo.region_iso_code: + client.geo.region_iso_code: dashed_name: client-geo-region-iso-code description: Region ISO code. example: CA-QC @@ -395,7 +403,7 @@ client: original_fieldset: geo short: Region ISO code. type: keyword - geo.region_name: + client.geo.region_name: dashed_name: client-geo-region-name description: Region name. example: Quebec @@ -407,7 +415,7 @@ client: original_fieldset: geo short: Region name. type: keyword - ip: + client.ip: dashed_name: client-ip description: IP address of the client (IPv4 or IPv6). flat_name: client.ip @@ -416,7 +424,7 @@ client: normalize: [] short: IP address of the client. type: ip - mac: + client.mac: dashed_name: client-mac description: MAC address of the client. flat_name: client.mac @@ -426,7 +434,7 @@ client: normalize: [] short: MAC address of the client. type: keyword - nat.ip: + client.nat.ip: dashed_name: client-nat-ip description: 'Translated IP of source based NAT sessions (e.g. internal client to internet). @@ -438,7 +446,7 @@ client: normalize: [] short: Client NAT ip address type: ip - nat.port: + client.nat.port: dashed_name: client-nat-port description: 'Translated port of source based NAT sessions (e.g. internal client to internet). @@ -451,7 +459,7 @@ client: normalize: [] short: Client NAT port type: long - packets: + client.packets: dashed_name: client-packets description: Packets sent from the client to the server. example: 12 @@ -461,7 +469,7 @@ client: normalize: [] short: Packets sent from the client to the server. type: long - port: + client.port: dashed_name: client-port description: Port of the client. flat_name: client.port @@ -471,7 +479,7 @@ client: normalize: [] short: Port of the client. type: long - registered_domain: + client.registered_domain: dashed_name: client-registered-domain description: 'The highest registered client domain, stripped of the subdomain. @@ -488,7 +496,7 @@ client: normalize: [] short: The highest registered client domain, stripped of the subdomain. type: keyword - top_level_domain: + client.top_level_domain: dashed_name: client-top-level-domain description: 'The effective top level domain (eTLD), also known as the domain suffix, is the last part of the domain name. For example, the top level domain @@ -505,7 +513,7 @@ client: normalize: [] short: The effective top level domain (com, org, net, co.uk). type: keyword - user.domain: + client.user.domain: dashed_name: client-user-domain description: 'Name of the directory the user is a member of. @@ -518,7 +526,7 @@ client: original_fieldset: user short: Name of the directory the user is a member of. type: keyword - user.email: + client.user.email: dashed_name: client-user-email description: User email address. flat_name: client.user.email @@ -529,7 +537,7 @@ client: original_fieldset: user short: User email address. type: keyword - user.full_name: + client.user.full_name: dashed_name: client-user-full-name description: User's full name, if available. example: Albert Einstein @@ -546,7 +554,7 @@ client: original_fieldset: user short: User's full name, if available. type: keyword - user.group.domain: + client.user.group.domain: dashed_name: client-user-group-domain description: 'Name of the directory the group is a member of. @@ -559,7 +567,7 @@ client: original_fieldset: group short: Name of the directory the group is a member of. type: keyword - user.group.id: + client.user.group.id: dashed_name: client-user-group-id description: Unique identifier for the group on the system/platform. flat_name: client.user.group.id @@ -570,7 +578,7 @@ client: original_fieldset: group short: Unique identifier for the group on the system/platform. type: keyword - user.group.name: + client.user.group.name: dashed_name: client-user-group-name description: Name of the group. flat_name: client.user.group.name @@ -581,7 +589,7 @@ client: original_fieldset: group short: Name of the group. type: keyword - user.hash: + client.user.hash: dashed_name: client-user-hash description: 'Unique user hash to correlate information for a user in anonymized form. @@ -596,7 +604,7 @@ client: original_fieldset: user short: Unique user hash to correlate information for a user in anonymized form. type: keyword - user.id: + client.user.id: dashed_name: client-user-id description: Unique identifier of the user. flat_name: client.user.id @@ -607,7 +615,7 @@ client: original_fieldset: user short: Unique identifier of the user. type: keyword - user.name: + client.user.name: dashed_name: client-user-name description: Short name or login of the user. example: albert @@ -630,8 +638,17 @@ client: - client.as - client.geo - client.user - - client.user.group prefix: client. + reused_here: + - full: client.as + schema_name: as + short: Fields describing an Autonomous System (Internet routing prefix). + - full: client.geo + schema_name: geo + short: Fields describing a location. + - full: client.user + schema_name: user + short: Fields to describe the user relevant to the event. short: Fields about the client side of a network connection, used with server. title: Client type: group @@ -639,7 +656,7 @@ cloud: description: Fields related to the cloud or infrastructure the events are coming from. fields: - account.id: + cloud.account.id: dashed_name: cloud-account-id description: 'The cloud account or organization id used to identify different entities in a multi-tenant environment. @@ -653,7 +670,7 @@ cloud: normalize: [] short: The cloud account or organization id. type: keyword - account.name: + cloud.account.name: dashed_name: cloud-account-name description: 'The cloud account name or alias used to identify different entities in a multi-tenant environment. @@ -667,7 +684,7 @@ cloud: normalize: [] short: The cloud account name. type: keyword - availability_zone: + cloud.availability_zone: dashed_name: cloud-availability-zone description: Availability zone in which this host is running. example: us-east-1c @@ -678,7 +695,7 @@ cloud: normalize: [] short: Availability zone in which this host is running. type: keyword - instance.id: + cloud.instance.id: dashed_name: cloud-instance-id description: Instance ID of the host machine. example: i-1234567890abcdef0 @@ -689,7 +706,7 @@ cloud: normalize: [] short: Instance ID of the host machine. type: keyword - instance.name: + cloud.instance.name: dashed_name: cloud-instance-name description: Instance name of the host machine. flat_name: cloud.instance.name @@ -699,7 +716,7 @@ cloud: normalize: [] short: Instance name of the host machine. type: keyword - machine.type: + cloud.machine.type: dashed_name: cloud-machine-type description: Machine type of the host machine. example: t2.medium @@ -710,7 +727,7 @@ cloud: normalize: [] short: Machine type of the host machine. type: keyword - project.id: + cloud.project.id: dashed_name: cloud-project-id description: 'The cloud project identifier. @@ -723,7 +740,7 @@ cloud: normalize: [] short: The cloud project id. type: keyword - project.name: + cloud.project.name: dashed_name: cloud-project-name description: 'The cloud project name. @@ -736,7 +753,7 @@ cloud: normalize: [] short: The cloud project name. type: keyword - provider: + cloud.provider: dashed_name: cloud-provider description: Name of the cloud provider. Example values are aws, azure, gcp, or digitalocean. @@ -748,7 +765,7 @@ cloud: normalize: [] short: Name of the cloud provider. type: keyword - region: + cloud.region: dashed_name: cloud-region description: Region in which this host is running. example: us-east-1 @@ -773,7 +790,7 @@ cloud: code_signature: description: These fields contain information about binary code signatures. fields: - exists: + code_signature.exists: dashed_name: code-signature-exists description: Boolean to capture if a signature is present. example: 'true' @@ -783,7 +800,7 @@ code_signature: normalize: [] short: Boolean to capture if a signature is present. type: boolean - status: + code_signature.status: dashed_name: code-signature-status description: 'Additional information about the certificate status. @@ -798,7 +815,7 @@ code_signature: normalize: [] short: Additional information about the certificate status. type: keyword - subject_name: + code_signature.subject_name: dashed_name: code-signature-subject-name description: Subject name of the code signer example: Microsoft Corporation @@ -809,7 +826,7 @@ code_signature: normalize: [] short: Subject name of the code signer type: keyword - trusted: + code_signature.trusted: dashed_name: code-signature-trusted description: 'Stores the trust status of the certificate chain. @@ -822,7 +839,7 @@ code_signature: normalize: [] short: Stores the trust status of the certificate chain. type: boolean - valid: + code_signature.valid: dashed_name: code-signature-valid description: 'Boolean to capture if the digital signature is verified against the binary content. @@ -841,10 +858,18 @@ code_signature: prefix: code_signature. reusable: expected: - - file - - process - - process.parent - - dll + - as: code_signature + at: file + full: file.code_signature + - as: code_signature + at: process + full: process.code_signature + - as: code_signature + at: process.parent + full: process.parent.code_signature + - as: code_signature + at: dll + full: dll.code_signature top_level: false short: These fields contain information about binary code signatures. title: Code Signature @@ -855,7 +880,7 @@ container: These fields help correlate data based containers from any runtime.' fields: - id: + container.id: dashed_name: container-id description: Unique container id. flat_name: container.id @@ -865,7 +890,7 @@ container: normalize: [] short: Unique container id. type: keyword - image.name: + container.image.name: dashed_name: container-image-name description: Name of the image the container was built on. flat_name: container.image.name @@ -875,7 +900,7 @@ container: normalize: [] short: Name of the image the container was built on. type: keyword - image.tag: + container.image.tag: dashed_name: container-image-tag description: Container image tags. flat_name: container.image.tag @@ -886,7 +911,7 @@ container: - array short: Container image tags. type: keyword - labels: + container.labels: dashed_name: container-labels description: Image labels. flat_name: container.labels @@ -896,7 +921,7 @@ container: object_type: keyword short: Image labels. type: object - name: + container.name: dashed_name: container-name description: Container name. flat_name: container.name @@ -906,7 +931,7 @@ container: normalize: [] short: Container name. type: keyword - runtime: + container.runtime: dashed_name: container-runtime description: Runtime managing this container. example: docker @@ -928,7 +953,7 @@ destination: Destination fields are usually populated in conjunction with source fields.' fields: - address: + destination.address: dashed_name: destination-address description: 'Some event destination addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always @@ -943,7 +968,7 @@ destination: normalize: [] short: Destination network address. type: keyword - as.number: + destination.as.number: dashed_name: destination-as-number description: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. @@ -956,7 +981,7 @@ destination: short: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. type: long - as.organization.name: + destination.as.organization.name: dashed_name: destination-as-organization-name description: Organization name. example: Google LLC @@ -973,7 +998,7 @@ destination: original_fieldset: as short: Organization name. type: keyword - bytes: + destination.bytes: dashed_name: destination-bytes description: Bytes sent from the destination to the source. example: 184 @@ -984,7 +1009,7 @@ destination: normalize: [] short: Bytes sent from the destination to the source. type: long - domain: + destination.domain: dashed_name: destination-domain description: Destination domain. flat_name: destination.domain @@ -994,7 +1019,7 @@ destination: normalize: [] short: Destination domain. type: keyword - geo.city_name: + destination.geo.city_name: dashed_name: destination-geo-city-name description: City name. example: Montreal @@ -1006,7 +1031,7 @@ destination: original_fieldset: geo short: City name. type: keyword - geo.continent_name: + destination.geo.continent_name: dashed_name: destination-geo-continent-name description: Name of the continent. example: North America @@ -1018,7 +1043,7 @@ destination: original_fieldset: geo short: Name of the continent. type: keyword - geo.country_iso_code: + destination.geo.country_iso_code: dashed_name: destination-geo-country-iso-code description: Country ISO code. example: CA @@ -1030,7 +1055,7 @@ destination: original_fieldset: geo short: Country ISO code. type: keyword - geo.country_name: + destination.geo.country_name: dashed_name: destination-geo-country-name description: Country name. example: Canada @@ -1042,7 +1067,7 @@ destination: original_fieldset: geo short: Country name. type: keyword - geo.location: + destination.geo.location: dashed_name: destination-geo-location description: Longitude and latitude. example: '{ "lon": -73.614830, "lat": 45.505918 }' @@ -1053,7 +1078,7 @@ destination: original_fieldset: geo short: Longitude and latitude. type: geo_point - geo.name: + destination.geo.name: dashed_name: destination-geo-name description: 'User-defined description of a location, at the level of granularity they care about. @@ -1071,7 +1096,7 @@ destination: original_fieldset: geo short: User-defined description of a location. type: keyword - geo.region_iso_code: + destination.geo.region_iso_code: dashed_name: destination-geo-region-iso-code description: Region ISO code. example: CA-QC @@ -1083,7 +1108,7 @@ destination: original_fieldset: geo short: Region ISO code. type: keyword - geo.region_name: + destination.geo.region_name: dashed_name: destination-geo-region-name description: Region name. example: Quebec @@ -1095,7 +1120,7 @@ destination: original_fieldset: geo short: Region name. type: keyword - ip: + destination.ip: dashed_name: destination-ip description: IP address of the destination (IPv4 or IPv6). flat_name: destination.ip @@ -1104,7 +1129,7 @@ destination: normalize: [] short: IP address of the destination. type: ip - mac: + destination.mac: dashed_name: destination-mac description: MAC address of the destination. flat_name: destination.mac @@ -1114,7 +1139,7 @@ destination: normalize: [] short: MAC address of the destination. type: keyword - nat.ip: + destination.nat.ip: dashed_name: destination-nat-ip description: 'Translated ip of destination based NAT sessions (e.g. internet to private DMZ) @@ -1126,7 +1151,7 @@ destination: normalize: [] short: Destination NAT ip type: ip - nat.port: + destination.nat.port: dashed_name: destination-nat-port description: 'Port the source session is translated to by NAT Device. @@ -1138,7 +1163,7 @@ destination: normalize: [] short: Destination NAT Port type: long - packets: + destination.packets: dashed_name: destination-packets description: Packets sent from the destination to the source. example: 12 @@ -1148,7 +1173,7 @@ destination: normalize: [] short: Packets sent from the destination to the source. type: long - port: + destination.port: dashed_name: destination-port description: Port of the destination. flat_name: destination.port @@ -1158,7 +1183,7 @@ destination: normalize: [] short: Port of the destination. type: long - registered_domain: + destination.registered_domain: dashed_name: destination-registered-domain description: 'The highest registered destination domain, stripped of the subdomain. @@ -1175,7 +1200,7 @@ destination: normalize: [] short: The highest registered destination domain, stripped of the subdomain. type: keyword - top_level_domain: + destination.top_level_domain: dashed_name: destination-top-level-domain description: 'The effective top level domain (eTLD), also known as the domain suffix, is the last part of the domain name. For example, the top level domain @@ -1192,7 +1217,7 @@ destination: normalize: [] short: The effective top level domain (com, org, net, co.uk). type: keyword - user.domain: + destination.user.domain: dashed_name: destination-user-domain description: 'Name of the directory the user is a member of. @@ -1205,7 +1230,7 @@ destination: original_fieldset: user short: Name of the directory the user is a member of. type: keyword - user.email: + destination.user.email: dashed_name: destination-user-email description: User email address. flat_name: destination.user.email @@ -1216,7 +1241,7 @@ destination: original_fieldset: user short: User email address. type: keyword - user.full_name: + destination.user.full_name: dashed_name: destination-user-full-name description: User's full name, if available. example: Albert Einstein @@ -1233,7 +1258,7 @@ destination: original_fieldset: user short: User's full name, if available. type: keyword - user.group.domain: + destination.user.group.domain: dashed_name: destination-user-group-domain description: 'Name of the directory the group is a member of. @@ -1246,7 +1271,7 @@ destination: original_fieldset: group short: Name of the directory the group is a member of. type: keyword - user.group.id: + destination.user.group.id: dashed_name: destination-user-group-id description: Unique identifier for the group on the system/platform. flat_name: destination.user.group.id @@ -1257,7 +1282,7 @@ destination: original_fieldset: group short: Unique identifier for the group on the system/platform. type: keyword - user.group.name: + destination.user.group.name: dashed_name: destination-user-group-name description: Name of the group. flat_name: destination.user.group.name @@ -1268,7 +1293,7 @@ destination: original_fieldset: group short: Name of the group. type: keyword - user.hash: + destination.user.hash: dashed_name: destination-user-hash description: 'Unique user hash to correlate information for a user in anonymized form. @@ -1283,7 +1308,7 @@ destination: original_fieldset: user short: Unique user hash to correlate information for a user in anonymized form. type: keyword - user.id: + destination.user.id: dashed_name: destination-user-id description: Unique identifier of the user. flat_name: destination.user.id @@ -1294,7 +1319,7 @@ destination: original_fieldset: user short: Unique identifier of the user. type: keyword - user.name: + destination.user.name: dashed_name: destination-user-name description: Short name or login of the user. example: albert @@ -1317,8 +1342,17 @@ destination: - destination.as - destination.geo - destination.user - - destination.user.group prefix: destination. + reused_here: + - full: destination.as + schema_name: as + short: Fields describing an Autonomous System (Internet routing prefix). + - full: destination.geo + schema_name: geo + short: Fields describing a location. + - full: destination.user + schema_name: user + short: Fields to describe the user relevant to the event. short: Fields about the destination side of a network connection, used with source. title: Destination type: group @@ -1336,7 +1370,7 @@ dll: * Dynamic library (`.dylib`) commonly used on macOS' fields: - code_signature.exists: + dll.code_signature.exists: dashed_name: dll-code-signature-exists description: Boolean to capture if a signature is present. example: 'true' @@ -1347,7 +1381,7 @@ dll: original_fieldset: code_signature short: Boolean to capture if a signature is present. type: boolean - code_signature.status: + dll.code_signature.status: dashed_name: dll-code-signature-status description: 'Additional information about the certificate status. @@ -1363,7 +1397,7 @@ dll: original_fieldset: code_signature short: Additional information about the certificate status. type: keyword - code_signature.subject_name: + dll.code_signature.subject_name: dashed_name: dll-code-signature-subject-name description: Subject name of the code signer example: Microsoft Corporation @@ -1375,7 +1409,7 @@ dll: original_fieldset: code_signature short: Subject name of the code signer type: keyword - code_signature.trusted: + dll.code_signature.trusted: dashed_name: dll-code-signature-trusted description: 'Stores the trust status of the certificate chain. @@ -1389,7 +1423,7 @@ dll: original_fieldset: code_signature short: Stores the trust status of the certificate chain. type: boolean - code_signature.valid: + dll.code_signature.valid: dashed_name: dll-code-signature-valid description: 'Boolean to capture if the digital signature is verified against the binary content. @@ -1404,7 +1438,7 @@ dll: short: Boolean to capture if the digital signature is verified against the binary content. type: boolean - hash.md5: + dll.hash.md5: dashed_name: dll-hash-md5 description: MD5 hash. flat_name: dll.hash.md5 @@ -1415,7 +1449,7 @@ dll: original_fieldset: hash short: MD5 hash. type: keyword - hash.sha1: + dll.hash.sha1: dashed_name: dll-hash-sha1 description: SHA1 hash. flat_name: dll.hash.sha1 @@ -1426,7 +1460,7 @@ dll: original_fieldset: hash short: SHA1 hash. type: keyword - hash.sha256: + dll.hash.sha256: dashed_name: dll-hash-sha256 description: SHA256 hash. flat_name: dll.hash.sha256 @@ -1437,7 +1471,7 @@ dll: original_fieldset: hash short: SHA256 hash. type: keyword - hash.sha512: + dll.hash.sha512: dashed_name: dll-hash-sha512 description: SHA512 hash. flat_name: dll.hash.sha512 @@ -1448,7 +1482,7 @@ dll: original_fieldset: hash short: SHA512 hash. type: keyword - name: + dll.name: dashed_name: dll-name description: 'Name of the library. @@ -1461,7 +1495,7 @@ dll: normalize: [] short: Name of the library. type: keyword - path: + dll.path: dashed_name: dll-path description: Full file path of the library. example: C:\Windows\System32\kernel32.dll @@ -1472,7 +1506,7 @@ dll: normalize: [] short: Full file path of the library. type: keyword - pe.architecture: + dll.pe.architecture: dashed_name: dll-pe-architecture description: CPU architecture target for the file. example: x64 @@ -1484,7 +1518,7 @@ dll: original_fieldset: pe short: CPU architecture target for the file. type: keyword - pe.company: + dll.pe.company: dashed_name: dll-pe-company description: Internal company name of the file, provided at compile-time. example: Microsoft Corporation @@ -1496,7 +1530,7 @@ dll: original_fieldset: pe short: Internal company name of the file, provided at compile-time. type: keyword - pe.description: + dll.pe.description: dashed_name: dll-pe-description description: Internal description of the file, provided at compile-time. example: Paint @@ -1508,7 +1542,7 @@ dll: original_fieldset: pe short: Internal description of the file, provided at compile-time. type: keyword - pe.file_version: + dll.pe.file_version: dashed_name: dll-pe-file-version description: Internal version of the file, provided at compile-time. example: 6.3.9600.17415 @@ -1520,7 +1554,7 @@ dll: original_fieldset: pe short: Process name. type: keyword - pe.imphash: + dll.pe.imphash: dashed_name: dll-pe-imphash description: 'A hash of the imports in a PE file. An imphash -- or import hash -- can be used to fingerprint binaries even after recompilation or other code-level @@ -1536,7 +1570,7 @@ dll: original_fieldset: pe short: A hash of the imports in a PE file. type: keyword - pe.original_file_name: + dll.pe.original_file_name: dashed_name: dll-pe-original-file-name description: Internal name of the file, provided at compile-time. example: MSPAINT.EXE @@ -1548,7 +1582,7 @@ dll: original_fieldset: pe short: Internal name of the file, provided at compile-time. type: keyword - pe.product: + dll.pe.product: dashed_name: dll-pe-product description: Internal product name of the file, provided at compile-time. example: "Microsoft\xAE Windows\xAE Operating System" @@ -1567,6 +1601,16 @@ dll: - dll.hash - dll.pe prefix: dll. + reused_here: + - full: dll.code_signature + schema_name: code_signature + short: These fields contain information about binary code signatures. + - full: dll.hash + schema_name: hash + short: Hashes, usually file hashes. + - full: dll.pe + schema_name: pe + short: These fields contain Windows Portable Executable (PE) metadata. short: These fields contain information about code libraries dynamically loaded into processes. title: DLL @@ -1578,7 +1622,7 @@ dns: (`dns.type:query`) or they should represent a full exchange and contain the query details as well as all of the answers that were provided for this query (`dns.type:answer`).' fields: - answers: + dns.answers: dashed_name: dns-answers description: 'An array containing an object for each answer section returned by the server. @@ -1598,7 +1642,7 @@ dns: object_type: keyword short: Array of DNS answers. type: object - answers.class: + dns.answers.class: dashed_name: dns-answers-class description: The class of DNS data contained in this resource record. example: IN @@ -1609,7 +1653,7 @@ dns: normalize: [] short: The class of DNS data contained in this resource record. type: keyword - answers.data: + dns.answers.data: dashed_name: dns-answers-data description: 'The data describing the resource. @@ -1622,7 +1666,7 @@ dns: normalize: [] short: The data describing the resource. type: keyword - answers.name: + dns.answers.name: dashed_name: dns-answers-name description: 'The domain name to which this resource record pertains. @@ -1637,7 +1681,7 @@ dns: normalize: [] short: The domain name to which this resource record pertains. type: keyword - answers.ttl: + dns.answers.ttl: dashed_name: dns-answers-ttl description: The time interval in seconds that this resource record may be cached before it should be discarded. Zero values mean that the data should not be @@ -1651,7 +1695,7 @@ dns: before it should be discarded. Zero values mean that the data should not be cached. type: long - answers.type: + dns.answers.type: dashed_name: dns-answers-type description: The type of data contained in this resource record. example: CNAME @@ -1662,7 +1706,7 @@ dns: normalize: [] short: The type of data contained in this resource record. type: keyword - header_flags: + dns.header_flags: dashed_name: dns-header-flags description: 'Array of 2 letter DNS header flags. @@ -1678,7 +1722,7 @@ dns: - array short: Array of DNS header flags. type: keyword - id: + dns.id: dashed_name: dns-id description: The DNS packet identifier assigned by the program that generated the query. The identifier is copied to the response. @@ -1691,7 +1735,7 @@ dns: short: The DNS packet identifier assigned by the program that generated the query. The identifier is copied to the response. type: keyword - op_code: + dns.op_code: dashed_name: dns-op-code description: The DNS operation code that specifies the kind of query in the message. This value is set by the originator of a query and copied into the @@ -1705,7 +1749,7 @@ dns: short: The DNS operation code that specifies the kind of query in the message. This value is set by the originator of a query and copied into the response. type: keyword - question.class: + dns.question.class: dashed_name: dns-question-class description: The class of records being queried. example: IN @@ -1716,7 +1760,7 @@ dns: normalize: [] short: The class of records being queried. type: keyword - question.name: + dns.question.name: dashed_name: dns-question-name description: 'The name being queried. @@ -1732,7 +1776,7 @@ dns: normalize: [] short: The name being queried. type: keyword - question.registered_domain: + dns.question.registered_domain: dashed_name: dns-question-registered-domain description: 'The highest registered domain, stripped of the subdomain. @@ -1749,7 +1793,7 @@ dns: normalize: [] short: The highest registered domain, stripped of the subdomain. type: keyword - question.subdomain: + dns.question.subdomain: dashed_name: dns-question-subdomain description: 'The subdomain is all of the labels under the registered_domain. @@ -1763,7 +1807,7 @@ dns: normalize: [] short: The subdomain of the domain. type: keyword - question.top_level_domain: + dns.question.top_level_domain: dashed_name: dns-question-top-level-domain description: 'The effective top level domain (eTLD), also known as the domain suffix, is the last part of the domain name. For example, the top level domain @@ -1780,7 +1824,7 @@ dns: normalize: [] short: The effective top level domain (com, org, net, co.uk). type: keyword - question.type: + dns.question.type: dashed_name: dns-question-type description: The type of record being queried. example: AAAA @@ -1791,7 +1835,7 @@ dns: normalize: [] short: The type of record being queried. type: keyword - resolved_ip: + dns.resolved_ip: dashed_name: dns-resolved-ip description: 'Array containing all IPs seen in `answers.data`. @@ -1809,7 +1853,7 @@ dns: - array short: Array containing all IPs seen in answers.data type: ip - response_code: + dns.response_code: dashed_name: dns-response-code description: The DNS response code. example: NOERROR @@ -1820,7 +1864,7 @@ dns: normalize: [] short: The DNS response code. type: keyword - type: + dns.type: dashed_name: dns-type description: 'The type of DNS event captured, query or answer. @@ -1847,7 +1891,7 @@ dns: ecs: description: Meta-information specific to ECS. fields: - version: + ecs.version: dashed_name: ecs-version description: 'ECS version this event conforms to. `ecs.version` is a required field and must exist in all events. @@ -1876,7 +1920,7 @@ error: Use them for errors that happen while fetching events or in cases where the event itself contains an error.' fields: - code: + error.code: dashed_name: error-code description: Error code describing the error. flat_name: error.code @@ -1886,7 +1930,7 @@ error: normalize: [] short: Error code describing the error. type: keyword - id: + error.id: dashed_name: error-id description: Unique identifier for the error. flat_name: error.id @@ -1896,7 +1940,7 @@ error: normalize: [] short: Unique identifier for the error. type: keyword - message: + error.message: dashed_name: error-message description: Error message. flat_name: error.message @@ -1906,7 +1950,7 @@ error: norms: false short: Error message. type: text - stack_trace: + error.stack_trace: dashed_name: error-stack-trace description: The stack trace of this error in plain text. doc_values: false @@ -1923,7 +1967,7 @@ error: normalize: [] short: The stack trace of this error in plain text. type: keyword - type: + error.type: dashed_name: error-type description: The type of the error, for example the class name of the exception. example: java.lang.NullPointerException @@ -1954,7 +1998,7 @@ event: the `event.kind` definition in this section for additional details about metric and state events.' fields: - action: + event.action: dashed_name: event-action description: 'The action captured by the event. @@ -1969,7 +2013,7 @@ event: normalize: [] short: The action captured by the event. type: keyword - category: + event.category: allowed_values: - description: 'Events in this category are related to the challenge and response process in which credentials are supplied and verified to allow the creation @@ -2150,7 +2194,7 @@ event: - array short: Event category. The second categorization field in the hierarchy. type: keyword - code: + event.code: dashed_name: event-code description: 'Identification code for this event, if one exists. @@ -2165,7 +2209,7 @@ event: normalize: [] short: Identification code for this event. type: keyword - created: + event.created: dashed_name: event-created description: 'event.created contains the date/time when the event was first read by an agent, or by your pipeline. @@ -2186,7 +2230,7 @@ event: normalize: [] short: Time when the event was first read by an agent or by your pipeline. type: date - dataset: + event.dataset: dashed_name: event-dataset description: 'Name of the dataset. @@ -2204,7 +2248,7 @@ event: normalize: [] short: Name of the dataset. type: keyword - duration: + event.duration: dashed_name: event-duration description: 'Duration of the event in nanoseconds. @@ -2220,7 +2264,7 @@ event: output_precision: 1 short: Duration of the event in nanoseconds. type: long - end: + event.end: dashed_name: event-end description: event.end contains the date when the event ended or when the activity was last observed. @@ -2231,7 +2275,7 @@ event: short: event.end contains the date when the event ended or when the activity was last observed. type: date - hash: + event.hash: dashed_name: event-hash description: Hash (perhaps logstash fingerprint) of raw field to be able to demonstrate log integrity. @@ -2244,7 +2288,7 @@ event: short: Hash (perhaps logstash fingerprint) of raw field to be able to demonstrate log integrity. type: keyword - id: + event.id: dashed_name: event-id description: Unique ID to describe the event. example: 8a4f500d @@ -2255,7 +2299,7 @@ event: normalize: [] short: Unique ID to describe the event. type: keyword - ingested: + event.ingested: dashed_name: event-ingested description: 'Timestamp when an event arrived in the central data store. @@ -2272,7 +2316,7 @@ event: normalize: [] short: Timestamp when an event arrived in the central data store. type: date - kind: + event.kind: allowed_values: - description: 'This value indicates an event that describes an alert or notable event, triggered by a detection rule. @@ -2357,7 +2401,7 @@ event: normalize: [] short: The kind of the event. The highest categorization field in the hierarchy. type: keyword - module: + event.module: dashed_name: event-module description: 'Name of the module this data is coming from. @@ -2372,7 +2416,7 @@ event: normalize: [] short: Name of the module this data is coming from. type: keyword - original: + event.original: dashed_name: event-original description: 'Raw text message of entire event. Used to demonstrate log integrity. @@ -2389,7 +2433,7 @@ event: normalize: [] short: Raw text message of entire event. type: keyword - outcome: + event.outcome: allowed_values: - description: 'Indicates that this event describes a failed result. A common example is `event.category:file AND event.type:access AND event.outcome:failure` @@ -2441,7 +2485,7 @@ event: short: The outcome of the event. The lowest level categorization field in the hierarchy. type: keyword - provider: + event.provider: dashed_name: event-provider description: 'Source of the event. @@ -2457,7 +2501,7 @@ event: normalize: [] short: Source of the event. type: keyword - reference: + event.reference: dashed_name: event-reference description: 'Reference URL linking to additional information about this event. @@ -2471,7 +2515,7 @@ event: normalize: [] short: Event reference URL type: keyword - risk_score: + event.risk_score: dashed_name: event-risk-score description: Risk score or priority of the event (e.g. security solutions). Use your system's original value here. @@ -2482,7 +2526,7 @@ event: short: Risk score or priority of the event (e.g. security solutions). Use your system's original value here. type: float - risk_score_norm: + event.risk_score_norm: dashed_name: event-risk-score-norm description: 'Normalized risk score or priority of the event, on a scale of 0 to 100. @@ -2495,7 +2539,7 @@ event: normalize: [] short: Normalized risk score or priority of the event (0-100). type: float - sequence: + event.sequence: dashed_name: event-sequence description: 'Sequence number of the event. @@ -2508,7 +2552,7 @@ event: normalize: [] short: Sequence number of the event. type: long - severity: + event.severity: dashed_name: event-severity description: 'The numeric severity of the event according to your event source. @@ -2528,7 +2572,7 @@ event: normalize: [] short: Numeric severity of the event. type: long - start: + event.start: dashed_name: event-start description: event.start contains the date when the event started or when the activity was first observed. @@ -2539,7 +2583,7 @@ event: short: event.start contains the date when the event started or when the activity was first observed. type: date - timezone: + event.timezone: dashed_name: event-timezone description: 'This field should be populated when the event''s timestamp does not include timezone information already (e.g. default Syslog timestamps). @@ -2554,7 +2598,7 @@ event: normalize: [] short: Event time zone. type: keyword - type: + event.type: allowed_values: - description: 'The access event type is used for the subset of events within a category that indicate that something was accessed. Common examples include @@ -2731,7 +2775,7 @@ event: - array short: Event type. The third categorization field in the hierarchy. type: keyword - url: + event.url: dashed_name: event-url description: 'URL linking to an external system to continue investigation of this event. @@ -2762,7 +2806,7 @@ file: File fields provide details about the affected file associated with the event or metric.' fields: - accessed: + file.accessed: dashed_name: file-accessed description: 'Last time the file was accessed. @@ -2773,7 +2817,7 @@ file: normalize: [] short: Last time the file was accessed. type: date - attributes: + file.attributes: dashed_name: file-attributes description: 'Array of file attributes. @@ -2789,7 +2833,7 @@ file: - array short: Array of file attributes. type: keyword - code_signature.exists: + file.code_signature.exists: dashed_name: file-code-signature-exists description: Boolean to capture if a signature is present. example: 'true' @@ -2800,7 +2844,7 @@ file: original_fieldset: code_signature short: Boolean to capture if a signature is present. type: boolean - code_signature.status: + file.code_signature.status: dashed_name: file-code-signature-status description: 'Additional information about the certificate status. @@ -2816,7 +2860,7 @@ file: original_fieldset: code_signature short: Additional information about the certificate status. type: keyword - code_signature.subject_name: + file.code_signature.subject_name: dashed_name: file-code-signature-subject-name description: Subject name of the code signer example: Microsoft Corporation @@ -2828,7 +2872,7 @@ file: original_fieldset: code_signature short: Subject name of the code signer type: keyword - code_signature.trusted: + file.code_signature.trusted: dashed_name: file-code-signature-trusted description: 'Stores the trust status of the certificate chain. @@ -2842,7 +2886,7 @@ file: original_fieldset: code_signature short: Stores the trust status of the certificate chain. type: boolean - code_signature.valid: + file.code_signature.valid: dashed_name: file-code-signature-valid description: 'Boolean to capture if the digital signature is verified against the binary content. @@ -2857,7 +2901,7 @@ file: short: Boolean to capture if the digital signature is verified against the binary content. type: boolean - created: + file.created: dashed_name: file-created description: 'File creation time. @@ -2868,7 +2912,7 @@ file: normalize: [] short: File creation time. type: date - ctime: + file.ctime: dashed_name: file-ctime description: 'Last time the file attributes or metadata changed. @@ -2880,7 +2924,7 @@ file: normalize: [] short: Last time the file attributes or metadata changed. type: date - device: + file.device: dashed_name: file-device description: Device that is the source of the file. example: sda @@ -2891,7 +2935,7 @@ file: normalize: [] short: Device that is the source of the file. type: keyword - directory: + file.directory: dashed_name: file-directory description: Directory where the file is located. It should include the drive letter, when appropriate. @@ -2903,7 +2947,7 @@ file: normalize: [] short: Directory where the file is located. type: keyword - drive_letter: + file.drive_letter: dashed_name: file-drive-letter description: 'Drive letter where the file is located. This field is only relevant on Windows. @@ -2917,7 +2961,7 @@ file: normalize: [] short: Drive letter where the file is located. type: keyword - extension: + file.extension: dashed_name: file-extension description: File extension. example: png @@ -2928,7 +2972,7 @@ file: normalize: [] short: File extension. type: keyword - gid: + file.gid: dashed_name: file-gid description: Primary group ID (GID) of the file. example: '1001' @@ -2939,7 +2983,7 @@ file: normalize: [] short: Primary group ID (GID) of the file. type: keyword - group: + file.group: dashed_name: file-group description: Primary group name of the file. example: alice @@ -2950,7 +2994,7 @@ file: normalize: [] short: Primary group name of the file. type: keyword - hash.md5: + file.hash.md5: dashed_name: file-hash-md5 description: MD5 hash. flat_name: file.hash.md5 @@ -2961,7 +3005,7 @@ file: original_fieldset: hash short: MD5 hash. type: keyword - hash.sha1: + file.hash.sha1: dashed_name: file-hash-sha1 description: SHA1 hash. flat_name: file.hash.sha1 @@ -2972,7 +3016,7 @@ file: original_fieldset: hash short: SHA1 hash. type: keyword - hash.sha256: + file.hash.sha256: dashed_name: file-hash-sha256 description: SHA256 hash. flat_name: file.hash.sha256 @@ -2983,7 +3027,7 @@ file: original_fieldset: hash short: SHA256 hash. type: keyword - hash.sha512: + file.hash.sha512: dashed_name: file-hash-sha512 description: SHA512 hash. flat_name: file.hash.sha512 @@ -2994,7 +3038,7 @@ file: original_fieldset: hash short: SHA512 hash. type: keyword - inode: + file.inode: dashed_name: file-inode description: Inode representing the file in the filesystem. example: '256383' @@ -3005,7 +3049,7 @@ file: normalize: [] short: Inode representing the file in the filesystem. type: keyword - mime_type: + file.mime_type: dashed_name: file-mime-type description: MIME type should identify the format of the file or stream of bytes using https://www.iana.org/assignments/media-types/media-types.xhtml[IANA @@ -3018,7 +3062,7 @@ file: normalize: [] short: Media type of file, document, or arrangement of bytes. type: keyword - mode: + file.mode: dashed_name: file-mode description: Mode of the file in octal representation. example: '0640' @@ -3029,7 +3073,7 @@ file: normalize: [] short: Mode of the file in octal representation. type: keyword - mtime: + file.mtime: dashed_name: file-mtime description: Last time the file content was modified. flat_name: file.mtime @@ -3038,7 +3082,7 @@ file: normalize: [] short: Last time the file content was modified. type: date - name: + file.name: dashed_name: file-name description: Name of the file including the extension, without the directory. example: example.png @@ -3049,7 +3093,7 @@ file: normalize: [] short: Name of the file including the extension, without the directory. type: keyword - owner: + file.owner: dashed_name: file-owner description: File owner's username. example: alice @@ -3060,7 +3104,7 @@ file: normalize: [] short: File owner's username. type: keyword - path: + file.path: dashed_name: file-path description: Full path to the file, including the file name. It should include the drive letter, when appropriate. @@ -3077,7 +3121,7 @@ file: normalize: [] short: Full path to the file, including the file name. type: keyword - pe.architecture: + file.pe.architecture: dashed_name: file-pe-architecture description: CPU architecture target for the file. example: x64 @@ -3089,7 +3133,7 @@ file: original_fieldset: pe short: CPU architecture target for the file. type: keyword - pe.company: + file.pe.company: dashed_name: file-pe-company description: Internal company name of the file, provided at compile-time. example: Microsoft Corporation @@ -3101,7 +3145,7 @@ file: original_fieldset: pe short: Internal company name of the file, provided at compile-time. type: keyword - pe.description: + file.pe.description: dashed_name: file-pe-description description: Internal description of the file, provided at compile-time. example: Paint @@ -3113,7 +3157,7 @@ file: original_fieldset: pe short: Internal description of the file, provided at compile-time. type: keyword - pe.file_version: + file.pe.file_version: dashed_name: file-pe-file-version description: Internal version of the file, provided at compile-time. example: 6.3.9600.17415 @@ -3125,7 +3169,7 @@ file: original_fieldset: pe short: Process name. type: keyword - pe.imphash: + file.pe.imphash: dashed_name: file-pe-imphash description: 'A hash of the imports in a PE file. An imphash -- or import hash -- can be used to fingerprint binaries even after recompilation or other code-level @@ -3141,7 +3185,7 @@ file: original_fieldset: pe short: A hash of the imports in a PE file. type: keyword - pe.original_file_name: + file.pe.original_file_name: dashed_name: file-pe-original-file-name description: Internal name of the file, provided at compile-time. example: MSPAINT.EXE @@ -3153,7 +3197,7 @@ file: original_fieldset: pe short: Internal name of the file, provided at compile-time. type: keyword - pe.product: + file.pe.product: dashed_name: file-pe-product description: Internal product name of the file, provided at compile-time. example: "Microsoft\xAE Windows\xAE Operating System" @@ -3165,7 +3209,7 @@ file: original_fieldset: pe short: Internal product name of the file, provided at compile-time. type: keyword - size: + file.size: dashed_name: file-size description: 'File size in bytes. @@ -3177,7 +3221,7 @@ file: normalize: [] short: File size in bytes. type: long - target_path: + file.target_path: dashed_name: file-target-path description: Target path for symlinks. flat_name: file.target_path @@ -3192,7 +3236,7 @@ file: normalize: [] short: Target path for symlinks. type: keyword - type: + file.type: dashed_name: file-type description: File type (file, dir, or symlink). example: file @@ -3203,7 +3247,7 @@ file: normalize: [] short: File type (file, dir, or symlink). type: keyword - uid: + file.uid: dashed_name: file-uid description: The user ID (UID) or security identifier (SID) of the file owner. example: '1001' @@ -3214,7 +3258,7 @@ file: normalize: [] short: The user ID (UID) or security identifier (SID) of the file owner. type: keyword - x509.alternative_names: + file.x509.alternative_names: dashed_name: file-x509-alternative-names description: List of subject alternative names (SAN). Name types vary by certificate authority and certificate type but commonly contain IP addresses, DNS names @@ -3232,7 +3276,7 @@ file: (and wildcards), and email addresses. short_description: List of subject alternative names (SAN) type: keyword - x509.issuer.common_name: + file.x509.issuer.common_name: dashed_name: file-x509-issuer-common-name description: List of common name (CN) of issuing certificate authority. example: DigiCert SHA2 High Assurance Server CA @@ -3245,7 +3289,7 @@ file: original_fieldset: x509 short: List of common name (CN) of issuing certificate authority. type: keyword - x509.issuer.country: + file.x509.issuer.country: dashed_name: file-x509-issuer-country description: List of country (C) codes example: US @@ -3258,7 +3302,7 @@ file: original_fieldset: x509 short: List of country (C) codes type: keyword - x509.issuer.distinguished_name: + file.x509.issuer.distinguished_name: dashed_name: file-x509-issuer-distinguished-name description: Distinguished name (DN) of issuing certificate authority. example: C=US, O=DigiCert Inc, OU=www.digicert.com, CN=DigiCert SHA2 High Assurance @@ -3271,7 +3315,7 @@ file: original_fieldset: x509 short: Distinguished name (DN) of issuing certificate authority. type: keyword - x509.issuer.locality: + file.x509.issuer.locality: dashed_name: file-x509-issuer-locality description: List of locality names (L) example: Mountain View @@ -3284,7 +3328,7 @@ file: original_fieldset: x509 short: List of locality names (L) type: keyword - x509.issuer.organization: + file.x509.issuer.organization: dashed_name: file-x509-issuer-organization description: List of organizations (O) of issuing certificate authority. example: DigiCert Inc @@ -3297,7 +3341,7 @@ file: original_fieldset: x509 short: List of organizations (O) of issuing certificate authority. type: keyword - x509.issuer.organizational_unit: + file.x509.issuer.organizational_unit: dashed_name: file-x509-issuer-organizational-unit description: List of organizational units (OU) of issuing certificate authority. example: www.digicert.com @@ -3310,7 +3354,7 @@ file: original_fieldset: x509 short: List of organizational units (OU) of issuing certificate authority. type: keyword - x509.issuer.state_or_province: + file.x509.issuer.state_or_province: dashed_name: file-x509-issuer-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -3323,7 +3367,7 @@ file: original_fieldset: x509 short: List of state or province names (ST, S, or P) type: keyword - x509.not_after: + file.x509.not_after: dashed_name: file-x509-not-after description: Time at which the certificate is no longer considered valid. example: 2020-07-16 03:15:39+00:00 @@ -3334,7 +3378,7 @@ file: original_fieldset: x509 short: Time at which the certificate is no longer considered valid. type: date - x509.not_before: + file.x509.not_before: dashed_name: file-x509-not-before description: Time at which the certificate is first considered valid. example: 2019-08-16 01:40:25+00:00 @@ -3345,7 +3389,7 @@ file: original_fieldset: x509 short: Time at which the certificate is first considered valid. type: date - x509.public_key_algorithm: + file.x509.public_key_algorithm: dashed_name: file-x509-public-key-algorithm description: Algorithm used to generate the public key. example: RSA @@ -3357,7 +3401,7 @@ file: original_fieldset: x509 short: Algorithm used to generate the public key. type: keyword - x509.public_key_curve: + file.x509.public_key_curve: dashed_name: file-x509-public-key-curve description: The curve used by the elliptic curve public key algorithm. This is algorithm specific. @@ -3371,7 +3415,7 @@ file: short: The curve used by the elliptic curve public key algorithm. This is algorithm specific. type: keyword - x509.public_key_exponent: + file.x509.public_key_exponent: dashed_name: file-x509-public-key-exponent description: Exponent used to derive the public key. This is algorithm specific. doc_values: false @@ -3384,7 +3428,7 @@ file: original_fieldset: x509 short: Exponent used to derive the public key. This is algorithm specific. type: long - x509.public_key_size: + file.x509.public_key_size: dashed_name: file-x509-public-key-size description: The size of the public key space in bits. example: 2048 @@ -3395,7 +3439,7 @@ file: original_fieldset: x509 short: The size of the public key space in bits. type: long - x509.serial_number: + file.x509.serial_number: dashed_name: file-x509-serial-number description: Unique serial number issued by the certificate authority. For consistency, if this value is alphanumeric, it should be formatted without colons and uppercase @@ -3412,7 +3456,7 @@ file: characters. short_description: Unique serial number issued by the certificate authority. type: keyword - x509.signature_algorithm: + file.x509.signature_algorithm: dashed_name: file-x509-signature-algorithm description: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). @@ -3426,7 +3470,7 @@ file: short: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). type: keyword - x509.subject.common_name: + file.x509.subject.common_name: dashed_name: file-x509-subject-common-name description: List of common names (CN) of subject. example: r2.shared.global.fastly.net @@ -3439,7 +3483,7 @@ file: original_fieldset: x509 short: List of common names (CN) of subject. type: keyword - x509.subject.country: + file.x509.subject.country: dashed_name: file-x509-subject-country description: List of country (C) code example: US @@ -3452,7 +3496,7 @@ file: original_fieldset: x509 short: List of country (C) code type: keyword - x509.subject.distinguished_name: + file.x509.subject.distinguished_name: dashed_name: file-x509-subject-distinguished-name description: Distinguished name (DN) of the certificate subject entity. example: C=US, ST=California, L=San Francisco, O=Fastly, Inc., CN=r2.shared.global.fastly.net @@ -3464,7 +3508,7 @@ file: original_fieldset: x509 short: Distinguished name (DN) of the certificate subject entity. type: keyword - x509.subject.locality: + file.x509.subject.locality: dashed_name: file-x509-subject-locality description: List of locality names (L) example: San Francisco @@ -3477,7 +3521,7 @@ file: original_fieldset: x509 short: List of locality names (L) type: keyword - x509.subject.organization: + file.x509.subject.organization: dashed_name: file-x509-subject-organization description: List of organizations (O) of subject. example: Fastly, Inc. @@ -3490,7 +3534,7 @@ file: original_fieldset: x509 short: List of organizations (O) of subject. type: keyword - x509.subject.organizational_unit: + file.x509.subject.organizational_unit: dashed_name: file-x509-subject-organizational-unit description: List of organizational units (OU) of subject. flat_name: file.x509.subject.organizational_unit @@ -3502,7 +3546,7 @@ file: original_fieldset: x509 short: List of organizational units (OU) of subject. type: keyword - x509.subject.state_or_province: + file.x509.subject.state_or_province: dashed_name: file-x509-subject-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -3515,7 +3559,7 @@ file: original_fieldset: x509 short: List of state or province names (ST, S, or P) type: keyword - x509.version_number: + file.x509.version_number: dashed_name: file-x509-version-number description: Version of x509 format. example: 3 @@ -3535,6 +3579,26 @@ file: - file.pe - file.x509 prefix: file. + reused_here: + - full: file.code_signature + schema_name: code_signature + short: These fields contain information about binary code signatures. + - full: file.hash + schema_name: hash + short: Hashes, usually file hashes. + - full: file.pe + schema_name: pe + short: These fields contain Windows Portable Executable (PE) metadata. + - full: file.x509 + schema_name: x509 + short: This implements the common core fields for x509 certificates. This information + is likely logged with TLS sessions, digital signatures found in executable binaries, + S/MIME information in email bodies, or analysis of files on disk. When only + a single certificate is logged in an event, it should be nested under `file`. + When hashes of the DER-encoded certificate are available, the `hash` data set + should be populated as well (e.g. `file.hash.sha256`). For events that contain + certificate information for both sides of the connection, the x509 object could + be nested under the respective side of the connection information (e.g. `tls.server.x509`). short: Fields describing files. title: File type: group @@ -3545,7 +3609,7 @@ geo: This geolocation information can be derived from techniques such as Geo IP, or be user-supplied.' fields: - city_name: + geo.city_name: dashed_name: geo-city-name description: City name. example: Montreal @@ -3556,7 +3620,7 @@ geo: normalize: [] short: City name. type: keyword - continent_name: + geo.continent_name: dashed_name: geo-continent-name description: Name of the continent. example: North America @@ -3567,7 +3631,7 @@ geo: normalize: [] short: Name of the continent. type: keyword - country_iso_code: + geo.country_iso_code: dashed_name: geo-country-iso-code description: Country ISO code. example: CA @@ -3578,7 +3642,7 @@ geo: normalize: [] short: Country ISO code. type: keyword - country_name: + geo.country_name: dashed_name: geo-country-name description: Country name. example: Canada @@ -3589,7 +3653,7 @@ geo: normalize: [] short: Country name. type: keyword - location: + geo.location: dashed_name: geo-location description: Longitude and latitude. example: '{ "lon": -73.614830, "lat": 45.505918 }' @@ -3599,7 +3663,7 @@ geo: normalize: [] short: Longitude and latitude. type: geo_point - name: + geo.name: dashed_name: geo-name description: 'User-defined description of a location, at the level of granularity they care about. @@ -3616,7 +3680,7 @@ geo: normalize: [] short: User-defined description of a location. type: keyword - region_iso_code: + geo.region_iso_code: dashed_name: geo-region-iso-code description: Region ISO code. example: CA-QC @@ -3627,7 +3691,7 @@ geo: normalize: [] short: Region ISO code. type: keyword - region_name: + geo.region_name: dashed_name: geo-region-name description: Region name. example: Quebec @@ -3643,12 +3707,24 @@ geo: prefix: geo. reusable: expected: - - client - - destination - - observer - - host - - server - - source + - as: geo + at: client + full: client.geo + - as: geo + at: destination + full: destination.geo + - as: geo + at: observer + full: observer.geo + - as: geo + at: host + full: host.geo + - as: geo + at: server + full: server.geo + - as: geo + at: source + full: source.geo top_level: false short: Fields describing a location. title: Geo @@ -3657,7 +3733,7 @@ group: description: The group fields are meant to represent groups that are relevant to the event. fields: - domain: + group.domain: dashed_name: group-domain description: 'Name of the directory the group is a member of. @@ -3669,7 +3745,7 @@ group: normalize: [] short: Name of the directory the group is a member of. type: keyword - id: + group.id: dashed_name: group-id description: Unique identifier for the group on the system/platform. flat_name: group.id @@ -3679,7 +3755,7 @@ group: normalize: [] short: Unique identifier for the group on the system/platform. type: keyword - name: + group.name: dashed_name: group-name description: Name of the group. flat_name: group.name @@ -3694,7 +3770,9 @@ group: prefix: group. reusable: expected: - - user + - as: group + at: user + full: user.group top_level: true short: User's group relevant to the event. title: Group @@ -3706,7 +3784,7 @@ hash: other hashes by lowercasing the hash algorithm name and using underscore separators as appropriate (snake case, e.g. sha3_512).' fields: - md5: + hash.md5: dashed_name: hash-md5 description: MD5 hash. flat_name: hash.md5 @@ -3716,7 +3794,7 @@ hash: normalize: [] short: MD5 hash. type: keyword - sha1: + hash.sha1: dashed_name: hash-sha1 description: SHA1 hash. flat_name: hash.sha1 @@ -3726,7 +3804,7 @@ hash: normalize: [] short: SHA1 hash. type: keyword - sha256: + hash.sha256: dashed_name: hash-sha256 description: SHA256 hash. flat_name: hash.sha256 @@ -3736,7 +3814,7 @@ hash: normalize: [] short: SHA256 hash. type: keyword - sha512: + hash.sha512: dashed_name: hash-sha512 description: SHA512 hash. flat_name: hash.sha512 @@ -3751,10 +3829,18 @@ hash: prefix: hash. reusable: expected: - - file - - process - - process.parent - - dll + - as: hash + at: file + full: file.hash + - as: hash + at: process + full: process.hash + - as: hash + at: process.parent + full: process.parent.hash + - as: hash + at: dll + full: dll.hash top_level: false short: Hashes, usually file hashes. title: Hash @@ -3766,7 +3852,7 @@ host: event happened, or from which the measurement was taken. Host types include hardware, virtual machines, Docker containers, and Kubernetes nodes.' fields: - architecture: + host.architecture: dashed_name: host-architecture description: Operating system architecture. example: x86_64 @@ -3777,7 +3863,7 @@ host: normalize: [] short: Operating system architecture. type: keyword - domain: + host.domain: dashed_name: host-domain description: 'Name of the domain of which the host is a member. @@ -3792,7 +3878,7 @@ host: normalize: [] short: Name of the directory the group is a member of. type: keyword - geo.city_name: + host.geo.city_name: dashed_name: host-geo-city-name description: City name. example: Montreal @@ -3804,7 +3890,7 @@ host: original_fieldset: geo short: City name. type: keyword - geo.continent_name: + host.geo.continent_name: dashed_name: host-geo-continent-name description: Name of the continent. example: North America @@ -3816,7 +3902,7 @@ host: original_fieldset: geo short: Name of the continent. type: keyword - geo.country_iso_code: + host.geo.country_iso_code: dashed_name: host-geo-country-iso-code description: Country ISO code. example: CA @@ -3828,7 +3914,7 @@ host: original_fieldset: geo short: Country ISO code. type: keyword - geo.country_name: + host.geo.country_name: dashed_name: host-geo-country-name description: Country name. example: Canada @@ -3840,7 +3926,7 @@ host: original_fieldset: geo short: Country name. type: keyword - geo.location: + host.geo.location: dashed_name: host-geo-location description: Longitude and latitude. example: '{ "lon": -73.614830, "lat": 45.505918 }' @@ -3851,7 +3937,7 @@ host: original_fieldset: geo short: Longitude and latitude. type: geo_point - geo.name: + host.geo.name: dashed_name: host-geo-name description: 'User-defined description of a location, at the level of granularity they care about. @@ -3869,7 +3955,7 @@ host: original_fieldset: geo short: User-defined description of a location. type: keyword - geo.region_iso_code: + host.geo.region_iso_code: dashed_name: host-geo-region-iso-code description: Region ISO code. example: CA-QC @@ -3881,7 +3967,7 @@ host: original_fieldset: geo short: Region ISO code. type: keyword - geo.region_name: + host.geo.region_name: dashed_name: host-geo-region-name description: Region name. example: Quebec @@ -3893,7 +3979,7 @@ host: original_fieldset: geo short: Region name. type: keyword - hostname: + host.hostname: dashed_name: host-hostname description: 'Hostname of the host. @@ -3905,7 +3991,7 @@ host: normalize: [] short: Hostname of the host. type: keyword - id: + host.id: dashed_name: host-id description: 'Unique host id. @@ -3919,7 +4005,7 @@ host: normalize: [] short: Unique host id. type: keyword - ip: + host.ip: dashed_name: host-ip description: Host ip addresses. flat_name: host.ip @@ -3929,7 +4015,7 @@ host: - array short: Host ip addresses. type: ip - mac: + host.mac: dashed_name: host-mac description: Host mac addresses. flat_name: host.mac @@ -3940,7 +4026,7 @@ host: - array short: Host mac addresses. type: keyword - name: + host.name: dashed_name: host-name description: 'Name of the host. @@ -3954,7 +4040,7 @@ host: normalize: [] short: Name of the host. type: keyword - os.family: + host.os.family: dashed_name: host-os-family description: OS family (such as redhat, debian, freebsd, windows). example: debian @@ -3966,7 +4052,7 @@ host: original_fieldset: os short: OS family (such as redhat, debian, freebsd, windows). type: keyword - os.full: + host.os.full: dashed_name: host-os-full description: Operating system name, including the version or code name. example: Mac OS Mojave @@ -3983,7 +4069,7 @@ host: original_fieldset: os short: Operating system name, including the version or code name. type: keyword - os.kernel: + host.os.kernel: dashed_name: host-os-kernel description: Operating system kernel version as a raw string. example: 4.4.0-112-generic @@ -3995,7 +4081,7 @@ host: original_fieldset: os short: Operating system kernel version as a raw string. type: keyword - os.name: + host.os.name: dashed_name: host-os-name description: Operating system name, without the version. example: Mac OS X @@ -4012,7 +4098,7 @@ host: original_fieldset: os short: Operating system name, without the version. type: keyword - os.platform: + host.os.platform: dashed_name: host-os-platform description: Operating system platform (such centos, ubuntu, windows). example: darwin @@ -4024,7 +4110,7 @@ host: original_fieldset: os short: Operating system platform (such centos, ubuntu, windows). type: keyword - os.version: + host.os.version: dashed_name: host-os-version description: Operating system version as a raw string. example: 10.14.1 @@ -4036,7 +4122,7 @@ host: original_fieldset: os short: Operating system version as a raw string. type: keyword - type: + host.type: dashed_name: host-type description: 'Type of host. @@ -4050,7 +4136,7 @@ host: normalize: [] short: Type of host. type: keyword - uptime: + host.uptime: dashed_name: host-uptime description: Seconds the host has been up. example: 1325 @@ -4060,7 +4146,7 @@ host: normalize: [] short: Seconds the host has been up. type: long - user.domain: + host.user.domain: dashed_name: host-user-domain description: 'Name of the directory the user is a member of. @@ -4073,7 +4159,7 @@ host: original_fieldset: user short: Name of the directory the user is a member of. type: keyword - user.email: + host.user.email: dashed_name: host-user-email description: User email address. flat_name: host.user.email @@ -4084,7 +4170,7 @@ host: original_fieldset: user short: User email address. type: keyword - user.full_name: + host.user.full_name: dashed_name: host-user-full-name description: User's full name, if available. example: Albert Einstein @@ -4101,7 +4187,7 @@ host: original_fieldset: user short: User's full name, if available. type: keyword - user.group.domain: + host.user.group.domain: dashed_name: host-user-group-domain description: 'Name of the directory the group is a member of. @@ -4114,7 +4200,7 @@ host: original_fieldset: group short: Name of the directory the group is a member of. type: keyword - user.group.id: + host.user.group.id: dashed_name: host-user-group-id description: Unique identifier for the group on the system/platform. flat_name: host.user.group.id @@ -4125,7 +4211,7 @@ host: original_fieldset: group short: Unique identifier for the group on the system/platform. type: keyword - user.group.name: + host.user.group.name: dashed_name: host-user-group-name description: Name of the group. flat_name: host.user.group.name @@ -4136,7 +4222,7 @@ host: original_fieldset: group short: Name of the group. type: keyword - user.hash: + host.user.hash: dashed_name: host-user-hash description: 'Unique user hash to correlate information for a user in anonymized form. @@ -4151,7 +4237,7 @@ host: original_fieldset: user short: Unique user hash to correlate information for a user in anonymized form. type: keyword - user.id: + host.user.id: dashed_name: host-user-id description: Unique identifier of the user. flat_name: host.user.id @@ -4162,7 +4248,7 @@ host: original_fieldset: user short: Unique identifier of the user. type: keyword - user.name: + host.user.name: dashed_name: host-user-name description: Short name or login of the user. example: albert @@ -4185,8 +4271,17 @@ host: - host.geo - host.os - host.user - - host.user.group prefix: host. + reused_here: + - full: host.geo + schema_name: geo + short: Fields describing a location. + - full: host.os + schema_name: os + short: OS fields contain information about the operating system. + - full: host.user + schema_name: user + short: Fields to describe the user relevant to the event. short: Fields describing the relevant computing instance. title: Host type: group @@ -4194,7 +4289,7 @@ http: description: Fields related to HTTP activity. Use the `url` field set to store the url of the request. fields: - request.body.bytes: + http.request.body.bytes: dashed_name: http-request-body-bytes description: Size in bytes of the request body. example: 887 @@ -4205,7 +4300,7 @@ http: normalize: [] short: Size in bytes of the request body. type: long - request.body.content: + http.request.body.content: dashed_name: http-request-body-content description: The full HTTP request body. example: Hello world @@ -4221,7 +4316,7 @@ http: normalize: [] short: The full HTTP request body. type: keyword - request.bytes: + http.request.bytes: dashed_name: http-request-bytes description: Total size in bytes of the request (body and headers). example: 1437 @@ -4232,7 +4327,7 @@ http: normalize: [] short: Total size in bytes of the request (body and headers). type: long - request.method: + http.request.method: dashed_name: http-request-method description: 'HTTP request method. @@ -4251,7 +4346,7 @@ http: normalize: [] short: HTTP request method. type: keyword - request.referrer: + http.request.referrer: dashed_name: http-request-referrer description: Referrer for this HTTP request. example: https://blog.example.com/ @@ -4262,7 +4357,7 @@ http: normalize: [] short: Referrer for this HTTP request. type: keyword - response.body.bytes: + http.response.body.bytes: dashed_name: http-response-body-bytes description: Size in bytes of the response body. example: 887 @@ -4273,7 +4368,7 @@ http: normalize: [] short: Size in bytes of the response body. type: long - response.body.content: + http.response.body.content: dashed_name: http-response-body-content description: The full HTTP response body. example: Hello world @@ -4289,7 +4384,7 @@ http: normalize: [] short: The full HTTP response body. type: keyword - response.bytes: + http.response.bytes: dashed_name: http-response-bytes description: Total size in bytes of the response (body and headers). example: 1437 @@ -4300,7 +4395,7 @@ http: normalize: [] short: Total size in bytes of the response (body and headers). type: long - response.status_code: + http.response.status_code: dashed_name: http-response-status-code description: HTTP response status code. example: 404 @@ -4311,7 +4406,7 @@ http: normalize: [] short: HTTP response status code. type: long - version: + http.version: dashed_name: http-version description: HTTP version. example: 1.1 @@ -4335,7 +4430,7 @@ interface: a single observer interface (e.g. network sensor on a span port) only the observer.ingress information should be populated. fields: - alias: + interface.alias: dashed_name: interface-alias description: Interface alias as reported by the system, typically used in firewall implementations for e.g. inside, outside, or dmz logical interface naming. @@ -4347,7 +4442,7 @@ interface: normalize: [] short: Interface alias type: keyword - id: + interface.id: dashed_name: interface-id description: Interface ID as reported by an observer (typically SNMP interface ID). @@ -4359,7 +4454,7 @@ interface: normalize: [] short: Interface ID type: keyword - name: + interface.name: dashed_name: interface-name description: Interface name as reported by the system. example: eth0 @@ -4375,8 +4470,12 @@ interface: prefix: interface. reusable: expected: - - observer.ingress - - observer.egress + - as: interface + at: observer.ingress + full: observer.ingress.interface + - as: interface + at: observer.egress + full: observer.egress.interface top_level: false short: Fields to describe observer interface information. title: Interface @@ -4391,7 +4490,7 @@ log: The details specific to your event source are typically not logged under `log.*`, but rather in `event.*` or in other ECS fields.' fields: - file.path: + log.file.path: dashed_name: log-file-path description: 'Full path to the log file this event came from, including the file name. It should include the drive letter, when appropriate. @@ -4405,7 +4504,7 @@ log: normalize: [] short: Full path to the log file this event came from. type: keyword - level: + log.level: dashed_name: log-level description: 'Original log level of the log event. @@ -4422,7 +4521,7 @@ log: normalize: [] short: Log level of the log event. type: keyword - logger: + log.logger: dashed_name: log-logger description: The name of the logger inside an application. This is usually the name of the class which initialized the logger, or can be a custom name. @@ -4434,7 +4533,7 @@ log: normalize: [] short: Name of the logger. type: keyword - origin.file.line: + log.origin.file.line: dashed_name: log-origin-file-line description: The line number of the file containing the source code which originated the log event. @@ -4445,7 +4544,7 @@ log: normalize: [] short: The line number of the file which originated the log event. type: integer - origin.file.name: + log.origin.file.name: dashed_name: log-origin-file-name description: 'The name of the file containing the source code which originated the log event. @@ -4460,7 +4559,7 @@ log: normalize: [] short: The code file which originated the log event. type: keyword - origin.function: + log.origin.function: dashed_name: log-origin-function description: The name of the function or method which originated the log event. example: init @@ -4471,7 +4570,7 @@ log: normalize: [] short: The function which originated the log event. type: keyword - original: + log.original: dashed_name: log-original description: 'This is the original log message and contains the full log message before splitting it up in multiple parts. @@ -4493,7 +4592,7 @@ log: normalize: [] short: Original log message with light interpretation only (encoding, newlines). type: keyword - syslog: + log.syslog: dashed_name: log-syslog description: The Syslog metadata of the event, if the event was transmitted via Syslog. Please see RFCs 5424 or 3164. @@ -4504,7 +4603,7 @@ log: object_type: keyword short: Syslog metadata type: object - syslog.facility.code: + log.syslog.facility.code: dashed_name: log-syslog-facility-code description: 'The Syslog numeric facility of the log event, if available. @@ -4518,7 +4617,7 @@ log: normalize: [] short: Syslog numeric facility of the event. type: long - syslog.facility.name: + log.syslog.facility.name: dashed_name: log-syslog-facility-name description: The Syslog text-based facility of the log event, if available. example: local7 @@ -4529,7 +4628,7 @@ log: normalize: [] short: Syslog text-based facility of the event. type: keyword - syslog.priority: + log.syslog.priority: dashed_name: log-syslog-priority description: 'Syslog numeric priority of the event, if available. @@ -4543,7 +4642,7 @@ log: normalize: [] short: Syslog priority of the event. type: long - syslog.severity.code: + log.syslog.severity.code: dashed_name: log-syslog-severity-code description: 'The Syslog numeric severity of the log event, if available. @@ -4558,7 +4657,7 @@ log: normalize: [] short: Syslog numeric severity of the event. type: long - syslog.severity.name: + log.syslog.severity.name: dashed_name: log-syslog-severity-name description: 'The Syslog numeric severity of the log event, if available. @@ -4587,7 +4686,7 @@ network: The network.* fields should be populated with details about the network activity associated with an event.' fields: - application: + network.application: dashed_name: network-application description: 'A name given to an application level protocol. This can be arbitrarily assigned for things like microservices, but also apply to things like skype, @@ -4605,7 +4704,7 @@ network: normalize: [] short: Application level protocol name. type: keyword - bytes: + network.bytes: dashed_name: network-bytes description: 'Total bytes transferred in both directions. @@ -4619,7 +4718,7 @@ network: normalize: [] short: Total bytes transferred in both directions. type: long - community_id: + network.community_id: dashed_name: network-community-id description: 'A hash of source and destination IPs and ports, as well as the protocol used in a communication. This is a tool-agnostic standard to identify @@ -4634,7 +4733,7 @@ network: normalize: [] short: A hash of source and destination IPs and ports. type: keyword - direction: + network.direction: dashed_name: network-direction description: "Direction of the network traffic.\nRecommended values are:\n \ \ * inbound\n * outbound\n * internal\n * external\n * unknown\n\nWhen\ @@ -4650,7 +4749,7 @@ network: normalize: [] short: Direction of the network traffic. type: keyword - forwarded_ip: + network.forwarded_ip: dashed_name: network-forwarded-ip description: Host IP address when the source IP address is the proxy. example: 192.1.1.2 @@ -4660,7 +4759,7 @@ network: normalize: [] short: Host IP address when the source IP address is the proxy. type: ip - iana_number: + network.iana_number: dashed_name: network-iana-number description: IANA Protocol Number (https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml). Standardized list of protocols. This aligns well with NetFlow and sFlow related @@ -4673,7 +4772,7 @@ network: normalize: [] short: IANA Protocol Number. type: keyword - inner: + network.inner: dashed_name: network-inner description: Network.inner fields are added in addition to network.vlan fields to describe the innermost VLAN when q-in-q VLAN tagging is present. Allowed @@ -4687,7 +4786,7 @@ network: object_type: keyword short: Inner VLAN tag information type: object - inner.vlan.id: + network.inner.vlan.id: dashed_name: network-inner-vlan-id description: VLAN ID as reported by the observer. example: 10 @@ -4699,7 +4798,7 @@ network: original_fieldset: vlan short: VLAN ID as reported by the observer. type: keyword - inner.vlan.name: + network.inner.vlan.name: dashed_name: network-inner-vlan-name description: Optional VLAN name as reported by the observer. example: outside @@ -4711,7 +4810,7 @@ network: original_fieldset: vlan short: Optional VLAN name as reported by the observer. type: keyword - name: + network.name: dashed_name: network-name description: Name given by operators to sections of their network. example: Guest Wifi @@ -4722,7 +4821,7 @@ network: normalize: [] short: Name given by operators to sections of their network. type: keyword - packets: + network.packets: dashed_name: network-packets description: 'Total packets transferred in both directions. @@ -4735,7 +4834,7 @@ network: normalize: [] short: Total packets transferred in both directions. type: long - protocol: + network.protocol: dashed_name: network-protocol description: 'L7 Network protocol name. ex. http, lumberjack, transport protocol. @@ -4749,7 +4848,7 @@ network: normalize: [] short: L7 Network protocol name. type: keyword - transport: + network.transport: dashed_name: network-transport description: 'Same as network.iana_number, but instead using the Keyword name of the transport layer (udp, tcp, ipv6-icmp, etc.) @@ -4764,7 +4863,7 @@ network: normalize: [] short: Protocol Name corresponding to the field `iana_number`. type: keyword - type: + network.type: dashed_name: network-type description: 'In the OSI Model this would be the Network Layer. ipv4, ipv6, ipsec, pim, etc @@ -4780,7 +4879,7 @@ network: short: In the OSI Model this would be the Network Layer. ipv4, ipv6, ipsec, pim, etc type: keyword - vlan.id: + network.vlan.id: dashed_name: network-vlan-id description: VLAN ID as reported by the observer. example: 10 @@ -4792,7 +4891,7 @@ network: original_fieldset: vlan short: VLAN ID as reported by the observer. type: keyword - vlan.name: + network.vlan.name: dashed_name: network-vlan-name description: Optional VLAN name as reported by the observer. example: outside @@ -4810,6 +4909,13 @@ network: - network.inner.vlan - network.vlan prefix: network. + reused_here: + - full: network.vlan + schema_name: vlan + short: Fields to describe observed VLAN information. + - full: network.inner.vlan + schema_name: vlan + short: Fields to describe observed VLAN information. short: Fields describing the communication path over which the event happened. title: Network type: group @@ -4827,7 +4933,7 @@ observer: metric. Message queues and ETL components used in processing events or metrics are not considered observers in ECS.' fields: - egress: + observer.egress: dashed_name: observer-egress description: Observer.egress holds information like interface number and name, vlan, and zone information to classify egress traffic. Single armed monitoring @@ -4840,7 +4946,7 @@ observer: object_type: keyword short: Object field for egress information type: object - egress.interface.alias: + observer.egress.interface.alias: dashed_name: observer-egress-interface-alias description: Interface alias as reported by the system, typically used in firewall implementations for e.g. inside, outside, or dmz logical interface naming. @@ -4853,7 +4959,7 @@ observer: original_fieldset: interface short: Interface alias type: keyword - egress.interface.id: + observer.egress.interface.id: dashed_name: observer-egress-interface-id description: Interface ID as reported by an observer (typically SNMP interface ID). @@ -4866,7 +4972,7 @@ observer: original_fieldset: interface short: Interface ID type: keyword - egress.interface.name: + observer.egress.interface.name: dashed_name: observer-egress-interface-name description: Interface name as reported by the system. example: eth0 @@ -4878,7 +4984,7 @@ observer: original_fieldset: interface short: Interface name type: keyword - egress.vlan.id: + observer.egress.vlan.id: dashed_name: observer-egress-vlan-id description: VLAN ID as reported by the observer. example: 10 @@ -4890,7 +4996,7 @@ observer: original_fieldset: vlan short: VLAN ID as reported by the observer. type: keyword - egress.vlan.name: + observer.egress.vlan.name: dashed_name: observer-egress-vlan-name description: Optional VLAN name as reported by the observer. example: outside @@ -4902,7 +5008,7 @@ observer: original_fieldset: vlan short: Optional VLAN name as reported by the observer. type: keyword - egress.zone: + observer.egress.zone: dashed_name: observer-egress-zone description: Network zone of outbound traffic as reported by the observer to categorize the destination area of egress traffic, e.g. Internal, External, @@ -4915,7 +5021,7 @@ observer: normalize: [] short: Observer Egress zone type: keyword - geo.city_name: + observer.geo.city_name: dashed_name: observer-geo-city-name description: City name. example: Montreal @@ -4927,7 +5033,7 @@ observer: original_fieldset: geo short: City name. type: keyword - geo.continent_name: + observer.geo.continent_name: dashed_name: observer-geo-continent-name description: Name of the continent. example: North America @@ -4939,7 +5045,7 @@ observer: original_fieldset: geo short: Name of the continent. type: keyword - geo.country_iso_code: + observer.geo.country_iso_code: dashed_name: observer-geo-country-iso-code description: Country ISO code. example: CA @@ -4951,7 +5057,7 @@ observer: original_fieldset: geo short: Country ISO code. type: keyword - geo.country_name: + observer.geo.country_name: dashed_name: observer-geo-country-name description: Country name. example: Canada @@ -4963,7 +5069,7 @@ observer: original_fieldset: geo short: Country name. type: keyword - geo.location: + observer.geo.location: dashed_name: observer-geo-location description: Longitude and latitude. example: '{ "lon": -73.614830, "lat": 45.505918 }' @@ -4974,7 +5080,7 @@ observer: original_fieldset: geo short: Longitude and latitude. type: geo_point - geo.name: + observer.geo.name: dashed_name: observer-geo-name description: 'User-defined description of a location, at the level of granularity they care about. @@ -4992,7 +5098,7 @@ observer: original_fieldset: geo short: User-defined description of a location. type: keyword - geo.region_iso_code: + observer.geo.region_iso_code: dashed_name: observer-geo-region-iso-code description: Region ISO code. example: CA-QC @@ -5004,7 +5110,7 @@ observer: original_fieldset: geo short: Region ISO code. type: keyword - geo.region_name: + observer.geo.region_name: dashed_name: observer-geo-region-name description: Region name. example: Quebec @@ -5016,7 +5122,7 @@ observer: original_fieldset: geo short: Region name. type: keyword - hostname: + observer.hostname: dashed_name: observer-hostname description: Hostname of the observer. flat_name: observer.hostname @@ -5026,7 +5132,7 @@ observer: normalize: [] short: Hostname of the observer. type: keyword - ingress: + observer.ingress: dashed_name: observer-ingress description: Observer.ingress holds information like interface number and name, vlan, and zone information to classify ingress traffic. Single armed monitoring @@ -5039,7 +5145,7 @@ observer: object_type: keyword short: Object field for ingress information type: object - ingress.interface.alias: + observer.ingress.interface.alias: dashed_name: observer-ingress-interface-alias description: Interface alias as reported by the system, typically used in firewall implementations for e.g. inside, outside, or dmz logical interface naming. @@ -5052,7 +5158,7 @@ observer: original_fieldset: interface short: Interface alias type: keyword - ingress.interface.id: + observer.ingress.interface.id: dashed_name: observer-ingress-interface-id description: Interface ID as reported by an observer (typically SNMP interface ID). @@ -5065,7 +5171,7 @@ observer: original_fieldset: interface short: Interface ID type: keyword - ingress.interface.name: + observer.ingress.interface.name: dashed_name: observer-ingress-interface-name description: Interface name as reported by the system. example: eth0 @@ -5077,7 +5183,7 @@ observer: original_fieldset: interface short: Interface name type: keyword - ingress.vlan.id: + observer.ingress.vlan.id: dashed_name: observer-ingress-vlan-id description: VLAN ID as reported by the observer. example: 10 @@ -5089,7 +5195,7 @@ observer: original_fieldset: vlan short: VLAN ID as reported by the observer. type: keyword - ingress.vlan.name: + observer.ingress.vlan.name: dashed_name: observer-ingress-vlan-name description: Optional VLAN name as reported by the observer. example: outside @@ -5101,7 +5207,7 @@ observer: original_fieldset: vlan short: Optional VLAN name as reported by the observer. type: keyword - ingress.zone: + observer.ingress.zone: dashed_name: observer-ingress-zone description: Network zone of incoming traffic as reported by the observer to categorize the source area of ingress traffic. e.g. internal, External, DMZ, @@ -5114,7 +5220,7 @@ observer: normalize: [] short: Observer ingress zone type: keyword - ip: + observer.ip: dashed_name: observer-ip description: IP addresses of the observer. flat_name: observer.ip @@ -5124,7 +5230,7 @@ observer: - array short: IP addresses of the observer. type: ip - mac: + observer.mac: dashed_name: observer-mac description: MAC addresses of the observer flat_name: observer.mac @@ -5135,7 +5241,7 @@ observer: - array short: MAC addresses of the observer type: keyword - name: + observer.name: dashed_name: observer-name description: 'Custom name of the observer. @@ -5151,7 +5257,7 @@ observer: normalize: [] short: Custom name of the observer. type: keyword - os.family: + observer.os.family: dashed_name: observer-os-family description: OS family (such as redhat, debian, freebsd, windows). example: debian @@ -5163,7 +5269,7 @@ observer: original_fieldset: os short: OS family (such as redhat, debian, freebsd, windows). type: keyword - os.full: + observer.os.full: dashed_name: observer-os-full description: Operating system name, including the version or code name. example: Mac OS Mojave @@ -5180,7 +5286,7 @@ observer: original_fieldset: os short: Operating system name, including the version or code name. type: keyword - os.kernel: + observer.os.kernel: dashed_name: observer-os-kernel description: Operating system kernel version as a raw string. example: 4.4.0-112-generic @@ -5192,7 +5298,7 @@ observer: original_fieldset: os short: Operating system kernel version as a raw string. type: keyword - os.name: + observer.os.name: dashed_name: observer-os-name description: Operating system name, without the version. example: Mac OS X @@ -5209,7 +5315,7 @@ observer: original_fieldset: os short: Operating system name, without the version. type: keyword - os.platform: + observer.os.platform: dashed_name: observer-os-platform description: Operating system platform (such centos, ubuntu, windows). example: darwin @@ -5221,7 +5327,7 @@ observer: original_fieldset: os short: Operating system platform (such centos, ubuntu, windows). type: keyword - os.version: + observer.os.version: dashed_name: observer-os-version description: Operating system version as a raw string. example: 10.14.1 @@ -5233,7 +5339,7 @@ observer: original_fieldset: os short: Operating system version as a raw string. type: keyword - product: + observer.product: dashed_name: observer-product description: The product name of the observer. example: s200 @@ -5244,7 +5350,7 @@ observer: normalize: [] short: The product name of the observer. type: keyword - serial_number: + observer.serial_number: dashed_name: observer-serial-number description: Observer serial number. flat_name: observer.serial_number @@ -5254,7 +5360,7 @@ observer: normalize: [] short: Observer serial number. type: keyword - type: + observer.type: dashed_name: observer-type description: 'The type of the observer the data is coming from. @@ -5268,7 +5374,7 @@ observer: normalize: [] short: The type of the observer the data is coming from. type: keyword - vendor: + observer.vendor: dashed_name: observer-vendor description: Vendor name of the observer. example: Symantec @@ -5279,7 +5385,7 @@ observer: normalize: [] short: Vendor name of the observer. type: keyword - version: + observer.version: dashed_name: observer-version description: Observer version. flat_name: observer.version @@ -5299,6 +5405,25 @@ observer: - observer.ingress.vlan - observer.os prefix: observer. + reused_here: + - full: observer.geo + schema_name: geo + short: Fields describing a location. + - full: observer.ingress.interface + schema_name: interface + short: Fields to describe observer interface information. + - full: observer.egress.interface + schema_name: interface + short: Fields to describe observer interface information. + - full: observer.os + schema_name: os + short: OS fields contain information about the operating system. + - full: observer.ingress.vlan + schema_name: vlan + short: Fields to describe observed VLAN information. + - full: observer.egress.vlan + schema_name: vlan + short: Fields to describe observed VLAN information. short: Fields describing an entity observing the event from outside the host. title: Observer type: group @@ -5309,7 +5434,7 @@ organization: These fields help you arrange or filter data stored in an index by one or multiple organizations.' fields: - id: + organization.id: dashed_name: organization-id description: Unique identifier for the organization. flat_name: organization.id @@ -5319,7 +5444,7 @@ organization: normalize: [] short: Unique identifier for the organization. type: keyword - name: + organization.name: dashed_name: organization-name description: Organization name. flat_name: organization.name @@ -5343,7 +5468,7 @@ organization: os: description: The OS fields contain information about the operating system. fields: - family: + os.family: dashed_name: os-family description: OS family (such as redhat, debian, freebsd, windows). example: debian @@ -5354,7 +5479,7 @@ os: normalize: [] short: OS family (such as redhat, debian, freebsd, windows). type: keyword - full: + os.full: dashed_name: os-full description: Operating system name, including the version or code name. example: Mac OS Mojave @@ -5370,7 +5495,7 @@ os: normalize: [] short: Operating system name, including the version or code name. type: keyword - kernel: + os.kernel: dashed_name: os-kernel description: Operating system kernel version as a raw string. example: 4.4.0-112-generic @@ -5381,7 +5506,7 @@ os: normalize: [] short: Operating system kernel version as a raw string. type: keyword - name: + os.name: dashed_name: os-name description: Operating system name, without the version. example: Mac OS X @@ -5397,7 +5522,7 @@ os: normalize: [] short: Operating system name, without the version. type: keyword - platform: + os.platform: dashed_name: os-platform description: Operating system platform (such centos, ubuntu, windows). example: darwin @@ -5408,7 +5533,7 @@ os: normalize: [] short: Operating system platform (such centos, ubuntu, windows). type: keyword - version: + os.version: dashed_name: os-version description: Operating system version as a raw string. example: 10.14.1 @@ -5424,9 +5549,15 @@ os: prefix: os. reusable: expected: - - observer - - host - - user_agent + - as: os + at: observer + full: observer.os + - as: os + at: host + full: host.os + - as: os + at: user_agent + full: user_agent.os top_level: false short: OS fields contain information about the operating system. title: Operating System @@ -5436,7 +5567,7 @@ package: It contains general information about a package, such as name, version or size. It also contains installation details, such as time or location. fields: - architecture: + package.architecture: dashed_name: package-architecture description: Package architecture. example: x86_64 @@ -5447,7 +5578,7 @@ package: normalize: [] short: Package architecture. type: keyword - build_version: + package.build_version: dashed_name: package-build-version description: 'Additional information about the build version of the installed package. @@ -5461,7 +5592,7 @@ package: normalize: [] short: Build version information type: keyword - checksum: + package.checksum: dashed_name: package-checksum description: Checksum of the installed package for verification. example: 68b329da9893e34099c7d8ad5cb9c940 @@ -5472,7 +5603,7 @@ package: normalize: [] short: Checksum of the installed package for verification. type: keyword - description: + package.description: dashed_name: package-description description: Description of the package. example: Open source programming language to build simple/reliable/efficient @@ -5484,7 +5615,7 @@ package: normalize: [] short: Description of the package. type: keyword - install_scope: + package.install_scope: dashed_name: package-install-scope description: Indicating how the package was installed, e.g. user-local, global. example: global @@ -5495,7 +5626,7 @@ package: normalize: [] short: Indicating how the package was installed, e.g. user-local, global. type: keyword - installed: + package.installed: dashed_name: package-installed description: Time when package was installed. flat_name: package.installed @@ -5504,7 +5635,7 @@ package: normalize: [] short: Time when package was installed. type: date - license: + package.license: dashed_name: package-license description: 'License under which the package was released. @@ -5518,7 +5649,7 @@ package: normalize: [] short: Package license type: keyword - name: + package.name: dashed_name: package-name description: Package name example: go @@ -5529,7 +5660,7 @@ package: normalize: [] short: Package name type: keyword - path: + package.path: dashed_name: package-path description: Path where the package is installed. example: /usr/local/Cellar/go/1.12.9/ @@ -5540,7 +5671,7 @@ package: normalize: [] short: Path where the package is installed. type: keyword - reference: + package.reference: dashed_name: package-reference description: Home page or reference URL of the software in this package, if available. @@ -5552,7 +5683,7 @@ package: normalize: [] short: Package home page or reference URL type: keyword - size: + package.size: dashed_name: package-size description: Package size in bytes. example: 62231 @@ -5563,7 +5694,7 @@ package: normalize: [] short: Package size in bytes. type: long - type: + package.type: dashed_name: package-type description: 'Type of package. @@ -5577,7 +5708,7 @@ package: normalize: [] short: Package type type: keyword - version: + package.version: dashed_name: package-version description: Package version example: 1.12.9 @@ -5597,7 +5728,7 @@ package: pe: description: These fields contain Windows Portable Executable (PE) metadata. fields: - architecture: + pe.architecture: dashed_name: pe-architecture description: CPU architecture target for the file. example: x64 @@ -5608,7 +5739,7 @@ pe: normalize: [] short: CPU architecture target for the file. type: keyword - company: + pe.company: dashed_name: pe-company description: Internal company name of the file, provided at compile-time. example: Microsoft Corporation @@ -5619,7 +5750,7 @@ pe: normalize: [] short: Internal company name of the file, provided at compile-time. type: keyword - description: + pe.description: dashed_name: pe-description description: Internal description of the file, provided at compile-time. example: Paint @@ -5630,7 +5761,7 @@ pe: normalize: [] short: Internal description of the file, provided at compile-time. type: keyword - file_version: + pe.file_version: dashed_name: pe-file-version description: Internal version of the file, provided at compile-time. example: 6.3.9600.17415 @@ -5641,7 +5772,7 @@ pe: normalize: [] short: Process name. type: keyword - imphash: + pe.imphash: dashed_name: pe-imphash description: 'A hash of the imports in a PE file. An imphash -- or import hash -- can be used to fingerprint binaries even after recompilation or other code-level @@ -5656,7 +5787,7 @@ pe: normalize: [] short: A hash of the imports in a PE file. type: keyword - original_file_name: + pe.original_file_name: dashed_name: pe-original-file-name description: Internal name of the file, provided at compile-time. example: MSPAINT.EXE @@ -5667,7 +5798,7 @@ pe: normalize: [] short: Internal name of the file, provided at compile-time. type: keyword - product: + pe.product: dashed_name: pe-product description: Internal product name of the file, provided at compile-time. example: "Microsoft\xAE Windows\xAE Operating System" @@ -5683,9 +5814,15 @@ pe: prefix: pe. reusable: expected: - - file - - dll - - process + - as: pe + at: file + full: file.pe + - as: pe + at: dll + full: dll.pe + - as: pe + at: process + full: process.pe top_level: false short: These fields contain Windows Portable Executable (PE) metadata. title: PE Header @@ -5697,7 +5834,7 @@ process: from a log message. The `process.pid` often stays in the metric itself and is copied to the global field for correlation.' fields: - args: + process.args: dashed_name: process-args description: 'Array of process arguments, starting with the absolute path to the executable. @@ -5716,7 +5853,7 @@ process: - array short: Array of process arguments. type: keyword - args_count: + process.args_count: dashed_name: process-args-count description: 'Length of the process.args array. @@ -5730,7 +5867,7 @@ process: normalize: [] short: Length of the process.args array. type: long - code_signature.exists: + process.code_signature.exists: dashed_name: process-code-signature-exists description: Boolean to capture if a signature is present. example: 'true' @@ -5741,7 +5878,7 @@ process: original_fieldset: code_signature short: Boolean to capture if a signature is present. type: boolean - code_signature.status: + process.code_signature.status: dashed_name: process-code-signature-status description: 'Additional information about the certificate status. @@ -5757,7 +5894,7 @@ process: original_fieldset: code_signature short: Additional information about the certificate status. type: keyword - code_signature.subject_name: + process.code_signature.subject_name: dashed_name: process-code-signature-subject-name description: Subject name of the code signer example: Microsoft Corporation @@ -5769,7 +5906,7 @@ process: original_fieldset: code_signature short: Subject name of the code signer type: keyword - code_signature.trusted: + process.code_signature.trusted: dashed_name: process-code-signature-trusted description: 'Stores the trust status of the certificate chain. @@ -5783,7 +5920,7 @@ process: original_fieldset: code_signature short: Stores the trust status of the certificate chain. type: boolean - code_signature.valid: + process.code_signature.valid: dashed_name: process-code-signature-valid description: 'Boolean to capture if the digital signature is verified against the binary content. @@ -5798,7 +5935,7 @@ process: short: Boolean to capture if the digital signature is verified against the binary content. type: boolean - command_line: + process.command_line: dashed_name: process-command-line description: 'Full command line that started the process, including the absolute path to the executable, and all arguments. @@ -5817,7 +5954,7 @@ process: normalize: [] short: Full command line that started the process. type: keyword - entity_id: + process.entity_id: dashed_name: process-entity-id description: 'Unique identifier for the process. @@ -5836,7 +5973,7 @@ process: normalize: [] short: Unique identifier for the process. type: keyword - executable: + process.executable: dashed_name: process-executable description: Absolute path to the process executable. example: /usr/bin/ssh @@ -5852,7 +5989,7 @@ process: normalize: [] short: Absolute path to the process executable. type: keyword - exit_code: + process.exit_code: dashed_name: process-exit-code description: 'The exit code of the process, if this is a termination event. @@ -5865,7 +6002,7 @@ process: normalize: [] short: The exit code of the process. type: long - hash.md5: + process.hash.md5: dashed_name: process-hash-md5 description: MD5 hash. flat_name: process.hash.md5 @@ -5876,7 +6013,7 @@ process: original_fieldset: hash short: MD5 hash. type: keyword - hash.sha1: + process.hash.sha1: dashed_name: process-hash-sha1 description: SHA1 hash. flat_name: process.hash.sha1 @@ -5887,7 +6024,7 @@ process: original_fieldset: hash short: SHA1 hash. type: keyword - hash.sha256: + process.hash.sha256: dashed_name: process-hash-sha256 description: SHA256 hash. flat_name: process.hash.sha256 @@ -5898,7 +6035,7 @@ process: original_fieldset: hash short: SHA256 hash. type: keyword - hash.sha512: + process.hash.sha512: dashed_name: process-hash-sha512 description: SHA512 hash. flat_name: process.hash.sha512 @@ -5909,7 +6046,7 @@ process: original_fieldset: hash short: SHA512 hash. type: keyword - name: + process.name: dashed_name: process-name description: 'Process name. @@ -5927,7 +6064,7 @@ process: normalize: [] short: Process name. type: keyword - parent.args: + process.parent.args: dashed_name: process-parent-args description: 'Array of process arguments. @@ -5945,7 +6082,7 @@ process: - array short: Array of process arguments. type: keyword - parent.args_count: + process.parent.args_count: dashed_name: process-parent-args-count description: 'Length of the process.args array. @@ -5959,7 +6096,7 @@ process: normalize: [] short: Length of the process.args array. type: long - parent.code_signature.exists: + process.parent.code_signature.exists: dashed_name: process-parent-code-signature-exists description: Boolean to capture if a signature is present. example: 'true' @@ -5970,7 +6107,7 @@ process: original_fieldset: code_signature short: Boolean to capture if a signature is present. type: boolean - parent.code_signature.status: + process.parent.code_signature.status: dashed_name: process-parent-code-signature-status description: 'Additional information about the certificate status. @@ -5986,7 +6123,7 @@ process: original_fieldset: code_signature short: Additional information about the certificate status. type: keyword - parent.code_signature.subject_name: + process.parent.code_signature.subject_name: dashed_name: process-parent-code-signature-subject-name description: Subject name of the code signer example: Microsoft Corporation @@ -5998,7 +6135,7 @@ process: original_fieldset: code_signature short: Subject name of the code signer type: keyword - parent.code_signature.trusted: + process.parent.code_signature.trusted: dashed_name: process-parent-code-signature-trusted description: 'Stores the trust status of the certificate chain. @@ -6012,7 +6149,7 @@ process: original_fieldset: code_signature short: Stores the trust status of the certificate chain. type: boolean - parent.code_signature.valid: + process.parent.code_signature.valid: dashed_name: process-parent-code-signature-valid description: 'Boolean to capture if the digital signature is verified against the binary content. @@ -6027,7 +6164,7 @@ process: short: Boolean to capture if the digital signature is verified against the binary content. type: boolean - parent.command_line: + process.parent.command_line: dashed_name: process-parent-command-line description: 'Full command line that started the process, including the absolute path to the executable, and all arguments. @@ -6046,7 +6183,7 @@ process: normalize: [] short: Full command line that started the process. type: keyword - parent.entity_id: + process.parent.entity_id: dashed_name: process-parent-entity-id description: 'Unique identifier for the process. @@ -6065,7 +6202,7 @@ process: normalize: [] short: Unique identifier for the process. type: keyword - parent.executable: + process.parent.executable: dashed_name: process-parent-executable description: Absolute path to the process executable. example: /usr/bin/ssh @@ -6081,7 +6218,7 @@ process: normalize: [] short: Absolute path to the process executable. type: keyword - parent.exit_code: + process.parent.exit_code: dashed_name: process-parent-exit-code description: 'The exit code of the process, if this is a termination event. @@ -6094,7 +6231,7 @@ process: normalize: [] short: The exit code of the process. type: long - parent.hash.md5: + process.parent.hash.md5: dashed_name: process-parent-hash-md5 description: MD5 hash. flat_name: process.parent.hash.md5 @@ -6105,7 +6242,7 @@ process: original_fieldset: hash short: MD5 hash. type: keyword - parent.hash.sha1: + process.parent.hash.sha1: dashed_name: process-parent-hash-sha1 description: SHA1 hash. flat_name: process.parent.hash.sha1 @@ -6116,7 +6253,7 @@ process: original_fieldset: hash short: SHA1 hash. type: keyword - parent.hash.sha256: + process.parent.hash.sha256: dashed_name: process-parent-hash-sha256 description: SHA256 hash. flat_name: process.parent.hash.sha256 @@ -6127,7 +6264,7 @@ process: original_fieldset: hash short: SHA256 hash. type: keyword - parent.hash.sha512: + process.parent.hash.sha512: dashed_name: process-parent-hash-sha512 description: SHA512 hash. flat_name: process.parent.hash.sha512 @@ -6138,7 +6275,7 @@ process: original_fieldset: hash short: SHA512 hash. type: keyword - parent.name: + process.parent.name: dashed_name: process-parent-name description: 'Process name. @@ -6156,7 +6293,7 @@ process: normalize: [] short: Process name. type: keyword - parent.pgid: + process.parent.pgid: dashed_name: process-parent-pgid description: Identifier of the group of processes the process belongs to. flat_name: process.parent.pgid @@ -6166,7 +6303,7 @@ process: normalize: [] short: Identifier of the group of processes the process belongs to. type: long - parent.pid: + process.parent.pid: dashed_name: process-parent-pid description: Process id. example: 4242 @@ -6177,7 +6314,7 @@ process: normalize: [] short: Process id. type: long - parent.ppid: + process.parent.ppid: dashed_name: process-parent-ppid description: Parent process' pid. example: 4241 @@ -6188,7 +6325,7 @@ process: normalize: [] short: Parent process' pid. type: long - parent.start: + process.parent.start: dashed_name: process-parent-start description: The time the process started. example: '2016-05-23T08:05:34.853Z' @@ -6198,7 +6335,7 @@ process: normalize: [] short: The time the process started. type: date - parent.thread.id: + process.parent.thread.id: dashed_name: process-parent-thread-id description: Thread ID. example: 4242 @@ -6209,7 +6346,7 @@ process: normalize: [] short: Thread ID. type: long - parent.thread.name: + process.parent.thread.name: dashed_name: process-parent-thread-name description: Thread name. example: thread-0 @@ -6220,7 +6357,7 @@ process: normalize: [] short: Thread name. type: keyword - parent.title: + process.parent.title: dashed_name: process-parent-title description: 'Process title. @@ -6238,7 +6375,7 @@ process: normalize: [] short: Process title. type: keyword - parent.uptime: + process.parent.uptime: dashed_name: process-parent-uptime description: Seconds the process has been up. example: 1325 @@ -6248,7 +6385,7 @@ process: normalize: [] short: Seconds the process has been up. type: long - parent.working_directory: + process.parent.working_directory: dashed_name: process-parent-working-directory description: The working directory of the process. example: /home/alice @@ -6264,7 +6401,7 @@ process: normalize: [] short: The working directory of the process. type: keyword - pe.architecture: + process.pe.architecture: dashed_name: process-pe-architecture description: CPU architecture target for the file. example: x64 @@ -6276,7 +6413,7 @@ process: original_fieldset: pe short: CPU architecture target for the file. type: keyword - pe.company: + process.pe.company: dashed_name: process-pe-company description: Internal company name of the file, provided at compile-time. example: Microsoft Corporation @@ -6288,7 +6425,7 @@ process: original_fieldset: pe short: Internal company name of the file, provided at compile-time. type: keyword - pe.description: + process.pe.description: dashed_name: process-pe-description description: Internal description of the file, provided at compile-time. example: Paint @@ -6300,7 +6437,7 @@ process: original_fieldset: pe short: Internal description of the file, provided at compile-time. type: keyword - pe.file_version: + process.pe.file_version: dashed_name: process-pe-file-version description: Internal version of the file, provided at compile-time. example: 6.3.9600.17415 @@ -6312,7 +6449,7 @@ process: original_fieldset: pe short: Process name. type: keyword - pe.imphash: + process.pe.imphash: dashed_name: process-pe-imphash description: 'A hash of the imports in a PE file. An imphash -- or import hash -- can be used to fingerprint binaries even after recompilation or other code-level @@ -6328,7 +6465,7 @@ process: original_fieldset: pe short: A hash of the imports in a PE file. type: keyword - pe.original_file_name: + process.pe.original_file_name: dashed_name: process-pe-original-file-name description: Internal name of the file, provided at compile-time. example: MSPAINT.EXE @@ -6340,7 +6477,7 @@ process: original_fieldset: pe short: Internal name of the file, provided at compile-time. type: keyword - pe.product: + process.pe.product: dashed_name: process-pe-product description: Internal product name of the file, provided at compile-time. example: "Microsoft\xAE Windows\xAE Operating System" @@ -6352,7 +6489,7 @@ process: original_fieldset: pe short: Internal product name of the file, provided at compile-time. type: keyword - pgid: + process.pgid: dashed_name: process-pgid description: Identifier of the group of processes the process belongs to. flat_name: process.pgid @@ -6362,7 +6499,7 @@ process: normalize: [] short: Identifier of the group of processes the process belongs to. type: long - pid: + process.pid: dashed_name: process-pid description: Process id. example: 4242 @@ -6373,7 +6510,7 @@ process: normalize: [] short: Process id. type: long - ppid: + process.ppid: dashed_name: process-ppid description: Parent process' pid. example: 4241 @@ -6384,7 +6521,7 @@ process: normalize: [] short: Parent process' pid. type: long - start: + process.start: dashed_name: process-start description: The time the process started. example: '2016-05-23T08:05:34.853Z' @@ -6394,7 +6531,7 @@ process: normalize: [] short: The time the process started. type: date - thread.id: + process.thread.id: dashed_name: process-thread-id description: Thread ID. example: 4242 @@ -6405,7 +6542,7 @@ process: normalize: [] short: Thread ID. type: long - thread.name: + process.thread.name: dashed_name: process-thread-name description: Thread name. example: thread-0 @@ -6416,7 +6553,7 @@ process: normalize: [] short: Thread name. type: keyword - title: + process.title: dashed_name: process-title description: 'Process title. @@ -6434,7 +6571,7 @@ process: normalize: [] short: Process title. type: keyword - uptime: + process.uptime: dashed_name: process-uptime description: Seconds the process has been up. example: 1325 @@ -6444,7 +6581,7 @@ process: normalize: [] short: Seconds the process has been up. type: long - working_directory: + process.working_directory: dashed_name: process-working-directory description: The working directory of the process. example: /home/alice @@ -6469,13 +6606,29 @@ process: - process.parent.hash - process.pe prefix: process. + reused_here: + - full: process.code_signature + schema_name: code_signature + short: These fields contain information about binary code signatures. + - full: process.parent.code_signature + schema_name: code_signature + short: These fields contain information about binary code signatures. + - full: process.hash + schema_name: hash + short: Hashes, usually file hashes. + - full: process.parent.hash + schema_name: hash + short: Hashes, usually file hashes. + - full: process.pe + schema_name: pe + short: These fields contain Windows Portable Executable (PE) metadata. short: These fields contain information about a process. title: Process type: group registry: description: Fields related to Windows Registry operations. fields: - data.bytes: + registry.data.bytes: dashed_name: registry-data-bytes description: 'Original bytes written with base64 encoding. @@ -6490,7 +6643,7 @@ registry: normalize: [] short: Original bytes written with base64 encoding. type: keyword - data.strings: + registry.data.strings: dashed_name: registry-data-strings description: 'Content when writing string types. @@ -6508,7 +6661,7 @@ registry: - array short: List of strings representing what was written to the registry. type: keyword - data.type: + registry.data.type: dashed_name: registry-data-type description: Standard registry type for encoding contents example: REG_SZ @@ -6519,7 +6672,7 @@ registry: normalize: [] short: Standard registry type for encoding contents type: keyword - hive: + registry.hive: dashed_name: registry-hive description: Abbreviated name for the hive. example: HKLM @@ -6530,7 +6683,7 @@ registry: normalize: [] short: Abbreviated name for the hive. type: keyword - key: + registry.key: dashed_name: registry-key description: Hive-relative path of keys. example: SOFTWARE\Microsoft\Windows NT\CurrentVersion\Image File Execution Options\winword.exe @@ -6541,7 +6694,7 @@ registry: normalize: [] short: Hive-relative path of keys. type: keyword - path: + registry.path: dashed_name: registry-path description: Full path, including hive, key and value example: HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Image File Execution @@ -6553,7 +6706,7 @@ registry: normalize: [] short: Full path, including hive, key and value type: keyword - value: + registry.value: dashed_name: registry-value description: Name of the value written. example: Debugger @@ -6582,7 +6735,7 @@ related: `related.ip`, you can then search for a given IP trivially, no matter where it appeared, by querying `related.ip:192.0.2.15`.' fields: - hash: + related.hash: dashed_name: related-hash description: All the hashes seen on your event. Populating this field, then using it to search for hashes can help in situations where you're unsure what @@ -6595,7 +6748,7 @@ related: - array short: All the hashes seen on your event. type: keyword - ip: + related.ip: dashed_name: related-ip description: All of the IPs seen on your event. flat_name: related.ip @@ -6605,7 +6758,7 @@ related: - array short: All of the IPs seen on your event. type: ip - user: + related.user: dashed_name: related-user description: All the user names seen on your event. flat_name: related.user @@ -6630,7 +6783,7 @@ rule: admission control platforms, network or host IDS/IPS, network firewalls, web application firewalls, url filters, endpoint detection and response (EDR) systems, etc.' fields: - author: + rule.author: dashed_name: rule-author description: Name, organization, or pseudonym of the author or authors who created the rule used to generate this event. @@ -6644,7 +6797,7 @@ rule: - array short: Rule author type: keyword - category: + rule.category: dashed_name: rule-category description: A categorization value keyword used by the entity using the rule for detection of this event. @@ -6656,7 +6809,7 @@ rule: normalize: [] short: Rule category type: keyword - description: + rule.description: dashed_name: rule-description description: The description of the rule generating the event. example: Block requests to public DNS over HTTPS / TLS protocols @@ -6667,7 +6820,7 @@ rule: normalize: [] short: Rule description type: keyword - id: + rule.id: dashed_name: rule-id description: A rule ID that is unique within the scope of an agent, observer, or other entity using the rule for detection of this event. @@ -6679,7 +6832,7 @@ rule: normalize: [] short: Rule ID type: keyword - license: + rule.license: dashed_name: rule-license description: Name of the license under which the rule used to generate this event is made available. @@ -6691,7 +6844,7 @@ rule: normalize: [] short: Rule license type: keyword - name: + rule.name: dashed_name: rule-name description: The name of the rule or signature generating the event. example: BLOCK_DNS_over_TLS @@ -6702,7 +6855,7 @@ rule: normalize: [] short: Rule name type: keyword - reference: + rule.reference: dashed_name: rule-reference description: 'Reference URL to additional information about the rule used to generate this event. @@ -6718,7 +6871,7 @@ rule: normalize: [] short: Rule reference URL type: keyword - ruleset: + rule.ruleset: dashed_name: rule-ruleset description: Name of the ruleset, policy, group, or parent category in which the rule used to generate this event is a member. @@ -6730,7 +6883,7 @@ rule: normalize: [] short: Rule ruleset type: keyword - uuid: + rule.uuid: dashed_name: rule-uuid description: A rule ID that is unique within the scope of a set or group of agents, observers, or other entities using the rule for detection of this @@ -6743,7 +6896,7 @@ rule: normalize: [] short: Rule UUID type: keyword - version: + rule.version: dashed_name: rule-version description: The version / revision of the rule being used for analysis. example: 1.1 @@ -6778,7 +6931,7 @@ server: in that category, you should still ensure that source and destination are filled appropriately.' fields: - address: + server.address: dashed_name: server-address description: 'Some event server addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store @@ -6793,7 +6946,7 @@ server: normalize: [] short: Server network address. type: keyword - as.number: + server.as.number: dashed_name: server-as-number description: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. @@ -6806,7 +6959,7 @@ server: short: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. type: long - as.organization.name: + server.as.organization.name: dashed_name: server-as-organization-name description: Organization name. example: Google LLC @@ -6823,7 +6976,7 @@ server: original_fieldset: as short: Organization name. type: keyword - bytes: + server.bytes: dashed_name: server-bytes description: Bytes sent from the server to the client. example: 184 @@ -6834,7 +6987,7 @@ server: normalize: [] short: Bytes sent from the server to the client. type: long - domain: + server.domain: dashed_name: server-domain description: Server domain. flat_name: server.domain @@ -6844,7 +6997,7 @@ server: normalize: [] short: Server domain. type: keyword - geo.city_name: + server.geo.city_name: dashed_name: server-geo-city-name description: City name. example: Montreal @@ -6856,7 +7009,7 @@ server: original_fieldset: geo short: City name. type: keyword - geo.continent_name: + server.geo.continent_name: dashed_name: server-geo-continent-name description: Name of the continent. example: North America @@ -6868,7 +7021,7 @@ server: original_fieldset: geo short: Name of the continent. type: keyword - geo.country_iso_code: + server.geo.country_iso_code: dashed_name: server-geo-country-iso-code description: Country ISO code. example: CA @@ -6880,7 +7033,7 @@ server: original_fieldset: geo short: Country ISO code. type: keyword - geo.country_name: + server.geo.country_name: dashed_name: server-geo-country-name description: Country name. example: Canada @@ -6892,7 +7045,7 @@ server: original_fieldset: geo short: Country name. type: keyword - geo.location: + server.geo.location: dashed_name: server-geo-location description: Longitude and latitude. example: '{ "lon": -73.614830, "lat": 45.505918 }' @@ -6903,7 +7056,7 @@ server: original_fieldset: geo short: Longitude and latitude. type: geo_point - geo.name: + server.geo.name: dashed_name: server-geo-name description: 'User-defined description of a location, at the level of granularity they care about. @@ -6921,7 +7074,7 @@ server: original_fieldset: geo short: User-defined description of a location. type: keyword - geo.region_iso_code: + server.geo.region_iso_code: dashed_name: server-geo-region-iso-code description: Region ISO code. example: CA-QC @@ -6933,7 +7086,7 @@ server: original_fieldset: geo short: Region ISO code. type: keyword - geo.region_name: + server.geo.region_name: dashed_name: server-geo-region-name description: Region name. example: Quebec @@ -6945,7 +7098,7 @@ server: original_fieldset: geo short: Region name. type: keyword - ip: + server.ip: dashed_name: server-ip description: IP address of the server (IPv4 or IPv6). flat_name: server.ip @@ -6954,7 +7107,7 @@ server: normalize: [] short: IP address of the server. type: ip - mac: + server.mac: dashed_name: server-mac description: MAC address of the server. flat_name: server.mac @@ -6964,7 +7117,7 @@ server: normalize: [] short: MAC address of the server. type: keyword - nat.ip: + server.nat.ip: dashed_name: server-nat-ip description: 'Translated ip of destination based NAT sessions (e.g. internet to private DMZ) @@ -6976,7 +7129,7 @@ server: normalize: [] short: Server NAT ip type: ip - nat.port: + server.nat.port: dashed_name: server-nat-port description: 'Translated port of destination based NAT sessions (e.g. internet to private DMZ) @@ -6989,7 +7142,7 @@ server: normalize: [] short: Server NAT port type: long - packets: + server.packets: dashed_name: server-packets description: Packets sent from the server to the client. example: 12 @@ -6999,7 +7152,7 @@ server: normalize: [] short: Packets sent from the server to the client. type: long - port: + server.port: dashed_name: server-port description: Port of the server. flat_name: server.port @@ -7009,7 +7162,7 @@ server: normalize: [] short: Port of the server. type: long - registered_domain: + server.registered_domain: dashed_name: server-registered-domain description: 'The highest registered server domain, stripped of the subdomain. @@ -7026,7 +7179,7 @@ server: normalize: [] short: The highest registered server domain, stripped of the subdomain. type: keyword - top_level_domain: + server.top_level_domain: dashed_name: server-top-level-domain description: 'The effective top level domain (eTLD), also known as the domain suffix, is the last part of the domain name. For example, the top level domain @@ -7043,7 +7196,7 @@ server: normalize: [] short: The effective top level domain (com, org, net, co.uk). type: keyword - user.domain: + server.user.domain: dashed_name: server-user-domain description: 'Name of the directory the user is a member of. @@ -7056,7 +7209,7 @@ server: original_fieldset: user short: Name of the directory the user is a member of. type: keyword - user.email: + server.user.email: dashed_name: server-user-email description: User email address. flat_name: server.user.email @@ -7067,7 +7220,7 @@ server: original_fieldset: user short: User email address. type: keyword - user.full_name: + server.user.full_name: dashed_name: server-user-full-name description: User's full name, if available. example: Albert Einstein @@ -7084,7 +7237,7 @@ server: original_fieldset: user short: User's full name, if available. type: keyword - user.group.domain: + server.user.group.domain: dashed_name: server-user-group-domain description: 'Name of the directory the group is a member of. @@ -7097,7 +7250,7 @@ server: original_fieldset: group short: Name of the directory the group is a member of. type: keyword - user.group.id: + server.user.group.id: dashed_name: server-user-group-id description: Unique identifier for the group on the system/platform. flat_name: server.user.group.id @@ -7108,7 +7261,7 @@ server: original_fieldset: group short: Unique identifier for the group on the system/platform. type: keyword - user.group.name: + server.user.group.name: dashed_name: server-user-group-name description: Name of the group. flat_name: server.user.group.name @@ -7119,7 +7272,7 @@ server: original_fieldset: group short: Name of the group. type: keyword - user.hash: + server.user.hash: dashed_name: server-user-hash description: 'Unique user hash to correlate information for a user in anonymized form. @@ -7134,7 +7287,7 @@ server: original_fieldset: user short: Unique user hash to correlate information for a user in anonymized form. type: keyword - user.id: + server.user.id: dashed_name: server-user-id description: Unique identifier of the user. flat_name: server.user.id @@ -7145,7 +7298,7 @@ server: original_fieldset: user short: Unique identifier of the user. type: keyword - user.name: + server.user.name: dashed_name: server-user-name description: Short name or login of the user. example: albert @@ -7168,8 +7321,17 @@ server: - server.as - server.geo - server.user - - server.user.group prefix: server. + reused_here: + - full: server.as + schema_name: as + short: Fields describing an Autonomous System (Internet routing prefix). + - full: server.geo + schema_name: geo + short: Fields describing a location. + - full: server.user + schema_name: user + short: Fields to describe the user relevant to the event. short: Fields about the server side of a network connection, used with client. title: Server type: group @@ -7179,7 +7341,7 @@ service: These fields help you find and correlate logs for a specific service and version.' fields: - ephemeral_id: + service.ephemeral_id: dashed_name: service-ephemeral-id description: 'Ephemeral identifier of this service (if one exists). @@ -7192,7 +7354,7 @@ service: normalize: [] short: Ephemeral identifier of this service. type: keyword - id: + service.id: dashed_name: service-id description: 'Unique identifier of the running service. If the service is comprised of many nodes, the `service.id` should be the same for all nodes. @@ -7211,7 +7373,7 @@ service: normalize: [] short: Unique identifier of the running service. type: keyword - name: + service.name: dashed_name: service-name description: 'Name of the service data is collected from. @@ -7230,7 +7392,7 @@ service: normalize: [] short: Name of the service. type: keyword - node.name: + service.node.name: dashed_name: service-node-name description: 'Name of a service node. @@ -7252,7 +7414,7 @@ service: normalize: [] short: Name of the service node. type: keyword - state: + service.state: dashed_name: service-state description: Current state of the service. flat_name: service.state @@ -7262,7 +7424,7 @@ service: normalize: [] short: Current state of the service. type: keyword - type: + service.type: dashed_name: service-type description: 'The type of the service data is collected from. @@ -7279,7 +7441,7 @@ service: normalize: [] short: The type of the service. type: keyword - version: + service.version: dashed_name: service-version description: 'Version of the service the data was collected from. @@ -7303,7 +7465,7 @@ source: Source fields are usually populated in conjunction with destination fields.' fields: - address: + source.address: dashed_name: source-address description: 'Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store @@ -7318,7 +7480,7 @@ source: normalize: [] short: Source network address. type: keyword - as.number: + source.as.number: dashed_name: source-as-number description: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. @@ -7331,7 +7493,7 @@ source: short: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. type: long - as.organization.name: + source.as.organization.name: dashed_name: source-as-organization-name description: Organization name. example: Google LLC @@ -7348,7 +7510,7 @@ source: original_fieldset: as short: Organization name. type: keyword - bytes: + source.bytes: dashed_name: source-bytes description: Bytes sent from the source to the destination. example: 184 @@ -7359,7 +7521,7 @@ source: normalize: [] short: Bytes sent from the source to the destination. type: long - domain: + source.domain: dashed_name: source-domain description: Source domain. flat_name: source.domain @@ -7369,7 +7531,7 @@ source: normalize: [] short: Source domain. type: keyword - geo.city_name: + source.geo.city_name: dashed_name: source-geo-city-name description: City name. example: Montreal @@ -7381,7 +7543,7 @@ source: original_fieldset: geo short: City name. type: keyword - geo.continent_name: + source.geo.continent_name: dashed_name: source-geo-continent-name description: Name of the continent. example: North America @@ -7393,7 +7555,7 @@ source: original_fieldset: geo short: Name of the continent. type: keyword - geo.country_iso_code: + source.geo.country_iso_code: dashed_name: source-geo-country-iso-code description: Country ISO code. example: CA @@ -7405,7 +7567,7 @@ source: original_fieldset: geo short: Country ISO code. type: keyword - geo.country_name: + source.geo.country_name: dashed_name: source-geo-country-name description: Country name. example: Canada @@ -7417,7 +7579,7 @@ source: original_fieldset: geo short: Country name. type: keyword - geo.location: + source.geo.location: dashed_name: source-geo-location description: Longitude and latitude. example: '{ "lon": -73.614830, "lat": 45.505918 }' @@ -7428,7 +7590,7 @@ source: original_fieldset: geo short: Longitude and latitude. type: geo_point - geo.name: + source.geo.name: dashed_name: source-geo-name description: 'User-defined description of a location, at the level of granularity they care about. @@ -7446,7 +7608,7 @@ source: original_fieldset: geo short: User-defined description of a location. type: keyword - geo.region_iso_code: + source.geo.region_iso_code: dashed_name: source-geo-region-iso-code description: Region ISO code. example: CA-QC @@ -7458,7 +7620,7 @@ source: original_fieldset: geo short: Region ISO code. type: keyword - geo.region_name: + source.geo.region_name: dashed_name: source-geo-region-name description: Region name. example: Quebec @@ -7470,7 +7632,7 @@ source: original_fieldset: geo short: Region name. type: keyword - ip: + source.ip: dashed_name: source-ip description: IP address of the source (IPv4 or IPv6). flat_name: source.ip @@ -7479,7 +7641,7 @@ source: normalize: [] short: IP address of the source. type: ip - mac: + source.mac: dashed_name: source-mac description: MAC address of the source. flat_name: source.mac @@ -7489,7 +7651,7 @@ source: normalize: [] short: MAC address of the source. type: keyword - nat.ip: + source.nat.ip: dashed_name: source-nat-ip description: 'Translated ip of source based NAT sessions (e.g. internal client to internet) @@ -7501,7 +7663,7 @@ source: normalize: [] short: Source NAT ip type: ip - nat.port: + source.nat.port: dashed_name: source-nat-port description: 'Translated port of source based NAT sessions. (e.g. internal client to internet) @@ -7514,7 +7676,7 @@ source: normalize: [] short: Source NAT port type: long - packets: + source.packets: dashed_name: source-packets description: Packets sent from the source to the destination. example: 12 @@ -7524,7 +7686,7 @@ source: normalize: [] short: Packets sent from the source to the destination. type: long - port: + source.port: dashed_name: source-port description: Port of the source. flat_name: source.port @@ -7534,7 +7696,7 @@ source: normalize: [] short: Port of the source. type: long - registered_domain: + source.registered_domain: dashed_name: source-registered-domain description: 'The highest registered source domain, stripped of the subdomain. @@ -7551,7 +7713,7 @@ source: normalize: [] short: The highest registered source domain, stripped of the subdomain. type: keyword - top_level_domain: + source.top_level_domain: dashed_name: source-top-level-domain description: 'The effective top level domain (eTLD), also known as the domain suffix, is the last part of the domain name. For example, the top level domain @@ -7568,7 +7730,7 @@ source: normalize: [] short: The effective top level domain (com, org, net, co.uk). type: keyword - user.domain: + source.user.domain: dashed_name: source-user-domain description: 'Name of the directory the user is a member of. @@ -7581,7 +7743,7 @@ source: original_fieldset: user short: Name of the directory the user is a member of. type: keyword - user.email: + source.user.email: dashed_name: source-user-email description: User email address. flat_name: source.user.email @@ -7592,7 +7754,7 @@ source: original_fieldset: user short: User email address. type: keyword - user.full_name: + source.user.full_name: dashed_name: source-user-full-name description: User's full name, if available. example: Albert Einstein @@ -7609,7 +7771,7 @@ source: original_fieldset: user short: User's full name, if available. type: keyword - user.group.domain: + source.user.group.domain: dashed_name: source-user-group-domain description: 'Name of the directory the group is a member of. @@ -7622,7 +7784,7 @@ source: original_fieldset: group short: Name of the directory the group is a member of. type: keyword - user.group.id: + source.user.group.id: dashed_name: source-user-group-id description: Unique identifier for the group on the system/platform. flat_name: source.user.group.id @@ -7633,7 +7795,7 @@ source: original_fieldset: group short: Unique identifier for the group on the system/platform. type: keyword - user.group.name: + source.user.group.name: dashed_name: source-user-group-name description: Name of the group. flat_name: source.user.group.name @@ -7644,7 +7806,7 @@ source: original_fieldset: group short: Name of the group. type: keyword - user.hash: + source.user.hash: dashed_name: source-user-hash description: 'Unique user hash to correlate information for a user in anonymized form. @@ -7659,7 +7821,7 @@ source: original_fieldset: user short: Unique user hash to correlate information for a user in anonymized form. type: keyword - user.id: + source.user.id: dashed_name: source-user-id description: Unique identifier of the user. flat_name: source.user.id @@ -7670,7 +7832,7 @@ source: original_fieldset: user short: Unique identifier of the user. type: keyword - user.name: + source.user.name: dashed_name: source-user-name description: Short name or login of the user. example: albert @@ -7693,8 +7855,17 @@ source: - source.as - source.geo - source.user - - source.user.group prefix: source. + reused_here: + - full: source.as + schema_name: as + short: Fields describing an Autonomous System (Internet routing prefix). + - full: source.geo + schema_name: geo + short: Fields describing a location. + - full: source.user + schema_name: user + short: Fields to describe the user relevant to the event. short: Fields about the source side of a network connection, used with destination. title: Source type: group @@ -7708,7 +7879,7 @@ threat: fields are meant to capture which kind of approach is used by this detected threat, to accomplish the goal (e.g. "endpoint denial of service").' fields: - framework: + threat.framework: dashed_name: threat-framework description: Name of the threat framework used to further categorize and classify the tactic and technique of the reported threat. Framework classification @@ -7722,7 +7893,7 @@ threat: normalize: [] short: Threat classification framework. type: keyword - tactic.id: + threat.tactic.id: dashed_name: threat-tactic-id description: The id of tactic used by this threat. You can use the Mitre ATT&CK Matrix Tactic categorization, for example. (ex. https://attack.mitre.org/tactics/TA0040/ @@ -7736,7 +7907,7 @@ threat: - array short: Threat tactic id. type: keyword - tactic.name: + threat.tactic.name: dashed_name: threat-tactic-name description: Name of the type of tactic used by this threat. You can use the Mitre ATT&CK Matrix Tactic categorization, for example. (ex. https://attack.mitre.org/tactics/TA0040/ @@ -7750,7 +7921,7 @@ threat: - array short: Threat tactic. type: keyword - tactic.reference: + threat.tactic.reference: dashed_name: threat-tactic-reference description: The reference url of tactic used by this threat. You can use the Mitre ATT&CK Matrix Tactic categorization, for example. (ex. https://attack.mitre.org/tactics/TA0040/ @@ -7764,7 +7935,7 @@ threat: - array short: Threat tactic url reference. type: keyword - technique.id: + threat.technique.id: dashed_name: threat-technique-id description: The id of technique used by this tactic. You can use the Mitre ATT&CK Matrix Tactic categorization, for example. (ex. https://attack.mitre.org/techniques/T1499/ @@ -7778,7 +7949,7 @@ threat: - array short: Threat technique id. type: keyword - technique.name: + threat.technique.name: dashed_name: threat-technique-name description: The name of technique used by this tactic. You can use the Mitre ATT&CK Matrix Tactic categorization, for example. (ex. https://attack.mitre.org/techniques/T1499/ @@ -7797,7 +7968,7 @@ threat: - array short: Threat technique name. type: keyword - technique.reference: + threat.technique.reference: dashed_name: threat-technique-reference description: The reference url of technique used by this tactic. You can use the Mitre ATT&CK Matrix Tactic categorization, for example. (ex. https://attack.mitre.org/techniques/T1499/ @@ -7822,7 +7993,7 @@ tls: itself and intentionally avoids in-depth analysis of the related x.509 certificate files. fields: - cipher: + tls.cipher: dashed_name: tls-cipher description: String indicating the cipher used during the current connection. example: TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 @@ -7833,7 +8004,7 @@ tls: normalize: [] short: String indicating the cipher used during the current connection. type: keyword - client.certificate: + tls.client.certificate: dashed_name: tls-client-certificate description: PEM-encoded stand-alone certificate offered by the client. This is usually mutually-exclusive of `client.certificate_chain` since this value @@ -7848,7 +8019,7 @@ tls: mutually-exclusive of `client.certificate_chain` since this value also exists in that list. type: keyword - client.certificate_chain: + tls.client.certificate_chain: dashed_name: tls-client-certificate-chain description: Array of PEM-encoded certificates that make up the certificate chain offered by the client. This is usually mutually-exclusive of `client.certificate` @@ -7866,7 +8037,7 @@ tls: offered by the client. This is usually mutually-exclusive of `client.certificate` since that value should be the first certificate in the chain. type: keyword - client.hash.md5: + tls.client.hash.md5: dashed_name: tls-client-hash-md5 description: Certificate fingerprint using the MD5 digest of DER-encoded version of certificate offered by the client. For consistency with other hash values, @@ -7881,7 +8052,7 @@ tls: certificate offered by the client. For consistency with other hash values, this value should be formatted as an uppercase hash. type: keyword - client.hash.sha1: + tls.client.hash.sha1: dashed_name: tls-client-hash-sha1 description: Certificate fingerprint using the SHA1 digest of DER-encoded version of certificate offered by the client. For consistency with other hash values, @@ -7896,7 +8067,7 @@ tls: of certificate offered by the client. For consistency with other hash values, this value should be formatted as an uppercase hash. type: keyword - client.hash.sha256: + tls.client.hash.sha256: dashed_name: tls-client-hash-sha256 description: Certificate fingerprint using the SHA256 digest of DER-encoded version of certificate offered by the client. For consistency with other hash @@ -7911,7 +8082,7 @@ tls: of certificate offered by the client. For consistency with other hash values, this value should be formatted as an uppercase hash. type: keyword - client.issuer: + tls.client.issuer: dashed_name: tls-client-issuer description: Distinguished name of subject of the issuer of the x.509 certificate presented by the client. @@ -7924,7 +8095,7 @@ tls: short: Distinguished name of subject of the issuer of the x.509 certificate presented by the client. type: keyword - client.ja3: + tls.client.ja3: dashed_name: tls-client-ja3 description: A hash that identifies clients based on how they perform an SSL/TLS handshake. @@ -7936,7 +8107,7 @@ tls: normalize: [] short: A hash that identifies clients based on how they perform an SSL/TLS handshake. type: keyword - client.not_after: + tls.client.not_after: dashed_name: tls-client-not-after description: Date/Time indicating when client certificate is no longer considered valid. @@ -7948,7 +8119,7 @@ tls: short: Date/Time indicating when client certificate is no longer considered valid. type: date - client.not_before: + tls.client.not_before: dashed_name: tls-client-not-before description: Date/Time indicating when client certificate is first considered valid. @@ -7959,7 +8130,7 @@ tls: normalize: [] short: Date/Time indicating when client certificate is first considered valid. type: date - client.server_name: + tls.client.server_name: dashed_name: tls-client-server-name description: Also called an SNI, this tells the server which hostname to which the client is attempting to connect. When this value is available, it should @@ -7974,7 +8145,7 @@ tls: client is attempting to connect. When this value is available, it should get copied to `destination.domain`. type: keyword - client.subject: + tls.client.subject: dashed_name: tls-client-subject description: Distinguished name of subject of the x.509 certificate presented by the client. @@ -7987,7 +8158,7 @@ tls: short: Distinguished name of subject of the x.509 certificate presented by the client. type: keyword - client.supported_ciphers: + tls.client.supported_ciphers: dashed_name: tls-client-supported-ciphers description: Array of ciphers offered by the client during the client hello. example: @@ -8002,7 +8173,7 @@ tls: - array short: Array of ciphers offered by the client during the client hello. type: keyword - client.x509.alternative_names: + tls.client.x509.alternative_names: dashed_name: tls-client-x509-alternative-names description: List of subject alternative names (SAN). Name types vary by certificate authority and certificate type but commonly contain IP addresses, DNS names @@ -8020,7 +8191,7 @@ tls: (and wildcards), and email addresses. short_description: List of subject alternative names (SAN) type: keyword - client.x509.issuer.common_name: + tls.client.x509.issuer.common_name: dashed_name: tls-client-x509-issuer-common-name description: List of common name (CN) of issuing certificate authority. example: DigiCert SHA2 High Assurance Server CA @@ -8033,7 +8204,7 @@ tls: original_fieldset: x509 short: List of common name (CN) of issuing certificate authority. type: keyword - client.x509.issuer.country: + tls.client.x509.issuer.country: dashed_name: tls-client-x509-issuer-country description: List of country (C) codes example: US @@ -8046,7 +8217,7 @@ tls: original_fieldset: x509 short: List of country (C) codes type: keyword - client.x509.issuer.distinguished_name: + tls.client.x509.issuer.distinguished_name: dashed_name: tls-client-x509-issuer-distinguished-name description: Distinguished name (DN) of issuing certificate authority. example: C=US, O=DigiCert Inc, OU=www.digicert.com, CN=DigiCert SHA2 High Assurance @@ -8059,7 +8230,7 @@ tls: original_fieldset: x509 short: Distinguished name (DN) of issuing certificate authority. type: keyword - client.x509.issuer.locality: + tls.client.x509.issuer.locality: dashed_name: tls-client-x509-issuer-locality description: List of locality names (L) example: Mountain View @@ -8072,7 +8243,7 @@ tls: original_fieldset: x509 short: List of locality names (L) type: keyword - client.x509.issuer.organization: + tls.client.x509.issuer.organization: dashed_name: tls-client-x509-issuer-organization description: List of organizations (O) of issuing certificate authority. example: DigiCert Inc @@ -8085,7 +8256,7 @@ tls: original_fieldset: x509 short: List of organizations (O) of issuing certificate authority. type: keyword - client.x509.issuer.organizational_unit: + tls.client.x509.issuer.organizational_unit: dashed_name: tls-client-x509-issuer-organizational-unit description: List of organizational units (OU) of issuing certificate authority. example: www.digicert.com @@ -8098,7 +8269,7 @@ tls: original_fieldset: x509 short: List of organizational units (OU) of issuing certificate authority. type: keyword - client.x509.issuer.state_or_province: + tls.client.x509.issuer.state_or_province: dashed_name: tls-client-x509-issuer-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -8111,7 +8282,7 @@ tls: original_fieldset: x509 short: List of state or province names (ST, S, or P) type: keyword - client.x509.not_after: + tls.client.x509.not_after: dashed_name: tls-client-x509-not-after description: Time at which the certificate is no longer considered valid. example: 2020-07-16 03:15:39+00:00 @@ -8122,7 +8293,7 @@ tls: original_fieldset: x509 short: Time at which the certificate is no longer considered valid. type: date - client.x509.not_before: + tls.client.x509.not_before: dashed_name: tls-client-x509-not-before description: Time at which the certificate is first considered valid. example: 2019-08-16 01:40:25+00:00 @@ -8133,7 +8304,7 @@ tls: original_fieldset: x509 short: Time at which the certificate is first considered valid. type: date - client.x509.public_key_algorithm: + tls.client.x509.public_key_algorithm: dashed_name: tls-client-x509-public-key-algorithm description: Algorithm used to generate the public key. example: RSA @@ -8145,7 +8316,7 @@ tls: original_fieldset: x509 short: Algorithm used to generate the public key. type: keyword - client.x509.public_key_curve: + tls.client.x509.public_key_curve: dashed_name: tls-client-x509-public-key-curve description: The curve used by the elliptic curve public key algorithm. This is algorithm specific. @@ -8159,7 +8330,7 @@ tls: short: The curve used by the elliptic curve public key algorithm. This is algorithm specific. type: keyword - client.x509.public_key_exponent: + tls.client.x509.public_key_exponent: dashed_name: tls-client-x509-public-key-exponent description: Exponent used to derive the public key. This is algorithm specific. doc_values: false @@ -8172,7 +8343,7 @@ tls: original_fieldset: x509 short: Exponent used to derive the public key. This is algorithm specific. type: long - client.x509.public_key_size: + tls.client.x509.public_key_size: dashed_name: tls-client-x509-public-key-size description: The size of the public key space in bits. example: 2048 @@ -8183,7 +8354,7 @@ tls: original_fieldset: x509 short: The size of the public key space in bits. type: long - client.x509.serial_number: + tls.client.x509.serial_number: dashed_name: tls-client-x509-serial-number description: Unique serial number issued by the certificate authority. For consistency, if this value is alphanumeric, it should be formatted without colons and uppercase @@ -8200,7 +8371,7 @@ tls: characters. short_description: Unique serial number issued by the certificate authority. type: keyword - client.x509.signature_algorithm: + tls.client.x509.signature_algorithm: dashed_name: tls-client-x509-signature-algorithm description: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). @@ -8214,7 +8385,7 @@ tls: short: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). type: keyword - client.x509.subject.common_name: + tls.client.x509.subject.common_name: dashed_name: tls-client-x509-subject-common-name description: List of common names (CN) of subject. example: r2.shared.global.fastly.net @@ -8227,7 +8398,7 @@ tls: original_fieldset: x509 short: List of common names (CN) of subject. type: keyword - client.x509.subject.country: + tls.client.x509.subject.country: dashed_name: tls-client-x509-subject-country description: List of country (C) code example: US @@ -8240,7 +8411,7 @@ tls: original_fieldset: x509 short: List of country (C) code type: keyword - client.x509.subject.distinguished_name: + tls.client.x509.subject.distinguished_name: dashed_name: tls-client-x509-subject-distinguished-name description: Distinguished name (DN) of the certificate subject entity. example: C=US, ST=California, L=San Francisco, O=Fastly, Inc., CN=r2.shared.global.fastly.net @@ -8252,7 +8423,7 @@ tls: original_fieldset: x509 short: Distinguished name (DN) of the certificate subject entity. type: keyword - client.x509.subject.locality: + tls.client.x509.subject.locality: dashed_name: tls-client-x509-subject-locality description: List of locality names (L) example: San Francisco @@ -8265,7 +8436,7 @@ tls: original_fieldset: x509 short: List of locality names (L) type: keyword - client.x509.subject.organization: + tls.client.x509.subject.organization: dashed_name: tls-client-x509-subject-organization description: List of organizations (O) of subject. example: Fastly, Inc. @@ -8278,7 +8449,7 @@ tls: original_fieldset: x509 short: List of organizations (O) of subject. type: keyword - client.x509.subject.organizational_unit: + tls.client.x509.subject.organizational_unit: dashed_name: tls-client-x509-subject-organizational-unit description: List of organizational units (OU) of subject. flat_name: tls.client.x509.subject.organizational_unit @@ -8290,7 +8461,7 @@ tls: original_fieldset: x509 short: List of organizational units (OU) of subject. type: keyword - client.x509.subject.state_or_province: + tls.client.x509.subject.state_or_province: dashed_name: tls-client-x509-subject-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -8303,7 +8474,7 @@ tls: original_fieldset: x509 short: List of state or province names (ST, S, or P) type: keyword - client.x509.version_number: + tls.client.x509.version_number: dashed_name: tls-client-x509-version-number description: Version of x509 format. example: 3 @@ -8315,7 +8486,7 @@ tls: original_fieldset: x509 short: Version of x509 format. type: keyword - curve: + tls.curve: dashed_name: tls-curve description: String indicating the curve used for the given cipher, when applicable. example: secp256r1 @@ -8326,7 +8497,7 @@ tls: normalize: [] short: String indicating the curve used for the given cipher, when applicable. type: keyword - established: + tls.established: dashed_name: tls-established description: Boolean flag indicating if the TLS negotiation was successful and transitioned to an encrypted tunnel. @@ -8337,7 +8508,7 @@ tls: short: Boolean flag indicating if the TLS negotiation was successful and transitioned to an encrypted tunnel. type: boolean - next_protocol: + tls.next_protocol: dashed_name: tls-next-protocol description: String indicating the protocol being tunneled. Per the values in the IANA registry (https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml#alpn-protocol-ids), @@ -8352,7 +8523,7 @@ tls: IANA registry (https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml#alpn-protocol-ids), this string should be lower case. type: keyword - resumed: + tls.resumed: dashed_name: tls-resumed description: Boolean flag indicating if this TLS connection was resumed from an existing TLS negotiation. @@ -8363,7 +8534,7 @@ tls: short: Boolean flag indicating if this TLS connection was resumed from an existing TLS negotiation. type: boolean - server.certificate: + tls.server.certificate: dashed_name: tls-server-certificate description: PEM-encoded stand-alone certificate offered by the server. This is usually mutually-exclusive of `server.certificate_chain` since this value @@ -8378,7 +8549,7 @@ tls: mutually-exclusive of `server.certificate_chain` since this value also exists in that list. type: keyword - server.certificate_chain: + tls.server.certificate_chain: dashed_name: tls-server-certificate-chain description: Array of PEM-encoded certificates that make up the certificate chain offered by the server. This is usually mutually-exclusive of `server.certificate` @@ -8396,7 +8567,7 @@ tls: offered by the server. This is usually mutually-exclusive of `server.certificate` since that value should be the first certificate in the chain. type: keyword - server.hash.md5: + tls.server.hash.md5: dashed_name: tls-server-hash-md5 description: Certificate fingerprint using the MD5 digest of DER-encoded version of certificate offered by the server. For consistency with other hash values, @@ -8411,7 +8582,7 @@ tls: certificate offered by the server. For consistency with other hash values, this value should be formatted as an uppercase hash. type: keyword - server.hash.sha1: + tls.server.hash.sha1: dashed_name: tls-server-hash-sha1 description: Certificate fingerprint using the SHA1 digest of DER-encoded version of certificate offered by the server. For consistency with other hash values, @@ -8426,7 +8597,7 @@ tls: of certificate offered by the server. For consistency with other hash values, this value should be formatted as an uppercase hash. type: keyword - server.hash.sha256: + tls.server.hash.sha256: dashed_name: tls-server-hash-sha256 description: Certificate fingerprint using the SHA256 digest of DER-encoded version of certificate offered by the server. For consistency with other hash @@ -8441,7 +8612,7 @@ tls: of certificate offered by the server. For consistency with other hash values, this value should be formatted as an uppercase hash. type: keyword - server.issuer: + tls.server.issuer: dashed_name: tls-server-issuer description: Subject of the issuer of the x.509 certificate presented by the server. @@ -8453,7 +8624,7 @@ tls: normalize: [] short: Subject of the issuer of the x.509 certificate presented by the server. type: keyword - server.ja3s: + tls.server.ja3s: dashed_name: tls-server-ja3s description: A hash that identifies servers based on how they perform an SSL/TLS handshake. @@ -8465,7 +8636,7 @@ tls: normalize: [] short: A hash that identifies servers based on how they perform an SSL/TLS handshake. type: keyword - server.not_after: + tls.server.not_after: dashed_name: tls-server-not-after description: Timestamp indicating when server certificate is no longer considered valid. @@ -8477,7 +8648,7 @@ tls: short: Timestamp indicating when server certificate is no longer considered valid. type: date - server.not_before: + tls.server.not_before: dashed_name: tls-server-not-before description: Timestamp indicating when server certificate is first considered valid. @@ -8488,7 +8659,7 @@ tls: normalize: [] short: Timestamp indicating when server certificate is first considered valid. type: date - server.subject: + tls.server.subject: dashed_name: tls-server-subject description: Subject of the x.509 certificate presented by the server. example: CN=www.mydomain.com, OU=Infrastructure Team, DC=mydomain, DC=com @@ -8499,7 +8670,7 @@ tls: normalize: [] short: Subject of the x.509 certificate presented by the server. type: keyword - server.x509.alternative_names: + tls.server.x509.alternative_names: dashed_name: tls-server-x509-alternative-names description: List of subject alternative names (SAN). Name types vary by certificate authority and certificate type but commonly contain IP addresses, DNS names @@ -8517,7 +8688,7 @@ tls: (and wildcards), and email addresses. short_description: List of subject alternative names (SAN) type: keyword - server.x509.issuer.common_name: + tls.server.x509.issuer.common_name: dashed_name: tls-server-x509-issuer-common-name description: List of common name (CN) of issuing certificate authority. example: DigiCert SHA2 High Assurance Server CA @@ -8530,7 +8701,7 @@ tls: original_fieldset: x509 short: List of common name (CN) of issuing certificate authority. type: keyword - server.x509.issuer.country: + tls.server.x509.issuer.country: dashed_name: tls-server-x509-issuer-country description: List of country (C) codes example: US @@ -8543,7 +8714,7 @@ tls: original_fieldset: x509 short: List of country (C) codes type: keyword - server.x509.issuer.distinguished_name: + tls.server.x509.issuer.distinguished_name: dashed_name: tls-server-x509-issuer-distinguished-name description: Distinguished name (DN) of issuing certificate authority. example: C=US, O=DigiCert Inc, OU=www.digicert.com, CN=DigiCert SHA2 High Assurance @@ -8556,7 +8727,7 @@ tls: original_fieldset: x509 short: Distinguished name (DN) of issuing certificate authority. type: keyword - server.x509.issuer.locality: + tls.server.x509.issuer.locality: dashed_name: tls-server-x509-issuer-locality description: List of locality names (L) example: Mountain View @@ -8569,7 +8740,7 @@ tls: original_fieldset: x509 short: List of locality names (L) type: keyword - server.x509.issuer.organization: + tls.server.x509.issuer.organization: dashed_name: tls-server-x509-issuer-organization description: List of organizations (O) of issuing certificate authority. example: DigiCert Inc @@ -8582,7 +8753,7 @@ tls: original_fieldset: x509 short: List of organizations (O) of issuing certificate authority. type: keyword - server.x509.issuer.organizational_unit: + tls.server.x509.issuer.organizational_unit: dashed_name: tls-server-x509-issuer-organizational-unit description: List of organizational units (OU) of issuing certificate authority. example: www.digicert.com @@ -8595,7 +8766,7 @@ tls: original_fieldset: x509 short: List of organizational units (OU) of issuing certificate authority. type: keyword - server.x509.issuer.state_or_province: + tls.server.x509.issuer.state_or_province: dashed_name: tls-server-x509-issuer-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -8608,7 +8779,7 @@ tls: original_fieldset: x509 short: List of state or province names (ST, S, or P) type: keyword - server.x509.not_after: + tls.server.x509.not_after: dashed_name: tls-server-x509-not-after description: Time at which the certificate is no longer considered valid. example: 2020-07-16 03:15:39+00:00 @@ -8619,7 +8790,7 @@ tls: original_fieldset: x509 short: Time at which the certificate is no longer considered valid. type: date - server.x509.not_before: + tls.server.x509.not_before: dashed_name: tls-server-x509-not-before description: Time at which the certificate is first considered valid. example: 2019-08-16 01:40:25+00:00 @@ -8630,7 +8801,7 @@ tls: original_fieldset: x509 short: Time at which the certificate is first considered valid. type: date - server.x509.public_key_algorithm: + tls.server.x509.public_key_algorithm: dashed_name: tls-server-x509-public-key-algorithm description: Algorithm used to generate the public key. example: RSA @@ -8642,7 +8813,7 @@ tls: original_fieldset: x509 short: Algorithm used to generate the public key. type: keyword - server.x509.public_key_curve: + tls.server.x509.public_key_curve: dashed_name: tls-server-x509-public-key-curve description: The curve used by the elliptic curve public key algorithm. This is algorithm specific. @@ -8656,7 +8827,7 @@ tls: short: The curve used by the elliptic curve public key algorithm. This is algorithm specific. type: keyword - server.x509.public_key_exponent: + tls.server.x509.public_key_exponent: dashed_name: tls-server-x509-public-key-exponent description: Exponent used to derive the public key. This is algorithm specific. doc_values: false @@ -8669,7 +8840,7 @@ tls: original_fieldset: x509 short: Exponent used to derive the public key. This is algorithm specific. type: long - server.x509.public_key_size: + tls.server.x509.public_key_size: dashed_name: tls-server-x509-public-key-size description: The size of the public key space in bits. example: 2048 @@ -8680,7 +8851,7 @@ tls: original_fieldset: x509 short: The size of the public key space in bits. type: long - server.x509.serial_number: + tls.server.x509.serial_number: dashed_name: tls-server-x509-serial-number description: Unique serial number issued by the certificate authority. For consistency, if this value is alphanumeric, it should be formatted without colons and uppercase @@ -8697,7 +8868,7 @@ tls: characters. short_description: Unique serial number issued by the certificate authority. type: keyword - server.x509.signature_algorithm: + tls.server.x509.signature_algorithm: dashed_name: tls-server-x509-signature-algorithm description: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). @@ -8711,7 +8882,7 @@ tls: short: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). type: keyword - server.x509.subject.common_name: + tls.server.x509.subject.common_name: dashed_name: tls-server-x509-subject-common-name description: List of common names (CN) of subject. example: r2.shared.global.fastly.net @@ -8724,7 +8895,7 @@ tls: original_fieldset: x509 short: List of common names (CN) of subject. type: keyword - server.x509.subject.country: + tls.server.x509.subject.country: dashed_name: tls-server-x509-subject-country description: List of country (C) code example: US @@ -8737,7 +8908,7 @@ tls: original_fieldset: x509 short: List of country (C) code type: keyword - server.x509.subject.distinguished_name: + tls.server.x509.subject.distinguished_name: dashed_name: tls-server-x509-subject-distinguished-name description: Distinguished name (DN) of the certificate subject entity. example: C=US, ST=California, L=San Francisco, O=Fastly, Inc., CN=r2.shared.global.fastly.net @@ -8749,7 +8920,7 @@ tls: original_fieldset: x509 short: Distinguished name (DN) of the certificate subject entity. type: keyword - server.x509.subject.locality: + tls.server.x509.subject.locality: dashed_name: tls-server-x509-subject-locality description: List of locality names (L) example: San Francisco @@ -8762,7 +8933,7 @@ tls: original_fieldset: x509 short: List of locality names (L) type: keyword - server.x509.subject.organization: + tls.server.x509.subject.organization: dashed_name: tls-server-x509-subject-organization description: List of organizations (O) of subject. example: Fastly, Inc. @@ -8775,7 +8946,7 @@ tls: original_fieldset: x509 short: List of organizations (O) of subject. type: keyword - server.x509.subject.organizational_unit: + tls.server.x509.subject.organizational_unit: dashed_name: tls-server-x509-subject-organizational-unit description: List of organizational units (OU) of subject. flat_name: tls.server.x509.subject.organizational_unit @@ -8787,7 +8958,7 @@ tls: original_fieldset: x509 short: List of organizational units (OU) of subject. type: keyword - server.x509.subject.state_or_province: + tls.server.x509.subject.state_or_province: dashed_name: tls-server-x509-subject-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -8800,7 +8971,7 @@ tls: original_fieldset: x509 short: List of state or province names (ST, S, or P) type: keyword - server.x509.version_number: + tls.server.x509.version_number: dashed_name: tls-server-x509-version-number description: Version of x509 format. example: 3 @@ -8812,7 +8983,7 @@ tls: original_fieldset: x509 short: Version of x509 format. type: keyword - version: + tls.version: dashed_name: tls-version description: Numeric part of the version parsed from the original string. example: '1.2' @@ -8823,7 +8994,7 @@ tls: normalize: [] short: Numeric part of the version parsed from the original string. type: keyword - version_protocol: + tls.version_protocol: dashed_name: tls-version-protocol description: Normalized lowercase protocol name parsed from original string. example: tls @@ -8840,6 +9011,27 @@ tls: - tls.client.x509 - tls.server.x509 prefix: tls. + reused_here: + - full: tls.client.x509 + schema_name: x509 + short: This implements the common core fields for x509 certificates. This information + is likely logged with TLS sessions, digital signatures found in executable binaries, + S/MIME information in email bodies, or analysis of files on disk. When only + a single certificate is logged in an event, it should be nested under `file`. + When hashes of the DER-encoded certificate are available, the `hash` data set + should be populated as well (e.g. `file.hash.sha256`). For events that contain + certificate information for both sides of the connection, the x509 object could + be nested under the respective side of the connection information (e.g. `tls.server.x509`). + - full: tls.server.x509 + schema_name: x509 + short: This implements the common core fields for x509 certificates. This information + is likely logged with TLS sessions, digital signatures found in executable binaries, + S/MIME information in email bodies, or analysis of files on disk. When only + a single certificate is logged in an event, it should be nested under `file`. + When hashes of the DER-encoded certificate are available, the `hash` data set + should be populated as well (e.g. `file.hash.sha256`). For events that contain + certificate information for both sides of the connection, the x509 object could + be nested under the respective side of the connection information (e.g. `tls.server.x509`). short: Fields describing a TLS connection. title: TLS type: group @@ -8888,7 +9080,7 @@ url: description: URL fields provide support for complete or partial URLs, and supports the breaking down into scheme, domain, path, and so on. fields: - domain: + url.domain: dashed_name: url-domain description: 'Domain of the url, such as "www.elastic.co". @@ -8902,7 +9094,7 @@ url: normalize: [] short: Domain of the url. type: keyword - extension: + url.extension: dashed_name: url-extension description: 'The field contains the file extension from the original request url. @@ -8919,7 +9111,7 @@ url: normalize: [] short: File extension from the original request url. type: keyword - fragment: + url.fragment: dashed_name: url-fragment description: 'Portion of the url after the `#`, such as "top". @@ -8931,7 +9123,7 @@ url: normalize: [] short: Portion of the url after the `#`. type: keyword - full: + url.full: dashed_name: url-full description: If full URLs are important to your use case, they should be stored in `url.full`, whether this field is reconstructed or present in the event @@ -8949,7 +9141,7 @@ url: normalize: [] short: Full unparsed URL. type: keyword - original: + url.original: dashed_name: url-original description: 'Unmodified original url as seen in the event source. @@ -8970,7 +9162,7 @@ url: normalize: [] short: Unmodified original url as seen in the event source. type: keyword - password: + url.password: dashed_name: url-password description: Password of the request. flat_name: url.password @@ -8980,7 +9172,7 @@ url: normalize: [] short: Password of the request. type: keyword - path: + url.path: dashed_name: url-path description: Path of the request, such as "/search". flat_name: url.path @@ -8990,7 +9182,7 @@ url: normalize: [] short: Path of the request, such as "/search". type: keyword - port: + url.port: dashed_name: url-port description: Port of the request, such as 443. example: 443 @@ -9001,7 +9193,7 @@ url: normalize: [] short: Port of the request, such as 443. type: long - query: + url.query: dashed_name: url-query description: 'The query field describes the query string of the request, such as "q=elasticsearch". @@ -9017,7 +9209,7 @@ url: normalize: [] short: Query string of the request. type: keyword - registered_domain: + url.registered_domain: dashed_name: url-registered-domain description: 'The highest registered url domain, stripped of the subdomain. @@ -9034,7 +9226,7 @@ url: normalize: [] short: The highest registered url domain, stripped of the subdomain. type: keyword - scheme: + url.scheme: dashed_name: url-scheme description: 'Scheme of the request, such as "https". @@ -9047,7 +9239,7 @@ url: normalize: [] short: Scheme of the url. type: keyword - top_level_domain: + url.top_level_domain: dashed_name: url-top-level-domain description: 'The effective top level domain (eTLD), also known as the domain suffix, is the last part of the domain name. For example, the top level domain @@ -9064,7 +9256,7 @@ url: normalize: [] short: The effective top level domain (com, org, net, co.uk). type: keyword - username: + url.username: dashed_name: url-username description: Username of the request. flat_name: url.username @@ -9087,7 +9279,7 @@ user: Fields can have one entry or multiple entries. If a user has more than one id, provide an array that includes all of them.' fields: - domain: + user.domain: dashed_name: user-domain description: 'Name of the directory the user is a member of. @@ -9099,7 +9291,7 @@ user: normalize: [] short: Name of the directory the user is a member of. type: keyword - email: + user.email: dashed_name: user-email description: User email address. flat_name: user.email @@ -9109,7 +9301,7 @@ user: normalize: [] short: User email address. type: keyword - full_name: + user.full_name: dashed_name: user-full-name description: User's full name, if available. example: Albert Einstein @@ -9125,7 +9317,7 @@ user: normalize: [] short: User's full name, if available. type: keyword - group.domain: + user.group.domain: dashed_name: user-group-domain description: 'Name of the directory the group is a member of. @@ -9138,7 +9330,7 @@ user: original_fieldset: group short: Name of the directory the group is a member of. type: keyword - group.id: + user.group.id: dashed_name: user-group-id description: Unique identifier for the group on the system/platform. flat_name: user.group.id @@ -9149,7 +9341,7 @@ user: original_fieldset: group short: Unique identifier for the group on the system/platform. type: keyword - group.name: + user.group.name: dashed_name: user-group-name description: Name of the group. flat_name: user.group.name @@ -9160,7 +9352,7 @@ user: original_fieldset: group short: Name of the group. type: keyword - hash: + user.hash: dashed_name: user-hash description: 'Unique user hash to correlate information for a user in anonymized form. @@ -9174,7 +9366,7 @@ user: normalize: [] short: Unique user hash to correlate information for a user in anonymized form. type: keyword - id: + user.id: dashed_name: user-id description: Unique identifier of the user. flat_name: user.id @@ -9184,7 +9376,7 @@ user: normalize: [] short: Unique identifier of the user. type: keyword - name: + user.name: dashed_name: user-name description: Short name or login of the user. example: albert @@ -9207,12 +9399,26 @@ user: prefix: user. reusable: expected: - - client - - destination - - host - - server - - source + - as: user + at: client + full: client.user + - as: user + at: destination + full: destination.user + - as: user + at: host + full: host.user + - as: user + at: server + full: server.user + - as: user + at: source + full: source.user top_level: true + reused_here: + - full: user.group + schema_name: group + short: User's group relevant to the event. short: Fields to describe the user relevant to the event. title: User type: group @@ -9221,7 +9427,7 @@ user_agent: They often show up in web service logs coming from the parsed user agent string.' fields: - device.name: + user_agent.device.name: dashed_name: user-agent-device-name description: Name of the device. example: iPhone @@ -9232,7 +9438,7 @@ user_agent: normalize: [] short: Name of the device. type: keyword - name: + user_agent.name: dashed_name: user-agent-name description: Name of the user agent. example: Safari @@ -9243,7 +9449,7 @@ user_agent: normalize: [] short: Name of the user agent. type: keyword - original: + user_agent.original: dashed_name: user-agent-original description: Unparsed user_agent string. example: Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 @@ -9260,7 +9466,7 @@ user_agent: normalize: [] short: Unparsed user_agent string. type: keyword - os.family: + user_agent.os.family: dashed_name: user-agent-os-family description: OS family (such as redhat, debian, freebsd, windows). example: debian @@ -9272,7 +9478,7 @@ user_agent: original_fieldset: os short: OS family (such as redhat, debian, freebsd, windows). type: keyword - os.full: + user_agent.os.full: dashed_name: user-agent-os-full description: Operating system name, including the version or code name. example: Mac OS Mojave @@ -9289,7 +9495,7 @@ user_agent: original_fieldset: os short: Operating system name, including the version or code name. type: keyword - os.kernel: + user_agent.os.kernel: dashed_name: user-agent-os-kernel description: Operating system kernel version as a raw string. example: 4.4.0-112-generic @@ -9301,7 +9507,7 @@ user_agent: original_fieldset: os short: Operating system kernel version as a raw string. type: keyword - os.name: + user_agent.os.name: dashed_name: user-agent-os-name description: Operating system name, without the version. example: Mac OS X @@ -9318,7 +9524,7 @@ user_agent: original_fieldset: os short: Operating system name, without the version. type: keyword - os.platform: + user_agent.os.platform: dashed_name: user-agent-os-platform description: Operating system platform (such centos, ubuntu, windows). example: darwin @@ -9330,7 +9536,7 @@ user_agent: original_fieldset: os short: Operating system platform (such centos, ubuntu, windows). type: keyword - os.version: + user_agent.os.version: dashed_name: user-agent-os-version description: Operating system version as a raw string. example: 10.14.1 @@ -9342,7 +9548,7 @@ user_agent: original_fieldset: os short: Operating system version as a raw string. type: keyword - version: + user_agent.version: dashed_name: user-agent-version description: Version of the user agent. example: 12.0 @@ -9358,6 +9564,10 @@ user_agent: nestings: - user_agent.os prefix: user_agent. + reused_here: + - full: user_agent.os + schema_name: os + short: OS fields contain information about the operating system. short: Fields to describe a browser user_agent string. title: User agent type: group @@ -9379,7 +9589,7 @@ vlan: information when observer events contain discrete ingress and egress VLAN information, typically provided by firewalls, routers, or load balancers.' fields: - id: + vlan.id: dashed_name: vlan-id description: VLAN ID as reported by the observer. example: 10 @@ -9390,7 +9600,7 @@ vlan: normalize: [] short: VLAN ID as reported by the observer. type: keyword - name: + vlan.name: dashed_name: vlan-name description: Optional VLAN name as reported by the observer. example: outside @@ -9406,10 +9616,18 @@ vlan: prefix: vlan. reusable: expected: - - observer.ingress - - observer.egress - - network - - network.inner + - as: vlan + at: observer.ingress + full: observer.ingress.vlan + - as: vlan + at: observer.egress + full: observer.egress.vlan + - as: vlan + at: network + full: network.vlan + - as: vlan + at: network.inner + full: network.inner.vlan top_level: false short: Fields to describe observed VLAN information. title: VLAN @@ -9418,7 +9636,7 @@ vulnerability: description: The vulnerability fields describe information about a vulnerability that is relevant to an event. fields: - category: + vulnerability.category: dashed_name: vulnerability-category description: 'The type of system or architecture that the vulnerability affects. These may be platform-specific (for example, Debian or SUSE) or general (for @@ -9435,7 +9653,7 @@ vulnerability: - array short: Category of a vulnerability. type: keyword - classification: + vulnerability.classification: dashed_name: vulnerability-classification description: The classification of the vulnerability scoring system. For example (https://www.first.org/cvss/) @@ -9447,7 +9665,7 @@ vulnerability: normalize: [] short: Classification of the vulnerability. type: keyword - description: + vulnerability.description: dashed_name: vulnerability-description description: The description of the vulnerability that provides additional context of the vulnerability. For example (https://cve.mitre.org/about/faqs.html#cve_entry_descriptions_created[Common @@ -9465,7 +9683,7 @@ vulnerability: normalize: [] short: Description of the vulnerability. type: keyword - enumeration: + vulnerability.enumeration: dashed_name: vulnerability-enumeration description: The type of identifier used for this vulnerability. For example (https://cve.mitre.org/about/) @@ -9477,7 +9695,7 @@ vulnerability: normalize: [] short: Identifier of the vulnerability. type: keyword - id: + vulnerability.id: dashed_name: vulnerability-id description: The identification (ID) is the number portion of a vulnerability entry. It includes a unique identification number for the vulnerability. For @@ -9491,7 +9709,7 @@ vulnerability: normalize: [] short: ID of the vulnerability. type: keyword - reference: + vulnerability.reference: dashed_name: vulnerability-reference description: A resource that provides additional information, context, and mitigations for the identified vulnerability. @@ -9503,7 +9721,7 @@ vulnerability: normalize: [] short: Reference of the vulnerability. type: keyword - report_id: + vulnerability.report_id: dashed_name: vulnerability-report-id description: The report or scan identification number. example: 20191018.0001 @@ -9514,7 +9732,7 @@ vulnerability: normalize: [] short: Scan identification number. type: keyword - scanner.vendor: + vulnerability.scanner.vendor: dashed_name: vulnerability-scanner-vendor description: The name of the vulnerability scanner vendor. example: Tenable @@ -9525,7 +9743,7 @@ vulnerability: normalize: [] short: Name of the scanner vendor. type: keyword - score.base: + vulnerability.score.base: dashed_name: vulnerability-score-base description: 'Scores can range from 0.0 to 10.0, with 10.0 being the most severe. @@ -9539,7 +9757,7 @@ vulnerability: normalize: [] short: Vulnerability Base score. type: float - score.environmental: + vulnerability.score.environmental: dashed_name: vulnerability-score-environmental description: 'Scores can range from 0.0 to 10.0, with 10.0 being the most severe. @@ -9552,7 +9770,7 @@ vulnerability: normalize: [] short: Vulnerability Environmental score. type: float - score.temporal: + vulnerability.score.temporal: dashed_name: vulnerability-score-temporal description: 'Scores can range from 0.0 to 10.0, with 10.0 being the most severe. @@ -9564,7 +9782,7 @@ vulnerability: normalize: [] short: Vulnerability Temporal score. type: float - score.version: + vulnerability.score.version: dashed_name: vulnerability-score-version description: 'The National Vulnerability Database (NVD) provides qualitative severity rankings of "Low", "Medium", and "High" for CVSS v2.0 base score @@ -9582,7 +9800,7 @@ vulnerability: normalize: [] short: CVSS version. type: keyword - severity: + vulnerability.severity: dashed_name: vulnerability-severity description: The severity of the vulnerability can help with metrics and internal prioritization regarding remediation. For example (https://nvd.nist.gov/vuln-metrics/cvss) @@ -9610,7 +9828,7 @@ x509: certificate information for both sides of the connection, the x509 object could be nested under the respective side of the connection information (e.g. `tls.server.x509`). fields: - alternative_names: + x509.alternative_names: dashed_name: x509-alternative-names description: List of subject alternative names (SAN). Name types vary by certificate authority and certificate type but commonly contain IP addresses, DNS names @@ -9627,7 +9845,7 @@ x509: (and wildcards), and email addresses. short_description: List of subject alternative names (SAN) type: keyword - issuer.common_name: + x509.issuer.common_name: dashed_name: x509-issuer-common-name description: List of common name (CN) of issuing certificate authority. example: DigiCert SHA2 High Assurance Server CA @@ -9639,7 +9857,7 @@ x509: - array short: List of common name (CN) of issuing certificate authority. type: keyword - issuer.country: + x509.issuer.country: dashed_name: x509-issuer-country description: List of country (C) codes example: US @@ -9651,7 +9869,7 @@ x509: - array short: List of country (C) codes type: keyword - issuer.distinguished_name: + x509.issuer.distinguished_name: dashed_name: x509-issuer-distinguished-name description: Distinguished name (DN) of issuing certificate authority. example: C=US, O=DigiCert Inc, OU=www.digicert.com, CN=DigiCert SHA2 High Assurance @@ -9663,7 +9881,7 @@ x509: normalize: [] short: Distinguished name (DN) of issuing certificate authority. type: keyword - issuer.locality: + x509.issuer.locality: dashed_name: x509-issuer-locality description: List of locality names (L) example: Mountain View @@ -9675,7 +9893,7 @@ x509: - array short: List of locality names (L) type: keyword - issuer.organization: + x509.issuer.organization: dashed_name: x509-issuer-organization description: List of organizations (O) of issuing certificate authority. example: DigiCert Inc @@ -9687,7 +9905,7 @@ x509: - array short: List of organizations (O) of issuing certificate authority. type: keyword - issuer.organizational_unit: + x509.issuer.organizational_unit: dashed_name: x509-issuer-organizational-unit description: List of organizational units (OU) of issuing certificate authority. example: www.digicert.com @@ -9699,7 +9917,7 @@ x509: - array short: List of organizational units (OU) of issuing certificate authority. type: keyword - issuer.state_or_province: + x509.issuer.state_or_province: dashed_name: x509-issuer-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -9711,7 +9929,7 @@ x509: - array short: List of state or province names (ST, S, or P) type: keyword - not_after: + x509.not_after: dashed_name: x509-not-after description: Time at which the certificate is no longer considered valid. example: 2020-07-16 03:15:39+00:00 @@ -9721,7 +9939,7 @@ x509: normalize: [] short: Time at which the certificate is no longer considered valid. type: date - not_before: + x509.not_before: dashed_name: x509-not-before description: Time at which the certificate is first considered valid. example: 2019-08-16 01:40:25+00:00 @@ -9731,7 +9949,7 @@ x509: normalize: [] short: Time at which the certificate is first considered valid. type: date - public_key_algorithm: + x509.public_key_algorithm: dashed_name: x509-public-key-algorithm description: Algorithm used to generate the public key. example: RSA @@ -9742,7 +9960,7 @@ x509: normalize: [] short: Algorithm used to generate the public key. type: keyword - public_key_curve: + x509.public_key_curve: dashed_name: x509-public-key-curve description: The curve used by the elliptic curve public key algorithm. This is algorithm specific. @@ -9755,7 +9973,7 @@ x509: short: The curve used by the elliptic curve public key algorithm. This is algorithm specific. type: keyword - public_key_exponent: + x509.public_key_exponent: dashed_name: x509-public-key-exponent description: Exponent used to derive the public key. This is algorithm specific. doc_values: false @@ -9767,7 +9985,7 @@ x509: normalize: [] short: Exponent used to derive the public key. This is algorithm specific. type: long - public_key_size: + x509.public_key_size: dashed_name: x509-public-key-size description: The size of the public key space in bits. example: 2048 @@ -9777,7 +9995,7 @@ x509: normalize: [] short: The size of the public key space in bits. type: long - serial_number: + x509.serial_number: dashed_name: x509-serial-number description: Unique serial number issued by the certificate authority. For consistency, if this value is alphanumeric, it should be formatted without colons and uppercase @@ -9793,7 +10011,7 @@ x509: characters. short_description: Unique serial number issued by the certificate authority. type: keyword - signature_algorithm: + x509.signature_algorithm: dashed_name: x509-signature-algorithm description: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). @@ -9806,7 +10024,7 @@ x509: short: Identifier for certificate signature algorithm. Recommend using names found in Go Lang Crypto library (See https://github.com/golang/go/blob/go1.14/src/crypto/x509/x509.go#L337-L353). type: keyword - subject.common_name: + x509.subject.common_name: dashed_name: x509-subject-common-name description: List of common names (CN) of subject. example: r2.shared.global.fastly.net @@ -9818,7 +10036,7 @@ x509: - array short: List of common names (CN) of subject. type: keyword - subject.country: + x509.subject.country: dashed_name: x509-subject-country description: List of country (C) code example: US @@ -9830,7 +10048,7 @@ x509: - array short: List of country (C) code type: keyword - subject.distinguished_name: + x509.subject.distinguished_name: dashed_name: x509-subject-distinguished-name description: Distinguished name (DN) of the certificate subject entity. example: C=US, ST=California, L=San Francisco, O=Fastly, Inc., CN=r2.shared.global.fastly.net @@ -9841,7 +10059,7 @@ x509: normalize: [] short: Distinguished name (DN) of the certificate subject entity. type: keyword - subject.locality: + x509.subject.locality: dashed_name: x509-subject-locality description: List of locality names (L) example: San Francisco @@ -9853,7 +10071,7 @@ x509: - array short: List of locality names (L) type: keyword - subject.organization: + x509.subject.organization: dashed_name: x509-subject-organization description: List of organizations (O) of subject. example: Fastly, Inc. @@ -9865,7 +10083,7 @@ x509: - array short: List of organizations (O) of subject. type: keyword - subject.organizational_unit: + x509.subject.organizational_unit: dashed_name: x509-subject-organizational-unit description: List of organizational units (OU) of subject. flat_name: x509.subject.organizational_unit @@ -9876,7 +10094,7 @@ x509: - array short: List of organizational units (OU) of subject. type: keyword - subject.state_or_province: + x509.subject.state_or_province: dashed_name: x509-subject-state-or-province description: List of state or province names (ST, S, or P) example: California @@ -9888,7 +10106,7 @@ x509: - array short: List of state or province names (ST, S, or P) type: keyword - version_number: + x509.version_number: dashed_name: x509-version-number description: Version of x509 format. example: 3 @@ -9904,9 +10122,15 @@ x509: prefix: x509. reusable: expected: - - file - - tls.client - - tls.server + - as: x509 + at: file + full: file.x509 + - as: x509 + at: tls.client + full: tls.client.x509 + - as: x509 + at: tls.server + full: tls.server.x509 top_level: false short: This implements the common core fields for x509 certificates. This information is likely logged with TLS sessions, digital signatures found in executable binaries, diff --git a/schemas/README.md b/schemas/README.md index e4cafdd45f..9d1ac97696 100644 --- a/schemas/README.md +++ b/schemas/README.md @@ -2,19 +2,109 @@ YAML with a twist: Flattened field names equivalent to nested. E.g. `foo.bar: value` and `foo:\n bar: value`. -## Schema heading - -- name (required): Name of the field set -- root (optional, default false): Whether or not the fields of this field set should be nested under the field set name. -- title (required): Rendered name of the field set (e.g. for documentation) - Must be correctly capitalized -- group (required for now): TBD. Just set it to 2, for now ;-) -- description (required): Description of the field set -- fields (required): YAML array as described below +Note that we use the wording "schema" and "field set" alternatively to mean the same concept: +a group of related fields. + +## Field set heading + +Required field set attributes: + +- name: Name of the field set, lowercased and with underscores to separate words. + For programmatic use. +- title: Capitalized name of the field set, with spaces to separate words. + For use in documentation section titles. +- description: Description of the field set. Two subsequent newlines create a new paragraph. +- fields: YAML array as described in the "List of fields" section below. + +Optional field set attributes: + +- short: Short version of the description to display in small spaces, such as the list of field sets. + Short descriptions must not have newlines. + Defaults to the main description when absent. + If the main description has multiple paragraphs, then a 'short' description + with no newlines is required. +- root (default false): Whether or not the fields of this field set should be namespaced under the field set name. + Most field sets are expected to have their fields namespaced under the field set name. + Only the "base" field set is expected to set this to true (to define a few root fields like `@timestamp`). +- group (default 2): To sort field sets against one another. + For example the "base" field set has group=1 and is the first listed in the documentation. + All others have group=2 and are therefore after "base" (sorted alphabetically). - type (ignored): at this level, should always be `group` -- reusable (optional): YAML object composed of top_level and expected sub properties +- reusable (optional): Used to identify which field sets are expected to be reused in multiple places. + See "Field set reuse" for details. + +### Field set reuse + +Unless otherwise noted via the `reusable` attribute, a field set is a group of +fields that will be defined at the root of the events. +As an example, the fields of the `event` field set are nested like: `{"event": {"id": "foo"}}`. + +Field set reuse lets us define a group of fields that's expected to be used in +multiple places, like for example `geo`, which can appear under `source`, `destination` and other places: + +```JSON +{ + "source": { "ip": "10.10.10.10", "geo": { "country_name": "..." } }, + "destination": { "ip": "10.42.42.42", "geo": { "country_name": "..." } } +} +``` + +The `reusable` attribute is composed of `top_level` and `expected` sub-attributes: + +- top\_level (optional, default true): Is this field set expected at the root of + events or is it only expected in the nested locations? +- expected (default []): list of places the field set's fields are expected. + There are two valid notations to list expected locations. + +The "flat" (or dotted) notation to represent where the fields are nested: + +```YAML + reusable: + top_level: false + expected: + - network + - network.inner +``` + +The above would nest field set `vlan` at `network.vlan.*` and `network.inner.vlan.*`: -## Field set +```JSON +{ + "network": { + "vlan": { }, + "inner": { + "vlan": {} + } + } +} +``` + +In some cases we need to nest a field set within itself, as a different name, +which can be thought of loosely as a "role". +A good example is nesting `process` at `process.parent`, to capture the parent of a process. +In these cases, we replace the "flat" key name with a small object with keys `at` and `as`: + +``` + reusable: + top_level: true + expected: + - { at: process, as: parent } +``` + +The above defines all process fields in both places: + +```JSON +{ + "process": { + "pid": 4242, + "parent": { + "pid": 1 + } + } +} +``` + +### List of fields Array of YAML objects: @@ -29,10 +119,12 @@ Supported keys to describe fields - name (required): Name of the field - level (required, one of: core, extended): ECS Level of maturity of the field - type (required): Type of the field. Must be set explicitly, no default. -- required (TBD): TBD if still relevant. -- short (optional): Optional shorter definition, for display in tight spaces. - Derived automatically if description is short enough. - description (required): Description of the field +- short (optional): Short version of the description to display in small spaces. + Short descriptions must not have newlines. + Defaults to the main description when absent. + If the main description has multiple paragraphs, then a 'short' description + with no newlines is required. - example (optional): A single value example of what can be expected in this field - multi\_fields (optional): Specify additional ways to index the field. - index (optional): If `False`, means field is not indexed (overrides type) @@ -58,7 +150,37 @@ Supported keys to describe expected values for a field - expected\_event\_types: list of expected "event.type" values to use in association with that category. -### Multi\_fields +#### Multi\_fields - type (required): type of the multi\_fields - name (optional): defaults to multi\_fields type + +## Minimal example + +```YAML +- name: my_fields + title: My fields + description: My awesome fields. + fields: + + - name: a_field + level: extended + type: keyword + example: 42 + description: > + A description + + with multiple paragraphs + + requires you to provide a 'short' description as well. + short: A short version of the description. + + - name: another_field + level: extended + type: keyword + multi_fields: + - type: text + name: text + example: I am Groot + description: A short description that doesn't require an explicit 'short'. +``` diff --git a/schemas/group.yml b/schemas/group.yml index dfb894ffb8..471e1f9a8b 100644 --- a/schemas/group.yml +++ b/schemas/group.yml @@ -9,6 +9,7 @@ event. reusable: + order: 1 top_level: true expected: - user diff --git a/schemas/process.yml b/schemas/process.yml index 8b48c0ea1a..00dd508345 100644 --- a/schemas/process.yml +++ b/schemas/process.yml @@ -24,6 +24,15 @@ from a log message. The `process.pid` often stays in the metric itself and is copied to the global field for correlation. type: group + + # TODO Temporarily commented out to simplify initial rewrite review + + # reusable: + # top_level: true + # expected: + # - at: process + # as: parent + fields: - name: pid diff --git a/schemas/user.yml b/schemas/user.yml index 0ac4fa6be0..255e132f69 100644 --- a/schemas/user.yml +++ b/schemas/user.yml @@ -18,6 +18,16 @@ - host - server - source + + # TODO Temporarily commented out to simplify initial rewrite review + + # - at: user + # as: target + # - at: user + # as: effective + # - at: user + # as: changes + type: group fields: diff --git a/scripts/generator.py b/scripts/generator.py index 0302d5e56d..230ac12130 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -1,74 +1,50 @@ import argparse import glob import os -import schema_reader import yaml -from generators import intermediate_files +from generators import asciidoc_fields +from generators import beats from generators import csv_generator from generators import es_template -from generators import beats -from generators import asciidoc_fields from generators import ecs_helpers +from generators import intermediate_files + +from schema import loader +from schema import cleaner +from schema import finalizer +from schema import subset_filter def main(): args = argument_parser() - # Get rid of empty include - if args.include and [''] == args.include: - args.include.clear() - - if args.ref: - # Load ECS schemas from a specific git ref - print('Loading schemas from git ref ' + args.ref) - tree = ecs_helpers.get_tree_by_ref(args.ref) - ecs_version = read_version_from_tree(tree) - ecs_schemas = schema_reader.load_schemas_from_git(tree) - else: - # Load the default schemas - print('Loading default schemas') - ecs_version = read_version() - ecs_schemas = schema_reader.load_schemas_from_files() + ecs_version = read_version(args.ref) print('Running generator. ECS version ' + ecs_version) - intermediate_fields = schema_reader.create_schema_dicts(ecs_schemas) - # Maybe load user specified directory of schemas - if args.include: - include_glob = ecs_helpers.get_glob_files(args.include, ecs_helpers.YAML_EXT) + # To debug issues in the gradual building up of the nested structure, insert + # statements like this after any step of interest. + # ecs_helpers.yaml_dump('ecs.yml', fields) - print('Loading user defined schemas: {0}'.format(include_glob)) - - custom_schemas = schema_reader.load_schemas_from_files(include_glob) - intermediate_custom = schema_reader.create_schema_dicts(custom_schemas) - schema_reader.merge_schema_fields(intermediate_fields, intermediate_custom) - - schema_reader.assemble_reusables(intermediate_fields) - - if args.subset: - subset = {} - for arg in args.subset: - for file in glob.glob(arg): - with open(file) as f: - raw = yaml.safe_load(f.read()) - ecs_helpers.recursive_merge_subset_dicts(subset, raw) - if not subset: - raise ValueError('Subset option specified but no subsets found') - intermediate_fields = ecs_helpers.fields_subset(subset, intermediate_fields) - - (nested, flat) = schema_reader.generate_nested_flat(intermediate_fields) + fields = loader.load_schemas(ref=args.ref, included_files=args.include) + cleaner.clean(fields) + finalizer.finalize(fields) + fields = subset_filter.filter(fields, args.subset) # default location to save files out_dir = 'generated' docs_dir = 'docs' if args.out: + default_dirs = False out_dir = os.path.join(args.out, out_dir) docs_dir = os.path.join(args.out, docs_dir) + else: + default_dirs = True ecs_helpers.make_dirs(out_dir) ecs_helpers.make_dirs(docs_dir) - intermediate_files.generate(nested, flat, out_dir) + nested, flat = intermediate_files.generate(fields, out_dir, default_dirs) if args.intermediate_only: exit() @@ -77,7 +53,8 @@ def main(): beats.generate(nested, ecs_version, out_dir) if args.include or args.subset: exit() - asciidoc_fields.generate(intermediate_fields, ecs_version, docs_dir) + + asciidoc_fields.generate(nested, ecs_version, docs_dir) def argument_parser(): @@ -94,16 +71,22 @@ def argument_parser(): help='index template settings to use when generating elasticsearch template') parser.add_argument('--mapping-settings', action='store', help='mapping settings to use when generating elasticsearch template') - return parser.parse_args() - - -def read_version(file='version'): - with open(file, 'r') as infile: - return infile.read().rstrip() + args = parser.parse_args() + # Clean up empty include of the Makefile + if args.include and [''] == args.include: + args.include.clear() + return args -def read_version_from_tree(tree): - return tree['version'].data_stream.read().decode('utf-8').rstrip() +def read_version(ref=None): + if ref: + print('Loading schemas from git ref ' + ref) + tree = ecs_helpers.get_tree_by_ref(ref) + return tree['version'].data_stream.read().decode('utf-8').rstrip() + else: + print('Loading schemas from local files') + with open('version', 'r') as infile: + return infile.read().rstrip() if __name__ == '__main__': diff --git a/scripts/generators/asciidoc_fields.py b/scripts/generators/asciidoc_fields.py index 84096b5332..59703fc79a 100644 --- a/scripts/generators/asciidoc_fields.py +++ b/scripts/generators/asciidoc_fields.py @@ -2,10 +2,10 @@ from generators import ecs_helpers -def generate(intermediate_nested, ecs_version, out_dir): - save_asciidoc(join(out_dir, 'fields.asciidoc'), page_field_index(intermediate_nested, ecs_version)) - save_asciidoc(join(out_dir, 'field-details.asciidoc'), page_field_details(intermediate_nested)) - save_asciidoc(join(out_dir, 'field-values.asciidoc'), page_field_values(intermediate_nested)) +def generate(nested, ecs_version, out_dir): + save_asciidoc(join(out_dir, 'fields.asciidoc'), page_field_index(nested, ecs_version)) + save_asciidoc(join(out_dir, 'field-details.asciidoc'), page_field_details(nested)) + save_asciidoc(join(out_dir, 'field-values.asciidoc'), page_field_values(nested)) # Helpers @@ -20,9 +20,9 @@ def save_asciidoc(file, text): # Field Index -def page_field_index(intermediate_nested, ecs_version): +def page_field_index(nested, ecs_version): page_text = index_header(ecs_version) - for fieldset in ecs_helpers.dict_sorted_by_keys(intermediate_nested, ['group', 'name']): + for fieldset in ecs_helpers.dict_sorted_by_keys(nested, ['group', 'name']): page_text += render_field_index_row(fieldset) page_text += table_footer() page_text += index_footer() @@ -39,14 +39,14 @@ def render_field_index_row(fieldset): # Field Details Page -def page_field_details(intermediate_nested): +def page_field_details(nested): page_text = '' - for fieldset in ecs_helpers.dict_sorted_by_keys(intermediate_nested, ['group', 'name']): - page_text += render_fieldset(fieldset, intermediate_nested) + for fieldset in ecs_helpers.dict_sorted_by_keys(nested, ['group', 'name']): + page_text += render_fieldset(fieldset, nested) return page_text -def render_fieldset(fieldset, intermediate_nested): +def render_fieldset(fieldset, nested): text = field_details_table_header().format( fieldset_title=fieldset['title'], fieldset_name=fieldset['name'], @@ -57,7 +57,7 @@ def render_fieldset(fieldset, intermediate_nested): text += table_footer() - text += render_fieldset_reuse_section(fieldset, intermediate_nested) + text += render_fieldset_reuse_section(fieldset, nested) return text @@ -66,10 +66,8 @@ def render_fields(fields): text = '' for field_name, field in sorted(fields.items()): # Skip fields nested in this field set - if 'field_details' in field and 'original_fieldset' not in field['field_details']: - text += render_field_details_row(field['field_details']) - if 'fields' in field: - text += render_fields(field['fields']) + if 'original_fieldset' not in field: + text += render_field_details_row(field) return text @@ -117,7 +115,7 @@ def render_field_details_row(field): return text -def render_fieldset_reuse_section(fieldset, intermediate_nested): +def render_fieldset_reuse_section(fieldset, nested): '''Render the section on where field set can be nested, and which field sets can be nested here''' if not ('nestings' in fieldset or 'reusable' in fieldset): return '' @@ -131,12 +129,11 @@ def render_fieldset_reuse_section(fieldset, intermediate_nested): fieldset_title=fieldset['title'] ) rows = [] - for nested_fs_name in fieldset['nestings']: - ecs = ecs_helpers.get_nested_field(nested_fs_name, intermediate_nested) + for reused_here_entry in fieldset['reused_here']: rows.append({ - 'flat_nesting': "{}.*".format(nested_fs_name), - 'name': nested_fs_name.split('.')[-1], - 'short': ecs['short'] + 'flat_nesting': "{}.*".format(reused_here_entry['full']), + 'name': reused_here_entry['schema_name'], + 'short': reused_here_entry['short'] }) for row in sorted(rows, key=lambda x: x['flat_nesting']): text += render_nesting_row(row) @@ -150,16 +147,20 @@ def render_fieldset_reuses_text(fieldset): return '' section_name = fieldset['name'] - sorted_fields = sorted(fieldset['reusable']['expected']) - rendered_fields = map(lambda f: "`{}.{}`".format(f, section_name), sorted_fields) + sorted_fields = sorted(fieldset['reusable']['expected'], key=lambda k: k['full']) + rendered_fields = map(lambda f: "`{}`".format(f['full']), sorted_fields) text = "The `{}` fields are expected to be nested at: {}.\n\n".format( section_name, ', '.join(rendered_fields)) if 'top_level' in fieldset['reusable'] and fieldset['reusable']['top_level']: + # TODO rewording kept for follow-up PR to simplify initial rewrite PR + # template = "Note also that the `{}` fields may be used directly at the root of the events.\n\n" template = "Note also that the `{}` fields may be used directly at the top level.\n\n" else: template = "Note also that the `{}` fields are not expected to " + \ "be used directly at the top level.\n\n" + # TODO rewording kept for follow-up PR to simplify initial rewrite PR + # "be used directly at the root of the events.\n\n" text += template.format(section_name) return text @@ -311,12 +312,11 @@ def nestings_row(): # Allowed values section -def page_field_values(intermediate_nested): +def page_field_values(nested): section_text = values_section_header() category_fields = ['event.kind', 'event.category', 'event.type', 'event.outcome'] for cat_field in category_fields: - section_text += render_field_values_page(ecs_helpers.get_nested_field(cat_field, - intermediate_nested)['field_details']) + section_text += render_field_values_page(nested['event']['fields'][cat_field]) return section_text diff --git a/scripts/generators/beats.py b/scripts/generators/beats.py index fdad322e9d..096351c326 100644 --- a/scripts/generators/beats.py +++ b/scripts/generators/beats.py @@ -8,7 +8,7 @@ def generate(ecs_nested, ecs_version, out_dir): df_whitelist = ecs_helpers.yaml_load('scripts/generators/beats_default_fields_whitelist.yml') # base first - beats_fields = fieldset_field_array(ecs_nested['base']['fields'], df_whitelist) + beats_fields = fieldset_field_array(ecs_nested['base']['fields'], df_whitelist, ecs_nested['base']['prefix']) allowed_fieldset_keys = ['name', 'title', 'group', 'description', 'footnote', 'type'] # other fieldsets @@ -18,7 +18,7 @@ def generate(ecs_nested, ecs_version, out_dir): fieldset = ecs_nested[fieldset_name] beats_field = ecs_helpers.dict_copy_keys_ordered(fieldset, allowed_fieldset_keys) - beats_field['fields'] = fieldset_field_array(fieldset['fields'], df_whitelist) + beats_field['fields'] = fieldset_field_array(fieldset['fields'], df_whitelist, fieldset['prefix']) beats_fields.append(beats_field) beats_file = OrderedDict() @@ -30,7 +30,7 @@ def generate(ecs_nested, ecs_version, out_dir): write_beats_yaml(beats_file, ecs_version, out_dir) -def fieldset_field_array(source_fields, df_whitelist): +def fieldset_field_array(source_fields, df_whitelist, fieldset_prefix): allowed_keys = ['name', 'level', 'required', 'type', 'object_type', 'ignore_above', 'multi_fields', 'format', 'input_format', 'output_format', 'output_precision', 'description', @@ -41,6 +41,10 @@ def fieldset_field_array(source_fields, df_whitelist): for nested_field_name in source_fields: ecs_field = source_fields[nested_field_name] beats_field = ecs_helpers.dict_copy_keys_ordered(ecs_field, allowed_keys) + if '' == fieldset_prefix: + contextual_name = nested_field_name + else: + contextual_name = '.'.join(nested_field_name.split('.')[1:]) cleaned_multi_fields = [] if 'multi_fields' in ecs_field: @@ -53,7 +57,7 @@ def fieldset_field_array(source_fields, df_whitelist): ecs_helpers.dict_copy_keys_ordered(mf, multi_fields_allowed_keys)) beats_field['multi_fields'] = cleaned_multi_fields - beats_field['name'] = nested_field_name + beats_field['name'] = contextual_name if not ecs_field['flat_name'] in df_whitelist: beats_field['default_field'] = False diff --git a/scripts/generators/ecs_helpers.py b/scripts/generators/ecs_helpers.py index 4d8ac4138d..911a3c9968 100644 --- a/scripts/generators/ecs_helpers.py +++ b/scripts/generators/ecs_helpers.py @@ -69,16 +69,6 @@ def fields_subset(subset, fields): return retained_fields -def recursive_merge_subset_dicts(a, b): - for key in b: - if key not in a: - a[key] = b[key] - elif 'fields' not in a[key] or 'fields' not in b[key] or b[key]['fields'] == '*': - a[key]['fields'] = '*' - elif isinstance(a[key]['fields'], dict) and isinstance(b[key]['fields'], dict): - recursive_merge_subset_dicts(a[key]['fields'], b[key]['fields']) - - def yaml_ordereddict(dumper, data): # YAML representation of an OrderedDict will be like a dictionary, but # respecting the order of the dictionary. @@ -150,6 +140,11 @@ def yaml_load(filename): # List helpers +def list_subtract(original, subtracted): + '''Subtract two lists. original = subtracted''' + return [item for item in original if item not in subtracted] + + def list_extract_keys(lst, key_name): """Returns an array of values for 'key_name', from a list of dictionaries""" acc = [] @@ -158,18 +153,9 @@ def list_extract_keys(lst, key_name): return acc -def list_split_by(lst, size): - '''Splits a list in smaller lists of a given size''' - acc = [] - for i in range(0, len(lst), size): - acc.append(lst[i:i + size]) - return acc +# Helpers for the deeply nested fields structure -def get_nested_field(fieldname, field_dict): - """Takes a field name in dot notation and a dictionary of fields and finds the field in the dictionary""" - fields = fieldname.split('.') - nested_field = field_dict[fields[0]] - for field in fields[1:]: - nested_field = nested_field['fields'][field] - return nested_field +def is_intermediate(field): + '''Encapsulates the check to see if a field is an intermediate field or a "real" field.''' + return ('intermediate' in field['field_details'] and field['field_details']['intermediate']) diff --git a/scripts/generators/intermediate_files.py b/scripts/generators/intermediate_files.py index cc9234e5e9..3cdce5e229 100644 --- a/scripts/generators/intermediate_files.py +++ b/scripts/generators/intermediate_files.py @@ -1,8 +1,96 @@ +import copy + +from schema import visitor from generators import ecs_helpers from os.path import join -def generate(ecs_nested, ecs_flat, out_dir): +def generate(fields, out_dir, default_dirs): ecs_helpers.make_dirs(join(out_dir, 'ecs')) - ecs_helpers.yaml_dump(join(out_dir, 'ecs/ecs_flat.yml'), ecs_flat) - ecs_helpers.yaml_dump(join(out_dir, 'ecs/ecs_nested.yml'), ecs_nested) + + # Should only be used for debugging ECS development + if default_dirs: + ecs_helpers.yaml_dump(join(out_dir, 'ecs/ecs.yml'), fields) + + flat = generate_flat_fields(fields) + nested = generate_nested_fields(fields) + + ecs_helpers.yaml_dump(join(out_dir, 'ecs/ecs_flat.yml'), flat) + ecs_helpers.yaml_dump(join(out_dir, 'ecs/ecs_nested.yml'), nested) + return nested, flat + + +def generate_flat_fields(fields): + '''Generate ecs_flat.yml''' + filtered = remove_non_root_reusables(fields) + flattened = {} + visitor.visit_fields_with_memo(filtered, accumulate_field, flattened) + return flattened + + +def accumulate_field(details, memo): + '''Visitor function that accumulates all field details in the memo dict''' + if 'schema_details' in details or ecs_helpers.is_intermediate(details): + return + field_details = copy.deepcopy(details['field_details']) + remove_internal_attributes(field_details) + + flat_name = field_details['flat_name'] + memo[flat_name] = field_details + + +def generate_nested_fields(fields): + '''Generate ecs_nested.yml''' + nested = {} + # Flatten each field set, but keep all resulting fields nested under their + # parent/host field set. + for (name, details) in fields.items(): + fieldset_details = { + **copy.deepcopy(details['field_details']), + **copy.deepcopy(details['schema_details']) + } + + fieldset_details.pop('node_name') + if 'reusable' in fieldset_details: + fieldset_details['reusable'].pop('order') + + # TODO Temporarily removed to simplify initial rewrite review + fieldset_details.pop('dashed_name') + fieldset_details.pop('flat_name') + if False == fieldset_details['root']: + fieldset_details.pop('root') + + fieldset_fields = {} + visitor.visit_fields_with_memo(details['fields'], accumulate_field, fieldset_fields) + fieldset_details['fields'] = fieldset_fields + + nested[name] = fieldset_details + return nested + + +# Helper functions + + +def remove_internal_attributes(field_details): + '''Remove attributes only relevant to the deeply nested structure, but not to ecs_flat/nested.yml.''' + field_details.pop('node_name', None) + field_details.pop('intermediate', None) + + +def remove_non_root_reusables(fields_nested): + ''' + Remove field sets that have top_level=false from the root of the field definitions. + + This attribute means they're only meant to be in the "reusable/expected" locations + and not at the root of user's events. + + This is only relevant for the 'flat' field representation. The nested one + still needs to keep all field sets at the root of the YAML file, as it + the official information about each field set. It's the responsibility of + users consuming ecs_nested.yml to skip the field sets with top_level=false. + ''' + fields = {} + for (name, field) in fields_nested.items(): + if 'reusable' not in field['schema_details'] or field['schema_details']['reusable']['top_level']: + fields[name] = field + return fields diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 13ccdcb7b4..7eaa0b4e30 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -2,4 +2,5 @@ pip PyYAML==5.3b1 autopep8==1.4.4 yamllint==1.19.0 +mock==4.0.2 gitpython==3.1.2 diff --git a/scripts/schema/__init__.py b/scripts/schema/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/schema/cleaner.py b/scripts/schema/cleaner.py new file mode 100644 index 0000000000..ef56966d36 --- /dev/null +++ b/scripts/schema/cleaner.py @@ -0,0 +1,186 @@ +import copy + +from generators import ecs_helpers +from schema import visitor + +# This script performs a few cleanup functions in place, within the deeply nested +# 'fields' structure passed to `clean(fields)`. +# +# What happens here: +# +# - check that mandatory attributes are present, without which we can't do much. +# - cleans things up, like stripping spaces, sorting arrays +# - makes lots of defaults explicit +# - pre-calculate a few additional helpful fields +# - converts shorthands into full representation (e.g. reuse locations) +# +# This script only deals with field sets themselves and the fields defined +# inside them. It doesn't perform field reuse, and therefore doesn't +# deal with final field names either. + + +def clean(fields): + visitor.visit_fields(fields, fieldset_func=schema_cleanup, field_func=field_cleanup) + + +# Schema level cleanup + + +def schema_cleanup(schema): + # Sanity check first + schema_mandatory_attributes(schema) + # trailing space cleanup + ecs_helpers.dict_clean_string_values(schema['schema_details']) + ecs_helpers.dict_clean_string_values(schema['field_details']) + # Some defaults + schema['schema_details'].setdefault('group', 2) + schema['schema_details'].setdefault('root', False) + schema['field_details'].setdefault('type', 'group') + schema['field_details'].setdefault('short', schema['field_details']['description']) + if 'reusable' in schema['schema_details']: + # order to perform chained reuses. Set to 1 if it needs to happen earlier. + schema['schema_details']['reusable'].setdefault('order', 2) + # Precalculate stuff. Those can't be set in the YAML. + if schema['schema_details']['root']: + schema['schema_details']['prefix'] = '' + else: + schema['schema_details']['prefix'] = schema['field_details']['name'] + '.' + normalize_reuse_notation(schema) + # Final validity check + schema_assertions_and_warnings(schema) + + +SCHEMA_MANDATORY_ATTRIBUTES = ['name', 'title', 'description'] + + +def schema_mandatory_attributes(schema): + '''Ensures for the presence of the mandatory schema attributes and raises if any are missing''' + current_schema_attributes = sorted(list(schema['field_details'].keys()) + + list(schema['schema_details'].keys())) + missing_attributes = ecs_helpers.list_subtract(SCHEMA_MANDATORY_ATTRIBUTES, current_schema_attributes) + if len(missing_attributes) > 0: + msg = "Schema {} is missing the following mandatory attributes: {}.\nFound these: {}".format( + schema['field_details']['name'], ', '.join(missing_attributes), current_schema_attributes) + raise ValueError(msg) + if 'reusable' in schema['schema_details']: + reuse_attributes = sorted(schema['schema_details']['reusable'].keys()) + missing_reuse_attributes = ecs_helpers.list_subtract(['expected', 'top_level'], reuse_attributes) + if len(missing_reuse_attributes) > 0: + msg = "Reusable schema {} is missing the following reuse attributes: {}.\nFound these: {}".format( + schema['field_details']['name'], ', '.join(missing_reuse_attributes), reuse_attributes) + raise ValueError(msg) + + +def schema_assertions_and_warnings(schema): + '''Additional checks on a fleshed out schema''' + single_line_short_description(schema) + + +def normalize_reuse_notation(schema): + """ + Replace single word reuse shorthands from the schema YAMLs with the explicit {at: , as:} notation. + + When marking "user" as reusable under "destination" with the shorthand entry + `- destination`, this is expanded to the complete entry + `- { "at": "destination", "as": "user" }`. + The field set is thus nested at `destination.user.*`, with fields such as `destination.user.name`. + + The dictionary notation enables nesting a field set as a different name. + An example is nesting "process" fields to capture parent process details + at `process.parent.*`. + The dictionary notation `- { "at": "process", "as": "parent" }` will yield + fields such as `process.parent.pid`. + """ + if 'reusable' not in schema['schema_details']: + return + schema_name = schema['field_details']['name'] + reuse_entries = [] + for reuse_entry in schema['schema_details']['reusable']['expected']: + if type(reuse_entry) is dict: # Already explicit + if 'at' in reuse_entry and 'as' in reuse_entry: + explicit_entry = reuse_entry + else: + raise ValueError("When specifying reusable expected locations for {} " + + "with the dictionary notation, keys 'as' and 'at' are required. " + + "Got {}.".format(schema_name, reuse_entry)) + else: # Make it explicit + explicit_entry = {'at': reuse_entry, 'as': schema_name} + explicit_entry['full'] = explicit_entry['at'] + '.' + explicit_entry['as'] + reuse_entries.append(explicit_entry) + schema['schema_details']['reusable']['expected'] = reuse_entries + + +# Field level cleanup + + +def field_cleanup(field): + field_mandatory_attributes(field) + if ecs_helpers.is_intermediate(field): + return + ecs_helpers.dict_clean_string_values(field['field_details']) + # TODO Temporarily commented out to simplify initial rewrite review + # if 'allowed_values' in field['field_details']: + # for allowed_value in field['field_details']['allowed_values']: + # ecs_helpers.dict_clean_string_values(allowed_value) + field_defaults(field) + field_assertions_and_warnings(field) + + +def field_defaults(field): + field['field_details'].setdefault('short', field['field_details']['description']) + field['field_details'].setdefault('normalize', []) + # TODO Temporarily re-adding object_type for initial rewrite review. I think this should go away. + if 'object' == field['field_details']['type']: + field['field_details'].setdefault('object_type', 'keyword') + field_or_multi_field_datatype_defaults(field['field_details']) + if 'multi_fields' in field['field_details']: + for mf in field['field_details']['multi_fields']: + field_or_multi_field_datatype_defaults(mf) + if 'name' not in mf: + mf['name'] = mf['type'] + + +def field_or_multi_field_datatype_defaults(field_details): + '''Sets datatype-related defaults on a canonical field or multi-field entries.''' + if field_details['type'] == 'keyword': + field_details.setdefault('ignore_above', 1024) + if field_details['type'] == 'text': + field_details.setdefault('norms', False) + if 'index' in field_details and not field_details['index']: + field_details.setdefault('doc_values', False) + + +FIELD_MANDATORY_ATTRIBUTES = ['name', 'description', 'type', 'level'] +ACCEPTABLE_FIELD_LEVELS = ['core', 'extended', 'custom'] + + +def field_mandatory_attributes(field): + '''Ensures for the presence of the mandatory field attributes and raises if any are missing''' + if ecs_helpers.is_intermediate(field): + return + current_field_attributes = sorted(field['field_details'].keys()) + missing_attributes = ecs_helpers.list_subtract(FIELD_MANDATORY_ATTRIBUTES, current_field_attributes) + if len(missing_attributes) > 0: + msg = "Field is missing the following mandatory attributes: {}.\nFound these: {}.\nField details: {}" + raise ValueError(msg.format(', '.join(missing_attributes), + current_field_attributes, field)) + + +def field_assertions_and_warnings(field): + '''Additional checks on a fleshed out field''' + if not ecs_helpers.is_intermediate(field): + single_line_short_description(field) + if field['field_details']['level'] not in ACCEPTABLE_FIELD_LEVELS: + msg = "Invalid level for field '{}'.\nValue: {}\nAcceptable values: {}".format( + field['field_details']['name'], field['field_details']['level'], + ACCEPTABLE_FIELD_LEVELS) + raise ValueError(msg) + +# Common + + +def single_line_short_description(schema_or_field): + if "\n" in schema_or_field['field_details']['short']: + msg = ("Short descriptions must be single line.\n" + + "Fieldset: '{}'\n{}".format(schema_or_field['field_details']['name'], schema_or_field)) + raise ValueError(msg) diff --git a/scripts/schema/finalizer.py b/scripts/schema/finalizer.py new file mode 100644 index 0000000000..5a8b662810 --- /dev/null +++ b/scripts/schema/finalizer.py @@ -0,0 +1,176 @@ +import copy + +from schema import visitor + +# This script takes the fleshed out deeply nested fields dictionary as emitted by +# cleaner.py, and performs field reuse in two phases. +# +# Phase 1 performs field reuse across field sets. E.g. `group` fields should also be under `user`. +# This type of reuse is then carried around if the receiving field set is also reused. +# In other words, user.group.* will be in other places where user is nested: +# source.user.* will contain source.user.group.* + +# Phase 2 performs field reuse where field sets are reused within themselves, with a different name. +# Examples are nesting `process` within itself, as `process.parent.*`, +# or nesting `user` within itself at `user.target.*`. +# This second kind of nesting is not carried around everywhere else the receiving field set is reused. +# So `user.target.*` is *not* carried over to `source.user.target*` when we reuse `user` under `source`. + + +def finalize(fields): + '''Intended entrypoint of the finalizer.''' + perform_reuse(fields) + calculate_final_values(fields) + + +def order_reuses(fields): + foreign_reuses = {} + self_nestings = {} + for schema_name, schema in fields.items(): + if not 'reusable' in schema['schema_details']: + continue + reuse_order = schema['schema_details']['reusable']['order'] + for reuse_entry in schema['schema_details']['reusable']['expected']: + destination_schema_name = reuse_entry['full'].split('.')[0] + if destination_schema_name == schema_name: + # Accumulate self-nestings for phase 2. + self_nestings.setdefault(destination_schema_name, []) + self_nestings[destination_schema_name].extend([reuse_entry]) + else: + # Group foreign reuses by 'order' attribute. + foreign_reuses.setdefault(reuse_order, {}) + foreign_reuses[reuse_order].setdefault(schema_name, []) + foreign_reuses[reuse_order][schema_name].extend([reuse_entry]) + return foreign_reuses, self_nestings + + +def perform_reuse(fields): + '''Performs field reuse in two phases''' + foreign_reuses, self_nestings = order_reuses(fields) + + # Phase 1: foreign reuse + # These are done respecting the reusable.order attribute. + # This lets us force the order for chained reuses (e.g. group => user, then user => many places) + for order in sorted(foreign_reuses.keys()): + for schema_name, reuse_entries in foreign_reuses[order].items(): + schema = fields[schema_name] + for reuse_entry in reuse_entries: + # print(order, "{} => {}".format(schema_name, reuse_entry['full'])) + destination_schema_name = reuse_entry['full'].split('.')[0] + destination_schema = fields[destination_schema_name] + ensure_valid_reuse(schema, destination_schema) + + new_field_details = copy.deepcopy(schema['field_details']) + new_field_details['original_fieldset'] = schema_name + new_field_details['intermediate'] = True + reused_fields = copy.deepcopy(schema['fields']) + set_original_fieldset(reused_fields, schema_name) + destination_fields = field_group_at_path(reuse_entry['at'], fields) + destination_fields[schema_name] = { + 'field_details': new_field_details, + 'fields': reused_fields, + } + append_reused_here(schema, reuse_entry, destination_schema) + + # Phase 2: self-nesting + for schema_name, reuse_entries in self_nestings.items(): + schema = fields[schema_name] + ensure_valid_reuse(schema) + # Since we're about self-nest more fields within these, make a pristine copy first + reused_fields = copy.deepcopy(schema['fields']) + set_original_fieldset(reused_fields, schema_name) + for reuse_entry in reuse_entries: + # print("x {} => {}".format(schema_name, reuse_entry['full'])) + nest_as = reuse_entry['as'] + new_field_details = copy.deepcopy(schema['field_details']) + new_field_details['name'] = nest_as + new_field_details['original_fieldset'] = schema_name + new_field_details['intermediate'] = True + destination_fields = schema['fields'] + destination_fields[nest_as] = { + 'field_details': new_field_details, + # Make a new copy of the pristine copy + 'fields': copy.deepcopy(reused_fields), + } + append_reused_here(schema, reuse_entry, fields[schema_name]) + + +def ensure_valid_reuse(reused_schema, destination_schema=None): + ''' + Raise if either the reused schema or destination schema have root=true. + + Second param is optional, if testing for a self-nesting (where source=destination). + ''' + if reused_schema['schema_details']['root']: + msg = "Schema {} has attribute root=true and therefore cannot be reused.".format( + reused_schema['field_details']['name']) + raise ValueError(msg) + elif destination_schema and destination_schema['schema_details']['root']: + msg = "Schema {} has attribute root=true and therefore cannot have other field sets reused inside it.".format( + destination_schema['field_details']['name']) + raise ValueError(msg) + + +def append_reused_here(reused_schema, reuse_entry, destination_schema): + '''Captures two ways of denoting what field sets are reused under a given field set''' + # Legacy, too limited + destination_schema['schema_details'].setdefault('nestings', []) + destination_schema['schema_details']['nestings'] = sorted( + destination_schema['schema_details']['nestings'] + [reuse_entry['full']] + ) + # New roomier way: we could eventually include contextual description here + destination_schema['schema_details'].setdefault('reused_here', []) + reused_here_entry = { + 'schema_name': reused_schema['field_details']['name'], + 'full': reuse_entry['full'], + 'short': reused_schema['field_details']['short'], + } + destination_schema['schema_details']['reused_here'].extend([reused_here_entry]) + + +def set_original_fieldset(fields, original_fieldset): + '''Recursively set the 'original_fieldset' attribute for all fields in a group of fields''' + def func(details): + # Don't override if already set (e.g. 'group' for user.group.* fields) + details['field_details'].setdefault('original_fieldset', original_fieldset) + visitor.visit_fields(fields, field_func=func) + + +def field_group_at_path(dotted_path, fields): + '''Returns the ['fields'] hash at the dotted_path.''' + path = dotted_path.split('.') + nesting = fields + for next_field in path: + field = nesting.get(next_field, None) + if not field: + raise ValueError("Field {} not found, failed to find {}".format(dotted_path, next_field)) + nesting = field.get('fields', None) + if not nesting: + field_type = field['field_details']['type'] + if field_type in ['object', 'group', 'nested']: + nesting = field['fields'] = {} + else: + raise ValueError("Field {} (type {}) already exists and cannot have nested fields".format( + dotted_path, field_type)) + return nesting + + +def calculate_final_values(fields): + ''' + This function navigates all fields recursively. + + It populates a few more values for the fields, especially path-based values + like flat_name. + ''' + visitor.visit_fields_with_path(fields, field_finalizer) + + +def field_finalizer(details, path): + '''This is the function called by the visitor to perform the work of calculate_final_values''' + name_array = path + [details['field_details']['node_name']] + flat_name = '.'.join(name_array) + details['field_details']['flat_name'] = flat_name + details['field_details']['dashed_name'] = flat_name.replace('.', '-').replace('_', '-') + if 'multi_fields' in details['field_details']: + for mf in details['field_details']['multi_fields']: + mf['flat_name'] = flat_name + '.' + mf['name'] diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py new file mode 100644 index 0000000000..de2d481fcc --- /dev/null +++ b/scripts/schema/loader.py @@ -0,0 +1,206 @@ +import copy +import glob +import os +import yaml + +from generators import ecs_helpers + +# Loads main ECS schemas and optional additional schemas. +# They are deeply nested, then merged together. +# This script doesn't fill in defaults other than the bare minimum for a predictable +# deeply nested structure. It doesn't concern itself with what "should be allowed" +# in being a good ECS citizen. It just loads things and merges them together. + +# The deeply nested structured returned by this script looks like this. +# +# [schema name]: { +# 'schema_details': { +# 'reusable': ... +# }, +# 'field_details': { +# 'type': ... +# }, +# 'fields': { +# [field name]: { +# 'field_details': { ... } +# 'fields': { +# +# (dotted key names replaced by deep nesting) +# [field name]: { +# 'field_details': { ... } +# 'fields': { +# } +# } +# } +# } +# } + +# Schemas at the top level always have all 3 keys populated. +# Leaf fields only have 'field_details' populated. +# Any intermediate field with other fields nested within them have 'fields' populated. +# Note that intermediate fields rarely have 'field_details' populated, but it's supported. +# Examples of this are 'dns.answers', 'observer.egress'. + + +def load_schemas(ref=None, included_files=[]): + """Loads ECS and custom schemas. They are returned deeply nested and merged.""" + # ECS fields (from git ref or not) + if ref: + schema_files_raw = load_schemas_from_git(ref) + else: + schema_files_raw = load_schema_files(ecs_helpers.ecs_files()) + fields = deep_nesting_representation(schema_files_raw) + + # Custom additional files (never from git ref) + if included_files and len(included_files) > 0: + print('Loading user defined schemas: {0}'.format(included_files)) + custom_files = ecs_helpers.get_glob_files(included_files, ecs_helpers.YAML_EXT) + custom_fields = deep_nesting_representation(load_schema_files(custom_files)) + fields = merge_fields(fields, custom_fields) + return fields + + +def load_schema_files(files): + fields_nested = {} + for f in files: + new_fields = read_schema_file(f) + fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields) + return fields_nested + + +def load_schemas_from_git(ref): + tree = ecs_helpers.get_tree_by_ref(ref) + fields_nested = {} + for blob in tree['schemas'].blobs: + if blob.name.endswith('.yml'): + new_fields = read_schema_blob(blob, ref) + fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields) + return fields_nested + + +def read_schema_file(file_name): + """Read a raw schema yml file into a dict.""" + with open(file_name) as f: + raw = yaml.safe_load(f.read()) + return nest_schema(raw, file_name) + + +def read_schema_blob(blob, ref): + """Read a raw schema yml git blob into a dict.""" + content = blob.data_stream.read().decode('utf-8') + raw = yaml.safe_load(content) + file_name = "{} (git ref {})".format(blob.name, ref) + return nest_schema(raw, file_name) + + +def nest_schema(raw, file_name): + ''' + Raw schema files are an array of schema details: [{'name': 'base', ...}] + + This function loops over the array (usually 1 schema per file) and turns it into + a dict with the schema name as the key: { 'base': { 'name': 'base', ...}} + ''' + fields = {} + for schema in raw: + if 'name' not in schema: + raise ValueError("Schema file {} is missing mandatory attribute 'name'".format(file_name)) + fields[schema['name']] = schema + return fields + + +def deep_nesting_representation(fields): + deeply_nested = {} + for (name, flat_schema) in fields.items(): + # We destructively select what goes into schema_details and child fields. + # The rest is 'field_details'. + flat_schema = flat_schema.copy() + flat_schema['node_name'] = flat_schema['name'] + + # Schema-only details. Not present on other nested field groups. + schema_details = {} + for schema_key in ['root', 'group', 'reusable', 'title']: + if schema_key in flat_schema: + schema_details[schema_key] = flat_schema.pop(schema_key) + + nested_schema = nest_fields(flat_schema.pop('fields', [])) + # Re-assemble new structure + deeply_nested[name] = { + 'schema_details': schema_details, + # What's still in flat_schema is the field_details for the field set itself + 'field_details': flat_schema, + 'fields': nested_schema['fields'] + } + return deeply_nested + + +def nest_fields(field_array): + schema_root = {'fields': {}} + for field in field_array: + nested_levels = field['name'].split('.') + parent_fields = nested_levels[:-1] + leaf_field = nested_levels[-1] + # "nested_schema" is a cursor we move within the schema_root structure we're building. + # Here we reset the cursor for this new field. + nested_schema = schema_root['fields'] + + current_path = [] + for idx, level in enumerate(parent_fields): + nested_schema.setdefault(level, {}) + # Where nested fields will live + nested_schema[level].setdefault('fields', {}) + + # Make type:object explicit for intermediate parent fields + nested_schema[level].setdefault('field_details', {}) + field_details = nested_schema[level]['field_details'] + field_details['node_name'] = level + # Respect explicitly defined object fields + if 'type' in field_details and field_details['type'] in ['object', 'nested']: + field_details.setdefault('intermediate', False) + else: + field_details.setdefault('type', 'object') + field_details.setdefault('name', '.'.join(parent_fields[:idx + 1])) + field_details.setdefault('intermediate', True) + + # moving the nested_schema cursor deeper + current_path.extend([level]) + nested_schema = nested_schema[level]['fields'] + nested_schema.setdefault(leaf_field, {}) + # Overwrite 'name' with the leaf field's name. The flat_name is already computed. + field['node_name'] = leaf_field + nested_schema[leaf_field]['field_details'] = field + return schema_root + + +def merge_fields(a, b): + """Merge ECS field sets with custom field sets.""" + a = copy.deepcopy(a) + b = copy.deepcopy(b) + for key in b: + if key not in a: + a[key] = b[key] + continue + # merge field details + if 'normalize' in b[key]['field_details']: + a[key].setdefault('field_details', {}) + a[key]['field_details'].setdefault('normalize', []) + a[key]['field_details']['normalize'].extend(b[key]['field_details'].pop('normalize')) + a[key]['field_details'].update(b[key]['field_details']) + # merge schema details + if 'schema_details' in b[key]: + asd = a[key]['schema_details'] + bsd = b[key]['schema_details'] + if 'reusable' in b[key]['schema_details']: + asd.setdefault('reusable', {}) + if 'top_level' in bsd['reusable']: + asd['reusable']['top_level'] = bsd['reusable']['top_level'] + else: + asd['reusable'].setdefault('top_level', True) + asd['reusable'].setdefault('expected', []) + asd['reusable']['expected'].extend(bsd['reusable']['expected']) + bsd.pop('reusable') + asd.update(bsd) + # merge nested fields + if 'fields' in b[key]: + a[key].setdefault('fields', {}) + a[key]['fields'] = merge_fields(a[key]['fields'], b[key]['fields']) + return a diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py new file mode 100644 index 0000000000..36d91bc0b2 --- /dev/null +++ b/scripts/schema/subset_filter.py @@ -0,0 +1,78 @@ +import glob +import yaml + +# This script takes all ECS and custom fields already loaded, and lets users +# filter out the ones they don't need. + + +def filter(fields, subset_file_globs): + ''' + Takes the deeply nested field structure and the subset file names. + + It returns a copy of the fields that matches the whitelist defined in the subset. + ''' + if not subset_file_globs or subset_file_globs == []: + return fields + subset_definitions = load_subset_definitions(subset_file_globs) + filtered_fields = extract_matching_fields(fields, subset_definitions) + return filtered_fields + + +def load_subset_definitions(file_globs): + subsets = {} + for f in eval_globs(file_globs): + raw = load_yaml_file(f) + merge_subsets(subsets, raw) + if not subsets: + raise ValueError('--subset specified, but no subsets found in {}'.format(file_globs)) + return subsets + + +def load_yaml_file(file_name): + with open(file_name) as f: + return yaml.safe_load(f.read()) + + +def eval_globs(globs): + '''Accepts an array of glob patterns or file names, returns the array of actual files''' + all_files = [] + for g in globs: + new_files = glob.glob(g) + if len(new_files) == 0: + warn("{} did not match any files".format(g)) + else: + all_files.extend(new_files) + return all_files + + +# You know, for silent tests +def warn(message): + print(message) + + +def merge_subsets(a, b): + '''Merges field subset definitions together. The b subset is merged into the a subset.''' + for key in b: + if key not in a: + a[key] = b[key] + elif 'fields' not in a[key] or 'fields' not in b[key] or b[key]['fields'] == '*': + a[key]['fields'] = '*' + elif isinstance(a[key]['fields'], dict) and isinstance(b[key]['fields'], dict): + merge_subsets(a[key]['fields'], b[key]['fields']) + + +def extract_matching_fields(fields, subset_definitions): + retained_fields = {} + allowed_options = ['fields'] + for key, val in subset_definitions.items(): + for option in val: + if option not in allowed_options: + raise ValueError('Unsupported option found in subset: {}'.format(option)) + # A missing fields key is shorthand for including all subfields + if 'fields' not in val or val['fields'] == '*': + retained_fields[key] = fields[key] + elif isinstance(val['fields'], dict): + # Copy the full field over so we get all the options, then replace the 'fields' with the right subset + retained_fields[key] = fields[key] + retained_fields[key]['fields'] = extract_matching_fields(fields[key]['fields'], val['fields']) + return retained_fields diff --git a/scripts/schema/visitor.py b/scripts/schema/visitor.py new file mode 100644 index 0000000000..5c2e030da5 --- /dev/null +++ b/scripts/schema/visitor.py @@ -0,0 +1,59 @@ +def visit_fields(fields, fieldset_func=None, field_func=None): + ''' + This function navigates the deeply nested tree structure and runs provided + functions on each fieldset or field encountered (both optional). + + The argument 'fields' should be at the named field grouping level: + {'name': {'schema_details': {}, 'field_details': {}, 'fields': {}} + + The 'fieldset_func(details)' provided will be called for each field set, + with the dictionary containing their details ({'schema_details': {}, 'field_details': {}, 'fields': {}). + + The 'field_func(details)' provided will be called for each field, with the dictionary + containing the field's details ({'field_details': {}, 'fields': {}). + ''' + for (name, details) in fields.items(): + if fieldset_func and 'schema_details' in details: + fieldset_func(details) + elif field_func and 'field_details' in details: + field_func(details) + if 'fields' in details: + visit_fields(details['fields'], + fieldset_func=fieldset_func, + field_func=field_func) + + +def visit_fields_with_path(fields, func, path=[]): + ''' + This function navigates the deeply nested tree structure and runs the provided + function on all fields and field sets. + + The 'func' provided will be called for each field, + with the dictionary containing their details ({'field_details': {}, 'fields': {}) + as well as the path array leading to the location of the field in question. + ''' + for (name, details) in fields.items(): + if 'field_details' in details: + func(details, path) + if 'fields' in details: + if 'schema_details' in details and details['schema_details']['root']: + new_nesting = [] + else: + new_nesting = [name] + visit_fields_with_path(details['fields'], func, path + new_nesting) + + +def visit_fields_with_memo(fields, func, memo=None): + ''' + This function navigates the deeply nested tree structure and runs the provided + function on all fields and field sets. + + The 'func' provided will be called for each field, + with the dictionary containing their details ({'field_details': {}, 'fields': {}) + as well as the 'memo' you pass in. + ''' + for (name, details) in fields.items(): + if 'field_details' in details: + func(details, memo) + if 'fields' in details: + visit_fields_with_memo(details['fields'], func, memo) diff --git a/scripts/schema_reader.py b/scripts/schema_reader.py deleted file mode 100644 index 8d86261f61..0000000000 --- a/scripts/schema_reader.py +++ /dev/null @@ -1,275 +0,0 @@ -import os -import yaml -import copy -from generators import ecs_helpers - -# This script has a few entrypoints. The code related to each entrypoint is grouped -# together between comments. -# -# load_schemas() -# yml file load (ECS or custom) + cleanup of field set attributes. -# merge_schema_fields() -# Merge ECS field sets with custom field sets -# generate_nested_flat() -# Finalize the intermediate representation of all fields. Fills field defaults, -# performs field nestings, and precalculates many values used by various generators. - -# Loads schemas and perform cleanup of schema attributes - - -def create_schema_dicts(schemas): - fields_nested = {} - for schema in schemas: - raw = yaml.safe_load(schema) - fields_nested.update(create_fields_dict(raw)) - finalize_schemas(fields_nested) - return fields_nested - - -def create_fields_dict(raw): - fields = {} - for field_set in raw: - fields[field_set['name']] = field_set - return fields - - -def load_schemas_from_files(files=ecs_helpers.ecs_files()): - schemas = [] - for file in files: - with open(file) as f: - content = f.read() - schemas.append(content) - return schemas - - -def load_schemas_from_git(tree): - schemas = [] - for blob in tree['schemas'].blobs: - if blob.name.endswith('.yml'): - content = blob.data_stream.read().decode('utf-8') - schemas.append(content) - return schemas - - -def finalize_schemas(fields_nested): - """Clean up all schema level attributes""" - for schema_name in fields_nested: - schema = fields_nested[schema_name] - schema_cleanup_values(schema) - - -def schema_cleanup_values(schema): - """Clean up one schema""" - ecs_helpers.dict_clean_string_values(schema) - schema_set_default_values(schema) - schema_set_fieldset_prefix(schema) - schema_fields_as_dictionary(schema) - - -def schema_set_default_values(schema): - schema['type'] = 'group' - schema.setdefault('group', 2) - schema.setdefault('short', schema['description']) - if "\n" in schema['short']: - raise ValueError("Short descriptions must be single line.\nFieldset: {}\n{}".format(schema['name'], schema)) - - -def schema_set_fieldset_prefix(schema): - if 'root' in schema and schema['root']: - schema['prefix'] = '' - else: - schema['prefix'] = schema['name'] + '.' - - -def schema_fields_as_dictionary(schema): - """Re-nest the array of field names as a dictionary of 'fieldname' => { field definition }""" - field_array = schema.pop('fields', []) - schema['fields'] = {} - for field in field_array: - nested_levels = field['name'].split('.') - nested_schema = schema['fields'] - for level in nested_levels[:-1]: - if level not in nested_schema: - nested_schema[level] = {} - if 'fields' not in nested_schema[level]: - nested_schema[level]['fields'] = {} - nested_schema = nested_schema[level]['fields'] - if nested_levels[-1] not in nested_schema: - nested_schema[nested_levels[-1]] = {} - # Only leaf fields will have field details so we can identify them later - nested_schema[nested_levels[-1]]['field_details'] = field - -# Merge ECS field sets with custom field sets - - -def merge_schema_fields(a, b): - """Merge ECS field sets with custom field sets""" - for key in b: - if key not in a: - a[key] = b[key] - else: - a_type = a[key].get('field_details', {}).get('type', 'object') - b_type = b[key].get('field_details', {}).get('type', 'object') - if a_type != b_type: - raise ValueError('Schemas unmergeable: type {} does not match type {}'.format(a_type, b_type)) - elif a_type not in ['object', 'nested']: - print('Warning: dropping field {}, already defined'.format(key)) - continue - # reusable should only be found at the top level of a fieldset - if 'reusable' in b[key]: - a[key].setdefault('reusable', {}) - a[key]['reusable']['top_level'] = a[key]['reusable'].get( - 'top_level', False) or b[key]['reusable']['top_level'] - a[key]['reusable'].setdefault('expected', []) - a[key]['reusable']['expected'].extend(b[key]['reusable']['expected']) - if 'fields' in b[key]: - a[key].setdefault('fields', {}) - merge_schema_fields(a[key]['fields'], b[key]['fields']) - -# Finalize the intermediate representation of all fields. - - -def generate_nested_flat(fields_intermediate): - for field_name, field in fields_intermediate.items(): - nestings = find_nestings(field['fields'], field_name + ".") - nestings.sort() - if len(nestings) > 0: - field['nestings'] = nestings - fields_nested = generate_partially_flattened_fields(fields_intermediate) - fields_flat = generate_fully_flattened_fields(fields_intermediate) - return (fields_nested, fields_flat) - - -def assemble_reusables(fields_nested): - # This happens as a second pass, so that all fieldsets have their - # fields array replaced with a fields dictionary. - for schema_name in fields_nested: - schema = fields_nested[schema_name] - duplicate_reusable_fieldsets(schema, fields_nested) - cleanup_fields_recursive(fields_nested, "") - - -def duplicate_reusable_fieldsets(schema, fields_nested): - """Copies reusable field definitions to their expected places""" - # Note: across this schema reader, functions are modifying dictionaries passed - # as arguments, which is usually a risk of unintended side effects. - # Here it simplifies the nesting of 'group' under 'user', - # which is in turn reusable in a few places. - if 'reusable' in schema: - for new_nesting in schema['reusable']['expected']: - split_flat_name = new_nesting.split('.') - top_level = split_flat_name[0] - # List field set names expected under another field set. - # E.g. host.nestings = [ 'geo', 'os', 'user' ] - nested_schema = fields_nested[top_level]['fields'] - for level in split_flat_name[1:]: - nested_schema = nested_schema.get(level, None) - if not nested_schema: - raise ValueError('Field {} in path {} not found in schema'.format(level, new_nesting)) - if nested_schema.get('reusable', None): - raise ValueError( - 'Reusable fields cannot be put inside other reusable fields except when the destination reusable is at the top level') - nested_schema = nested_schema.setdefault('fields', {}) - nested_schema[schema['name']] = schema - - -def cleanup_fields_recursive(fields, prefix, original_fieldset=None): - for (name, field) in fields.items(): - # Copy field here so reusable field sets become unique copies instead of references to the original set - field = field.copy() - fields[name] = field - temp_original_fieldset = name if ('reusable' in field and prefix != "") else original_fieldset - if 'field_details' in field: - # Deep copy the field details so we can insert different flat names for each reusable fieldset - field_details = copy.deepcopy(field['field_details']) - new_flat_name = prefix + name - field_details['flat_name'] = new_flat_name - field_details['dashed_name'] = new_flat_name.replace('.', '-').replace('_', '-') - if temp_original_fieldset: - field_details['original_fieldset'] = temp_original_fieldset - ecs_helpers.dict_clean_string_values(field_details) - field_set_defaults(field_details) - field['field_details'] = field_details - if 'fields' in field: - field['fields'] = field['fields'].copy() - new_prefix = prefix + name + "." - if 'root' in field and field['root']: - new_prefix = "" - cleanup_fields_recursive(field['fields'], new_prefix, temp_original_fieldset) - - -def field_set_defaults(field): - field.setdefault('normalize', []) - if field['type'] == 'keyword': - field.setdefault('ignore_above', 1024) - if field['type'] == 'text': - field.setdefault('norms', False) - if field['type'] == 'object': - field.setdefault('object_type', 'keyword') - - field.setdefault('short', field['description']) - if "\n" in field['short']: - raise ValueError("Short descriptions must be single line.\nField: {}\n{}".format(field['flat_name'], field)) - # print(" Short descriptions must be single line. Field: {}".format(field['flat_name'])) - - if 'index' in field and not field['index']: - field.setdefault('doc_values', False) - if 'multi_fields' in field: - field_set_multi_field_defaults(field) - - -def field_set_multi_field_defaults(parent_field): - """Sets defaults for each nested field in the multi_fields array""" - for mf in parent_field['multi_fields']: - mf.setdefault('name', mf['type']) - if mf['type'] == 'text': - mf.setdefault('norms', False) - mf['flat_name'] = parent_field['flat_name'] + '.' + mf['name'] - - -def find_nestings(fields, prefix): - """Recursively finds all reusable fields in the fields dictionary.""" - nestings = [] - for field_name, field in fields.items(): - if 'reusable' in field: - nestings.append(prefix + field_name) - if 'fields' in field: - nestings.extend(find_nestings(field['fields'], prefix + field_name + '.')) - return nestings - - -def generate_partially_flattened_fields(fields_nested): - flat_fields = {} - for (name, field) in fields_nested.items(): - # assigning field.copy() adds all the top level schema fields, has to be a copy since we're about - # to reassign the 'fields' key and we don't want to modify fields_nested - flat_fields[name] = field.copy() - flat_fields[name]['fields'] = flatten_fields(field['fields'], "") - return flat_fields - - -def generate_fully_flattened_fields(fields_nested): - flattened = flatten_fields(remove_non_root_reusables(fields_nested), "") - return flattened - - -def remove_non_root_reusables(fields_nested): - fields = {} - for (name, field) in fields_nested.items(): - if 'reusable' not in field or ('reusable' in field and field['reusable']['top_level']): - fields[name] = field - return fields - - -def flatten_fields(fields, key_prefix): - flat_fields = {} - for (name, field) in fields.items(): - new_key = key_prefix + name - if 'field_details' in field: - flat_fields[new_key] = field['field_details'].copy() - if 'fields' in field: - new_prefix = new_key + "." - if 'root' in field and field['root']: - new_prefix = "" - flat_fields.update(flatten_fields(field['fields'], new_prefix)) - return flat_fields diff --git a/scripts/tests/test_ecs_helpers.py b/scripts/tests/test_ecs_helpers.py index e3ad248cde..2eb5ff0254 100644 --- a/scripts/tests/test_ecs_helpers.py +++ b/scripts/tests/test_ecs_helpers.py @@ -8,6 +8,15 @@ class TestECSHelpers(unittest.TestCase): + + def test_is_intermediate_field(self): + pseudo_field = {'field_details': {}} + self.assertEqual(ecs_helpers.is_intermediate(pseudo_field), False) + pseudo_field['field_details']['intermediate'] = False + self.assertEqual(ecs_helpers.is_intermediate(pseudo_field), False) + pseudo_field['field_details']['intermediate'] = True + self.assertEqual(ecs_helpers.is_intermediate(pseudo_field), True) + # dict_copy_existing_keys def test_dict_copy_existing_keys(self): @@ -84,155 +93,11 @@ def test_clean_string_values(self): ecs_helpers.dict_clean_string_values(dict) self.assertEqual(dict, {'dirty': 'space, the final frontier', 'clean': 'val', 'int': 1}) - def test_list_slit_by(self): - lst = ['ecs', 'has', 'a', 'meme', 'now'] - split_list = ecs_helpers.list_split_by(lst, 3) - self.assertEqual(split_list, [['ecs', 'has', 'a'], ['meme', 'now']]) - - def test_recursive_subset_merge(self): - subset_a = { - 'field1': { - 'fields': { - 'subfield1': { - 'fields': { - 'subsubfield1': { - 'fields': '*' - } - } - }, - 'subfield2': { - 'fields': '*' - } - } - }, - 'field2': {} - } - subset_b = { - 'field1': { - 'fields': { - 'subfield1': { - 'fields': '*' - }, - 'subfield3': { - 'fields': '*' - } - } - }, - 'field2': { - 'fields': { - 'subfield2': { - 'fields': '*' - } - } - }, - 'field3': { - 'fields': '*' - } - } - expected = { - 'field1': { - 'fields': { - 'subfield1': { - 'fields': '*' - }, - 'subfield2': { - 'fields': '*' - }, - 'subfield3': { - 'fields': '*' - } - } - }, - 'field2': { - 'fields': '*' - }, - 'field3': { - 'fields': '*' - } - } - ecs_helpers.recursive_merge_subset_dicts(subset_a, subset_b) - self.assertEqual(subset_a, expected) - - def test_fields_subset(self): - fields = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field' - } - }, - 'test_field2': { - 'field_details': { - 'name': 'test_field2', - 'type': 'keyword', - 'description': 'Another test field' - } - } - } - } - } - subset = { - 'test_fieldset': { - 'fields': { - 'test_field1': { - 'fields': '*' - } - } - } - } - expected = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - } - } - actual = ecs_helpers.fields_subset(subset, fields) - self.assertEqual(actual, expected) - - def test_get_nested_field(self): - fields = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field' - } - }, - 'test_field2': { - 'field_details': { - 'name': 'test_field2', - 'type': 'keyword', - 'description': 'Another test field' - } - } - } - } - } - nested_field_name = 'test_fieldset.test_field1' - expected = { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field' - } - } - actual = ecs_helpers.get_nested_field(nested_field_name, fields) - self.assertEqual(actual, expected) + # List helper tests + + def test_list_subtract(self): + self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a']), ['b']) + self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a', 'c']), ['b']) def test_get_tree_by_ref(self): ref = 'v1.5.0' diff --git a/scripts/tests/test_ecs_spec.py b/scripts/tests/test_ecs_spec.py index 5480c02acf..6ff6372579 100644 --- a/scripts/tests/test_ecs_spec.py +++ b/scripts/tests/test_ecs_spec.py @@ -4,25 +4,40 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -from scripts import schema_reader -from generators import ecs_helpers +from scripts.schema import loader +from scripts.schema import cleaner +from scripts.schema import finalizer +from scripts.generators import intermediate_files class TestEcsSpec(unittest.TestCase): """Sanity check for things that should be true in the ECS spec.""" + @classmethod + def setUpClass(cls): + fields = loader.load_schemas() + cleaner.clean(fields) + finalizer.finalize(fields) + cls.ecs_nested = intermediate_files.generate_nested_fields(fields) + cls.ecs_fields = intermediate_files.generate_flat_fields(fields) + def setUp(self): - schemas = schema_reader.load_schemas_from_files() - intermediate_schemas = schema_reader.create_schema_dicts(schemas) - schema_reader.assemble_reusables(intermediate_schemas) - (self.ecs_nested, self.ecs_fields) = schema_reader.generate_nested_flat(intermediate_schemas) + self.ecs_nested = TestEcsSpec.ecs_nested + self.ecs_fields = TestEcsSpec.ecs_fields def test_base_flat_name(self): - self.assertIsInstance(self.ecs_fields['@timestamp'], dict) + self.assertIn('@timestamp', self.ecs_fields) + self.assertIn('@timestamp', self.ecs_nested['base']['fields']) self.assertEqual( self.ecs_nested['base']['fields']['@timestamp']['flat_name'], '@timestamp') + def test_root_fieldsets_can_have_nested_keys(self): + self.assertIn('trace.id', self.ecs_fields) + self.assertIn('transaction.id', self.ecs_fields) + self.assertIn('trace.id', self.ecs_nested['tracing']['fields']) + self.assertIn('transaction.id', self.ecs_nested['tracing']['fields']) + def test_flat_includes_reusable_fields(self): all_keys = sorted(self.ecs_fields.keys()) @@ -63,30 +78,30 @@ def test_nested_includes_reusable_fields(self): user_keys = sorted(self.ecs_nested['user']['fields'].keys()) # geo - self.assertIn('geo.name', client_keys) - self.assertIn('geo.name', destination_keys) - self.assertIn('geo.name', host_keys) - self.assertIn('geo.name', observer_keys) - self.assertIn('geo.name', server_keys) - self.assertIn('geo.name', source_keys) - - # group - self.assertIn('group.name', user_keys) - self.assertIn('user.group.id', client_keys) - self.assertIn('user.group.id', destination_keys) - self.assertIn('user.group.id', server_keys) - self.assertIn('user.group.id', source_keys) + self.assertIn('client.geo.name', client_keys) + self.assertIn('destination.geo.name', destination_keys) + self.assertIn('host.geo.name', host_keys) + self.assertIn('observer.geo.name', observer_keys) + self.assertIn('server.geo.name', server_keys) + self.assertIn('source.geo.name', source_keys) + + # group (chained reuses) + self.assertIn('user.group.name', user_keys) + self.assertIn('client.user.group.id', client_keys) + self.assertIn('destination.user.group.id', destination_keys) + self.assertIn('server.user.group.id', server_keys) + self.assertIn('source.user.group.id', source_keys) # user - self.assertIn('user.id', client_keys) - self.assertIn('user.id', destination_keys) - self.assertIn('user.id', server_keys) - self.assertIn('user.id', source_keys) + self.assertIn('client.user.id', client_keys) + self.assertIn('destination.user.id', destination_keys) + self.assertIn('server.user.id', server_keys) + self.assertIn('source.user.id', source_keys) # os - self.assertIn('os.name', host_keys) - self.assertIn('os.name', observer_keys) - self.assertIn('os.name', user_agent_keys) + self.assertIn('host.os.name', host_keys) + self.assertIn('observer.os.name', observer_keys) + self.assertIn('user_agent.os.name', user_agent_keys) def test_related_fields_always_arrays(self): for (field_name, field) in self.ecs_nested['related']['fields'].items(): diff --git a/scripts/tests/test_schema_reader.py b/scripts/tests/test_schema_reader.py deleted file mode 100644 index 6f08fa19d9..0000000000 --- a/scripts/tests/test_schema_reader.py +++ /dev/null @@ -1,480 +0,0 @@ -import os -import sys -import unittest - -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) - -from scripts import schema_reader -from generators import ecs_helpers - - -class TestSchemaReader(unittest.TestCase): - - # schemas - - def test_schema_set_fieldset_prefix_at_root(self): - schema = {'root': True, 'name': 'myfieldset'} - schema_reader.schema_set_fieldset_prefix(schema) - self.assertEqual(schema, - {'prefix': '', 'root': True, 'name': 'myfieldset'}) - - def test_schema_set_fieldset_prefix_root_unspecified(self): - schema = {'name': 'myfieldset'} - schema_reader.schema_set_fieldset_prefix(schema) - self.assertEqual(schema, - {'prefix': 'myfieldset.', 'name': 'myfieldset'}) - - def test_schema_set_fieldset_prefix_not_at_root(self): - schema = {'root': False, 'name': 'myfieldset'} - schema_reader.schema_set_fieldset_prefix(schema) - self.assertEqual(schema, - {'prefix': 'myfieldset.', 'root': False, 'name': 'myfieldset'}) - - def test_set_default_values_defaults(self): - schema = {'description': '...'} - schema_reader.schema_set_default_values(schema) - self.assertEqual(schema, {'group': 2, 'type': 'group', 'description': '...', 'short': '...'}) - - def test_set_default_values_no_overwrite(self): - schema = {'group': 1, 'description': '...'} - schema_reader.schema_set_default_values(schema) - self.assertEqual(schema, {'group': 1, 'type': 'group', 'description': '...', 'short': '...'}) - - # field definitions - - def test_field_set_defaults_no_short(self): - field = {'description': 'a field', 'type': 'faketype'} - schema_reader.field_set_defaults(field) - self.assertEqual(field, {'description': 'a field', 'short': 'a field', 'type': 'faketype', 'normalize': []}) - - def test_field_set_multi_field_defaults_missing_name(self): - field = { - 'name': 'myfield', - 'flat_name': 'myfieldset.myfield', - 'multi_fields': [ - {'type': 'text'} - ] - } - schema_reader.field_set_multi_field_defaults(field) - expected = { - 'name': 'myfield', - 'flat_name': 'myfieldset.myfield', - 'multi_fields': [{ - 'name': 'text', - 'type': 'text', - 'norms': False, - 'flat_name': 'myfieldset.myfield.text', - }] - } - self.assertEqual(field, expected) - - def test_load_schemas_with_empty_list_loads_nothing(self): - result = schema_reader.load_schemas_from_files([]) - self.assertEqual(result, ([])) - - def test_load_schemas_by_git_ref(self): - ref = 'v1.5.0' - tree = ecs_helpers.get_tree_by_ref(ref) - schemas = schema_reader.load_schemas_from_git(tree) - self.assertEqual(len(schemas), 42) - - def test_flatten_fields(self): - fields = { - 'top_level': { - 'field_details': { - 'name': 'top_level' - }, - 'fields': { - 'nested_field': { - 'field_details': { - 'name': 'nested_field' - }, - 'fields': { - 'double_nested_field': { - 'field_details': { - 'name': 'double_nested_field' - } - } - } - } - } - } - } - flat_fields = schema_reader.flatten_fields(fields, "") - expected = { - 'top_level': { - 'name': 'top_level' - }, - 'top_level.nested_field': { - 'name': 'nested_field' - }, - 'top_level.nested_field.double_nested_field': { - 'name': 'double_nested_field' - } - } - self.assertEqual(flat_fields, expected) - - def test_cleanup_fields_recursive(self): - """Reuse a fieldset under two other fieldsets and check that the flat names are properly generated.""" - reusable = { - 'name': 'reusable_fieldset', - 'reusable': { - 'top_level': False, - 'expected': [ - 'test_fieldset' - ] - }, - 'fields': { - 'reusable_field': { - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - } - fields = { - 'base_set1': { - 'name': 'base_set1', - 'fields': { - 'reusable_fieldset': reusable - } - }, - 'base_set2': { - 'name': 'base_set2', - 'fields': { - 'reusable_fieldset': reusable - } - } - } - schema_reader.cleanup_fields_recursive(fields, "") - expected = { - 'base_set1': { - 'name': 'base_set1', - 'fields': { - 'reusable_fieldset': { - 'name': 'reusable_fieldset', - 'reusable': { - 'top_level': False, - 'expected': [ - 'test_fieldset' - ] - }, - 'fields': { - 'reusable_field': { - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field', - 'flat_name': 'base_set1.reusable_fieldset.reusable_field', - 'dashed_name': 'base-set1-reusable-fieldset-reusable-field', - 'ignore_above': 1024, - 'short': 'A test field', - 'normalize': [], - 'original_fieldset': 'reusable_fieldset' - } - } - } - } - } - }, - 'base_set2': { - 'name': 'base_set2', - 'fields': { - 'reusable_fieldset': { - 'name': 'reusable_fieldset', - 'reusable': { - 'top_level': False, - 'expected': [ - 'test_fieldset' - ] - }, - 'fields': { - 'reusable_field': { - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field', - 'flat_name': 'base_set2.reusable_fieldset.reusable_field', - 'dashed_name': 'base-set2-reusable-fieldset-reusable-field', - 'ignore_above': 1024, - 'short': 'A test field', - 'normalize': [], - 'original_fieldset': 'reusable_fieldset' - } - } - } - } - } - } - } - self.assertEqual(fields, expected) - - def test_merge_schema_fields(self): - fieldset1 = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'reusable': { - 'top_level': False, - 'expected': ['location1, location2'] - }, - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field' - } - }, - 'test_field2': { - 'field_details': { - 'name': 'test_field2', - 'type': 'keyword', - 'description': 'Another test field' - } - } - } - } - } - fieldset2 = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'reusable': { - 'top_level': True, - 'expected': ['location3, location4'] - }, - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field with matching type but custom description' - } - }, - 'test_field3': { - 'field_details': { - 'name': 'test_field3', - 'type': 'keyword', - 'description': 'A third test field' - } - } - } - } - } - expected = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'reusable': { - 'top_level': True, - 'expected': ['location1, location2', 'location3, location4'] - }, - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field' - } - }, - 'test_field2': { - 'field_details': { - 'name': 'test_field2', - 'type': 'keyword', - 'description': 'Another test field' - } - }, - 'test_field3': { - 'field_details': { - 'name': 'test_field3', - 'type': 'keyword', - 'description': 'A third test field' - } - } - } - } - } - schema_reader.merge_schema_fields(fieldset1, fieldset2) - self.assertEqual(fieldset1, expected) - - def test_merge_schema_fields_fail(self): - fieldset1 = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - } - } - fieldset2 = { - 'test_fieldset': { - 'name': 'test_fieldset', - 'fields': { - 'test_field1': { - 'field_details': { - 'name': 'test_field1', - 'type': 'long', - 'description': 'A conflicting field' - } - } - } - } - } - with self.assertRaises(ValueError): - schema_reader.merge_schema_fields(fieldset1, fieldset2) - - def test_reusable_dot_notation(self): - fieldset = { - 'reusable_fieldset1': { - 'name': 'reusable_fieldset1', - 'reusable': { - 'top_level': False, - 'expected': [ - 'test_fieldset.sub_field' - ] - }, - 'fields': { - 'reusable_field': { - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - }, - 'test_fieldset': { - 'name': 'test_fieldset', - 'fields': { - 'sub_field': { - 'fields': {} - } - } - } - } - expected = { - 'sub_field': { - 'fields': { - 'reusable_fieldset1': { - 'name': 'reusable_fieldset1', - 'reusable': { - 'top_level': False, - 'expected': [ - 'test_fieldset.sub_field' - ] - }, - 'fields': { - 'reusable_field': { - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - } - } - } - } - schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset1'], fieldset) - self.assertEqual(fieldset['test_fieldset']['fields'], expected) - - def test_improper_reusable_fails(self): - fieldset = { - 'reusable_fieldset1': { - 'name': 'reusable_fieldset1', - 'reusable': { - 'top_level': False, - 'expected': [ - 'test_fieldset' - ] - }, - 'fields': { - 'reusable_field': { - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - }, - 'reusable_fieldset2': { - 'name': 'reusable_fieldset2', - 'reusable': { - 'top_level': False, - 'expected': [ - 'test_fieldset.reusable_fieldset1' - ] - }, - 'fields': { - 'reusable_field': { - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - }, - 'test_fieldset': { - 'name': 'test_fieldset', - 'fields': {} - } - } - # This should fail because test_fieldset.reusable_fieldset1 doesn't exist yet - with self.assertRaises(ValueError): - schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset2'], fieldset) - schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset1'], fieldset) - # Then this should fail because even though test_fieldset.reusable_fieldset1 now exists, test_fieldset.reusable_fieldset1 is not - # an allowed reusable location (it's the destination of another reusable) - with self.assertRaises(ValueError): - schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset2'], fieldset) - - def test_find_nestings(self): - field = { - 'sub_field': { - 'reusable': { - 'top_level': True, - 'expected': [ - 'some_other_field' - ] - }, - 'fields': { - 'reusable_fieldset1': { - 'name': 'reusable_fieldset1', - 'reusable': { - 'top_level': False, - 'expected': [ - 'sub_field' - ] - }, - 'fields': { - 'nested_reusable_field': { - 'reusable': { - 'top_level': False, - 'expected': 'sub_field.nested_reusable_field' - }, - 'field_details': { - 'name': 'reusable_field', - 'type': 'keyword', - 'description': 'A test field' - } - } - } - } - } - } - } - expected = ['sub_field.reusable_fieldset1', 'sub_field.reusable_fieldset1.nested_reusable_field'] - self.assertEqual(schema_reader.find_nestings(field['sub_field']['fields'], 'sub_field.'), expected) - - -if __name__ == '__main__': - unittest.main() diff --git a/scripts/tests/unit/__init__.py b/scripts/tests/unit/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/tests/unit/test_schema_cleaner.py b/scripts/tests/unit/test_schema_cleaner.py new file mode 100644 index 0000000000..56fa4d5926 --- /dev/null +++ b/scripts/tests/unit/test_schema_cleaner.py @@ -0,0 +1,271 @@ +import os +import pprint +import sys +import unittest + +sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) + +from schema import cleaner + + +class TestSchemaCleaner(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + def schema_process(self): + return { + 'process': { + 'schema_details': {'title': 'Process'}, + 'field_details': { + 'name': 'process', + 'description': 'Process details', + 'type': 'group', + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'pid', + 'type': 'keyword', + 'description': 'The process ID', + 'level': 'core', + } + }, + 'parent': { + 'field_details': { + 'name': 'parent', + 'type': 'object', + 'intermediate': True, + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'parent.pid', + 'level': 'core', + 'description': 'The process ID', + 'type': 'keyword', + } + } + } + } + } + } + } + + # schemas + + def test_schema_raises_on_missing_required_attributes(self): + schema = self.schema_process()['process'] + schema['schema_details'].pop('title') + with self.assertRaisesRegex(ValueError, 'mandatory attributes: title'): + cleaner.schema_cleanup(schema) + + schema = self.schema_process()['process'] + schema['field_details'].pop('description') + with self.assertRaisesRegex(ValueError, 'mandatory attributes: description'): + cleaner.schema_cleanup(schema) + + def test_reusable_schema_raises_on_missing_reuse_attributes(self): + schema = self.schema_process()['process'] + schema['schema_details']['reusable'] = {} + with self.assertRaisesRegex(ValueError, 'reuse attributes: expected, top_level'): + cleaner.schema_mandatory_attributes(schema) + + schema['schema_details']['reusable']['expected'] = ['somewhere'] + with self.assertRaisesRegex(ValueError, 'reuse attributes: top_level'): + cleaner.schema_mandatory_attributes(schema) + + schema['schema_details']['reusable'].pop('expected') + schema['schema_details']['reusable']['top_level'] = True + with self.assertRaisesRegex(ValueError, 'reuse attributes: expected'): + cleaner.schema_mandatory_attributes(schema) + + def test_normalize_reuse_notation(self): + reuse_locations = ['source', 'destination'] + pseudo_schema = { + 'field_details': {'name': 'user'}, + 'schema_details': {'reusable': {'expected': reuse_locations}}, + } + expected_reuse = [ + {'at': 'source', 'as': 'user', 'full': 'source.user'}, + {'at': 'destination', 'as': 'user', 'full': 'destination.user'}, + ] + cleaner.normalize_reuse_notation(pseudo_schema) + self.assertEqual(pseudo_schema['schema_details']['reusable']['expected'], expected_reuse) + + def test_already_normalized_reuse_notation(self): + reuse_locations = [{'at': 'process', 'as': 'parent'}] + pseudo_schema = { + 'field_details': {'name': 'process'}, + 'schema_details': {'reusable': {'expected': reuse_locations}}, + } + expected_reuse = [ + {'at': 'process', 'as': 'parent', 'full': 'process.parent'}, + ] + cleaner.normalize_reuse_notation(pseudo_schema) + self.assertEqual(pseudo_schema['schema_details']['reusable']['expected'], expected_reuse) + + def test_schema_simple_cleanup(self): + my_schema = { + 'schema_details': { + 'title': "My Schema\n", + 'type': "group\n ", + 'group': 2, + 'root': False + }, + 'field_details': { + 'name': 'my_schema', + 'short': " a really short description\n\n", + 'description': "\ta long\n\nmultiline description ", + } + } + cleaner.schema_cleanup(my_schema) + self.assertEqual(my_schema['schema_details']['title'], 'My Schema') + self.assertEqual(my_schema['field_details']['type'], 'group') + self.assertEqual(my_schema['field_details']['short'], 'a really short description') + self.assertEqual(my_schema['field_details']['description'], "a long\n\nmultiline description") + + def test_schema_cleanup_setting_defaults(self): + my_schema = { + 'schema_details': { + 'title': 'My Schema', + 'reusable': { + 'top_level': True, + 'expected': ['foo'] + } + }, + 'field_details': { + 'name': 'my_schema', + 'description': "a nice description ", + } + } + cleaner.schema_cleanup(my_schema) + self.assertEqual(my_schema['schema_details']['root'], False) + self.assertEqual(my_schema['schema_details']['group'], 2) + self.assertEqual(my_schema['schema_details']['prefix'], 'my_schema.') + self.assertEqual(my_schema['schema_details']['reusable']['order'], 2) + self.assertEqual(my_schema['field_details']['type'], 'group') + self.assertEqual(my_schema['field_details']['short'], 'a nice description') + + # fields + + def test_field_raises_on_missing_required_attributes(self): + for missing_attribute in ['name', 'description', 'type', 'level']: + field = self.schema_process()['process']['fields']['pid'] + field['field_details'].pop(missing_attribute) + with self.assertRaisesRegex(ValueError, + "mandatory attributes: {}".format(missing_attribute)): + cleaner.field_mandatory_attributes(field) + + def test_field_simple_cleanup(self): + my_field = { + 'field_details': { + 'name': "my_field\t", + 'type': 'keyword', + 'level': 'core', + 'short': " a really short description\n\n", + 'description': "\ta long\n\nmultiline description ", + 'allowed_values': [ + { + 'name': "authentication\t", + 'description': "when can auth be used?\n\n", + } + ] + } + } + cleaner.field_cleanup(my_field) + self.assertEqual(my_field['field_details']['name'], 'my_field') + self.assertEqual(my_field['field_details']['short'], 'a really short description') + self.assertEqual(my_field['field_details']['description'], "a long\n\nmultiline description") + # TODO Temporarily commented out to simplify initial rewrite review + # self.assertEqual(my_field['field_details']['allowed_values'][0]['name'], 'authentication') + # self.assertEqual(my_field['field_details']['allowed_values'][0]['description'], 'when can auth be used?') + + def test_field_defaults(self): + field_min_details = { + 'description': 'description', + 'level': 'extended', + 'name': 'my_field', + 'type': 'unknown', + } + # Note: unknown datatypes simply don't have defaults (for future proofing) + field_details = field_min_details.copy() + cleaner.field_defaults({'field_details': field_details}) + self.assertEqual(field_details['short'], field_details['description']) + self.assertEqual(field_details['normalize'], []) + + field_details = {**field_min_details, **{'type': 'keyword'}} + cleaner.field_defaults({'field_details': field_details}) + self.assertEqual(field_details['ignore_above'], 1024) + + field_details = {**field_min_details, **{'type': 'text'}} + cleaner.field_defaults({'field_details': field_details}) + self.assertEqual(field_details['norms'], False) + + field_details = {**field_min_details, **{'index': True}} + cleaner.field_defaults({'field_details': field_details}) + self.assertNotIn('doc_values', field_details) + + field_details = {**field_min_details, **{'index': False}} + cleaner.field_defaults({'field_details': field_details}) + self.assertEqual(field_details['doc_values'], False) + + def test_field_defaults_dont_override(self): + field_details = { + 'description': 'description', + 'level': 'extended', + 'name': 'my_long_field', + 'type': 'keyword', + 'ignore_above': 8000, + } + cleaner.field_defaults({'field_details': field_details}) + self.assertEqual(field_details['ignore_above'], 8000) + + def test_multi_field_defaults_and_precalc(self): + field_details = { + 'description': 'description', + 'level': 'extended', + 'name': 'my_field', + 'type': 'unimportant', + 'multi_fields': [ + { + 'type': 'text' + }, + { + 'type': 'keyword', + 'name': 'special_name' + }, + ] + } + cleaner.field_defaults({'field_details': field_details}) + + mf = field_details['multi_fields'][0] + self.assertEqual(mf['name'], 'text') + self.assertEqual(mf['norms'], False) + + mf = field_details['multi_fields'][1] + self.assertEqual(mf['name'], 'special_name') + self.assertEqual(mf['ignore_above'], 1024) + + # common to schemas and fields + + def test_multiline_short_description_raises(self): + schema = {'field_details': { + 'name': 'fake_schema', + 'short': "multiple\nlines"}} + with self.assertRaisesRegex(ValueError, 'single line'): + cleaner.single_line_short_description(schema) + + def test_clean(self): + '''A high level sanity test''' + fields = self.schema_process() + cleaner.clean(fields) + # schemas are processed + self.assertEqual(fields['process']['schema_details']['prefix'], 'process.') + self.assertEqual(fields['process']['schema_details']['group'], 2) + # fields are processed + parent_pid = fields['process']['fields']['parent']['fields']['pid'] + self.assertEqual(parent_pid['field_details']['name'], 'parent.pid') + self.assertEqual(parent_pid['field_details']['ignore_above'], 1024) + self.assertEqual(parent_pid['field_details']['short'], + parent_pid['field_details']['description']) diff --git a/scripts/tests/unit/test_schema_finalizer.py b/scripts/tests/unit/test_schema_finalizer.py new file mode 100644 index 0000000000..573fc13054 --- /dev/null +++ b/scripts/tests/unit/test_schema_finalizer.py @@ -0,0 +1,297 @@ +import os +import pprint +import sys +import unittest + +sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) + +from schema import finalizer + + +class TestSchemaFinalizer(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + def schema_base(self): + return { + 'base': { + 'schema_details': { + 'title': 'Base', + 'root': True, + }, + 'field_details': { + 'name': 'base', + 'node_name': 'base', + 'short': 'short desc', + }, + 'fields': { + '@timestamp': { + 'field_details': { + 'name': '@timestamp', + 'node_name': '@timestamp', + } + }, + } + } + } + + def schema_process(self): + return { + 'process': { + 'schema_details': { + 'title': 'Process', + 'root': False, + 'reusable': { + 'top_level': True, + 'order': 2, + 'expected': [ + {'full': 'process.parent', 'at': 'process', 'as': 'parent'}, + ] + } + }, + 'field_details': { + 'name': 'process', + 'node_name': 'process', + 'short': 'short desc', + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'pid', + 'node_name': 'pid', + } + }, + 'thread': { + 'field_details': { + 'name': 'thread', + 'node_name': 'thread', + }, + 'fields': { + 'id': { + 'field_details': { + 'name': 'thread.id', + 'node_name': 'id', + } + } + } + } + } + } + } + + def schema_user(self): + return { + 'user': { + 'schema_details': { + 'root': False, + 'reusable': { + 'top_level': True, + 'order': 2, + 'expected': [ + {'full': 'server.user', 'at': 'server', 'as': 'user'}, + {'full': 'user.target', 'at': 'user', 'as': 'target'}, + {'full': 'user.effective', 'at': 'user', 'as': 'effective'}, + ] + } + }, + 'field_details': { + 'name': 'user', + 'node_name': 'user', + 'type': 'group', + 'short': 'short desc', + }, + 'fields': { + 'name': { + 'field_details': { + 'name': 'name', + 'node_name': 'name', + } + }, + 'full_name': { + 'field_details': { + 'name': 'full_name', + 'node_name': 'full_name', + 'multi_fields': [ + {'name': 'text', 'type': 'text'} + ] + } + }, + } + } + } + + def schema_server(self): + return { + 'server': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'server', + 'node_name': 'server', + 'type': 'group', + 'short': 'short desc', + }, + 'fields': { + 'ip': { + 'field_details': { + 'name': 'ip', + 'node_name': 'ip', + 'type': 'ip' + } + } + } + } + } + + # perform_reuse + + def test_perform_reuse_with_foreign_reuse_and_self_reuse(self): + fields = {**self.schema_user(), **self.schema_server(), **self.schema_process()} + # If the test had multiple foreign destinations for user fields, we could compare them together instead + finalizer.perform_reuse(fields) + process_fields = fields['process']['fields'] + server_fields = fields['server']['fields'] + user_fields = fields['user']['fields'] + # Expected reuse + self.assertIn('parent', process_fields) + self.assertIn('user', server_fields) + self.assertIn('target', user_fields) + self.assertIn('effective', user_fields) + # Sanity check for presence of leaf fields, after performing reuse + self.assertIn('name', user_fields['target']['fields']) + self.assertIn('name', user_fields['effective']['fields']) + self.assertIn('name', server_fields['user']['fields']) + self.assertIn('pid', process_fields['parent']['fields']) + # Ensure the parent field of reused fields is marked as intermediate + self.assertTrue(server_fields['user']['field_details']['intermediate']) + self.assertTrue(process_fields['parent']['field_details']['intermediate']) + self.assertTrue(user_fields['target']['field_details']['intermediate']) + self.assertTrue(user_fields['effective']['field_details']['intermediate']) + # No unexpected cross-nesting + self.assertNotIn('target', user_fields['target']['fields']) + self.assertNotIn('target', user_fields['effective']['fields']) + self.assertNotIn('target', server_fields['user']['fields']) + # Legacy list of nestings, added to destination schema + self.assertIn('process.parent', fields['process']['schema_details']['nestings']) + self.assertIn('user.effective', fields['user']['schema_details']['nestings']) + self.assertIn('user.target', fields['user']['schema_details']['nestings']) + self.assertIn('server.user', fields['server']['schema_details']['nestings']) + # Attribute 'reused_here' lists nestings inside a destination schema + self.assertIn({'full': 'process.parent', 'schema_name': 'process', 'short': 'short desc'}, + fields['process']['schema_details']['reused_here']) + self.assertIn({'full': 'user.effective', 'schema_name': 'user', 'short': 'short desc'}, + fields['user']['schema_details']['reused_here']) + self.assertIn({'full': 'user.target', 'schema_name': 'user', 'short': 'short desc'}, + fields['user']['schema_details']['reused_here']) + self.assertIn({'full': 'server.user', 'schema_name': 'user', 'short': 'short desc'}, + fields['server']['schema_details']['reused_here']) + # Reused fields have an indication they're reused + self.assertEqual(process_fields['parent']['field_details']['original_fieldset'], 'process', + "The parent field of reused fields should have 'original_fieldset' populated") + self.assertEqual(process_fields['parent']['fields']['pid']['field_details']['original_fieldset'], 'process', + "Leaf fields of reused fields for self-nested fields should have 'original_fieldset'") + self.assertEqual(server_fields['user']['field_details']['original_fieldset'], 'user', + "The parent field of foreign reused fields should have 'original_fieldset' populated") + self.assertEqual(server_fields['user']['fields']['name']['field_details']['original_fieldset'], 'user') + # Original fieldset's fields must not be marked with 'original_fieldset=' + self.assertNotIn('original_fieldset', user_fields['name']['field_details']) + self.assertNotIn('original_fieldset', process_fields['pid']['field_details']) + + def test_root_schema_cannot_be_reused_nor_have_field_set_reused_in_it(self): + reused_schema = { + 'schema_details': {'reusable': {'expected': ['foo']}}, + 'field_details': {'name': 'reused_schema'} + } + destination_schema = { + 'schema_details': {'reusable': {'expected': ['foo']}}, + 'field_details': {'name': 'destination_schema'} + } + # test root=true on reused + reused_schema['schema_details']['root'] = True + # foreign reuse + with self.assertRaisesRegex(ValueError, 'reused_schema.*root.*cannot be reused'): + finalizer.ensure_valid_reuse(reused_schema, destination_schema) + # self-nesting + with self.assertRaisesRegex(ValueError, 'reused_schema.*root.*cannot be reused'): + finalizer.ensure_valid_reuse(reused_schema) + # test root=true on destination + reused_schema['schema_details']['root'] = False + destination_schema['schema_details']['root'] = True + with self.assertRaisesRegex(ValueError, 'destination_schema.*root.*cannot'): + finalizer.ensure_valid_reuse(reused_schema, destination_schema) + + # calculate_final_values + + def test_calculate_final_values(self): + fields = {**self.schema_base(), **self.schema_user(), **self.schema_server()} + finalizer.perform_reuse(fields) + finalizer.calculate_final_values(fields) + base_fields = fields['base']['fields'] + server_fields = fields['server']['fields'] + user_fields = fields['user']['fields'] + # Pre-calculated path-based values + # root=true + timestamp_details = base_fields['@timestamp']['field_details'] + self.assertEqual(timestamp_details['flat_name'], '@timestamp', + "Field sets with root=true must not namespace field names with the field set's name") + self.assertEqual(timestamp_details['dashed_name'], '@timestamp') + # root=false + self.assertEqual(server_fields['ip']['field_details']['flat_name'], 'server.ip', + "Field sets with root=false must namespace field names with the field set's name") + self.assertEqual(server_fields['ip']['field_details']['dashed_name'], 'server-ip') + # reused + server_user_name_details = server_fields['user']['fields']['name']['field_details'] + self.assertEqual(server_user_name_details['flat_name'], 'server.user.name') + self.assertEqual(server_user_name_details['dashed_name'], 'server-user-name') + # self-nestings + user_target_name_details = user_fields['target']['fields']['name']['field_details'] + self.assertEqual(user_target_name_details['flat_name'], 'user.target.name') + self.assertEqual(user_target_name_details['dashed_name'], 'user-target-name') + # multi-fields flat_name + user_full_name_details = user_fields['full_name']['field_details'] + self.assertEqual(user_full_name_details['multi_fields'][0]['flat_name'], 'user.full_name.text') + + # field_group_at_path + + def test_field_group_at_path_root_destination(self): + all_fields = self.schema_server() + fields = finalizer.field_group_at_path('server', all_fields) + self.assertIn('ip', fields.keys(), + "should return the dictionary of server fields") + + def test_field_group_at_path_find_nested_destination(self): + all_fields = self.schema_process() + fields = finalizer.field_group_at_path('process.thread', all_fields) + self.assertIn('id', fields.keys(), + "should return the dictionary of process.thread fields") + self.assertEqual('thread.id', fields['id']['field_details']['name']) + + def test_field_group_at_path_missing_nested_path(self): + all_fields = self.schema_server() + with self.assertRaisesRegex(ValueError, "Field server.nonexistent not found"): + finalizer.field_group_at_path('server.nonexistent', all_fields) + + def test_field_group_at_path_leaf_field_not_field_group(self): + all_fields = self.schema_server() + with self.assertRaisesRegex(ValueError, "Field server\.ip \(type ip\) already exists"): + finalizer.field_group_at_path('server.ip', all_fields) + + def test_field_group_at_path_for_leaf_object_field_creates_the_section(self): + all_fields = { + 'network': { + 'field_details': { + 'name': 'network', + }, + 'fields': { + 'ingress': { + 'field_details': { + 'name': 'network.ingress', + 'type': 'object' + } + } + } + } + } + ingress_subfields = finalizer.field_group_at_path('network.ingress', all_fields) + self.assertEqual(ingress_subfields, {}) + self.assertEqual(all_fields['network']['fields']['ingress']['fields'], {}) diff --git a/scripts/tests/unit/test_schema_loader.py b/scripts/tests/unit/test_schema_loader.py new file mode 100644 index 0000000000..edd585c011 --- /dev/null +++ b/scripts/tests/unit/test_schema_loader.py @@ -0,0 +1,599 @@ +import mock +import os +import pprint +import sys +import unittest + +sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) + +from schema import loader + + +class TestSchemaLoader(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + # Pseudo-fixtures + + def schema_base(self): + return { + 'base': { + 'schema_details': {'root': True}, + 'field_details': {'name': 'base', 'type': 'group'}, + 'fields': { + 'message': { + 'field_details': { + 'name': 'message', + 'type': 'keyword' + } + } + } + } + } + + def schema_process(self): + return { + 'process': { + 'schema_details': {}, + 'field_details': { + 'name': 'process', + 'type': 'group' + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'pid', + 'type': 'keyword' + } + }, + 'parent': { + 'field_details': {'type': 'object'}, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'parent.pid', + 'type': 'keyword' + } + } + } + } + } + } + } + + # Schema loading + + def test_load_schemas_no_custom(self): + fields = loader.load_schemas([]) + self.assertEqual( + ['field_details', 'fields', 'schema_details'], + sorted(fields['process'].keys()), + "Schemas should have 'field_details', 'fields' and 'schema_details' subkeys") + self.assertEqual( + ['field_details'], + list(fields['process']['fields']['pid'].keys()), + "Leaf fields should have only the 'field_details' subkey") + self.assertIn( + 'fields', + fields['process']['fields']['thread'].keys(), + "Fields containing nested fields should at least have the 'fields' subkey") + + @mock.patch('schema.loader.read_schema_file') + def test_load_schemas_fail_on_accidental_fieldset_redefinition(self, mock_read_schema): + mock_read_schema.side_effect = [ + { + 'file': { + 'name': 'file', + 'type': 'keyword' + } + }, + { + 'file': { + 'name': 'file', + 'type': 'text' + } + } + ] + with self.assertRaises(ValueError): + loader.load_schema_files(['a.yml', 'b.yml']) + + @mock.patch('schema.loader.read_schema_file') + def test_load_schemas_allows_unique_fieldsets(self, mock_read_schema): + file_map = { + 'file': { + 'name': 'file', + 'type': 'keyword' + } + } + host_map = { + 'host': { + 'name': 'host', + 'type': 'text' + } + } + mock_read_schema.side_effect = [file_map, host_map] + exp = { + 'file': file_map['file'], + 'host': host_map['host'] + } + res = loader.load_schema_files(['a.yml', 'b.yml']) + self.assertEqual(res, exp) + + def test_nest_schema_raises_on_missing_schema_name(self): + with self.assertRaisesRegex(ValueError, 'incomplete.yml'): + loader.nest_schema([{'description': 'just a description'}], 'incomplete.yml') + + # nesting stuff + + def test_nest_fields(self): + process_fields = [ + {'name': 'pid'}, + {'name': 'parent.pid'}, + ] + expected_nested_fields = { + 'fields': { + 'pid': { + 'field_details': { + 'name': 'pid', + 'node_name': 'pid', + } + }, + 'parent': { + 'field_details': { + 'name': 'parent', + 'node_name': 'parent', + 'type': 'object', + 'intermediate': True, + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'parent.pid', + 'node_name': 'pid', + } + } + } + } + } + } + nested_fields = loader.nest_fields(process_fields) + self.assertEqual(nested_fields, expected_nested_fields) + + def test_nest_fields_recognizes_explicitly_defined_object_fields(self): + dns_fields = [ + {'name': 'question.name', 'type': 'keyword'}, + {'name': 'answers', 'type': 'object'}, + {'name': 'answers.data', 'type': 'keyword'}, + ] + expected_nested_fields = { + 'fields': { + 'answers': { + 'field_details': { + 'name': 'answers', + 'node_name': 'answers', + 'type': 'object', + 'intermediate': False, + }, + 'fields': { + 'data': { + 'field_details': { + 'name': 'answers.data', + 'node_name': 'data', + 'type': 'keyword', + } + } + } + }, + 'question': { + 'field_details': { + 'name': 'question', + 'node_name': 'question', + 'type': 'object', + 'intermediate': True, + }, + 'fields': { + 'name': { + 'field_details': { + 'name': 'question.name', + 'node_name': 'name', + 'type': 'keyword', + } + } + } + } + } + } + nested_fields = loader.nest_fields(dns_fields) + self.assertEqual(nested_fields, expected_nested_fields) + + def test_nest_fields_multiple_intermediate_fields(self): + log_fields = [{'name': 'origin.file.name'}] + expected_nested_fields = { + 'fields': { + 'origin': { + 'field_details': { + 'name': 'origin', + 'node_name': 'origin', + 'type': 'object', + 'intermediate': True, + }, + 'fields': { + 'file': { + 'field_details': { + 'name': 'origin.file', + 'node_name': 'file', + 'type': 'object', + 'intermediate': True, + }, + 'fields': { + 'name': { + 'field_details': { + 'name': 'origin.file.name', + 'node_name': 'name', + } + } + } + } + } + } + } + } + nested_log_fields = loader.nest_fields(log_fields) + self.assertEqual(nested_log_fields, expected_nested_fields) + + def test_deep_nesting_representation(self): + all_schemas = { + 'base': { + 'name': 'base', + 'title': 'Base', + 'root': True, + 'type': 'group', + 'fields': [ + {'name': 'message', 'type': 'keyword'} + ] + }, + 'process': { + 'name': 'process', + 'type': 'group', + 'fields': [ + {'name': 'pid', 'type': 'keyword'}, + {'name': 'parent.pid', 'type': 'keyword'}, + ] + } + } + deeply_nested = loader.deep_nesting_representation(all_schemas) + expected_deeply_nested = { + 'base': { + 'schema_details': { + 'root': True, + 'title': 'Base', + }, + 'field_details': { + 'name': 'base', + 'node_name': 'base', + 'type': 'group', + }, + 'fields': { + 'message': { + 'field_details': { + 'name': 'message', + 'node_name': 'message', + 'type': 'keyword', + } + } + } + }, + 'process': { + 'schema_details': {}, + 'field_details': { + 'name': 'process', + 'node_name': 'process', + 'type': 'group' + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'pid', + 'node_name': 'pid', + 'type': 'keyword', + } + }, + 'parent': { + 'field_details': { + # These are made explicit for intermediate fields + 'name': 'parent', + 'node_name': 'parent', + 'type': 'object', + 'intermediate': True, + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'parent.pid', + 'node_name': 'pid', + 'type': 'keyword', + } + } + } + } + } + } + } + + process_fields = deeply_nested['process']['fields'] + self.assertEqual(process_fields['parent']['field_details']['intermediate'], True) + self.assertEqual(deeply_nested, expected_deeply_nested) + + # Merging + + def test_merge_new_schema(self): + custom = { + 'custom': { + 'schema_details': {}, + 'field_details': { + 'name': 'custom', + 'type': 'group' + }, + 'fields': { + 'my_field': { + 'field_details': { + 'name': 'my_field', + 'type': 'keyword' + } + } + } + } + } + expected_fields = {**self.schema_base(), **custom} + merged_fields = loader.merge_fields(self.schema_base(), custom) + self.assertEqual(expected_fields, merged_fields, + "New schemas should just be a dictionary merge") + + def test_merge_field_within_schema(self): + custom = { + 'base': { + 'schema_details': {}, + 'field_details': { + 'name': 'base' + }, + 'fields': { + 'my_field': { + 'field_details': { + 'name': 'my_field', + 'type': 'keyword' + } + } + } + } + } + expected_fields = { + 'base': { + 'schema_details': {'root': True}, + 'field_details': { + 'name': 'base', + 'type': 'group' + }, + 'fields': { + 'message': { + 'field_details': { + 'name': 'message', + 'type': 'keyword' + } + }, + 'my_field': { + 'field_details': { + 'name': 'my_field', + 'type': 'keyword' + } + } + } + } + } + merged_fields = loader.merge_fields(self.schema_base(), custom) + self.assertEqual(['message', 'my_field'], + sorted(expected_fields['base']['fields'].keys())) + self.assertEqual(expected_fields, merged_fields, + "New fields being merged in existing schemas are merged in the 'fields' dict.") + + def test_fields_with_subfields_mergeable(self): + custom = { + 'process': { + 'schema_details': {}, + 'field_details': { + 'name': 'process' + }, + 'fields': { + 'parent': { + 'field_details': {'type': 'object'}, + 'fields': { + 'name': { + 'field_details': { + 'name': 'parent.name', + 'type': 'keyword' + } + } + } + } + } + } + } + merged_fields = loader.merge_fields(self.schema_process(), custom) + expected_fields = { + 'process': { + 'schema_details': {}, + 'field_details': { + 'name': 'process', + 'type': 'group' + }, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'pid', + 'type': 'keyword' + } + }, + 'parent': { + 'field_details': {'type': 'object'}, + 'fields': { + 'pid': { + 'field_details': { + 'name': 'parent.pid', + 'type': 'keyword' + } + }, + 'name': { + 'field_details': { + 'name': 'parent.name', + 'type': 'keyword' + } + } + } + } + } + } + } + self.assertEqual(merged_fields, expected_fields) + + def test_merge_array_attributes(self): + # array attributes: + # - schema/reusable.expected + # - field/normalize + ecs = { + 'foo': { + 'schema_details': { + 'reusable': { + 'top_level': True, + 'expected': ['normal.location'] + } + }, + 'field_details': {'name': 'foo', 'type': 'group'}, + 'fields': { + 'normalized_field': { + 'field_details': { + 'name': 'normalized_field', + 'type': 'keyword', + 'normalize': ['lowercase'] + } + }, + 'not_initially_normalized': { + 'field_details': { + 'name': 'not_initially_normalized', + 'type': 'keyword' + } + } + } + } + } + custom = { + 'foo': { + 'schema_details': { + 'reusable': { + 'expected': ['a_new.location'] + } + }, + 'field_details': {'name': 'foo', 'type': 'group'}, + 'fields': { + 'normalized_field': { + 'field_details': { + 'name': 'normalized_field', + 'normalize': ['array'] + } + }, + 'not_initially_normalized': { + 'field_details': { + 'name': 'not_initially_normalized', + 'normalize': ['array'] + } + } + } + } + } + merged_fields = loader.merge_fields(ecs, custom) + expected_fields = { + 'foo': { + 'schema_details': { + 'reusable': { + 'top_level': True, + 'expected': ['normal.location', 'a_new.location'] + } + }, + 'field_details': {'name': 'foo', 'type': 'group'}, + 'fields': { + 'normalized_field': { + 'field_details': { + 'name': 'normalized_field', + 'type': 'keyword', + 'normalize': ['lowercase', 'array'] + } + }, + 'not_initially_normalized': { + 'field_details': { + 'name': 'not_initially_normalized', + 'type': 'keyword', + 'normalize': ['array'] + } + } + } + } + } + self.assertEqual( + merged_fields['foo']['schema_details']['reusable']['expected'], + ['normal.location', 'a_new.location']) + self.assertEqual( + merged_fields['foo']['fields']['normalized_field']['field_details']['normalize'], + ['lowercase', 'array']) + self.assertEqual( + merged_fields['foo']['fields']['not_initially_normalized']['field_details']['normalize'], + ['array']) + self.assertEqual(merged_fields, expected_fields) + + def test_merge_non_array_attributes(self): + custom = { + 'base': { + 'schema_details': { + 'root': False, # Override (not that I'd recommend overriding that) + 'group': 3 # New + }, + 'field_details': { + 'type': 'object', # Override + 'example': 'foo' # New + }, + 'fields': { + 'message': { + 'field_details': { + 'type': 'wildcard', # Override + 'example': 'wild value' # New + } + } + } + } + } + merged_fields = loader.merge_fields(self.schema_base(), custom) + expected_fields = { + 'base': { + 'schema_details': { + 'root': False, + 'group': 3 + }, + 'field_details': { + 'name': 'base', + 'type': 'object', + 'example': 'foo' + }, + 'fields': { + 'message': { + 'field_details': { + 'name': 'message', + 'type': 'wildcard', + 'example': 'wild value' + } + } + } + } + } + self.assertEqual(merged_fields, expected_fields) + + +if __name__ == '__main__': + unittest.main() diff --git a/scripts/tests/unit/test_schema_subset_filter.py b/scripts/tests/unit/test_schema_subset_filter.py new file mode 100644 index 0000000000..25e7c77024 --- /dev/null +++ b/scripts/tests/unit/test_schema_subset_filter.py @@ -0,0 +1,160 @@ +import mock +import os +import pprint +import sys +import unittest + +sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) + +from schema import subset_filter + + +class TestSchemaSubsetFilter(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + @mock.patch('schema.subset_filter.warn') + def test_eval_globs(self, mock_warn): + files = subset_filter.eval_globs(['schemas/*.yml', 'missing*']) + self.assertTrue(mock_warn.called, "a warning should have been printed for missing*") + self.assertIn('schemas/base.yml', files) + self.assertEqual(list(filter(lambda f: f.startswith('missing'), files)), [], + "The 'missing*' pattern should not show up in the resulting files") + + @mock.patch('schema.subset_filter.warn') + def test_load_subset_definitions_raises_when_no_subset_found(self, mock_warn): + with self.assertRaisesRegex(ValueError, + "--subset specified, but no subsets found in \['foo\*.yml'\]"): + subset_filter.load_subset_definitions(['foo*.yml']) + + def test_basic_merging(self): + basics = {'base': {'fields': '*'}, 'event': {}} + network = {'network': {'fields': '*'}} + subsets = {} + subset_filter.merge_subsets(subsets, basics) + subset_filter.merge_subsets(subsets, network) + expected_subsets = {**basics, **network} + self.assertEqual(subsets, expected_subsets) + + def test_merging_superset(self): + # 'log' is used to test superset with the explicit '{'fields': '*'}' notation + # 'process' is used to test superset with the shorhand '{}' notation + supersets = {'log': {'fields': '*'}, 'process': {}} + supserseded = { + 'log': {'fields': {'syslog': {'fields': '*'}}}, + 'process': {'fields': {'parent': {'fields': '*'}}}, + } + subsets = {} + subset_filter.merge_subsets(subsets, supersets) + subset_filter.merge_subsets(subsets, supserseded) + self.assertEqual(subsets, supersets) + # reverse order + subsets = {} + subset_filter.merge_subsets(subsets, supserseded) + subset_filter.merge_subsets(subsets, supersets) + self.assertEqual(subsets, supersets) + + def schema_log(self): + return { + 'log': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'log', + 'type': 'group' + }, + 'fields': { + 'level': { + 'field_details': { + 'name': 'level', + 'type': 'keyword' + } + }, + + 'origin': { + 'fields': { + 'function': { + 'field_details': { + 'name': 'function', + 'type': 'keyword' + } + }, + 'foo': { + 'field_details': { + 'name': 'foo', + 'type': 'keyword' + } + }, + } + } + } + } + } + + def test_extract_matching_fields_shorthand_notation(self): + subset = {'log': {}} + filtered_fields = subset_filter.extract_matching_fields(self.schema_log(), subset) + self.assertEqual(filtered_fields, self.schema_log()) + + def test_extract_matching_fields_explicit_all_fields_notation(self): + subset = {'log': {'fields': '*'}} + filtered_fields = subset_filter.extract_matching_fields(self.schema_log(), subset) + self.assertEqual(filtered_fields, self.schema_log()) + + def test_extract_matching_fields_subfields_only_notation(self): + subset = {'log': {'fields': {'origin': {}}}} + filtered_fields = subset_filter.extract_matching_fields(self.schema_log(), subset) + expected_fields = { + 'log': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'log', + 'type': 'group' + }, + 'fields': { + 'origin': { + 'fields': { + 'function': { + 'field_details': { + 'name': 'function', + 'type': 'keyword' + } + }, + 'foo': { + 'field_details': { + 'name': 'foo', + 'type': 'keyword' + } + }, + } + } + } + } + } + self.assertEqual(filtered_fields, expected_fields) + + def test_extract_matching_individual_field(self): + subset = {'log': {'fields': {'origin': {'fields': {'function': {}}}}}} + filtered_fields = subset_filter.extract_matching_fields(self.schema_log(), subset) + expected_fields = { + 'log': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'log', + 'type': 'group' + }, + 'fields': { + 'origin': { + 'fields': { + 'function': { + 'field_details': { + 'name': 'function', + 'type': 'keyword' + } + }, + } + } + } + } + } + self.assertEqual(filtered_fields, expected_fields)