From 3e76626147af6a76641eefb6b445b141b4f7c78c Mon Sep 17 00:00:00 2001 From: Ben Skelker Date: Sun, 6 Oct 2019 13:16:25 +0300 Subject: [PATCH] Uodate field reuse note and section structure --- docs/field-details.asciidoc | 408 ++++++++++++++++++++++---- scripts/generators/asciidoc_fields.py | 82 ++++-- 2 files changed, 405 insertions(+), 85 deletions(-) diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc index a2486bbba6..5913e43e53 100644 --- a/docs/field-details.asciidoc +++ b/docs/field-details.asciidoc @@ -4,7 +4,7 @@ The `base` field set contains all fields which are on the top level. These fields are common across all types of events. -NOTE: This field set is not reused. + ==== Base Field Details @@ -74,6 +74,12 @@ example: `["production", "env2"]` |===== +[[ecs-base-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-agent]] === Agent Fields @@ -81,7 +87,7 @@ The agent fields contain the data about the software entity, if any, that collec Examples include Beats. Agents may also run on observers. ECS agent.* fields shall be populated with details of the agent running on the host or observer where the event happened or the measurement was taken. -NOTE: This field set is not reused. + ==== Agent Field Details @@ -158,12 +164,18 @@ example: `6.0.0-rc2` |===== +[[ecs-agent-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-as]] === Autonomous System Fields An autonomous system (AS) is a collection of connected Internet Protocol (IP) routing prefixes under the control of one or more network operators on behalf of a single administrative entity or domain that presents a common, clearly defined routing policy to the internet. -NOTE: See <> information. +NOTE: See information on `as` <>. ==== Autonomous System Field Details @@ -200,9 +212,43 @@ example: `Google LLC` [[ecs-as-reuse]] ==== Field Reuse -The `as` fields are expected to be nested at: `client.as`, `destination.as`, `server.as`, `source.as`. -Note also that the `as` fields are not expected to be used directly at the top level. +[[ecs-as-parents]] +The `as` fields must be nested under: + +[options="header"] +|===== +| Parent fields | Description + +// =============================================================== + + +| <> +| Fields about the client side of a network connection, used with server. + +// =============================================================== + + +| <> +| Fields about the destination side of a network connection, used with source. + +// =============================================================== + + +| <> +| Fields about the server side of a network connection, used with client. + +// =============================================================== + + +| <> +| Fields about the source side of a network connection, used with destination. + +// =============================================================== + + +|===== +NOTE: The `as` fields should *not* be used directly as root fields. @@ -216,7 +262,7 @@ For TCP events, the client is the initiator of the TCP connection that sends the Client / server representations can add semantic context to an exchange, which is helpful to visualize the data in certain situations. If your context falls in that category, you should still ensure that source and destination are filled appropriately. -NOTE: See <> information. +NOTE: See information on `client` <>. ==== Client Field Details @@ -357,11 +403,11 @@ example: `google.com` [[ecs-client-nestings]] -===== Field sets that can be nested under Client +The `client` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -391,7 +437,7 @@ example: `google.com` Fields related to the cloud or infrastructure the events are coming from. -NOTE: This field set is not reused. + ==== Cloud Field Details @@ -482,6 +528,12 @@ example: `us-east-1` |===== +[[ecs-cloud-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-container]] === Container Fields @@ -489,7 +541,7 @@ Container fields are used for meta information about the specific container that These fields help correlate data based containers from any runtime. -NOTE: This field set is not reused. + ==== Container Field Details @@ -567,6 +619,12 @@ example: `docker` |===== +[[ecs-container-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-destination]] === Destination Fields @@ -574,7 +632,7 @@ Destination fields describe details about the destination of a packet/event. Destination fields are usually populated in conjunction with source fields. -NOTE: See <> information. +NOTE: See information on `destination` <>. ==== Destination Field Details @@ -715,11 +773,11 @@ example: `google.com` [[ecs-destination-nestings]] -===== Field sets that can be nested under Destination +The `destination` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -751,7 +809,7 @@ Fields describing DNS queries and answers. DNS events should either represent a single DNS query prior to getting answers (`dns.type:query`) or they should represent a full exchange and contain the query details as well as all of the answers that were provided for this query (`dns.type:answer`). -NOTE: This field set is not reused. + ==== DNS Field Details @@ -961,12 +1019,18 @@ example: `answer` |===== +[[ecs-dns-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-ecs]] === ECS Fields Meta-information specific to ECS. -NOTE: This field set is not reused. + ==== ECS Field Details @@ -991,6 +1055,12 @@ example: `1.0.0` |===== +[[ecs-ecs-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-error]] === Error Fields @@ -998,7 +1068,7 @@ These fields can represent errors of any kind. Use them for errors that happen while fetching events or in cases where the event itself contains an error. -NOTE: This field set is not reused. + ==== Error Field Details @@ -1065,6 +1135,12 @@ example: `java.lang.NullPointerException` |===== +[[ecs-error-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-event]] === Event Fields @@ -1072,7 +1148,7 @@ The event fields are used for context information about the log or metric event A log is defined as an event containing details of something that happened. Log events must include the time at which the thing happened. Examples of log events include a process starting on a host, a network packet being sent from a source to a destination, or a network connection between a client and a server being initiated or closed. A metric is defined as an event containing one or more numerical or categorical measurements and the time at which the measurement was taken. Examples of metric events include memory pressure measured on a host, or vulnerabilities measured on a scanned host. -NOTE: This field set is not reused. + ==== Event Field Details @@ -1351,6 +1427,12 @@ type: keyword |===== +[[ecs-event-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-file]] === File Fields @@ -1358,7 +1440,7 @@ A file is defined as a set of information that has been created on, or has exist File objects can be associated with host events, network events, and/or file events (e.g., those produced by File Integrity Monitoring [FIM] products or services). File fields provide details about the affected file associated with the event or metric. -NOTE: See <> information. +NOTE: See information on `file` <>. ==== File Field Details @@ -1583,11 +1665,11 @@ example: `1001` [[ecs-file-nestings]] -===== Field sets that can be nested under File +The `file` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -1607,7 +1689,7 @@ Geo fields can carry data about a specific location related to an event. This geolocation information can be derived from techniques such as Geo IP, or be user-supplied. -NOTE: See <> information. +NOTE: See information on `geo` <>. ==== Geo Field Details @@ -1714,9 +1796,55 @@ example: `Quebec` [[ecs-geo-reuse]] ==== Field Reuse -The `geo` fields are expected to be nested at: `client.geo`, `destination.geo`, `host.geo`, `observer.geo`, `server.geo`, `source.geo`. -Note also that the `geo` fields are not expected to be used directly at the top level. +[[ecs-geo-parents]] +The `geo` fields must be nested under: + +[options="header"] +|===== +| Parent fields | Description + +// =============================================================== + + +| <> +| Fields about the client side of a network connection, used with server. + +// =============================================================== + + +| <> +| Fields about the destination side of a network connection, used with source. + +// =============================================================== + + +| <> +| Fields describing the relevant computing instance. + +// =============================================================== + + +| <> +| Fields describing an entity observing the event from outside the host. + +// =============================================================== + + +| <> +| Fields about the server side of a network connection, used with client. + +// =============================================================== + + +| <> +| Fields about the source side of a network connection, used with destination. + +// =============================================================== + + +|===== +NOTE: The `geo` fields should *not* be used directly as root fields. @@ -1726,7 +1854,7 @@ Note also that the `geo` fields are not expected to be used directly at the top The group fields are meant to represent groups that are relevant to the event. -NOTE: See <> information. +NOTE: See information on `group` <>. ==== Group Field Details @@ -1763,9 +1891,25 @@ type: keyword [[ecs-group-reuse]] ==== Field Reuse -The `group` fields are expected to be nested at: `user.group`. -Note also that the `group` fields may be used directly at the top level. +[[ecs-group-parents]] +The `group` fields can be nested under: + +[options="header"] +|===== +| Parent fields | Description + +// =============================================================== + + +| <> +| Fields to describe the user relevant to the event. + +// =============================================================== + + +|===== +NOTE: The `group` fields can also be used directly as root fields. @@ -1777,7 +1921,7 @@ The hash fields represent different hash algorithms and their values. Field names for common hashes (e.g. MD5, SHA1) are predefined. Add fields for other hashes by lowercasing the hash algorithm name and using underscore separators as appropriate (snake case, e.g. sha3_512). -NOTE: See <> information. +NOTE: See information on `hash` <>. ==== Hash Field Details @@ -1836,9 +1980,31 @@ type: keyword [[ecs-hash-reuse]] ==== Field Reuse -The `hash` fields are expected to be nested at: `file.hash`, `process.hash`. -Note also that the `hash` fields are not expected to be used directly at the top level. +[[ecs-hash-parents]] +The `hash` fields must be nested under: + +[options="header"] +|===== +| Parent fields | Description + +// =============================================================== + + +| <> +| Fields describing files. + +// =============================================================== + + +| <> +| These fields contain information about a process. + +// =============================================================== + + +|===== +NOTE: The `hash` fields should *not* be used directly as root fields. @@ -1850,7 +2016,7 @@ A host is defined as a general computing instance. ECS host.* fields should be populated with details about the host on which the event happened, or from which the measurement was taken. Host types include hardware, virtual machines, Docker containers, and Kubernetes nodes. -NOTE: See <> information. +NOTE: See information on `host` <>. ==== Host Field Details @@ -1967,11 +2133,11 @@ example: `1325` [[ecs-host-nestings]] -===== Field sets that can be nested under Host +The `host` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -2001,7 +2167,7 @@ example: `1325` Fields related to HTTP activity. Use the `url` field set to store the url of the request. -NOTE: This field set is not reused. + ==== HTTP Field Details @@ -2125,12 +2291,18 @@ example: `1.1` |===== +[[ecs-http-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-log]] === Log Fields Fields which are specific to log events. -NOTE: This field set is not reused. + ==== Log Field Details @@ -2214,6 +2386,12 @@ example: `Sep 19 08:26:10 localhost My log` |===== +[[ecs-log-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-network]] === Network Fields @@ -2221,7 +2399,7 @@ The network is defined as the communication path over which a host or network ev The network.* fields should be populated with details about the network activity associated with an event. -NOTE: This field set is not reused. + ==== Network Field Details @@ -2386,6 +2564,12 @@ example: `ipv4` |===== +[[ecs-network-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-observer]] === Observer Fields @@ -2393,7 +2577,7 @@ An observer is defined as a special network, security, or application device use This could be a custom hardware appliance or a server that has been configured to run special network, security, or application software. Examples include firewalls, intrusion detection/prevention systems, network monitoring sensors, web application firewalls, data loss prevention systems, and APM servers. The observer.* fields shall be populated with details of the system, if any, that detects, observes and/or creates a network, security, or application event or metric. Message queues and ETL components used in processing events or metrics are not considered observers in ECS. -NOTE: See <> information. +NOTE: See information on `observer` <>. ==== Observer Field Details @@ -2491,11 +2675,11 @@ type: keyword [[ecs-observer-nestings]] -===== Field sets that can be nested under Observer +The `observer` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -2521,7 +2705,7 @@ The organization fields enrich data with information about the company or entity These fields help you arrange or filter data stored in an index by one or multiple organizations. -NOTE: This field set is not reused. + ==== Organization Field Details @@ -2555,12 +2739,18 @@ type: keyword |===== +[[ecs-organization-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-os]] === Operating System Fields The OS fields contain information about the operating system. -NOTE: See <> information. +NOTE: See information on `os` <>. ==== Operating System Field Details @@ -2641,9 +2831,37 @@ example: `10.14.1` [[ecs-os-reuse]] ==== Field Reuse -The `os` fields are expected to be nested at: `host.os`, `observer.os`, `user_agent.os`. -Note also that the `os` fields are not expected to be used directly at the top level. +[[ecs-os-parents]] +The `os` fields must be nested under: + +[options="header"] +|===== +| Parent fields | Description + +// =============================================================== + + +| <> +| Fields describing the relevant computing instance. + +// =============================================================== + + +| <> +| Fields describing an entity observing the event from outside the host. + +// =============================================================== + + +| <> +| Fields to describe a browser user_agent string. + +// =============================================================== + + +|===== +NOTE: The `os` fields should *not* be used directly as root fields. @@ -2655,7 +2873,7 @@ These fields contain information about a process. These fields can help you correlate metrics information with a process id/name from a log message. The `process.pid` often stays in the metric itself and is copied to the global field for correlation. -NOTE: See <> information. +NOTE: See information on `process` <>. ==== Process Field Details @@ -2812,11 +3030,11 @@ example: `/home/alice` [[ecs-process-nestings]] -===== Field sets that can be nested under Process +The `process` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -2838,7 +3056,7 @@ Some pieces of information can be seen in many places in an ECS event. To facili A concrete example is IP addresses, which can be under host, observer, source, destination, client, server, and network.forwarded_ip. If you append all IPs to `related.ip`, you can then search for a given IP trivially, no matter where it appeared, by querying `related.ip:a.b.c.d`. -NOTE: This field set is not reused. + ==== Related Field Details @@ -2861,6 +3079,12 @@ type: ip |===== +[[ecs-related-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-server]] === Server Fields @@ -2870,7 +3094,7 @@ For TCP events, the server is the receiver of the initial SYN packet(s) of the T Client / server representations can add semantic context to an exchange, which is helpful to visualize the data in certain situations. If your context falls in that category, you should still ensure that source and destination are filled appropriately. -NOTE: See <> information. +NOTE: See information on `server` <>. ==== Server Field Details @@ -3011,11 +3235,11 @@ example: `google.com` [[ecs-server-nestings]] -===== Field sets that can be nested under Server +The `server` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -3047,7 +3271,7 @@ The service fields describe the service for or from which the data was collected These fields help you find and correlate logs for a specific service and version. -NOTE: This field set is not reused. + ==== Service Field Details @@ -3156,6 +3380,12 @@ example: `3.2.4` |===== +[[ecs-service-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-source]] === Source Fields @@ -3163,7 +3393,7 @@ Source fields describe details about the source of a packet/event. Source fields are usually populated in conjunction with destination fields. -NOTE: See <> information. +NOTE: See information on `source` <>. ==== Source Field Details @@ -3304,11 +3534,11 @@ example: `google.com` [[ecs-source-nestings]] -===== Field sets that can be nested under Source +The `source` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -3338,7 +3568,7 @@ example: `google.com` Distributed tracing makes it possible to analyze performance throughout a microservice architecture all in one view. This is accomplished by tracing all of the requests - from the initial web request in the front-end service - to queries made through multiple back-end services. -NOTE: This field set is not reused. + ==== Tracing Field Details @@ -3376,12 +3606,18 @@ example: `00f067aa0ba902b7` |===== +[[ecs-tracing-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-url]] === URL Fields URL fields provide support for complete or partial URLs, and supports the breaking down into scheme, domain, path, and so on. -NOTE: This field set is not reused. + ==== URL Field Details @@ -3530,6 +3766,12 @@ type: keyword |===== +[[ecs-url-reuse]] +==== Field Reuse + +These fields are not reused. + + [[ecs-user]] === User Fields @@ -3537,7 +3779,7 @@ The user fields describe information about the user that is relevant to the even Fields can have one entry or multiple entries. If a user has more than one id, provide an array that includes all of them. -NOTE: See <> information. +NOTE: See information on `user` <>. ==== User Field Details @@ -3622,19 +3864,59 @@ example: `albert` [[ecs-user-reuse]] ==== Field Reuse -The `user` fields are expected to be nested at: `client.user`, `destination.user`, `host.user`, `server.user`, `source.user`. -Note also that the `user` fields may be used directly at the top level. +[[ecs-user-parents]] +The `user` fields can be nested under: + +[options="header"] +|===== +| Parent fields | Description + +// =============================================================== + + +| <> +| Fields about the client side of a network connection, used with server. + +// =============================================================== + + +| <> +| Fields about the destination side of a network connection, used with source. + +// =============================================================== + + +| <> +| Fields describing the relevant computing instance. + +// =============================================================== + + +| <> +| Fields about the server side of a network connection, used with client. + +// =============================================================== + + +| <> +| Fields about the source side of a network connection, used with destination. + +// =============================================================== + + +|===== +NOTE: The `user` fields can also be used directly as root fields. [[ecs-user-nestings]] -===== Field sets that can be nested under User +The `user` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== @@ -3654,7 +3936,7 @@ The user_agent fields normally come from a browser request. They often show up in web service logs coming from the parsed user agent string. -NOTE: See <> information. +NOTE: See information on `user_agent` <>. ==== User agent Field Details @@ -3717,11 +3999,11 @@ example: `12.0` [[ecs-user_agent-nestings]] -===== Field sets that can be nested under User agent +The `user_agent` field can be a parent of: [options="header"] |===== -| Nested fields | Description +| Child fields | Description // =============================================================== diff --git a/scripts/generators/asciidoc_fields.py b/scripts/generators/asciidoc_fields.py index d316610425..68df6126b2 100644 --- a/scripts/generators/asciidoc_fields.py +++ b/scripts/generators/asciidoc_fields.py @@ -50,13 +50,17 @@ def page_field_details(ecs_nested): def render_fieldset(fieldset, ecs_nested): - text = field_details_table_header().format( + text = field_set_title_description_para().format( fieldset_title=fieldset['title'], fieldset_name=fieldset['name'], fieldset_description=render_asciidoc_paragraphs(fieldset['description']), fieldset_reuse_links=render_fieldset_reuse_link(fieldset) ) + text += field_details_table_header().format( + fieldset_title=fieldset['title'] + ) + for field in ecs_helpers.dict_sorted_by_keys(fieldset['fields'], 'flat_name'): if 'original_fieldset' not in field: text += render_field_details_row(field) @@ -90,16 +94,19 @@ def render_field_details_row(field): def render_fieldset_reuse_section(fieldset, ecs_nested): '''Render the section on where field set can be nested, and which field sets can be nested here''' if not ('nestings' in fieldset or 'reusable' in fieldset): - return '' + text = field_reuse_section().format( + reuse_of_fieldset='These fields are not reused.', + fieldset_name=fieldset['name'] + ) + return text text = field_reuse_section().format( - reuse_of_fieldset=render_fieldset_reuses_text(fieldset), + reuse_of_fieldset=render_fieldset_reuses_text(fieldset, ecs_nested), fieldset_name=fieldset['name'] ) if 'nestings' in fieldset: text += nestings_table_header().format( - fieldset_name=fieldset['name'], - fieldset_title=fieldset['title'] + fieldset_name=fieldset['name'] ) nestings = [] for nested_fs_name in sorted(fieldset['nestings']): @@ -110,32 +117,44 @@ def render_fieldset_reuse_section(fieldset, ecs_nested): }) text += table_footer() return text - + + def render_fieldset_reuse_link(fieldset): '''Render a link to field reuse section, only when appropriate''' if ('nestings' in fieldset or 'reusable' in fieldset): - return 'NOTE: See <> information.'.format(fieldset['name']) + return 'NOTE: See information on `{field_name}` <>.'.format(field_name=fieldset['name']) else: - return 'NOTE: This field set is not reused.' + return '' -def render_fieldset_reuses_text(fieldset): +def render_fieldset_reuses_text(fieldset, ecs_nested): '''Render where a given field set is expected to be reused''' if 'reusable' not in fieldset: return '' + if 'top_level' in fieldset['reusable'] and fieldset['reusable']['top_level']: + text = parent_table_header().format( + fieldset_name=fieldset['name'], + nested_condition='can' + ) + else: + text = parent_table_header().format( + fieldset_name=fieldset['name'], + nested_condition='must' + ) - section_name = fieldset['name'] - sorted_fields = sorted(fieldset['reusable']['expected']) - rendered_fields = map(lambda f: "`{}.{}`".format(f, section_name), sorted_fields) - text = "The `{}` fields are expected to be nested at: {}.\n\n".format( - section_name, ', '.join(rendered_fields)) + for parent_fs_name in sorted(fieldset['reusable']['expected']): + text += render_nesting_row({ + 'flat_nesting': "{}.{}.*".format(parent_fs_name, fieldset['name']), + 'name': parent_fs_name, + 'short': ecs_nested[parent_fs_name]['short'] + }) + text += table_footer() if 'top_level' in fieldset['reusable'] and fieldset['reusable']['top_level']: - template = "Note also that the `{}` fields may be used directly at the top level.\n\n" + text += "NOTE: The `{}` fields can also be used directly as root fields.\n\n".format(fieldset['name']) else: - template = "Note also that the `{}` fields are not expected to " + \ - "be used directly at the top level.\n\n" - text += template.format(section_name) + text += "NOTE: The `{}` fields should *not* be used directly as root fields.\n\n".format(fieldset['name']) + return text @@ -200,8 +219,7 @@ def index_footer(): # Main Fields Table - -def field_details_table_header(): +def field_set_title_description_para(): return ''' [[ecs-{fieldset_name}]] === {fieldset_title} Fields @@ -209,7 +227,11 @@ def field_details_table_header(): {fieldset_description} {fieldset_reuse_links} +''' + +def field_details_table_header(): + return ''' ==== {fieldset_title} Field Details [options="header"] @@ -252,11 +274,27 @@ def field_reuse_section(): def nestings_table_header(): return ''' [[ecs-{fieldset_name}-nestings]] -===== Field sets that can be nested under {fieldset_title} +The `{fieldset_name}` field can be a parent of: + +[options="header"] +|===== +| Child fields | Description + +// =============================================================== + +''' + +# Parent field table + + +def parent_table_header(): + return ''' +[[ecs-{fieldset_name}-parents]] +The `{fieldset_name}` fields {nested_condition} be nested under: [options="header"] |===== -| Nested fields | Description +| Parent fields | Description // ===============================================================