elastic · ebeahan · Jan 13, 2021 · Jan 7, 2021 · Jan 7, 2021 · Jan 7, 2021
diff --git a/experimental/generated/beats/fields.ecs.yml b/experimental/generated/beats/fields.ecs.yml
@@ -564,6 +564,53 @@
       ignore_above: 1024
       description: Runtime managing this container.
       example: docker
+  - name: data_stream
+    title: Data Stream
+    group: 2
+    description: 'The data_stream fields are part defining the new data stream naming
+      scheme. In the new data stream naming scheme the value of the data stream fields
+      combine to the name of the actual data stream in the following manner `{data_stream.type}-{data_stream.dataset}-{data_stream.namespace}`.
+      This means the fields can only contain characters that are valid as part of
+      names of data streams. More details about this can be found in this blog post.
+      TODO: Add link to blog post Due to the fact that the values of the `data_stream`
+      fields make up the data stream name, the restrictions on data stream names also
+      apply to values for the `data_stream` fields. As an example, they cannot include
+      \, /, *, ?, ", <, >, |, ` `. Please see the Elasticsearch reference for [restrictions
+      on index/data stream names](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html#indices-create-api-path-params).'
+    type: group
+    fields:
+    - name: dataset
+      level: extended
+      type: constant_keyword
+      description: 'The field can contain anything that makes sense to signify the
+        source of the data. Examples include `nginx.access`, `prometheus`, `endpoint`
+        etc. For data streams that otherwise fit, but that do not have dataset set
+        we use the value "generic" for the dataset value. `event.dataset` should have
+        the same value as `data_stream.dataset`. Data dataset value has the following
+        restrictions: * Must not contain `-` * No longer than 100 chars'
+      example: nginx.access
+      default_field: false
+    - name: namespace
+      level: extended
+      type: constant_keyword
+      description: 'A user defined namespace. Namespaces are useful to allow grouping
+        of data. Many of our customers already organize their indices this way, and
+        now we are providing this best practice as a default. Many people will use
+        `default` as the value. Data namespace value has the following restrictions:
+        * Must not contain `-` * No longer than 100 chars'
+      example: logs
+      default_field: false
+    - name: type
+      level: extended
+      type: constant_keyword
+      description: "An overarching type for the data stream. Currently allowed values\
+        \ include \"logs\", \"metrics\". We expect to also add \"traces\" and \"synthetics\"\
+        \ in the near future Any future values for `data_stream.type` should also\
+        \ adhere to the following restrictions (these are derived from the Elasticsearch\
+        \ index restrictions):\n  * Must not contain `-`\n  * Must not start with\
+        \ `+` or `_`"
+      example: logs
+      default_field: false
   - name: destination
     title: Destination
     group: 2

diff --git a/experimental/generated/csv/fields.csv b/experimental/generated/csv/fields.csv
@@ -60,6 +60,9 @@ ECS_Version,Indexed,Field_Set,Field,Type,Level,Normalization,Example,Description
 2.0.0-dev+exp,true,container,container.labels,object,extended,,,Image labels.
 2.0.0-dev+exp,true,container,container.name,keyword,extended,,,Container name.
 2.0.0-dev+exp,true,container,container.runtime,keyword,extended,,docker,Runtime managing this container.
+2.0.0-dev+exp,true,data_stream,data_stream.dataset,constant_keyword,extended,,nginx.access,The field can contain anything that makes sense to signify the source of the data.
+2.0.0-dev+exp,true,data_stream,data_stream.namespace,constant_keyword,extended,,logs,A user defined namespace. Namespaces are useful to allow grouping of data.
+2.0.0-dev+exp,true,data_stream,data_stream.type,constant_keyword,extended,,logs,An overarching type for the data stream.
 2.0.0-dev+exp,true,destination,destination.address,keyword,extended,,,Destination network address.
 2.0.0-dev+exp,true,destination,destination.as.number,long,extended,,15169,Unique number allocated to the autonomous system.
 2.0.0-dev+exp,true,destination,destination.as.organization.name,wildcard,extended,,Google LLC,Organization name.

diff --git a/experimental/generated/ecs/ecs_flat.yml b/experimental/generated/ecs/ecs_flat.yml
@@ -705,6 +705,50 @@ container.runtime:
   normalize: []
   short: Runtime managing this container.
   type: keyword
+data_stream.dataset:
+  dashed_name: data-stream-dataset
+  description: 'The field can contain anything that makes sense to signify the source
+    of the data. Examples include `nginx.access`, `prometheus`, `endpoint` etc. For
+    data streams that otherwise fit, but that do not have dataset set we use the value
+    "generic" for the dataset value. `event.dataset` should have the same value as
+    `data_stream.dataset`. Data dataset value has the following restrictions: * Must
+    not contain `-` * No longer than 100 chars'
+  example: nginx.access
+  flat_name: data_stream.dataset
+  level: extended
+  name: dataset
+  normalize: []
+  short: The field can contain anything that makes sense to signify the source of
+    the data.
+  type: constant_keyword
+data_stream.namespace:
+  dashed_name: data-stream-namespace
+  description: 'A user defined namespace. Namespaces are useful to allow grouping
+    of data. Many of our customers already organize their indices this way, and now
+    we are providing this best practice as a default. Many people will use `default`
+    as the value. Data namespace value has the following restrictions: * Must not
+    contain `-` * No longer than 100 chars'
+  example: logs
+  flat_name: data_stream.namespace
+  level: extended
+  name: namespace
+  normalize: []
+  short: A user defined namespace. Namespaces are useful to allow grouping of data.
+  type: constant_keyword
+data_stream.type:
+  dashed_name: data-stream-type
+  description: "An overarching type for the data stream. Currently allowed values\
+    \ include \"logs\", \"metrics\". We expect to also add \"traces\" and \"synthetics\"\
+    \ in the near future Any future values for `data_stream.type` should also adhere\
+    \ to the following restrictions (these are derived from the Elasticsearch index\
+    \ restrictions):\n  * Must not contain `-`\n  * Must not start with `+` or `_`"
+  example: logs
+  flat_name: data_stream.type
+  level: extended
+  name: type
+  normalize: []
+  short: An overarching type for the data stream.
+  type: constant_keyword
 destination.address:
   dashed_name: destination-address
   description: 'Some event destination addresses are defined ambiguously. The event

diff --git a/experimental/generated/ecs/ecs_nested.yml b/experimental/generated/ecs/ecs_nested.yml
@@ -983,6 +983,70 @@ container:
   short: Fields describing the container that generated this event.
   title: Container
   type: group
+data_stream:
+  description: 'The data_stream fields are part defining the new data stream naming
+    scheme. In the new data stream naming scheme the value of the data stream fields
+    combine to the name of the actual data stream in the following manner `{data_stream.type}-{data_stream.dataset}-{data_stream.namespace}`.
+    This means the fields can only contain characters that are valid as part of names
+    of data streams. More details about this can be found in this blog post. TODO:
+    Add link to blog post Due to the fact that the values of the `data_stream` fields
+    make up the data stream name, the restrictions on data stream names also apply
+    to values for the `data_stream` fields. As an example, they cannot include \,
+    /, *, ?, ", <, >, |, ` `. Please see the Elasticsearch reference for [restrictions
+    on index/data stream names](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html#indices-create-api-path-params).'
+  fields:
+    data_stream.dataset:
+      dashed_name: data-stream-dataset
+      description: 'The field can contain anything that makes sense to signify the
+        source of the data. Examples include `nginx.access`, `prometheus`, `endpoint`
+        etc. For data streams that otherwise fit, but that do not have dataset set
+        we use the value "generic" for the dataset value. `event.dataset` should have
+        the same value as `data_stream.dataset`. Data dataset value has the following
+        restrictions: * Must not contain `-` * No longer than 100 chars'
+      example: nginx.access
+      flat_name: data_stream.dataset
+      level: extended
+      name: dataset
+      normalize: []
+      short: The field can contain anything that makes sense to signify the source
+        of the data.
+      type: constant_keyword
+    data_stream.namespace:
+      dashed_name: data-stream-namespace
+      description: 'A user defined namespace. Namespaces are useful to allow grouping
+        of data. Many of our customers already organize their indices this way, and
+        now we are providing this best practice as a default. Many people will use
+        `default` as the value. Data namespace value has the following restrictions:
+        * Must not contain `-` * No longer than 100 chars'
+      example: logs
+      flat_name: data_stream.namespace
+      level: extended
+      name: namespace
+      normalize: []
+      short: A user defined namespace. Namespaces are useful to allow grouping of
+        data.
+      type: constant_keyword
+    data_stream.type:
+      dashed_name: data-stream-type
+      description: "An overarching type for the data stream. Currently allowed values\
+        \ include \"logs\", \"metrics\". We expect to also add \"traces\" and \"synthetics\"\
+        \ in the near future Any future values for `data_stream.type` should also\
+        \ adhere to the following restrictions (these are derived from the Elasticsearch\
+        \ index restrictions):\n  * Must not contain `-`\n  * Must not start with\
+        \ `+` or `_`"
+      example: logs
+      flat_name: data_stream.type
+      level: extended
+      name: type
+      normalize: []
+      short: An overarching type for the data stream.
+      type: constant_keyword
+  group: 2
+  name: data_stream
+  prefix: data_stream.
+  short: The data_stream fields are part defining the new data stream naming scheme.
+  title: Data Stream
+  type: group
 destination:
   description: 'Destination fields capture details about the receiver of a network
     exchange/packet. These fields are populated from a network event, packet, or other

diff --git a/experimental/generated/elasticsearch/7/template.json b/experimental/generated/elasticsearch/7/template.json
@@ -303,6 +303,19 @@
           }
         }
       },
+      "data_stream": {
+        "properties": {
+          "dataset": {
+            "type": "constant_keyword"
+          },
+          "namespace": {
+            "type": "constant_keyword"
+          },
+          "type": {
+            "type": "constant_keyword"
+          }
+        }
+      },
       "destination": {
         "properties": {
           "address": {

diff --git a/experimental/generated/elasticsearch/component/data_stream.json b/experimental/generated/elasticsearch/component/data_stream.json
@@ -0,0 +1,25 @@
+{
+  "_meta": {
+    "documentation": "https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html",
+    "ecs_version": "2.0.0-dev+exp"
+  },
+  "template": {
+    "mappings": {
+      "properties": {
+        "data_stream": {
+          "properties": {
+            "dataset": {
+              "type": "constant_keyword"
+            },
+            "namespace": {
+              "type": "constant_keyword"
+            },
+            "type": {
+              "type": "constant_keyword"
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/experimental/generated/elasticsearch/template.json b/experimental/generated/elasticsearch/template.json
@@ -37,7 +37,8 @@
     "ecs_2.0.0-dev-exp_url",
     "ecs_2.0.0-dev-exp_user",
     "ecs_2.0.0-dev-exp_user_agent",
-    "ecs_2.0.0-dev-exp_vulnerability"
+    "ecs_2.0.0-dev-exp_vulnerability",
+    "ecs_2.0.0-dev-exp_data_stream"
   ],
   "index_patterns": [
     "try-ecs-*"

diff --git a/experimental/schemas/data_stream.yml b/experimental/schemas/data_stream.yml
@@ -0,0 +1,45 @@
+---
+- name: data_stream
+  title: Data Stream
+  short: The data_stream fields are part defining the new data stream naming scheme.
+  description: >
+    The data_stream fields are part defining the new data stream naming scheme.
+    In the new data stream naming scheme the value of the data stream fields combine to the name of the actual data stream in the following manner `{data_stream.type}-{data_stream.dataset}-{data_stream.namespace}`. This means the fields can only contain characters that are valid as part of names of data streams. More details about this can be found in this blog post. TODO: Add link to blog post
+    Due to the fact that the values of the `data_stream` fields make up the data stream name, the restrictions on data stream names also apply to values for the `data_stream` fields. As an example, they cannot include \, /, *, ?, ", <, >, |, ` `. Please see the Elasticsearch reference for [restrictions on index/data stream names](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html#indices-create-api-path-params).
+  fields:
+
+    - name: type
+      level: extended
+      type: constant_keyword
+      example: logs
+      description: >
+        An overarching type for the data stream.
+        Currently allowed values include "logs", "metrics". We expect to also add "traces" and "synthetics" in the near future
+        Any future values for `data_stream.type` should also adhere to the following restrictions (these are derived from the Elasticsearch index restrictions):
+          * Must not contain `-`
+          * Must not start with `+` or `_`
+      short: An overarching type for the data stream.
+
+    - name: dataset
+      level: extended
+      type: constant_keyword
+      example: nginx.access
+      description: >
+        The field can contain anything that makes sense to signify the source of the data.
+        Examples include `nginx.access`, `prometheus`, `endpoint` etc. For data streams that otherwise fit, but that do not have dataset set we use the value "generic" for the dataset value. `event.dataset` should have the same value as `data_stream.dataset`.
+        Data dataset value has the following restrictions:
+        * Must not contain `-`
+        * No longer than 100 chars
+      short: The field can contain anything that makes sense to signify the source of the data.
+
+    - name: namespace
+      level: extended
+      type: constant_keyword
+      example: logs
+      description: >
+        A user defined namespace. Namespaces are useful to allow grouping of data.
+        Many of our customers already organize their indices this way, and now we are providing this best practice as a default. Many people will use `default` as the value.
+        Data namespace value has the following restrictions:
+        * Must not contain `-`
+        * No longer than 100 chars
+      short: A user defined namespace. Namespaces are useful to allow grouping of data.