From 7869bd5b62a944d9cecad542173ad211276a69d5 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Thu, 31 Mar 2022 16:41:05 +0200 Subject: [PATCH] Import ecs fields from the ecs_nested.yml source file (#766) Import ECS fields from `ecs/ecs_nested.yml` instead of from `beats/fields.ecs.yml`. Beats fields don't include all the information provided by ECS, for example they don't include the `normalize` rules, required to make checks on these normalization rules. I guess this can be expected, as there can be features of ECS that are not used or needed in Beats, but could be used by other tools. `ecs_nested.yml` takes into account where nested objects can be reused. This fixes https://github.com/elastic/elastic-package/issues/750. --- internal/fields/dependency_manager.go | 30 +++++--- internal/fields/model.go | 76 ++++++++++++++++++- .../data_stream/first/fields/geo-fields.yml | 2 - .../data_stream/first/sample_event.json | 4 +- .../other/fields_tests/docs/README.md | 5 +- 5 files changed, 100 insertions(+), 17 deletions(-) diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index 3e5c11f11..8ae592c38 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -25,8 +25,8 @@ const ( ecsSchemaName = "ecs" gitReferencePrefix = "git@" - ecsSchemaFile = "fields.ecs.yml" - ecsSchemaURL = "https://raw.githubusercontent.com/elastic/ecs/%s/generated/beats/%s" + ecsSchemaFile = "ecs_nested.yml" + ecsSchemaURL = "https://raw.githubusercontent.com/elastic/ecs/%s/generated/ecs/%s" ) // DependencyManager is responsible for resolving external field dependencies. @@ -61,6 +61,15 @@ func loadECSFieldsSchema(dep buildmanifest.ECSDependency) ([]FieldDefinition, er return nil, nil } + content, err := readECSFieldsSchemaFile(dep) + if err != nil { + return nil, errors.Wrap(err, "error reading ECS fields schema file") + } + + return parseECSFieldsSchema(content) +} + +func readECSFieldsSchemaFile(dep buildmanifest.ECSDependency) ([]byte, error) { gitReference, err := asGitReference(dep.Reference) if err != nil { return nil, errors.Wrap(err, "can't process the value as Git reference") @@ -70,12 +79,8 @@ func loadECSFieldsSchema(dep buildmanifest.ECSDependency) ([]FieldDefinition, er if err != nil { return nil, errors.Wrap(err, "error fetching profile path") } - cachedSchemaPath := filepath.Join(loc.FieldsCacheDir(), ecsSchemaName, gitReference, ecsSchemaFile) content, err := os.ReadFile(cachedSchemaPath) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return nil, errors.Wrapf(err, "can't read cached schema (path: %s)", cachedSchemaPath) - } if errors.Is(err, os.ErrNotExist) { logger.Debugf("Pulling ECS dependency using reference: %s", dep.Reference) @@ -109,14 +114,21 @@ func loadECSFieldsSchema(dep buildmanifest.ECSDependency) ([]FieldDefinition, er if err != nil { return nil, errors.Wrapf(err, "can't write cached schema (path: %s)", cachedSchemaPath) } + } else if err != nil { + return nil, errors.Wrapf(err, "can't read cached schema (path: %s)", cachedSchemaPath) } - var f []FieldDefinition - err = yaml.Unmarshal(content, &f) + return content, nil +} + +func parseECSFieldsSchema(content []byte) ([]FieldDefinition, error) { + var fields FieldDefinitions + err := yaml.Unmarshal(content, &fields) if err != nil { return nil, errors.Wrap(err, "unmarshalling field body failed") } - return f[0].Fields, nil + + return fields, nil } func asGitReference(reference string) (string, error) { diff --git a/internal/fields/model.go b/internal/fields/model.go index 7ac71e754..9b6a3d2cc 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -4,6 +4,13 @@ package fields +import ( + "fmt" + "strings" + + "gopkg.in/yaml.v3" +) + // FieldDefinition describes a single field with its properties. type FieldDefinition struct { Name string `yaml:"name"` @@ -16,7 +23,7 @@ type FieldDefinition struct { External string `yaml:"external"` Index *bool `yaml:"index"` DocValues *bool `yaml:"doc_values"` - Fields []FieldDefinition `yaml:"fields,omitempty"` + Fields FieldDefinitions `yaml:"fields,omitempty"` MultiFields []FieldDefinition `yaml:"multi_fields,omitempty"` } @@ -82,3 +89,70 @@ func updateFields(origFields, fields []FieldDefinition) []FieldDefinition { } return updatedFields } + +// FieldDefinitions is an array of FieldDefinition, this can be unmarshalled from +// a yaml list or a yaml map. +type FieldDefinitions []FieldDefinition + +func (fds *FieldDefinitions) UnmarshalYAML(value *yaml.Node) error { + nilNode := yaml.Kind(0) + switch value.Kind { + case yaml.SequenceNode: + // Fields are defined as a list, this happens in Beats fields files. + var fields []FieldDefinition + err := value.Decode(&fields) + if err != nil { + return err + } + *fds = fields + return nil + case yaml.MappingNode: + // Fields are defined as a map, this happens in ecs fields files. + if len(value.Content)%2 != 0 { + return fmt.Errorf("pairs of key-values expected in map") + } + var fields []FieldDefinition + for i := 0; i+1 < len(value.Content); i += 2 { + key := value.Content[i] + value := value.Content[i+1] + + var name string + err := key.Decode(&name) + if err != nil { + return err + } + + var field FieldDefinition + err = value.Decode(&field) + if err != nil { + return err + } + + // "base" group is used by convention in ECS to include + // fields that can appear in the root level of the document. + // Append its child fields directly instead. + if name == "base" { + fields = append(fields, field.Fields...) + } else { + field.Name = name + cleanNestedNames(field.Name, field.Fields) + fields = append(fields, field) + } + } + *fds = fields + return nil + case nilNode: + *fds = nil + return nil + default: + return fmt.Errorf("expected map or sequence") + } +} + +func cleanNestedNames(parent string, fields []FieldDefinition) { + for i := range fields { + if strings.HasPrefix(fields[i].Name, parent+".") { + fields[i].Name = fields[i].Name[len(parent)+1:] + } + } +} diff --git a/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml b/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml index 056a08c6f..37fbc8fa0 100644 --- a/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml +++ b/test/packages/other/fields_tests/data_stream/first/fields/geo-fields.yml @@ -1,6 +1,4 @@ - name: destination.geo.location external: ecs -- name: geo.location - external: ecs - name: source.geo.location external: ecs diff --git a/test/packages/other/fields_tests/data_stream/first/sample_event.json b/test/packages/other/fields_tests/data_stream/first/sample_event.json index 97d1717c5..ec22a9351 100644 --- a/test/packages/other/fields_tests/data_stream/first/sample_event.json +++ b/test/packages/other/fields_tests/data_stream/first/sample_event.json @@ -3,6 +3,6 @@ "lat": 1.0, "lon": "2.0" }, - "geo.location.lat": 3.0, - "geo.location.lon": 4.0 + "destination.geo.location.lat": 3.0, + "destination.geo.location.lon": 4.0 } \ No newline at end of file diff --git a/test/packages/other/fields_tests/docs/README.md b/test/packages/other/fields_tests/docs/README.md index e1174d949..8fbce827f 100644 --- a/test/packages/other/fields_tests/docs/README.md +++ b/test/packages/other/fields_tests/docs/README.md @@ -8,8 +8,8 @@ An example event for `first` looks as following: "lat": 1.0, "lon": "2.0" }, - "geo.location.lat": 3.0, - "geo.location.lon": 4.0 + "destination.geo.location.lat": 3.0, + "destination.geo.location.lon": 4.0 } ``` @@ -22,5 +22,4 @@ An example event for `first` looks as following: | data_stream.namespace | Data stream namespace. | constant_keyword | | data_stream.type | Data stream type. | constant_keyword | | destination.geo.location | Longitude and latitude. | geo_point | -| geo.location | Longitude and latitude. | geo_point | | source.geo.location | Longitude and latitude. | geo_point |