Skip to content

Commit

Permalink
[1.7] Add --ref support for experimental artifacts (#1063) (#1102)
Browse files Browse the repository at this point in the history
Co-authored-by: Mathieu Martin <[email protected]>
  • Loading branch information
ebeahan and webmat authored Nov 10, 2020
1 parent e10c027 commit 21a9e7c
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ All notable changes to this project will be documented in this file based on the

* Field details Jinja2 template components have been consolidated into one template #897
* Add `[discrete]` marker before each section header in field details. #989
* `--ref` now loads `experimental/schemas` based on git ref in addition to `schemas`. #1063


## [1.6.0](https://github.com/elastic/ecs/compare/v1.5.0...v1.6.0)
Expand Down
18 changes: 17 additions & 1 deletion USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ And looking at a specific artifact, `../myprojects/out/generated/elasticsearch/7
...
```

Include can be used together with the `--ref` flag to merge custom fields into a targeted ECS version. See [`Ref`](#ref).

> NOTE: The `--include` mechanism will not validate custom YAML files prior to merging. This allows for modifying existing ECS fields in a custom schema without having to redefine all the mandatory field attributes.
#### Subset
Expand Down Expand Up @@ -235,12 +237,26 @@ It's also possible to combine `--include` and `--subset` together! Do note that

#### Ref

The `--ref` argument allows for passing a specific `git` tag (e.g. `v.1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts.
The `--ref` argument allows for passing a specific `git` tag (e.g. `v1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts.

```
$ python scripts/generator.py --ref v1.5.0
```
The `--ref` argument loads field definitions from the specified git reference (branch, tag, etc.) from directories [`./schemas`](./schemas) and [`./experimental/schemas`](./experimental/schemas) (when specified via `--include`).
Here's another example loading both ECS fields and [experimental](experimental/README.md) changes *from branch "1.7"*, then adds custom fields on top.
```
$ python scripts/generator.py --ref 1.7 --include experimental/schemas ../myproject/fields/custom --out ../myproject/out
```
The command above will produce artifacts based on:
* main ECS field definitions as of branch 1.7
* experimental ECS changes as of branch 1.7
* custom fields in `../myproject/fields/custom` as they are on the filesystem
> Note: `--ref` does have a dependency on `git` being installed and all expected commits/tags fetched from the ECS upstream repo. This will unlikely be an issue unless you downloaded the ECS as a zip archive from GitHub vs. cloning it.
#### Mapping & Template Settings
Expand Down
3 changes: 2 additions & 1 deletion scripts/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def main():

def argument_parser():
parser = argparse.ArgumentParser()
parser.add_argument('--ref', action='store', help='git reference to use when building schemas')
parser.add_argument('--ref', action='store', help='Loads fields definitions from `./schemas` subdirectory from specified git reference. \
Note that "--include experimental/schemas" will also respect this git ref.')
parser.add_argument('--include', nargs='+',
help='include user specified directory of custom field definitions')
parser.add_argument('--subset', nargs='+',
Expand Down
8 changes: 8 additions & 0 deletions scripts/generators/ecs_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ def get_tree_by_ref(ref):
return commit.tree


def path_exists_in_git_tree(tree, file_path):
try:
_ = tree[file_path]
except KeyError:
return False
return True


def usage_doc_files():
usage_docs_dir = os.path.join(os.path.dirname(__file__), '../../docs/usage')
usage_docs_path = pathlib.Path(usage_docs_dir)
Expand Down
26 changes: 20 additions & 6 deletions scripts/schema/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,18 @@ def load_schemas(ref=None, included_files=[]):
schema_files_raw = load_schema_files(ecs_helpers.ecs_files())
fields = deep_nesting_representation(schema_files_raw)

# Custom additional files (never from git ref)
EXPERIMENTAL_SCHEMA_DIR = 'experimental/schemas'

# Custom additional files
if included_files and len(included_files) > 0:
print('Loading user defined schemas: {0}'.format(included_files))
# If --ref provided and --include loading experimental schemas
if ref and EXPERIMENTAL_SCHEMA_DIR in included_files:
exp_schema_files_raw = load_schemas_from_git(ref, target_dir=EXPERIMENTAL_SCHEMA_DIR)
exp_fields = deep_nesting_representation(exp_schema_files_raw)
fields = merge_fields(fields, exp_fields)
included_files.remove(EXPERIMENTAL_SCHEMA_DIR)
# Remaining additional custom files (never from git ref)
custom_files = ecs_helpers.get_glob_files(included_files, ecs_helpers.YAML_EXT)
custom_fields = deep_nesting_representation(load_schema_files(custom_files))
fields = merge_fields(fields, custom_fields)
Expand All @@ -68,13 +77,18 @@ def load_schema_files(files):
return fields_nested


def load_schemas_from_git(ref):
def load_schemas_from_git(ref, target_dir='schemas'):
tree = ecs_helpers.get_tree_by_ref(ref)
fields_nested = {}
for blob in tree['schemas'].blobs:
if blob.name.endswith('.yml'):
new_fields = read_schema_blob(blob, ref)
fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields)

# Handles case if target dir doesn't exists in git ref
if ecs_helpers.path_exists_in_git_tree(tree, target_dir):
for blob in tree[target_dir].blobs:
if blob.name.endswith('.yml'):
new_fields = read_schema_blob(blob, ref)
fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields)
else:
raise KeyError(f"Target directory './{target_dir}' not present in git ref '{ref}'!")
return fields_nested


Expand Down
8 changes: 8 additions & 0 deletions scripts/tests/test_ecs_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,19 @@ def test_list_subtract(self):
self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a']), ['b'])
self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a', 'c']), ['b'])

# git helper tests

def test_get_tree_by_ref(self):
ref = 'v1.5.0'
tree = ecs_helpers.get_tree_by_ref(ref)
self.assertEqual(tree.hexsha, '4449df245f6930d59bcd537a5958891261a9476b')

def test_path_exists_in_git_tree(self):
ref = 'v1.6.0'
tree = ecs_helpers.get_tree_by_ref(ref)
self.assertFalse(ecs_helpers.path_exists_in_git_tree(tree, 'nonexistant'))
self.assertTrue(ecs_helpers.path_exists_in_git_tree(tree, 'schemas'))


if __name__ == '__main__':
unittest.main()
52 changes: 52 additions & 0 deletions scripts/tests/unit/test_schema_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,21 @@ def test_load_schemas_no_custom(self):
fields['process']['fields']['thread'].keys(),
"Fields containing nested fields should at least have the 'fields' subkey")

def test_load_schemas_git_ref(self):
fields = loader.load_schemas(ref='v1.6.0')
self.assertEqual(
['field_details', 'fields', 'schema_details'],
sorted(fields['process'].keys()),
"Schemas should have 'field_details', 'fields' and 'schema_details' subkeys")
self.assertEqual(
['field_details'],
list(fields['process']['fields']['pid'].keys()),
"Leaf fields should have only the 'field_details' subkey")
self.assertIn(
'fields',
fields['process']['fields']['thread'].keys(),
"Fields containing nested fields should at least have the 'fields' subkey")

@mock.patch('schema.loader.read_schema_file')
def test_load_schemas_fail_on_accidental_fieldset_redefinition(self, mock_read_schema):
mock_read_schema.side_effect = [
Expand Down Expand Up @@ -124,6 +139,43 @@ def test_nest_schema_raises_on_missing_schema_name(self):
with self.assertRaisesRegex(ValueError, 'incomplete.yml'):
loader.nest_schema([{'description': 'just a description'}], 'incomplete.yml')

def test_load_schemas_from_git(self):
fields = loader.load_schemas_from_git('v1.0.0', target_dir='schemas')
self.assertEqual(
['agent',
'base',
'client',
'cloud',
'container',
'destination',
'ecs',
'error',
'event',
'file',
'geo',
'group',
'host',
'http',
'log',
'network',
'observer',
'organization',
'os',
'process',
'related',
'server',
'service',
'source',
'url',
'user',
'user_agent'],
sorted(fields.keys()),
"Raw schema fields should have expected fieldsets for v1.0.0")

def test_load_schemas_from_git_missing_target_directory(self):
with self.assertRaisesRegex(KeyError, "not present in git ref 'v1.5.0'"):
loader.load_schemas_from_git('v1.5.0', target_dir='experimental')

# nesting stuff

def test_nest_fields(self):
Expand Down

0 comments on commit 21a9e7c

Please sign in to comment.