Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.x] Add --ref support for experimental artifacts (#1063) #1101

Merged
merged 1 commit into from
Nov 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ All notable changes to this project will be documented in this file based on the

* Field details Jinja2 template components have been consolidated into one template #897
* Add `[discrete]` marker before each section header in field details. #989
* `--ref` now loads `experimental/schemas` based on git ref in addition to `schemas`. #1063


## [1.6.0](https://github.com/elastic/ecs/compare/v1.5.0...v1.6.0)
Expand Down
18 changes: 17 additions & 1 deletion USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ And looking at a specific artifact, `../myprojects/out/generated/elasticsearch/7
...
```

Include can be used together with the `--ref` flag to merge custom fields into a targeted ECS version. See [`Ref`](#ref).

> NOTE: The `--include` mechanism will not validate custom YAML files prior to merging. This allows for modifying existing ECS fields in a custom schema without having to redefine all the mandatory field attributes.

#### Subset
Expand Down Expand Up @@ -235,12 +237,26 @@ It's also possible to combine `--include` and `--subset` together! Do note that

#### Ref

The `--ref` argument allows for passing a specific `git` tag (e.g. `v.1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts.
The `--ref` argument allows for passing a specific `git` tag (e.g. `v1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts.

```
$ python scripts/generator.py --ref v1.5.0
```

The `--ref` argument loads field definitions from the specified git reference (branch, tag, etc.) from directories [`./schemas`](./schemas) and [`./experimental/schemas`](./experimental/schemas) (when specified via `--include`).

Here's another example loading both ECS fields and [experimental](experimental/README.md) changes *from branch "1.7"*, then adds custom fields on top.

```
$ python scripts/generator.py --ref 1.7 --include experimental/schemas ../myproject/fields/custom --out ../myproject/out
```

The command above will produce artifacts based on:

* main ECS field definitions as of branch 1.7
* experimental ECS changes as of branch 1.7
* custom fields in `../myproject/fields/custom` as they are on the filesystem

> Note: `--ref` does have a dependency on `git` being installed and all expected commits/tags fetched from the ECS upstream repo. This will unlikely be an issue unless you downloaded the ECS as a zip archive from GitHub vs. cloning it.

#### Mapping & Template Settings
Expand Down
3 changes: 2 additions & 1 deletion scripts/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def main():

def argument_parser():
parser = argparse.ArgumentParser()
parser.add_argument('--ref', action='store', help='git reference to use when building schemas')
parser.add_argument('--ref', action='store', help='Loads fields definitions from `./schemas` subdirectory from specified git reference. \
Note that "--include experimental/schemas" will also respect this git ref.')
parser.add_argument('--include', nargs='+',
help='include user specified directory of custom field definitions')
parser.add_argument('--subset', nargs='+',
Expand Down
8 changes: 8 additions & 0 deletions scripts/generators/ecs_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ def get_tree_by_ref(ref):
return commit.tree


def path_exists_in_git_tree(tree, file_path):
try:
_ = tree[file_path]
except KeyError:
return False
return True


def usage_doc_files():
usage_docs_dir = os.path.join(os.path.dirname(__file__), '../../docs/usage')
usage_docs_path = pathlib.Path(usage_docs_dir)
Expand Down
26 changes: 20 additions & 6 deletions scripts/schema/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,18 @@ def load_schemas(ref=None, included_files=[]):
schema_files_raw = load_schema_files(ecs_helpers.ecs_files())
fields = deep_nesting_representation(schema_files_raw)

# Custom additional files (never from git ref)
EXPERIMENTAL_SCHEMA_DIR = 'experimental/schemas'

# Custom additional files
if included_files and len(included_files) > 0:
print('Loading user defined schemas: {0}'.format(included_files))
# If --ref provided and --include loading experimental schemas
if ref and EXPERIMENTAL_SCHEMA_DIR in included_files:
exp_schema_files_raw = load_schemas_from_git(ref, target_dir=EXPERIMENTAL_SCHEMA_DIR)
exp_fields = deep_nesting_representation(exp_schema_files_raw)
fields = merge_fields(fields, exp_fields)
included_files.remove(EXPERIMENTAL_SCHEMA_DIR)
# Remaining additional custom files (never from git ref)
custom_files = ecs_helpers.get_glob_files(included_files, ecs_helpers.YAML_EXT)
custom_fields = deep_nesting_representation(load_schema_files(custom_files))
fields = merge_fields(fields, custom_fields)
Expand All @@ -68,13 +77,18 @@ def load_schema_files(files):
return fields_nested


def load_schemas_from_git(ref):
def load_schemas_from_git(ref, target_dir='schemas'):
tree = ecs_helpers.get_tree_by_ref(ref)
fields_nested = {}
for blob in tree['schemas'].blobs:
if blob.name.endswith('.yml'):
new_fields = read_schema_blob(blob, ref)
fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields)

# Handles case if target dir doesn't exists in git ref
if ecs_helpers.path_exists_in_git_tree(tree, target_dir):
for blob in tree[target_dir].blobs:
if blob.name.endswith('.yml'):
new_fields = read_schema_blob(blob, ref)
fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields)
else:
raise KeyError(f"Target directory './{target_dir}' not present in git ref '{ref}'!")
return fields_nested


Expand Down
8 changes: 8 additions & 0 deletions scripts/tests/test_ecs_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,19 @@ def test_list_subtract(self):
self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a']), ['b'])
self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a', 'c']), ['b'])

# git helper tests

def test_get_tree_by_ref(self):
ref = 'v1.5.0'
tree = ecs_helpers.get_tree_by_ref(ref)
self.assertEqual(tree.hexsha, '4449df245f6930d59bcd537a5958891261a9476b')

def test_path_exists_in_git_tree(self):
ref = 'v1.6.0'
tree = ecs_helpers.get_tree_by_ref(ref)
self.assertFalse(ecs_helpers.path_exists_in_git_tree(tree, 'nonexistant'))
self.assertTrue(ecs_helpers.path_exists_in_git_tree(tree, 'schemas'))


if __name__ == '__main__':
unittest.main()
52 changes: 52 additions & 0 deletions scripts/tests/unit/test_schema_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,21 @@ def test_load_schemas_no_custom(self):
fields['process']['fields']['thread'].keys(),
"Fields containing nested fields should at least have the 'fields' subkey")

def test_load_schemas_git_ref(self):
fields = loader.load_schemas(ref='v1.6.0')
self.assertEqual(
['field_details', 'fields', 'schema_details'],
sorted(fields['process'].keys()),
"Schemas should have 'field_details', 'fields' and 'schema_details' subkeys")
self.assertEqual(
['field_details'],
list(fields['process']['fields']['pid'].keys()),
"Leaf fields should have only the 'field_details' subkey")
self.assertIn(
'fields',
fields['process']['fields']['thread'].keys(),
"Fields containing nested fields should at least have the 'fields' subkey")

@mock.patch('schema.loader.read_schema_file')
def test_load_schemas_fail_on_accidental_fieldset_redefinition(self, mock_read_schema):
mock_read_schema.side_effect = [
Expand Down Expand Up @@ -124,6 +139,43 @@ def test_nest_schema_raises_on_missing_schema_name(self):
with self.assertRaisesRegex(ValueError, 'incomplete.yml'):
loader.nest_schema([{'description': 'just a description'}], 'incomplete.yml')

def test_load_schemas_from_git(self):
fields = loader.load_schemas_from_git('v1.0.0', target_dir='schemas')
self.assertEqual(
['agent',
'base',
'client',
'cloud',
'container',
'destination',
'ecs',
'error',
'event',
'file',
'geo',
'group',
'host',
'http',
'log',
'network',
'observer',
'organization',
'os',
'process',
'related',
'server',
'service',
'source',
'url',
'user',
'user_agent'],
sorted(fields.keys()),
"Raw schema fields should have expected fieldsets for v1.0.0")

def test_load_schemas_from_git_missing_target_directory(self):
with self.assertRaisesRegex(KeyError, "not present in git ref 'v1.5.0'"):
loader.load_schemas_from_git('v1.5.0', target_dir='experimental')

# nesting stuff

def test_nest_fields(self):
Expand Down