From ac864e1af3a02c57ea2385f8f1ebd3c42b37e6b0 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Wed, 2 Jan 2019 17:09:41 +0100 Subject: [PATCH 01/13] creating function to process integrations --- local/bin/py/update_pre_build.py | 204 ++++++++++++++++--------------- 1 file changed, 108 insertions(+), 96 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index e0f7c176f54d0..a9b93afd1288a 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -8,6 +8,10 @@ import re import tempfile import shutil +import requests +import yaml +import pickle +from tqdm import * from collections import OrderedDict from functools import partial, wraps from itertools import chain, zip_longest @@ -15,10 +19,6 @@ from optparse import OptionParser from os import sep, makedirs, getenv, remove from os.path import exists, basename, curdir, join, abspath, normpath, dirname -import requests -import yaml -from tqdm import * -import pickle def cache_by_sha(func): @@ -241,14 +241,32 @@ def process(self): globs.append('{}{}'.format(self.options.extras, e_glob)) for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)): + self.process_filename(file_name) + + self.merge_integrations() + + def process_filename(self, file_name): + + if file_name.endswith('dd/utils/context/source.py'): self.process_source_attribute(file_name) + + elif file_name.endswith('.csv'): self.process_integration_metric(file_name) + + elif file_name.endswith('manifest.json'): self.process_integration_manifest(file_name) + + elif file_name.endswith('service_checks.json'): self.process_service_checks(file_name) - self.process_integration_readme(file_name) + + elif ('/integrations-core/docs/dev/' in file_name and file_name.endswith('.md')): self.dev_doc_integrations_core(file_name) - self.merge_integrations() + elif file_name.endswith('.md'): + self.process_integration_readme(file_name) + else: + print("Processing of {} was unsuccessful".format(file_name)) + def merge_integrations(self): """ Merges integrations that come under one """ @@ -298,39 +316,37 @@ def process_source_attribute(self, file_name): and inserts them into the file something.md :param file_name: path to a source.py file """ - if file_name.endswith('dd/utils/context/source.py'): - out = '|Integration name | API source attribute|\n' - out += '|:---|:---|\n' - with open(file_name, 'r') as f: - result = f.read() - m = re.search(self.regex_source, result) - result = m.group(2) if m else result - result = re.sub(r'[^0-9A-Za-z:, ]', '', result) - for line in result.split(','): - pair = line.split(':') - if len(pair) > 1: - out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip()) - with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f: - boundary = re.compile(r'^-{3,}$', re.MULTILINE) - _, fm, content = boundary.split(f.read(), 2) - template = "---\n{front_matter}\n---\n\n{content}\n" - new_content = template.format(front_matter=fm.strip(), content=out) - f.truncate(0) - f.seek(0) - f.write(new_content) + out = '|Integration name | API source attribute|\n' + out += '|:---|:---|\n' + with open(file_name, 'r') as f: + result = f.read() + m = re.search(self.regex_source, result) + result = m.group(2) if m else result + result = re.sub(r'[^0-9A-Za-z:, ]', '', result) + for line in result.split(','): + pair = line.split(':') + if len(pair) > 1: + out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip()) + with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f: + boundary = re.compile(r'^-{3,}$', re.MULTILINE) + _, fm, content = boundary.split(f.read(), 2) + template = "---\n{front_matter}\n---\n\n{content}\n" + new_content = template.format(front_matter=fm.strip(), content=out) + f.truncate(0) + f.seek(0) + f.write(new_content) def process_integration_metric(self, file_name): """ Take a single metadata csv file and convert it to yaml :param file_name: path to a metadata csv file """ - if file_name.endswith('.csv'): - if file_name.endswith('/metadata.csv'): - key_name = basename(dirname(normpath(file_name))) - else: - key_name = basename(file_name.replace('_metadata.csv', '')) - new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name) - self.csv_to_yaml(key_name, file_name, new_file_name) + if file_name.endswith('/metadata.csv'): + key_name = basename(dirname(normpath(file_name))) + else: + key_name = basename(file_name.replace('_metadata.csv', '')) + new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name) + self.csv_to_yaml(key_name, file_name, new_file_name) def dev_doc_integrations_core(self, file_name): """ @@ -338,28 +354,25 @@ def dev_doc_integrations_core(self, file_name): and transform it to be displayed on the doc in the /developers/integrations section :param file_name: path to a file """ - relative_path_on_github = '/integrations-core/docs/dev/' doc_directory = '/developers/integrations/' - if (relative_path_on_github in file_name and file_name.endswith('.md')): - - with open(file_name, mode='r+') as f: - content = f.read() + with open(file_name, mode='r+') as f: + content = f.read() - # Replacing the master README.md by _index.md to follow Hugo logic - if file_name.endswith('README.md'): - file_name = '_index.md' + # Replacing the master README.md by _index.md to follow Hugo logic + if file_name.endswith('README.md'): + file_name = '_index.md' - #Replacing links that point to the Github folder by link that point to the doc. - new_link = doc_directory +'\\2' - regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md') - content = re.sub(regex_github_link, new_link, content, count=0) + #Replacing links that point to the Github folder by link that point to the doc. + new_link = doc_directory +'\\2' + regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md') + content = re.sub(regex_github_link, new_link, content, count=0) - # Writing the new content to the documentation file - dirp = '{}{}'.format(self.content_dir, doc_directory[1:]) - makedirs(dirp, exist_ok=True) - with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f: - f.write(content) + # Writing the new content to the documentation file + dirp = '{}{}'.format(self.content_dir, doc_directory[1:]) + makedirs(dirp, exist_ok=True) + with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f: + f.write(content) def process_integration_manifest(self, file_name): """ @@ -367,19 +380,19 @@ def process_integration_manifest(self, file_name): set is_public to false to hide integrations we merge later :param file_name: path to a manifest json file """ - if file_name.endswith('manifest.json'): - names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d] - with open(file_name) as f: - data = json.load(f) - data_name = data.get('name', '').lower() - if data_name in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']: - data['is_public'] = False - if data_name in names: - item = [d for d in self.datafile_json if d.get('name', '').lower() == data_name] - if len(item) > 0: - item[0].update(data) - else: - self.datafile_json.append(data) + + names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d] + with open(file_name) as f: + data = json.load(f) + data_name = data.get('name', '').lower() + if data_name in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']: + data['is_public'] = False + if data_name in names: + item = [d for d in self.datafile_json if d.get('name', '').lower() == data_name] + if len(item) > 0: + item[0].update(data) + else: + self.datafile_json.append(data) def process_service_checks(self, file_name): """ @@ -387,9 +400,8 @@ def process_service_checks(self, file_name): as the integration name it came from e.g /data/service_checks/docker.json :param file_name: path to a service_checks json file """ - if file_name.endswith('service_checks.json'): - new_file_name = '{}.json'.format(basename(dirname(normpath(file_name)))) - shutil.copy(file_name, self.data_service_checks_dir + new_file_name) + new_file_name = '{}.json'.format(basename(dirname(normpath(file_name)))) + shutil.copy(file_name, self.data_service_checks_dir + new_file_name) def process_integration_readme(self, file_name): """ @@ -401,36 +413,36 @@ def process_integration_readme(self, file_name): 5. write out file to content/integrations with filename changed to integrationname.md :param file_name: path to a readme md file """ - if file_name.endswith('.md'): - dependencies = [] - if file_name.startswith(self.options.integrations): - dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/")) - elif file_name.startswith(self.options.extras): - dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/")) - metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep)) - metrics = metrics[0] if len(metrics) > 0 else None - metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2) - service_check = glob.glob('{file}.json'.format(file=self.data_service_checks_dir + basename(dirname(file_name)))) - service_check = service_check[0] if len(service_check) > 0 else None - service_check_exist = service_check and exists(service_check) - manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json') - manifest_json = json.load(open(manifest)) if exists(manifest) else {} - new_file_name = '{}.md'.format(basename(dirname(file_name))) - exist_already = exists(self.content_integrations_dir + new_file_name) - with open(file_name, 'r') as f: - result = f.read() - title = manifest_json.get('name', '').lower() - if title not in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']: - result = re.sub(self.regex_h1, '', result, 1) - if metrics_exist: - result = re.sub(self.regex_metrics, r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'%format(title), result, 0) - if service_check_exist: - result = re.sub(self.regex_service_check, r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' % format(title), result, 0) - result = "{0}\n\n{1}".format(result, '{{< get-dependencies >}}') - result = self.add_integration_frontmatter(new_file_name, result, dependencies) - if not exist_already: - with open(self.content_integrations_dir + new_file_name, 'w') as out: - out.write(result) + + dependencies = [] + if file_name.startswith(self.options.integrations): + dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/")) + elif file_name.startswith(self.options.extras): + dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/")) + metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep)) + metrics = metrics[0] if len(metrics) > 0 else None + metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2) + service_check = glob.glob('{file}.json'.format(file=self.data_service_checks_dir + basename(dirname(file_name)))) + service_check = service_check[0] if len(service_check) > 0 else None + service_check_exist = service_check and exists(service_check) + manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json') + manifest_json = json.load(open(manifest)) if exists(manifest) else {} + new_file_name = '{}.md'.format(basename(dirname(file_name))) + exist_already = exists(self.content_integrations_dir + new_file_name) + with open(file_name, 'r') as f: + result = f.read() + title = manifest_json.get('name', '').lower() + if title not in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']: + result = re.sub(self.regex_h1, '', result, 1) + if metrics_exist: + result = re.sub(self.regex_metrics, r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'%format(title), result, 0) + if service_check_exist: + result = re.sub(self.regex_service_check, r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' % format(title), result, 0) + result = "{0}\n\n{1}".format(result, '{{< get-dependencies >}}') + result = self.add_integration_frontmatter(new_file_name, result, dependencies) + if not exist_already: + with open(self.content_integrations_dir + new_file_name, 'w') as out: + out.write(result) def add_integration_frontmatter(self, file_name, content, dependencies=[]): """ From cc8b35b5a8c2827356fd14ab68511f1267e8fa39 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Fri, 4 Jan 2019 13:49:10 +0100 Subject: [PATCH 02/13] bumping libs for new build strat --- local/etc/requirements3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/local/etc/requirements3.txt b/local/etc/requirements3.txt index 3430bbb32bea1..41ad3d6dff076 100644 --- a/local/etc/requirements3.txt +++ b/local/etc/requirements3.txt @@ -7,9 +7,9 @@ cffi==1.5.2 cssutils>=1.0.0 htmlmin>=0.1.10 pycparser==2.14 -awscli==1.11.182 +awscli==1.16.82 requests==2.20.0 -PyYAML==3.12 +PyYAML==3.13 tqdm==4.14.0 Pygments==2.2.0 datadog==0.16.0 From c8a4bae58550820add3eed690de8747ba723fc25 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Fri, 4 Jan 2019 13:49:32 +0100 Subject: [PATCH 03/13] extracting all variables from script to config file --- local/bin/py/update_pre_build.py | 139 +++++++++++++++++++++---------- local/etc/pull_config.yaml | 57 +++++++++++++ 2 files changed, 154 insertions(+), 42 deletions(-) create mode 100644 local/etc/pull_config.yaml diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index a9b93afd1288a..db07da4393336 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + import csv import fnmatch import glob @@ -20,6 +21,7 @@ from os import sep, makedirs, getenv, remove from os.path import exists, basename, curdir, join, abspath, normpath, dirname +CONFIGURATION_FILE = './local/etc/pull_config.yaml' def cache_by_sha(func): """ only downloads fresh file, if we don't have one or we do and the sha has changed """ @@ -123,6 +125,12 @@ def __init__(self, opts): self.options.integrations = self.options.integrations + sep if self.options.extras and not self.options.extras.endswith(sep): self.options.extras = self.options.extras + sep + + self.list_of_orgs = [] + self.list_of_repos = [] + self.list_of_files = [] + self.list_of_contents = [] + self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir() self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep) self.content_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'content' + sep) @@ -183,7 +191,7 @@ def csv_to_yaml(key_name, csv_filename, yml_filename): with open(file=yml_filename, mode='w', encoding='utf-8') as f: f.write(yaml.dump(yaml_data, default_flow_style=False)) - def download_from_repo(self, org, repo, branch, globs): + def download_from_repo(self, org, repo, branch, globs=None): """ Takes github info and file globs and downloads files from github using multiple processes :param org: github organization or person @@ -208,43 +216,85 @@ def process(self): """ print('Processing') - dogweb_globs = ['integration/**/*_metadata.csv', 'integration/**/manifest.json', - 'integration/**/service_checks.json', 'integration/**/README.md', - 'dd/utils/context/source.py'] - integrations_globs = ['*[!}]/metadata.csv', '*[!}]/manifest.json', '*[!}]/service_checks.json', '*[!}]/README.md', 'docs/**'] - extras_globs = ['**/metadata.csv', '**/manifest.json', '**/service_checks.json', '**/README.md'] - - # sync from dogweb, download if we don't have it (token required) - if not self.options.dogweb: - if self.options.token: - self.download_from_repo('DataDog', 'dogweb', 'prod', dogweb_globs) - self.options.dogweb = '{0}{1}{2}'.format(self.extract_dir, 'dogweb', sep) - - # sync from integrations-core, download if we don't have it (public repo so no token needed) - if not options.integrations: - self.download_from_repo('DataDog', 'integrations-core', 'master', integrations_globs) - self.options.integrations = '{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep) - - # sync from integrations-extras, download if we don't have it (public repo so no token needed) - if not options.extras: - self.download_from_repo('DataDog', 'integrations-extras', 'master', extras_globs) - self.options.extras = '{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep) - - globs = [] - - for d_glob, i_glob, e_glob in zip_longest(dogweb_globs, integrations_globs, extras_globs): - if d_glob: - globs.append('{}{}'.format(self.options.dogweb, d_glob)) - if i_glob: - globs.append('{}{}'.format(self.options.integrations, i_glob)) - if e_glob: - globs.append('{}{}'.format(self.options.extras, e_glob)) - - for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)): + # TO DO Check first whether or not it's to do a local or a remote build + # then use the config to build the doc + + self.extract_config() + + self.local_or_upstream() + + for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)): self.process_filename(file_name) self.merge_integrations() + def extract_config(self): + + print('Loading {} configuration file'.format(CONFIGURATION_FILE)) + configuration = yaml.load(open(CONFIGURATION_FILE)) + + for org in configuration: + self.list_of_orgs.append(org['org_name']) + for repo in org['repos']: + self.list_of_repos.append(repo['repo_name']) + for content in repo['contents']: + content_temp = {\ + "org_name":org['org_name'],\ + "repo_name":repo['repo_name'],\ + "branch":content['branch'],\ + "globs":content['globs']} + self.list_of_contents.append(content_temp) + print('Adding content {} '.format(content_temp)) + + def local_or_upstream(self): + + for content in self.list_of_contents: + + if content['repo_name']=='dogweb': + if not self.options.dogweb: + if self.options.token: + print("No local version of {} found, downloading content from upstream version".format(content['repo_name'])) + self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + self.options.dogweb = '{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep) + + print("Updating globs for new local version or {} repo".format(content['repo_name'])) + content['globs'] = self.update_globs(self.options.dogweb,content['globs']) + + # sync from integrations-core, download if we don't have it (public repo so no token needed) + elif content['repo_name']== 'integrations-core': + if not options.integrations: + print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) + self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + self.options.integrations = '{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep) + + print("Updating globs for new local version or {} repo".format(content['repo_name'])) + content['globs'] = self.update_globs(self.options.integrations,content['globs']) + + # sync from integrations-extras, download if we don't have it (public repo so no token needed) + elif content['repo_name']=='integrations-extras': + if not options.extras: + print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) + + self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + self.options.extras = '{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep) + + print("Updating globs for new local version or {} repo".format(content['repo_name'])) + content['globs'] = self.update_globs(self.options.extras,content['globs']) + + else: + print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) + self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + + # Adding the final globs to a global list of globs + self.list_of_files += content['globs'] + + def update_globs(self, new_path, globs): + new_globs = [] + for item in globs: + new_globs.append('{}{}'.format(new_path, item)) + + return new_globs + def process_filename(self, file_name): if file_name.endswith('dd/utils/context/source.py'): @@ -363,7 +413,7 @@ def dev_doc_integrations_core(self, file_name): if file_name.endswith('README.md'): file_name = '_index.md' - #Replacing links that point to the Github folder by link that point to the doc. + # Replacing links that point to the Github folder by link that point to the doc. new_link = doc_directory +'\\2' regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md') content = re.sub(regex_github_link, new_link, content, count=0) @@ -380,7 +430,7 @@ def process_integration_manifest(self, file_name): set is_public to false to hide integrations we merge later :param file_name: path to a manifest json file """ - + names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d] with open(file_name) as f: data = json.load(f) @@ -413,12 +463,7 @@ def process_integration_readme(self, file_name): 5. write out file to content/integrations with filename changed to integrationname.md :param file_name: path to a readme md file """ - - dependencies = [] - if file_name.startswith(self.options.integrations): - dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/")) - elif file_name.startswith(self.options.extras): - dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/")) + metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep)) metrics = metrics[0] if len(metrics) > 0 else None metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2) @@ -427,6 +472,7 @@ def process_integration_readme(self, file_name): service_check_exist = service_check and exists(service_check) manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json') manifest_json = json.load(open(manifest)) if exists(manifest) else {} + dependencies = self.add_dependencies(file_name) new_file_name = '{}.md'.format(basename(dirname(file_name))) exist_already = exists(self.content_integrations_dir + new_file_name) with open(file_name, 'r') as f: @@ -469,6 +515,15 @@ def add_integration_frontmatter(self, file_name, content, dependencies=[]): fm = {'kind': 'integration'} return template.format(front_matter=fm, content=content) + def add_dependencies(self, file_name): + dependencies = [] + if file_name.startswith(self.options.integrations): + dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/")) + + elif file_name.startswith(self.options.extras): + dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/")) + + return dependencies if __name__ == '__main__': parser = OptionParser(usage="usage: %prog [options] link_type") diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml new file mode 100644 index 0000000000000..1e3cb812ba129 --- /dev/null +++ b/local/etc/pull_config.yaml @@ -0,0 +1,57 @@ +--- +- org_name: DataDog + + repos: + - repo_name: dogweb + + contents: + + - content_name: source + branch: prod + globs: + - dd/utils/context/source.py + path_to_remove: '' + dest_dir: '' + + - content_name: integrations + branch: prod + globs: + - integration/**/*_metadata.csv + - integration/**/manifest.json + - integration/**/service_checks.json + - integration/**/README.md + path_to_remove: '' + dest_dir: '' + + - repo_name: integrations-core + contents: + + - content_name: integrations + branch: master + globs: + - "*[!}]/metadata.csv" + - "*[!}]/manifest.json" + - "*[!}]/service_checks.json" + - "*[!}]/README.md" + dest_dir: '' + path_to_remove: '' + + - content_name: integrations-core-doc + branch: master + globs: + - docs/** + dest_dir: '' + path_to_remove: '' + + - repo_name: integrations-extras + contents: + + - content_name: integrations + branch: master + globs: + - "**/metadata.csv" + - "**/manifest.json" + - "**/service_checks.json" + - "**/README.md" + path_to_remove: '' + dest_dir: '' From 43280bc292b9139a3074d360a5cdcd66256276dd Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Mon, 7 Jan 2019 08:53:18 +0100 Subject: [PATCH 04/13] clean-up --- local/bin/py/update_pre_build.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index db07da4393336..b25ce391a0fad 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -125,12 +125,10 @@ def __init__(self, opts): self.options.integrations = self.options.integrations + sep if self.options.extras and not self.options.extras.endswith(sep): self.options.extras = self.options.extras + sep - self.list_of_orgs = [] self.list_of_repos = [] self.list_of_files = [] self.list_of_contents = [] - self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir() self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep) self.content_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'content' + sep) @@ -216,16 +214,10 @@ def process(self): """ print('Processing') - # TO DO Check first whether or not it's to do a local or a remote build - # then use the config to build the doc - self.extract_config() - self.local_or_upstream() - for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)): self.process_filename(file_name) - self.merge_integrations() def extract_config(self): From 2a89b706bf5cad9c7fb14833d5a1efda4feb2a99 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Wed, 9 Jan 2019 13:44:36 -0500 Subject: [PATCH 05/13] fixing build --- ja/layouts/shortcodes/get-metrics-from-git.html | 2 ++ layouts/shortcodes/get-metrics-from-git.html | 4 +++- local/bin/py/placehold_translations.py | 3 +-- local/bin/py/update_pre_build.py | 11 ++++++----- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/ja/layouts/shortcodes/get-metrics-from-git.html b/ja/layouts/shortcodes/get-metrics-from-git.html index eb950303487a6..26658e7c81ce4 100644 --- a/ja/layouts/shortcodes/get-metrics-from-git.html +++ b/ja/layouts/shortcodes/get-metrics-from-git.html @@ -18,6 +18,7 @@ {{ $data := index $.Page.Site.Data.integrations $integration }} +{{ if $data }} {{ if (index $params 1 ) }} {{/* Custom metrics set in shortcode param index 1 (second position) */}} @@ -71,5 +72,6 @@ {{ end }} +{{ end }} {{ end }} \ No newline at end of file diff --git a/layouts/shortcodes/get-metrics-from-git.html b/layouts/shortcodes/get-metrics-from-git.html index 4f6886970fbc6..4aa23607fa380 100644 --- a/layouts/shortcodes/get-metrics-from-git.html +++ b/layouts/shortcodes/get-metrics-from-git.html @@ -29,6 +29,7 @@ {{ end }} {{ $data := ($.Scratch.Get "data") }} + {{ if $data }} {{ if (index $params 1 ) }} {{/* Custom metrics set in shortcode param index 1 (second position) */}} @@ -82,5 +83,6 @@ {{ end }} + {{ end }} -{{ end }} \ No newline at end of file +{{ end }} diff --git a/local/bin/py/placehold_translations.py b/local/bin/py/placehold_translations.py index bcd8aba5a4d50..75890a790f70e 100755 --- a/local/bin/py/placehold_translations.py +++ b/local/bin/py/placehold_translations.py @@ -67,8 +67,7 @@ def create_placeholder_file(template, new_glob): content=new_content.strip()) with open(new_dest, 'w') as o_file: - o_file.write(content) - print("creating placeholder for {0} at {1}".format(template, new_dest)) + o_file.write(content) return new_dest diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index b25ce391a0fad..318553460c349 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -189,7 +189,7 @@ def csv_to_yaml(key_name, csv_filename, yml_filename): with open(file=yml_filename, mode='w', encoding='utf-8') as f: f.write(yaml.dump(yaml_data, default_flow_style=False)) - def download_from_repo(self, org, repo, branch, globs=None): + def download_from_repo(self, org, repo, branch, globs): """ Takes github info and file globs and downloads files from github using multiple processes :param org: github organization or person @@ -216,8 +216,10 @@ def process(self): self.extract_config() self.local_or_upstream() + for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)): self.process_filename(file_name) + self.merge_integrations() def extract_config(self): @@ -241,16 +243,15 @@ def extract_config(self): def local_or_upstream(self): for content in self.list_of_contents: - if content['repo_name']=='dogweb': if not self.options.dogweb: if self.options.token: print("No local version of {} found, downloading content from upstream version".format(content['repo_name'])) self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - self.options.dogweb = '{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep) print("Updating globs for new local version or {} repo".format(content['repo_name'])) - content['globs'] = self.update_globs(self.options.dogweb,content['globs']) + + content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs']) # sync from integrations-core, download if we don't have it (public repo so no token needed) elif content['repo_name']== 'integrations-core': @@ -276,7 +277,7 @@ def local_or_upstream(self): else: print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - + # Adding the final globs to a global list of globs self.list_of_files += content['globs'] From 8e29d01ecd4e21b0edbb116db91d67103ef5f8c0 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Wed, 9 Jan 2019 14:10:06 -0500 Subject: [PATCH 06/13] fixing multi content import --- local/bin/py/update_pre_build.py | 20 ++++++++------------ local/etc/pull_config.yaml | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index 318553460c349..2af3843d60062 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -152,7 +152,6 @@ def __init__(self, opts): 'cassandra_nodetool': {'action': 'merge', 'target': 'cassandra', 'remove_header': False}, 'datadog_checks_base': {'action': 'discard', 'target': 'none', 'remove_header': False}, 'datadog_checks_tests_helper': {'action': 'discard', 'target': 'none', 'remove_header': False}, - 'dev': {'action': 'discard', 'target': 'none', 'remove_header': False}, 'docs': {'action': 'discard', 'target': 'none', 'remove_header': False}, 'gitlab_runner': {'action': 'merge', 'target': 'gitlab', 'remove_header': False}, 'hdfs_datanode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False}, @@ -255,24 +254,21 @@ def local_or_upstream(self): # sync from integrations-core, download if we don't have it (public repo so no token needed) elif content['repo_name']== 'integrations-core': - if not options.integrations: + if not self.options.integrations: print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - self.options.integrations = '{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep) print("Updating globs for new local version or {} repo".format(content['repo_name'])) - content['globs'] = self.update_globs(self.options.integrations,content['globs']) + content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep),content['globs']) # sync from integrations-extras, download if we don't have it (public repo so no token needed) elif content['repo_name']=='integrations-extras': - if not options.extras: + if not self.options.extras: print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) - self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - self.options.extras = '{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep) print("Updating globs for new local version or {} repo".format(content['repo_name'])) - content['globs'] = self.update_globs(self.options.extras,content['globs']) + content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep),content['globs']) else: print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) @@ -510,11 +506,11 @@ def add_integration_frontmatter(self, file_name, content, dependencies=[]): def add_dependencies(self, file_name): dependencies = [] - if file_name.startswith(self.options.integrations): - dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/")) + if file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)): + dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep), "https://github.com/DataDog/integrations-core/blob/master/")) - elif file_name.startswith(self.options.extras): - dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/")) + elif file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)): + dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep), "https://github.com/DataDog/integrations-extras/blob/master/")) return dependencies diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml index 1e3cb812ba129..c2c9e1c8e4afe 100644 --- a/local/etc/pull_config.yaml +++ b/local/etc/pull_config.yaml @@ -39,7 +39,7 @@ - content_name: integrations-core-doc branch: master globs: - - docs/** + - docs/dev/*.md dest_dir: '' path_to_remove: '' From f8b920901af647f8c577750a39ac6652b4fd6db7 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Wed, 9 Jan 2019 15:30:09 -0500 Subject: [PATCH 07/13] abstracting file processing --- local/bin/py/update_pre_build.py | 154 +++++++++++++++++-------------- local/etc/pull_config.yaml | 21 ++--- 2 files changed, 93 insertions(+), 82 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index 2af3843d60062..e0a5b53769076 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -214,10 +214,10 @@ def process(self): print('Processing') self.extract_config() + self.local_or_upstream() - for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)): - self.process_filename(file_name) + self.process_filenames() self.merge_integrations() @@ -225,17 +225,21 @@ def extract_config(self): print('Loading {} configuration file'.format(CONFIGURATION_FILE)) configuration = yaml.load(open(CONFIGURATION_FILE)) - for org in configuration: self.list_of_orgs.append(org['org_name']) for repo in org['repos']: self.list_of_repos.append(repo['repo_name']) for content in repo['contents']: - content_temp = {\ - "org_name":org['org_name'],\ - "repo_name":repo['repo_name'],\ - "branch":content['branch'],\ - "globs":content['globs']} + content_temp = {} + content_temp['org_name'] = org['org_name'] + content_temp['repo_name'] = repo['repo_name'] + content_temp['branch'] = content['branch'] + content_temp['action']= content['action'] + content_temp['globs'] = content['globs'] + + if content['action'] == 'pull-and-push': + content_temp['options'] = content['options'] + self.list_of_contents.append(content_temp) print('Adding content {} '.format(content_temp)) @@ -284,28 +288,64 @@ def update_globs(self, new_path, globs): return new_globs - def process_filename(self, file_name): + def process_filenames(self): - if file_name.endswith('dd/utils/context/source.py'): - self.process_source_attribute(file_name) + for content in self.list_of_contents: + print("Processing content: {}".format(content)) + if content['action'] == 'integrations': + self.process_integrations(content['globs']) - elif file_name.endswith('.csv'): - self.process_integration_metric(file_name) + elif content['action'] == 'source': + + self.process_source_attribute(content['globs']) + + elif content['action'] == 'pull-and-push': + + self.pull_and_push(content) + else: + print("[ERROR] Unsuccessful Processing of {}".format(content)) + + def process_integrations(self,globs): + + for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)): + if file_name.endswith('.csv'): + self.process_integration_metric(file_name) - elif file_name.endswith('manifest.json'): - self.process_integration_manifest(file_name) + elif file_name.endswith('manifest.json'): + self.process_integration_manifest(file_name) - elif file_name.endswith('service_checks.json'): - self.process_service_checks(file_name) + elif file_name.endswith('service_checks.json'): + self.process_service_checks(file_name) - elif ('/integrations-core/docs/dev/' in file_name and file_name.endswith('.md')): - self.dev_doc_integrations_core(file_name) + elif file_name.endswith('.md'): + self.process_integration_readme(file_name) - elif file_name.endswith('.md'): - self.process_integration_readme(file_name) - else: - print("Processing of {} was unsuccessful".format(file_name)) + def pull_and_push(self, content): + """ + Take the content from a folder following github logic + and transform it to be displayed in the doc in dest_dir folder + :param globs: folder to pull + :param dest_dir: folder to push the data to in the doc repo + """ + + for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in content['globs'])): + with open(file_name, mode='r+') as f: + file_content = f.read() + + # Replacing the master README.md by _index.md to follow Hugo logic + if file_name.endswith('README.md'): + file_name = '_index.md' + + # Replacing links that point to the Github folder by link that point to the doc. + new_link = content['options']['dest_dir'] + '\\2' + regex_github_link = re.compile(r'(https:\/\/github\.com\/{}\/{}\/blob\/{}\/{})(\S+)\.md'.format(content['org_name'],content['repo_name'],content['branch'],content['options']['path_to_remove'])) + file_content = re.sub(regex_github_link, new_link, file_content, count=0) + # Writing the new content to the documentation file + dirp = '{}{}'.format(self.content_dir, content['options']['dest_dir'][1:]) + makedirs(dirp, exist_ok=True) + with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f: + f.write(file_content) def merge_integrations(self): """ Merges integrations that come under one """ @@ -349,31 +389,33 @@ def merge_integrations(self): data = '---\n{0}\n---\n'.format(fm) f.write(data) - def process_source_attribute(self, file_name): + def process_source_attribute(self, globs): """ Take a single source.py file extracts the FROM_DISPLAY_NAME dict values and inserts them into the file something.md :param file_name: path to a source.py file """ - out = '|Integration name | API source attribute|\n' - out += '|:---|:---|\n' - with open(file_name, 'r') as f: - result = f.read() - m = re.search(self.regex_source, result) - result = m.group(2) if m else result - result = re.sub(r'[^0-9A-Za-z:, ]', '', result) - for line in result.split(','): - pair = line.split(':') - if len(pair) > 1: - out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip()) - with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f: - boundary = re.compile(r'^-{3,}$', re.MULTILINE) - _, fm, content = boundary.split(f.read(), 2) - template = "---\n{front_matter}\n---\n\n{content}\n" - new_content = template.format(front_matter=fm.strip(), content=out) - f.truncate(0) - f.seek(0) - f.write(new_content) + for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)): + if file_name.endswith('dd/utils/context/source.py'): + out = '|Integration name | API source attribute|\n' + out += '|:---|:---|\n' + with open(file_name, 'r') as f: + result = f.read() + m = re.search(self.regex_source, result) + result = m.group(2) if m else result + result = re.sub(r'[^0-9A-Za-z:, ]', '', result) + for line in result.split(','): + pair = line.split(':') + if len(pair) > 1: + out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip()) + with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f: + boundary = re.compile(r'^-{3,}$', re.MULTILINE) + _, fm, content = boundary.split(f.read(), 2) + template = "---\n{front_matter}\n---\n\n{content}\n" + new_content = template.format(front_matter=fm.strip(), content=out) + f.truncate(0) + f.seek(0) + f.write(new_content) def process_integration_metric(self, file_name): """ @@ -387,32 +429,6 @@ def process_integration_metric(self, file_name): new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name) self.csv_to_yaml(key_name, file_name, new_file_name) - def dev_doc_integrations_core(self, file_name): - """ - Take the content from https://github.com/DataDog/integrations-core/tree/master/docs/dev - and transform it to be displayed on the doc in the /developers/integrations section - :param file_name: path to a file - """ - doc_directory = '/developers/integrations/' - - with open(file_name, mode='r+') as f: - content = f.read() - - # Replacing the master README.md by _index.md to follow Hugo logic - if file_name.endswith('README.md'): - file_name = '_index.md' - - # Replacing links that point to the Github folder by link that point to the doc. - new_link = doc_directory +'\\2' - regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md') - content = re.sub(regex_github_link, new_link, content, count=0) - - # Writing the new content to the documentation file - dirp = '{}{}'.format(self.content_dir, doc_directory[1:]) - makedirs(dirp, exist_ok=True) - with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f: - f.write(content) - def process_integration_manifest(self, file_name): """ Take a single manifest json file and upsert to integrations.json data diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml index c2c9e1c8e4afe..2c02f67470f42 100644 --- a/local/etc/pull_config.yaml +++ b/local/etc/pull_config.yaml @@ -6,47 +6,42 @@ contents: - - content_name: source + - action: source branch: prod globs: - dd/utils/context/source.py - path_to_remove: '' - dest_dir: '' - - content_name: integrations + - action: integrations branch: prod globs: - integration/**/*_metadata.csv - integration/**/manifest.json - integration/**/service_checks.json - integration/**/README.md - path_to_remove: '' - dest_dir: '' - repo_name: integrations-core contents: - - content_name: integrations + - action: integrations branch: master globs: - "*[!}]/metadata.csv" - "*[!}]/manifest.json" - "*[!}]/service_checks.json" - "*[!}]/README.md" - dest_dir: '' - path_to_remove: '' - - content_name: integrations-core-doc + - action: pull-and-push branch: master globs: - docs/dev/*.md - dest_dir: '' - path_to_remove: '' + options: + dest_dir: '/developers/integrations/' + path_to_remove: 'docs/dev/' - repo_name: integrations-extras contents: - - content_name: integrations + - action: integrations branch: master globs: - "**/metadata.csv" From 8045d65faa96eeca6613a210b3483d22fac2a0da Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Wed, 9 Jan 2019 15:38:38 -0500 Subject: [PATCH 08/13] removing useless params --- local/bin/py/update_pre_build.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index e0a5b53769076..07069dc3858b8 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -125,9 +125,6 @@ def __init__(self, opts): self.options.integrations = self.options.integrations + sep if self.options.extras and not self.options.extras.endswith(sep): self.options.extras = self.options.extras + sep - self.list_of_orgs = [] - self.list_of_repos = [] - self.list_of_files = [] self.list_of_contents = [] self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir() self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep) @@ -226,9 +223,7 @@ def extract_config(self): print('Loading {} configuration file'.format(CONFIGURATION_FILE)) configuration = yaml.load(open(CONFIGURATION_FILE)) for org in configuration: - self.list_of_orgs.append(org['org_name']) for repo in org['repos']: - self.list_of_repos.append(repo['repo_name']) for content in repo['contents']: content_temp = {} content_temp['org_name'] = org['org_name'] @@ -277,9 +272,6 @@ def local_or_upstream(self): else: print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - - # Adding the final globs to a global list of globs - self.list_of_files += content['globs'] def update_globs(self, new_path, globs): new_globs = [] From 27e5a1fbb0d226083f4338c6c30408291c90f697 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Wed, 9 Jan 2019 15:52:28 -0500 Subject: [PATCH 09/13] factorising lines --- local/bin/py/update_pre_build.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index 07069dc3858b8..703d02f52c06c 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -245,20 +245,14 @@ def local_or_upstream(self): if not self.options.dogweb: if self.options.token: print("No local version of {} found, downloading content from upstream version".format(content['repo_name'])) - self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - - print("Updating globs for new local version or {} repo".format(content['repo_name'])) - - content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs']) + self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) # sync from integrations-core, download if we don't have it (public repo so no token needed) elif content['repo_name']== 'integrations-core': if not self.options.integrations: print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - - print("Updating globs for new local version or {} repo".format(content['repo_name'])) - content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep),content['globs']) + # sync from integrations-extras, download if we don't have it (public repo so no token needed) elif content['repo_name']=='integrations-extras': @@ -266,13 +260,13 @@ def local_or_upstream(self): print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - print("Updating globs for new local version or {} repo".format(content['repo_name'])) - content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep),content['globs']) - else: print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + print("Updating globs for new local version of repo {}".format(content['repo_name'])) + content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs']) + def update_globs(self, new_path, globs): new_globs = [] for item in globs: From 2ca68cee6dc6225effa3a17bde6cda1fcac28765 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Thu, 10 Jan 2019 08:56:02 -0500 Subject: [PATCH 10/13] formating code and adding comments --- local/bin/py/update_pre_build.py | 1054 +++++++++++++++++++++++------- 1 file changed, 820 insertions(+), 234 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index 703d02f52c06c..9eb33d06b4171 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -19,33 +19,52 @@ from multiprocessing.pool import ThreadPool as Pool from optparse import OptionParser from os import sep, makedirs, getenv, remove -from os.path import exists, basename, curdir, join, abspath, normpath, dirname +from os.path import ( + exists, + basename, + curdir, + join, + abspath, + normpath, + dirname, +) + +CONFIGURATION_FILE = "./local/etc/pull_config.yaml" -CONFIGURATION_FILE = './local/etc/pull_config.yaml' def cache_by_sha(func): """ only downloads fresh file, if we don't have one or we do and the sha has changed """ + @wraps(func) def cached_func(*args, **kwargs): cache = {} list_item = args[1] - dest_dir = kwargs.get('dest_dir') - path_to_file = list_item.get('path', '') - file_out = '{}{}'.format(dest_dir, path_to_file) - p_file_out = '{}{}.pickle'.format(dest_dir, path_to_file) + dest_dir = kwargs.get("dest_dir") + path_to_file = list_item.get("path", "") + file_out = "{}{}".format(dest_dir, path_to_file) + p_file_out = "{}{}.pickle".format( + dest_dir, path_to_file + ) makedirs(dirname(file_out), exist_ok=True) if exists(p_file_out) and exists(file_out): - with open(p_file_out, 'rb') as pf: + with open(p_file_out, "rb") as pf: cache = pickle.load(pf) - cache_sha = cache.get('sha', False) - input_sha = list_item.get('sha', False) - if cache_sha and input_sha and cache_sha == input_sha: + cache_sha = cache.get("sha", False) + input_sha = list_item.get("sha", False) + if ( + cache_sha + and input_sha + and cache_sha == input_sha + ): # do nothing as we have the up to date file already return None else: - with open(p_file_out, mode='wb+') as pf: - pickle.dump(list_item, pf, pickle.HIGHEST_PROTOCOL) + with open(p_file_out, mode="wb+") as pf: + pickle.dump( + list_item, pf, pickle.HIGHEST_PROTOCOL + ) return func(*args, **kwargs) + return cached_func @@ -60,39 +79,68 @@ def __exit__(self, *exc): return False def headers(self): - return {'Authorization': 'token {}'.format(self.token)} if self.token else {} + return ( + {"Authorization": "token {}".format(self.token)} + if self.token + else {} + ) def extract(self, data): out = [] - for item in data.get('tree', []): - out.append({'path': item.get('path', ''), 'url': item.get('url', ''), 'type': item.get('type', ''), - 'sha': item.get('sha', '')}) - if item.get('tree', None): - out.append(self.extract(item.get('tree'))) + for item in data.get("tree", []): + out.append( + { + "path": item.get("path", ""), + "url": item.get("url", ""), + "type": item.get("type", ""), + "sha": item.get("sha", ""), + } + ) + if item.get("tree", None): + out.append(self.extract(item.get("tree"))) return out def list(self, org, repo, branch, globs=None): globs = [] if globs is None else globs listing = [] # get the latest sha - url = 'https://api.github.com/repos/{0}/{1}/git/refs/heads/{2}'.format(org, repo, branch) + url = "https://api.github.com/repos/{0}/{1}/git/refs/heads/{2}".format( + org, repo, branch + ) headers = self.headers() - print('Getting latest sha from {}/{}..'.format(repo, branch)) + print( + "Getting latest sha from {}/{}..".format( + repo, branch + ) + ) sha_response = requests.get(url, headers=headers) if sha_response.status_code == requests.codes.ok: - sha = sha_response.json().get('object', {}).get('sha', None) + sha = ( + sha_response.json() + .get("object", {}) + .get("sha", None) + ) if sha: - print('Getting tree from {}/{} @ {}'.format(repo, branch, sha)) + print( + "Getting tree from {}/{} @ {}".format( + repo, branch, sha + ) + ) tree_response = requests.get( - 'https://api.github.com/repos/{0}/{1}/git/trees/{2}?recursive=1'.format(org, repo, sha), - headers=headers) + "https://api.github.com/repos/{0}/{1}/git/trees/{2}?recursive=1".format( + org, repo, sha + ), + headers=headers, + ) if tree_response.status_code == 200: - listing = self.extract(tree_response.json()) + listing = self.extract( + tree_response.json() + ) if globs: filtered_listing = [] for item in listing: - path = item.get('path', '') + path = item.get("path", "") for glob_string in globs: if fnmatch.fnmatch(path, glob_string): filtered_listing.append(item) @@ -101,17 +149,27 @@ def list(self, org, repo, branch, globs=None): return listing @cache_by_sha - def raw(self, list_item, request_session, org, repo, branch, dest_dir): + def raw( + self, + list_item, + request_session, + org, + repo, + branch, + dest_dir, + ): headers = self.headers() - path_to_file = list_item.get('path', '') - file_out = '{}{}'.format(dest_dir, path_to_file) + path_to_file = list_item.get("path", "") + file_out = "{}{}".format(dest_dir, path_to_file) raw_response = request_session.get( - 'https://raw.githubusercontent.com/{0}/{1}/{2}/{3}'.format(org, repo, branch, path_to_file), - headers=headers + "https://raw.githubusercontent.com/{0}/{1}/{2}/{3}".format( + org, repo, branch, path_to_file + ), + headers=headers, ) if raw_response.status_code == requests.codes.ok: makedirs(dirname(file_out), exist_ok=True) - with open(file_out, mode='wb+') as f: + with open(file_out, mode="wb+") as f: f.write(raw_response.content) @@ -119,54 +177,204 @@ class PreBuild: def __init__(self, opts): super().__init__() self.options = opts - if self.options.dogweb and not self.options.dogweb.endswith(sep): + if ( + self.options.dogweb + and not self.options.dogweb.endswith(sep) + ): self.options.dogweb = self.options.dogweb + sep - if self.options.integrations and not self.options.integrations.endswith(sep): - self.options.integrations = self.options.integrations + sep - if self.options.extras and not self.options.extras.endswith(sep): + if ( + self.options.integrations + and not self.options.integrations.endswith(sep) + ): + self.options.integrations = ( + self.options.integrations + sep + ) + if ( + self.options.extras + and not self.options.extras.endswith(sep) + ): self.options.extras = self.options.extras + sep self.list_of_contents = [] - self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir() - self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep) - self.content_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'content' + sep) - self.data_integrations_dir = join(self.data_dir, 'integrations') + sep - self.data_service_checks_dir = join(self.data_dir, 'service_checks') + sep - self.content_integrations_dir = join(self.content_dir, 'integrations') + sep - self.extract_dir = '{0}'.format(join(self.tempdir, "extracted") + sep) - self.integration_datafile = '{0}{1}{2}'.format(abspath(normpath(self.options.source)), sep, "integrations.json") - self.regex_h1 = re.compile(r'^#{1}(?!#)(.*)', re.MULTILINE) - self.regex_h1_replace = re.compile(r'^(#{1})(?!#)(.*)', re.MULTILINE) - self.regex_metrics = re.compile(r'(#{3} Metrics\n)([\s\S]*this integration.|[\s\S]*this check.)([\s\S]*)(#{3} Events\n)', re.DOTALL) - self.regex_service_check = re.compile(r'(#{3} Service Checks\n)([\s\S]*does not include any service checks at this time.)([\s\S]*)(#{2} Troubleshooting\n)', re.DOTALL) - self.regex_fm = re.compile(r'(?:-{3})(.*?)(?:-{3})(.*)', re.DOTALL) - self.regex_source = re.compile(r'(\S*FROM_DISPLAY_NAME\s*=\s*\{)(.*?)\}', re.DOTALL) + self.tempdir = ( + "/tmp" + if platform.system() == "Darwin" + else tempfile.gettempdir() + ) + self.data_dir = "{0}{1}{2}".format( + abspath(normpath(options.source)), + sep, + "data" + sep, + ) + self.content_dir = "{0}{1}{2}".format( + abspath(normpath(options.source)), + sep, + "content" + sep, + ) + self.data_integrations_dir = ( + join(self.data_dir, "integrations") + sep + ) + self.data_service_checks_dir = ( + join(self.data_dir, "service_checks") + sep + ) + self.content_integrations_dir = ( + join(self.content_dir, "integrations") + sep + ) + self.extract_dir = "{0}".format( + join(self.tempdir, "extracted") + sep + ) + self.integration_datafile = "{0}{1}{2}".format( + abspath(normpath(self.options.source)), + sep, + "integrations.json", + ) + self.regex_h1 = re.compile( + r"^#{1}(?!#)(.*)", re.MULTILINE + ) + self.regex_h1_replace = re.compile( + r"^(#{1})(?!#)(.*)", re.MULTILINE + ) + self.regex_metrics = re.compile( + r"(#{3} Metrics\n)([\s\S]*this integration.|[\s\S]*this check.)([\s\S]*)(#{3} Events\n)", + re.DOTALL, + ) + self.regex_service_check = re.compile( + r"(#{3} Service Checks\n)([\s\S]*does not include any service checks at this time.)([\s\S]*)(#{2} Troubleshooting\n)", + re.DOTALL, + ) + self.regex_fm = re.compile( + r"(?:-{3})(.*?)(?:-{3})(.*)", re.DOTALL + ) + self.regex_source = re.compile( + r"(\S*FROM_DISPLAY_NAME\s*=\s*\{)(.*?)\}", + re.DOTALL, + ) self.datafile_json = [] self.pool_size = 5 - self.integration_mutations = OrderedDict({ - 'hdfs': {'action': 'create', 'target': 'hdfs', 'remove_header': False, 'fm': {'is_public': True, 'kind': 'integration', 'integration_title': 'Hdfs', 'short_description': 'Track cluster disk usage, volume failures, dead DataNodes, and more.'}}, - 'mesos': {'action': 'create', 'target': 'mesos', 'remove_header': False, 'fm': {'aliases': ['/integrations/mesos_master/','/integrations/mesos_slave/'], 'is_public': True, 'kind': 'integration', 'integration_title': 'Mesos', 'short_description': 'Track cluster resource usage, master and slave counts, tasks statuses, and more.'}}, - 'activemq_xml': {'action': 'merge', 'target': 'activemq', 'remove_header': False}, - 'cassandra_nodetool': {'action': 'merge', 'target': 'cassandra', 'remove_header': False}, - 'datadog_checks_base': {'action': 'discard', 'target': 'none', 'remove_header': False}, - 'datadog_checks_tests_helper': {'action': 'discard', 'target': 'none', 'remove_header': False}, - 'docs': {'action': 'discard', 'target': 'none', 'remove_header': False}, - 'gitlab_runner': {'action': 'merge', 'target': 'gitlab', 'remove_header': False}, - 'hdfs_datanode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False}, - 'hdfs_namenode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False}, - 'mesos_master': {'action': 'merge', 'target': 'mesos', 'remove_header': True}, - 'mesos_slave': {'action': 'merge', 'target': 'mesos', 'remove_header': False}, - 'kafka_consumer': {'action': 'merge', 'target': 'kafka', 'remove_header': False}, - 'kube_dns': {'action': 'discard', 'target': 'none', 'remove_header': False}, - 'kube_proxy': {'action': 'discard', 'target': 'none', 'remove_header': False}, - 'kubernetes_state': {'action': 'discard', 'target': 'none', 'remove_header': False}, - 'system_core': {'action': 'discard', 'target': 'system', 'remove_header': False}, - 'system_swap': {'action': 'discard', 'target': 'system', 'remove_header': False}, - 'hbase_regionserver': {'action': 'merge', 'target': 'hbase_master', 'remove_header': False}, - }) - self.initial_integration_files = glob.glob('{}*.md'.format(self.content_integrations_dir)) + self.integration_mutations = OrderedDict( + { + "hdfs": { + "action": "create", + "target": "hdfs", + "remove_header": False, + "fm": { + "is_public": True, + "kind": "integration", + "integration_title": "Hdfs", + "short_description": "Track cluster disk usage, volume failures, dead DataNodes, and more.", + }, + }, + "mesos": { + "action": "create", + "target": "mesos", + "remove_header": False, + "fm": { + "aliases": [ + "/integrations/mesos_master/", + "/integrations/mesos_slave/", + ], + "is_public": True, + "kind": "integration", + "integration_title": "Mesos", + "short_description": "Track cluster resource usage, master and slave counts, tasks statuses, and more.", + }, + }, + "activemq_xml": { + "action": "merge", + "target": "activemq", + "remove_header": False, + }, + "cassandra_nodetool": { + "action": "merge", + "target": "cassandra", + "remove_header": False, + }, + "datadog_checks_base": { + "action": "discard", + "target": "none", + "remove_header": False, + }, + "datadog_checks_tests_helper": { + "action": "discard", + "target": "none", + "remove_header": False, + }, + "docs": { + "action": "discard", + "target": "none", + "remove_header": False, + }, + "gitlab_runner": { + "action": "merge", + "target": "gitlab", + "remove_header": False, + }, + "hdfs_datanode": { + "action": "merge", + "target": "hdfs", + "remove_header": False, + }, + "hdfs_namenode": { + "action": "merge", + "target": "hdfs", + "remove_header": False, + }, + "mesos_master": { + "action": "merge", + "target": "mesos", + "remove_header": True, + }, + "mesos_slave": { + "action": "merge", + "target": "mesos", + "remove_header": False, + }, + "kafka_consumer": { + "action": "merge", + "target": "kafka", + "remove_header": False, + }, + "kube_dns": { + "action": "discard", + "target": "none", + "remove_header": False, + }, + "kube_proxy": { + "action": "discard", + "target": "none", + "remove_header": False, + }, + "kubernetes_state": { + "action": "discard", + "target": "none", + "remove_header": False, + }, + "system_core": { + "action": "discard", + "target": "system", + "remove_header": False, + }, + "system_swap": { + "action": "discard", + "target": "system", + "remove_header": False, + }, + "hbase_regionserver": { + "action": "merge", + "target": "hbase_master", + "remove_header": False, + }, + } + ) + self.initial_integration_files = glob.glob( + "{}*.md".format(self.content_integrations_dir) + ) makedirs(self.data_integrations_dir, exist_ok=True) - makedirs(self.data_service_checks_dir, exist_ok=True) - makedirs(self.content_integrations_dir, exist_ok=True) + makedirs( + self.data_service_checks_dir, exist_ok=True + ) + makedirs( + self.content_integrations_dir, exist_ok=True + ) @staticmethod def csv_to_yaml(key_name, csv_filename, yml_filename): @@ -179,11 +387,21 @@ def csv_to_yaml(key_name, csv_filename, yml_filename): """ yaml_data = {key_name: []} with open(csv_filename) as csv_file: - reader = csv.DictReader(csv_file, delimiter=',') - yaml_data[key_name] = [dict(line) for line in reader] + reader = csv.DictReader(csv_file, delimiter=",") + yaml_data[key_name] = [ + dict(line) for line in reader + ] if yaml_data[key_name]: - with open(file=yml_filename, mode='w', encoding='utf-8') as f: - f.write(yaml.dump(yaml_data, default_flow_style=False)) + with open( + file=yml_filename, + mode="w", + encoding="utf-8", + ) as f: + f.write( + yaml.dump( + yaml_data, default_flow_style=False + ) + ) def download_from_repo(self, org, repo, branch, globs): """ @@ -196,114 +414,228 @@ def download_from_repo(self, org, repo, branch, globs): """ with GitHub(self.options.token) as gh: listing = gh.list(org, repo, branch, globs) - dest = '{0}{1}{2}'.format(self.extract_dir, repo, sep) + dest = "{0}{1}{2}".format( + self.extract_dir, repo, sep + ) with Pool(processes=self.pool_size) as pool: with requests.Session() as s: - r = [x for x in tqdm( - pool.imap_unordered(partial(gh.raw, request_session=s, org=org, repo=repo, branch=branch, dest_dir=dest), listing))] + r = [ + x + for x in tqdm( + pool.imap_unordered( + partial( + gh.raw, + request_session=s, + org=org, + repo=repo, + branch=branch, + dest_dir=dest, + ), + listing, + ) + ) + ] def process(self): """ - 1. If we did not specify local dogweb directory and there is a token download dogweb repo files we need - 2. If we did not specify local integrations-core directory download with or without token as its public repo - 3. Process all files we have dogweb first integrations-core second with the latter taking precedence + This represent the overall workflow of the build of the documentation """ - print('Processing') + print("Processing") self.extract_config() self.local_or_upstream() - + self.process_filenames() - + self.merge_integrations() def extract_config(self): - - print('Loading {} configuration file'.format(CONFIGURATION_FILE)) + """ + This pull the content from the configuration file at CONFIGURATION_FILE location + then parse it to populate the list_of_content variable that contain all contents + that needs to be pulled and processed. + """ + print( + "Loading {} configuration file".format( + CONFIGURATION_FILE + ) + ) configuration = yaml.load(open(CONFIGURATION_FILE)) for org in configuration: - for repo in org['repos']: - for content in repo['contents']: + for repo in org["repos"]: + for content in repo["contents"]: content_temp = {} - content_temp['org_name'] = org['org_name'] - content_temp['repo_name'] = repo['repo_name'] - content_temp['branch'] = content['branch'] - content_temp['action']= content['action'] - content_temp['globs'] = content['globs'] + content_temp["org_name"] = org[ + "org_name" + ] + content_temp["repo_name"] = repo[ + "repo_name" + ] + content_temp["branch"] = content[ + "branch" + ] + content_temp["action"] = content[ + "action" + ] + content_temp["globs"] = content["globs"] - if content['action'] == 'pull-and-push': - content_temp['options'] = content['options'] + if content["action"] == "pull-and-push": + content_temp["options"] = content[ + "options" + ] - self.list_of_contents.append(content_temp) - print('Adding content {} '.format(content_temp)) + self.list_of_contents.append( + content_temp + ) + print( + "Adding content {} ".format( + content_temp + ) + ) def local_or_upstream(self): - + """ + This goes through the list_of_contents and check for each repo specified + If a local version exists otherwise we download it from the upstream repo on Github + """ for content in self.list_of_contents: - if content['repo_name']=='dogweb': + if content["repo_name"] == "dogweb": if not self.options.dogweb: if self.options.token: - print("No local version of {} found, downloading content from upstream version".format(content['repo_name'])) - self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + print( + "No local version of {} found, downloading content from upstream version".format( + content["repo_name"] + ) + ) + self.download_from_repo( + content["org_name"], + content["repo_name"], + content["branch"], + content["globs"], + ) - # sync from integrations-core, download if we don't have it (public repo so no token needed) - elif content['repo_name']== 'integrations-core': + elif ( + content["repo_name"] == "integrations-core" + ): if not self.options.integrations: - print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) - self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) - + print( + "No local version of {} found, downloading downloading content from upstream version".format( + content["repo_name"] + ) + ) + self.download_from_repo( + content["org_name"], + content["repo_name"], + content["branch"], + content["globs"], + ) - # sync from integrations-extras, download if we don't have it (public repo so no token needed) - elif content['repo_name']=='integrations-extras': + elif ( + content["repo_name"] + == "integrations-extras" + ): if not self.options.extras: - print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) - self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + print( + "No local version of {} found, downloading downloading content from upstream version".format( + content["repo_name"] + ) + ) + self.download_from_repo( + content["org_name"], + content["repo_name"], + content["branch"], + content["globs"], + ) else: - print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name'])) - self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs']) + print( + "No local version of {} found, downloading downloading content from upstream version".format( + content["repo_name"] + ) + ) + self.download_from_repo( + content["org_name"], + content["repo_name"], + content["branch"], + content["globs"], + ) - print("Updating globs for new local version of repo {}".format(content['repo_name'])) - content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs']) + print( + "Updating globs for new local version of repo {}".format( + content["repo_name"] + ) + ) + content["globs"] = self.update_globs( + "{0}{1}{2}".format( + self.extract_dir, + content["repo_name"], + sep, + ), + content["globs"], + ) def update_globs(self, new_path, globs): + """ + Depending if the repo is local or we downloaded it we need to update the globs to match + the final version of the repo to use + :param new_path: new_path to update the globs with + :param globs: list of globs to update + """ new_globs = [] for item in globs: - new_globs.append('{}{}'.format(new_path, item)) + new_globs.append("{}{}".format(new_path, item)) return new_globs def process_filenames(self): - + """ + Goes through the list_of_contents and for each content + triggers the right action to apply. + """ for content in self.list_of_contents: print("Processing content: {}".format(content)) - if content['action'] == 'integrations': - self.process_integrations(content['globs']) - - elif content['action'] == 'source': - - self.process_source_attribute(content['globs']) - - elif content['action'] == 'pull-and-push': - + if content["action"] == "integrations": + self.process_integrations(content["globs"]) + + elif content["action"] == "source": + + self.process_source_attribute( + content["globs"] + ) + + elif content["action"] == "pull-and-push": + self.pull_and_push(content) else: - print("[ERROR] Unsuccessful Processing of {}".format(content)) + print( + "[ERROR] Unsuccessful Processing of {}".format( + content + ) + ) - def process_integrations(self,globs): - - for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)): - if file_name.endswith('.csv'): + def process_integrations(self, globs): + """ + Go through all files needed for integrations build + and triggers the right function for the right type of file. + :param globs: list of globs for integrations. + """ + for file_name in tqdm( + chain.from_iterable( + glob.iglob(pattern, recursive=True) + for pattern in globs + ) + ): + if file_name.endswith(".csv"): self.process_integration_metric(file_name) - elif file_name.endswith('manifest.json'): + elif file_name.endswith("manifest.json"): self.process_integration_manifest(file_name) - elif file_name.endswith('service_checks.json'): + elif file_name.endswith("service_checks.json"): self.process_service_checks(file_name) - elif file_name.endswith('.md'): + elif file_name.endswith(".md"): self.process_integration_readme(file_name) def pull_and_push(self, content): @@ -314,65 +646,139 @@ def pull_and_push(self, content): :param dest_dir: folder to push the data to in the doc repo """ - for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in content['globs'])): - with open(file_name, mode='r+') as f: + for file_name in tqdm( + chain.from_iterable( + glob.iglob(pattern, recursive=True) + for pattern in content["globs"] + ) + ): + with open(file_name, mode="r+") as f: file_content = f.read() # Replacing the master README.md by _index.md to follow Hugo logic - if file_name.endswith('README.md'): - file_name = '_index.md' + if file_name.endswith("README.md"): + file_name = "_index.md" # Replacing links that point to the Github folder by link that point to the doc. - new_link = content['options']['dest_dir'] + '\\2' - regex_github_link = re.compile(r'(https:\/\/github\.com\/{}\/{}\/blob\/{}\/{})(\S+)\.md'.format(content['org_name'],content['repo_name'],content['branch'],content['options']['path_to_remove'])) - file_content = re.sub(regex_github_link, new_link, file_content, count=0) + new_link = ( + content["options"]["dest_dir"] + "\\2" + ) + regex_github_link = re.compile( + r"(https:\/\/github\.com\/{}\/{}\/blob\/{}\/{})(\S+)\.md".format( + content["org_name"], + content["repo_name"], + content["branch"], + content["options"][ + "path_to_remove" + ], + ) + ) + file_content = re.sub( + regex_github_link, + new_link, + file_content, + count=0, + ) # Writing the new content to the documentation file - dirp = '{}{}'.format(self.content_dir, content['options']['dest_dir'][1:]) + dirp = "{}{}".format( + self.content_dir, + content["options"]["dest_dir"][1:], + ) makedirs(dirp, exist_ok=True) - with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f: + with open( + "{}{}".format(dirp, basename(file_name)), + mode="w+", + encoding="utf-8", + ) as f: f.write(file_content) def merge_integrations(self): """ Merges integrations that come under one """ - for name, action_obj in self.integration_mutations.items(): + for ( + name, + action_obj, + ) in self.integration_mutations.items(): if name not in self.initial_integration_files: - action = action_obj.get('action') - target = action_obj.get('target') - input_file = '{}{}.md'.format(self.content_integrations_dir, name) - output_file = '{}{}.md'.format(self.content_integrations_dir, target) - if action == 'merge': - with open(input_file, 'r') as content_file, open(output_file, 'a') as target_file: + action = action_obj.get("action") + target = action_obj.get("target") + input_file = "{}{}.md".format( + self.content_integrations_dir, name + ) + output_file = "{}{}.md".format( + self.content_integrations_dir, target + ) + if action == "merge": + with open( + input_file, "r" + ) as content_file, open( + output_file, "a" + ) as target_file: content = content_file.read() - content = re.sub(self.regex_fm, r'\2', content, count=0) - if action_obj.get('remove_header', False): - content = re.sub(self.regex_h1, '', content, count=0) + content = re.sub( + self.regex_fm, + r"\2", + content, + count=0, + ) + if action_obj.get( + "remove_header", False + ): + content = re.sub( + self.regex_h1, + "", + content, + count=0, + ) else: - content = re.sub(self.regex_h1_replace, r'##\2', content, count=0) + content = re.sub( + self.regex_h1_replace, + r"##\2", + content, + count=0, + ) target_file.write(content) try: remove(input_file) except OSError: - print('the file {} was not found and could not be removed during merge action'.format(input_file)) - elif action == 'truncate': + print( + "the file {} was not found and could not be removed during merge action".format( + input_file + ) + ) + elif action == "truncate": if exists(output_file): - with open(output_file, 'r+') as target_file: + with open( + output_file, "r+" + ) as target_file: content = target_file.read() - content = re.sub(self.regex_fm, r'---\n\1\n---\n', content, count=0) + content = re.sub( + self.regex_fm, + r"---\n\1\n---\n", + content, + count=0, + ) target_file.truncate(0) target_file.seek(0) target_file.write(content) else: - open(output_file, 'w').close() - elif action == 'discard': + open(output_file, "w").close() + elif action == "discard": try: remove(input_file) except OSError: - print('the file {} was not found and could not be removed during discard action'.format(input_file)) - elif action == 'create': - with open(output_file, 'w+') as f: - fm = yaml.dump(action_obj.get('fm'), default_flow_style=False).rstrip() - data = '---\n{0}\n---\n'.format(fm) + print( + "the file {} was not found and could not be removed during discard action".format( + input_file + ) + ) + elif action == "create": + with open(output_file, "w+") as f: + fm = yaml.dump( + action_obj.get("fm"), + default_flow_style=False, + ).rstrip() + data = "---\n{0}\n---\n".format(fm) f.write(data) def process_source_attribute(self, globs): @@ -381,24 +787,49 @@ def process_source_attribute(self, globs): and inserts them into the file something.md :param file_name: path to a source.py file """ - for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)): - if file_name.endswith('dd/utils/context/source.py'): - out = '|Integration name | API source attribute|\n' - out += '|:---|:---|\n' - with open(file_name, 'r') as f: + for file_name in tqdm( + chain.from_iterable( + glob.iglob(pattern, recursive=True) + for pattern in globs + ) + ): + if file_name.endswith( + "dd/utils/context/source.py" + ): + out = "|Integration name | API source attribute|\n" + out += "|:---|:---|\n" + with open(file_name, "r") as f: result = f.read() m = re.search(self.regex_source, result) result = m.group(2) if m else result - result = re.sub(r'[^0-9A-Za-z:, ]', '', result) - for line in result.split(','): - pair = line.split(':') + result = re.sub( + r"[^0-9A-Za-z:, ]", "", result + ) + for line in result.split(","): + pair = line.split(":") if len(pair) > 1: - out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip()) - with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f: - boundary = re.compile(r'^-{3,}$', re.MULTILINE) - _, fm, content = boundary.split(f.read(), 2) + out += "|{0}|{1}|\n".format( + pair[0].strip().title(), + pair[1].strip(), + ) + with open( + "{}{}".format( + self.options.source, + "/content/integrations/faq/list-of-api-source-attribute-value.md", + ), + mode="r+", + encoding="utf-8", + ) as f: + boundary = re.compile( + r"^-{3,}$", re.MULTILINE + ) + _, fm, content = boundary.split( + f.read(), 2 + ) template = "---\n{front_matter}\n---\n\n{content}\n" - new_content = template.format(front_matter=fm.strip(), content=out) + new_content = template.format( + front_matter=fm.strip(), content=out + ) f.truncate(0) f.seek(0) f.write(new_content) @@ -408,11 +839,17 @@ def process_integration_metric(self, file_name): Take a single metadata csv file and convert it to yaml :param file_name: path to a metadata csv file """ - if file_name.endswith('/metadata.csv'): - key_name = basename(dirname(normpath(file_name))) + if file_name.endswith("/metadata.csv"): + key_name = basename( + dirname(normpath(file_name)) + ) else: - key_name = basename(file_name.replace('_metadata.csv', '')) - new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name) + key_name = basename( + file_name.replace("_metadata.csv", "") + ) + new_file_name = "{}{}.yaml".format( + self.data_integrations_dir, key_name + ) self.csv_to_yaml(key_name, file_name, new_file_name) def process_integration_manifest(self, file_name): @@ -422,14 +859,27 @@ def process_integration_manifest(self, file_name): :param file_name: path to a manifest json file """ - names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d] + names = [ + d.get("name", "").lower() + for d in self.datafile_json + if "name" in d + ] with open(file_name) as f: data = json.load(f) - data_name = data.get('name', '').lower() - if data_name in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']: - data['is_public'] = False + data_name = data.get("name", "").lower() + if data_name in [ + k + for k, v in self.integration_mutations.items() + if v.get("action") == "merge" + ]: + data["is_public"] = False if data_name in names: - item = [d for d in self.datafile_json if d.get('name', '').lower() == data_name] + item = [ + d + for d in self.datafile_json + if d.get("name", "").lower() + == data_name + ] if len(item) > 0: item[0].update(data) else: @@ -441,8 +891,13 @@ def process_service_checks(self, file_name): as the integration name it came from e.g /data/service_checks/docker.json :param file_name: path to a service_checks json file """ - new_file_name = '{}.json'.format(basename(dirname(normpath(file_name)))) - shutil.copy(file_name, self.data_service_checks_dir + new_file_name) + new_file_name = "{}.json".format( + basename(dirname(normpath(file_name))) + ) + shutil.copy( + file_name, + self.data_service_checks_dir + new_file_name, + ) def process_integration_readme(self, file_name): """ @@ -455,33 +910,90 @@ def process_integration_readme(self, file_name): :param file_name: path to a readme md file """ - metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep)) + metrics = glob.glob( + "{path}{sep}*metadata.csv".format( + path=dirname(file_name), sep=sep + ) + ) metrics = metrics[0] if len(metrics) > 0 else None - metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2) - service_check = glob.glob('{file}.json'.format(file=self.data_service_checks_dir + basename(dirname(file_name)))) - service_check = service_check[0] if len(service_check) > 0 else None - service_check_exist = service_check and exists(service_check) - manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json') - manifest_json = json.load(open(manifest)) if exists(manifest) else {} + metrics_exist = ( + metrics + and exists(metrics) + and linecache.getline(metrics, 2) + ) + service_check = glob.glob( + "{file}.json".format( + file=self.data_service_checks_dir + + basename(dirname(file_name)) + ) + ) + service_check = ( + service_check[0] + if len(service_check) > 0 + else None + ) + service_check_exist = service_check and exists( + service_check + ) + manifest = "{0}{1}{2}".format( + dirname(file_name), sep, "manifest.json" + ) + manifest_json = ( + json.load(open(manifest)) + if exists(manifest) + else {} + ) dependencies = self.add_dependencies(file_name) - new_file_name = '{}.md'.format(basename(dirname(file_name))) - exist_already = exists(self.content_integrations_dir + new_file_name) - with open(file_name, 'r') as f: + new_file_name = "{}.md".format( + basename(dirname(file_name)) + ) + exist_already = exists( + self.content_integrations_dir + new_file_name + ) + with open(file_name, "r") as f: result = f.read() - title = manifest_json.get('name', '').lower() - if title not in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']: - result = re.sub(self.regex_h1, '', result, 1) + title = manifest_json.get("name", "").lower() + if title not in [ + k + for k, v in self.integration_mutations.items() + if v.get("action") == "merge" + ]: + result = re.sub( + self.regex_h1, "", result, 1 + ) if metrics_exist: - result = re.sub(self.regex_metrics, r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'%format(title), result, 0) + result = re.sub( + self.regex_metrics, + r'\1{{< get-metrics-from-git "%s" >}}\n\3\4' + % format(title), + result, + 0, + ) if service_check_exist: - result = re.sub(self.regex_service_check, r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' % format(title), result, 0) - result = "{0}\n\n{1}".format(result, '{{< get-dependencies >}}') - result = self.add_integration_frontmatter(new_file_name, result, dependencies) + result = re.sub( + self.regex_service_check, + r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' + % format(title), + result, + 0, + ) + result = "{0}\n\n{1}".format( + result, "{{< get-dependencies >}}" + ) + result = self.add_integration_frontmatter( + new_file_name, result, dependencies + ) if not exist_already: - with open(self.content_integrations_dir + new_file_name, 'w') as out: + with open( + self.content_integrations_dir + + new_file_name, + "w", + ) as out: out.write(result) - def add_integration_frontmatter(self, file_name, content, dependencies=[]): + def add_integration_frontmatter( + self, file_name, content, dependencies=[] + ): """ Takes an integration README.md and injects front matter yaml based on manifest.json data of the same integration :param file_name: new integration markdown filename e.g airbrake.md @@ -491,41 +1003,115 @@ def add_integration_frontmatter(self, file_name, content, dependencies=[]): fm = {} template = "---\n{front_matter}\n---\n\n{content}\n" if file_name not in self.initial_integration_files: - item = [d for d in self.datafile_json if d.get('name', '').lower() == basename(file_name).replace('.md', '')] + item = [ + d + for d in self.datafile_json + if d.get("name", "").lower() + == basename(file_name).replace(".md", "") + ] if item and len(item) > 0: - item[0]['kind'] = 'integration' - item[0]['integration_title'] = item[0].get('public_title', '').replace('Datadog-', '').replace( - 'Integration', '').strip() - item[0]['git_integration_title'] = item[0].get('name', '').lower() - if item[0].get('type', None): - item[0]['ddtype'] = item[0].get('type') - del item[0]['type'] - item[0]['dependencies'] = dependencies - fm = yaml.dump(item[0], default_flow_style=False).rstrip() + item[0]["kind"] = "integration" + item[0]["integration_title"] = ( + item[0] + .get("public_title", "") + .replace("Datadog-", "") + .replace("Integration", "") + .strip() + ) + item[0]["git_integration_title"] = ( + item[0].get("name", "").lower() + ) + if item[0].get("type", None): + item[0]["ddtype"] = item[0].get("type") + del item[0]["type"] + item[0]["dependencies"] = dependencies + fm = yaml.dump( + item[0], default_flow_style=False + ).rstrip() else: - fm = {'kind': 'integration'} - return template.format(front_matter=fm, content=content) + fm = {"kind": "integration"} + return template.format( + front_matter=fm, content=content + ) def add_dependencies(self, file_name): dependencies = [] - if file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)): - dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep), "https://github.com/DataDog/integrations-core/blob/master/")) + if file_name.startswith( + "{0}{1}{2}".format( + self.extract_dir, "integrations-core", sep + ) + ): + dependencies.append( + file_name.replace( + "{0}{1}{2}".format( + self.extract_dir, + "integrations-core", + sep, + ), + "https://github.com/DataDog/integrations-core/blob/master/", + ) + ) - elif file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)): - dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep), "https://github.com/DataDog/integrations-extras/blob/master/")) + elif file_name.startswith( + "{0}{1}{2}".format( + self.extract_dir, "integrations-extras", sep + ) + ): + dependencies.append( + file_name.replace( + "{0}{1}{2}".format( + self.extract_dir, + "integrations-extras", + sep, + ), + "https://github.com/DataDog/integrations-extras/blob/master/", + ) + ) return dependencies -if __name__ == '__main__': - parser = OptionParser(usage="usage: %prog [options] link_type") - parser.add_option("-t", "--token", help="github access token", default=None) - parser.add_option("-w", "--dogweb", help="path to dogweb local folder", default=None) - parser.add_option("-i", "--integrations", help="path to integrations-core local folder", default=None) - parser.add_option("-e", "--extras", help="path to integrations-extras local folder", default=None) - parser.add_option("-s", "--source", help="location of src files", default=curdir) + +if __name__ == "__main__": + parser = OptionParser( + usage="usage: %prog [options] link_type" + ) + parser.add_option( + "-t", + "--token", + help="github access token", + default=None, + ) + parser.add_option( + "-w", + "--dogweb", + help="path to dogweb local folder", + default=None, + ) + parser.add_option( + "-i", + "--integrations", + help="path to integrations-core local folder", + default=None, + ) + parser.add_option( + "-e", + "--extras", + help="path to integrations-extras local folder", + default=None, + ) + parser.add_option( + "-s", + "--source", + help="location of src files", + default=curdir, + ) options, args = parser.parse_args() - options.token = getenv('GITHUB_TOKEN', options.token) if not options.token else options.token + options.token = ( + getenv("GITHUB_TOKEN", options.token) + if not options.token + else options.token + ) pre = PreBuild(options) pre.process() From eff597171622d40f3062bd193e40594dcd74d6bc Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Thu, 10 Jan 2019 09:45:00 -0500 Subject: [PATCH 11/13] using same content logic for all processing function --- local/bin/py/update_pre_build.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index 9eb33d06b4171..236524c07288f 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -596,13 +596,11 @@ def process_filenames(self): for content in self.list_of_contents: print("Processing content: {}".format(content)) if content["action"] == "integrations": - self.process_integrations(content["globs"]) + self.process_integrations(content) elif content["action"] == "source": - self.process_source_attribute( - content["globs"] - ) + self.process_source_attribute(content) elif content["action"] == "pull-and-push": @@ -614,16 +612,16 @@ def process_filenames(self): ) ) - def process_integrations(self, globs): + def process_integrations(self, content): """ Go through all files needed for integrations build and triggers the right function for the right type of file. - :param globs: list of globs for integrations. + :param content: integrations content to process """ for file_name in tqdm( chain.from_iterable( glob.iglob(pattern, recursive=True) - for pattern in globs + for pattern in content["globs"] ) ): if file_name.endswith(".csv"): @@ -642,8 +640,7 @@ def pull_and_push(self, content): """ Take the content from a folder following github logic and transform it to be displayed in the doc in dest_dir folder - :param globs: folder to pull - :param dest_dir: folder to push the data to in the doc repo + :param content: content to process """ for file_name in tqdm( @@ -781,7 +778,7 @@ def merge_integrations(self): data = "---\n{0}\n---\n".format(fm) f.write(data) - def process_source_attribute(self, globs): + def process_source_attribute(self, content): """ Take a single source.py file extracts the FROM_DISPLAY_NAME dict values and inserts them into the file something.md @@ -790,7 +787,7 @@ def process_source_attribute(self, globs): for file_name in tqdm( chain.from_iterable( glob.iglob(pattern, recursive=True) - for pattern in globs + for pattern in content["globs"] ) ): if file_name.endswith( From 3d37cb25fa138ac1dc616e3789c0fb0344e0ab20 Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Thu, 10 Jan 2019 09:48:41 -0500 Subject: [PATCH 12/13] comment update --- local/bin/py/update_pre_build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py index 236524c07288f..9260fcdf37719 100755 --- a/local/bin/py/update_pre_build.py +++ b/local/bin/py/update_pre_build.py @@ -438,7 +438,7 @@ def download_from_repo(self, org, repo, branch, globs): def process(self): """ - This represent the overall workflow of the build of the documentation + This represents the overall workflow of the build of the documentation """ print("Processing") @@ -452,8 +452,8 @@ def process(self): def extract_config(self): """ - This pull the content from the configuration file at CONFIGURATION_FILE location - then parse it to populate the list_of_content variable that contain all contents + This pulls the content from the configuration file at CONFIGURATION_FILE location + then parses it to populate the list_of_content variable that contains all contents that needs to be pulled and processed. """ print( @@ -614,7 +614,7 @@ def process_filenames(self): def process_integrations(self, content): """ - Go through all files needed for integrations build + Goes through all files needed for integrations build and triggers the right function for the right type of file. :param content: integrations content to process """ From fa878deaa1a743fab0c5611848a112c010e2ef0a Mon Sep 17 00:00:00 2001 From: Pierre Guceski Date: Thu, 10 Jan 2019 09:49:59 -0500 Subject: [PATCH 13/13] removing unsused param in config file --- local/etc/pull_config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml index 2c02f67470f42..829c485472749 100644 --- a/local/etc/pull_config.yaml +++ b/local/etc/pull_config.yaml @@ -48,5 +48,3 @@ - "**/manifest.json" - "**/service_checks.json" - "**/README.md" - path_to_remove: '' - dest_dir: ''