From ac864e1af3a02c57ea2385f8f1ebd3c42b37e6b0 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Wed, 2 Jan 2019 17:09:41 +0100
Subject: [PATCH 01/13] creating function to process integrations

---
 local/bin/py/update_pre_build.py | 204 ++++++++++++++++---------------
 1 file changed, 108 insertions(+), 96 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index e0f7c176f54d0..a9b93afd1288a 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -8,6 +8,10 @@
 import re
 import tempfile
 import shutil
+import requests
+import yaml
+import pickle
+from tqdm import *
 from collections import OrderedDict
 from functools import partial, wraps
 from itertools import chain, zip_longest
@@ -15,10 +19,6 @@
 from optparse import OptionParser
 from os import sep, makedirs, getenv, remove
 from os.path import exists, basename, curdir, join, abspath, normpath, dirname
-import requests
-import yaml
-from tqdm import *
-import pickle
 
 
 def cache_by_sha(func):
@@ -241,14 +241,32 @@ def process(self):
                 globs.append('{}{}'.format(self.options.extras, e_glob))
 
         for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)):
+            self.process_filename(file_name)
+
+        self.merge_integrations()
+
+    def process_filename(self, file_name):
+
+        if file_name.endswith('dd/utils/context/source.py'):
             self.process_source_attribute(file_name)
+
+        elif file_name.endswith('.csv'):
             self.process_integration_metric(file_name)
+
+        elif file_name.endswith('manifest.json'):
             self.process_integration_manifest(file_name)
+
+        elif file_name.endswith('service_checks.json'):
             self.process_service_checks(file_name)
-            self.process_integration_readme(file_name)
+
+        elif ('/integrations-core/docs/dev/' in file_name and file_name.endswith('.md')):
             self.dev_doc_integrations_core(file_name)
 
-        self.merge_integrations()
+        elif file_name.endswith('.md'):
+            self.process_integration_readme(file_name)
+        else:
+            print("Processing of {} was unsuccessful".format(file_name))
+
 
     def merge_integrations(self):
         """ Merges integrations that come under one """
@@ -298,39 +316,37 @@ def process_source_attribute(self, file_name):
         and inserts them into the file something.md
         :param file_name: path to a source.py file
         """
-        if file_name.endswith('dd/utils/context/source.py'):
-            out = '|Integration name | API source attribute|\n'
-            out += '|:---|:---|\n'
-            with open(file_name, 'r') as f:
-                result = f.read()
-                m = re.search(self.regex_source, result)
-                result = m.group(2) if m else result
-                result = re.sub(r'[^0-9A-Za-z:, ]', '', result)
-                for line in result.split(','):
-                    pair = line.split(':')
-                    if len(pair) > 1:
-                        out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip())
-            with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f:
-                boundary = re.compile(r'^-{3,}$', re.MULTILINE)
-                _, fm, content = boundary.split(f.read(), 2)
-                template = "---\n{front_matter}\n---\n\n{content}\n"
-                new_content = template.format(front_matter=fm.strip(), content=out)
-                f.truncate(0)
-                f.seek(0)
-                f.write(new_content)
+        out = '|Integration name | API source attribute|\n'
+        out += '|:---|:---|\n'
+        with open(file_name, 'r') as f:
+            result = f.read()
+            m = re.search(self.regex_source, result)
+            result = m.group(2) if m else result
+            result = re.sub(r'[^0-9A-Za-z:, ]', '', result)
+            for line in result.split(','):
+                pair = line.split(':')
+                if len(pair) > 1:
+                    out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip())
+        with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f:
+            boundary = re.compile(r'^-{3,}$', re.MULTILINE)
+            _, fm, content = boundary.split(f.read(), 2)
+            template = "---\n{front_matter}\n---\n\n{content}\n"
+            new_content = template.format(front_matter=fm.strip(), content=out)
+            f.truncate(0)
+            f.seek(0)
+            f.write(new_content)
 
     def process_integration_metric(self, file_name):
         """
         Take a single metadata csv file and convert it to yaml
         :param file_name: path to a metadata csv file
         """
-        if file_name.endswith('.csv'):
-            if file_name.endswith('/metadata.csv'):
-                key_name = basename(dirname(normpath(file_name)))
-            else:
-                key_name = basename(file_name.replace('_metadata.csv', ''))
-            new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name)
-            self.csv_to_yaml(key_name, file_name, new_file_name)
+        if file_name.endswith('/metadata.csv'):
+            key_name = basename(dirname(normpath(file_name)))
+        else:
+            key_name = basename(file_name.replace('_metadata.csv', ''))
+        new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name)
+        self.csv_to_yaml(key_name, file_name, new_file_name)
 
     def dev_doc_integrations_core(self, file_name):
         """
@@ -338,28 +354,25 @@ def dev_doc_integrations_core(self, file_name):
         and transform it to be displayed on the doc in the /developers/integrations section
         :param file_name: path to a file
         """
-        relative_path_on_github = '/integrations-core/docs/dev/'
         doc_directory = '/developers/integrations/'
 
-        if (relative_path_on_github in file_name and file_name.endswith('.md')):
-
-            with open(file_name, mode='r+') as f:
-                content = f.read()
+        with open(file_name, mode='r+') as f:
+            content = f.read()
 
-                # Replacing the master README.md by _index.md to follow Hugo logic
-                if file_name.endswith('README.md'):
-                    file_name = '_index.md'
+            # Replacing the master README.md by _index.md to follow Hugo logic
+            if file_name.endswith('README.md'):
+                file_name = '_index.md'
 
-                #Replacing links that point to the Github folder by link that point to the doc.
-                new_link = doc_directory +'\\2'
-                regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md')
-                content = re.sub(regex_github_link, new_link, content, count=0)
+            #Replacing links that point to the Github folder by link that point to the doc.
+            new_link = doc_directory +'\\2'
+            regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md')
+            content = re.sub(regex_github_link, new_link, content, count=0)
 
-            # Writing the new content to the documentation file
-            dirp = '{}{}'.format(self.content_dir, doc_directory[1:])
-            makedirs(dirp, exist_ok=True)
-            with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f:
-                f.write(content)
+        # Writing the new content to the documentation file
+        dirp = '{}{}'.format(self.content_dir, doc_directory[1:])
+        makedirs(dirp, exist_ok=True)
+        with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f:
+            f.write(content)
 
     def process_integration_manifest(self, file_name):
         """
@@ -367,19 +380,19 @@ def process_integration_manifest(self, file_name):
         set is_public to false to hide integrations we merge later
         :param file_name: path to a manifest json file
         """
-        if file_name.endswith('manifest.json'):
-            names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d]
-            with open(file_name) as f:
-                data = json.load(f)
-                data_name = data.get('name', '').lower()
-                if data_name in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
-                    data['is_public'] = False
-                if data_name in names:
-                    item = [d for d in self.datafile_json if d.get('name', '').lower() == data_name]
-                    if len(item) > 0:
-                        item[0].update(data)
-                else:
-                    self.datafile_json.append(data)
+        
+        names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d]
+        with open(file_name) as f:
+            data = json.load(f)
+            data_name = data.get('name', '').lower()
+            if data_name in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
+                data['is_public'] = False
+            if data_name in names:
+                item = [d for d in self.datafile_json if d.get('name', '').lower() == data_name]
+                if len(item) > 0:
+                    item[0].update(data)
+            else:
+                self.datafile_json.append(data)
 
     def process_service_checks(self, file_name):
         """
@@ -387,9 +400,8 @@ def process_service_checks(self, file_name):
         as the integration name it came from e.g /data/service_checks/docker.json
         :param file_name: path to a service_checks json file
         """
-        if file_name.endswith('service_checks.json'):
-            new_file_name = '{}.json'.format(basename(dirname(normpath(file_name))))
-            shutil.copy(file_name, self.data_service_checks_dir + new_file_name)
+        new_file_name = '{}.json'.format(basename(dirname(normpath(file_name))))
+        shutil.copy(file_name, self.data_service_checks_dir + new_file_name)
 
     def process_integration_readme(self, file_name):
         """
@@ -401,36 +413,36 @@ def process_integration_readme(self, file_name):
         5. write out file to content/integrations with filename changed to integrationname.md
         :param file_name: path to a readme md file
         """
-        if file_name.endswith('.md'):
-            dependencies = []
-            if file_name.startswith(self.options.integrations):
-                dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/"))
-            elif file_name.startswith(self.options.extras):
-                dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/"))
-            metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep))
-            metrics = metrics[0] if len(metrics) > 0 else None
-            metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2)
-            service_check = glob.glob('{file}.json'.format(file=self.data_service_checks_dir + basename(dirname(file_name))))
-            service_check = service_check[0] if len(service_check) > 0 else None
-            service_check_exist = service_check and exists(service_check)
-            manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json')
-            manifest_json = json.load(open(manifest)) if exists(manifest) else {}
-            new_file_name = '{}.md'.format(basename(dirname(file_name)))
-            exist_already = exists(self.content_integrations_dir + new_file_name)
-            with open(file_name, 'r') as f:
-                result = f.read()
-                title = manifest_json.get('name', '').lower()
-                if title not in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
-                    result = re.sub(self.regex_h1, '', result, 1)
-                if metrics_exist:
-                    result = re.sub(self.regex_metrics, r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'%format(title), result, 0)
-                if service_check_exist:
-                    result = re.sub(self.regex_service_check, r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' % format(title), result, 0)
-                result = "{0}\n\n{1}".format(result, '{{< get-dependencies >}}')
-                result = self.add_integration_frontmatter(new_file_name, result, dependencies)
-                if not exist_already:
-                    with open(self.content_integrations_dir + new_file_name, 'w') as out:
-                        out.write(result)
+        
+        dependencies = []
+        if file_name.startswith(self.options.integrations):
+            dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/"))
+        elif file_name.startswith(self.options.extras):
+            dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/"))
+        metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep))
+        metrics = metrics[0] if len(metrics) > 0 else None
+        metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2)
+        service_check = glob.glob('{file}.json'.format(file=self.data_service_checks_dir + basename(dirname(file_name))))
+        service_check = service_check[0] if len(service_check) > 0 else None
+        service_check_exist = service_check and exists(service_check)
+        manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json')
+        manifest_json = json.load(open(manifest)) if exists(manifest) else {}
+        new_file_name = '{}.md'.format(basename(dirname(file_name)))
+        exist_already = exists(self.content_integrations_dir + new_file_name)
+        with open(file_name, 'r') as f:
+            result = f.read()
+            title = manifest_json.get('name', '').lower()
+            if title not in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
+                result = re.sub(self.regex_h1, '', result, 1)
+            if metrics_exist:
+                result = re.sub(self.regex_metrics, r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'%format(title), result, 0)
+            if service_check_exist:
+                result = re.sub(self.regex_service_check, r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' % format(title), result, 0)
+            result = "{0}\n\n{1}".format(result, '{{< get-dependencies >}}')
+            result = self.add_integration_frontmatter(new_file_name, result, dependencies)
+            if not exist_already:
+                with open(self.content_integrations_dir + new_file_name, 'w') as out:
+                    out.write(result)
 
     def add_integration_frontmatter(self, file_name, content, dependencies=[]):
         """

From cc8b35b5a8c2827356fd14ab68511f1267e8fa39 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Fri, 4 Jan 2019 13:49:10 +0100
Subject: [PATCH 02/13] bumping libs for new build strat

---
 local/etc/requirements3.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/local/etc/requirements3.txt b/local/etc/requirements3.txt
index 3430bbb32bea1..41ad3d6dff076 100644
--- a/local/etc/requirements3.txt
+++ b/local/etc/requirements3.txt
@@ -7,9 +7,9 @@ cffi==1.5.2
 cssutils>=1.0.0
 htmlmin>=0.1.10
 pycparser==2.14
-awscli==1.11.182
+awscli==1.16.82
 requests==2.20.0
-PyYAML==3.12
+PyYAML==3.13
 tqdm==4.14.0
 Pygments==2.2.0
 datadog==0.16.0

From c8a4bae58550820add3eed690de8747ba723fc25 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Fri, 4 Jan 2019 13:49:32 +0100
Subject: [PATCH 03/13] extracting all variables from script to config file

---
 local/bin/py/update_pre_build.py | 139 +++++++++++++++++++++----------
 local/etc/pull_config.yaml       |  57 +++++++++++++
 2 files changed, 154 insertions(+), 42 deletions(-)
 create mode 100644 local/etc/pull_config.yaml

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index a9b93afd1288a..db07da4393336 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+
 import csv
 import fnmatch
 import glob
@@ -20,6 +21,7 @@
 from os import sep, makedirs, getenv, remove
 from os.path import exists, basename, curdir, join, abspath, normpath, dirname
 
+CONFIGURATION_FILE = './local/etc/pull_config.yaml'
 
 def cache_by_sha(func):
     """ only downloads fresh file, if we don't have one or we do and the sha has changed """
@@ -123,6 +125,12 @@ def __init__(self, opts):
             self.options.integrations = self.options.integrations + sep
         if self.options.extras and not self.options.extras.endswith(sep):
             self.options.extras = self.options.extras + sep
+
+        self.list_of_orgs = []
+        self.list_of_repos = []
+        self.list_of_files = []
+        self.list_of_contents = []
+
         self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir()
         self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep)
         self.content_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'content' + sep)
@@ -183,7 +191,7 @@ def csv_to_yaml(key_name, csv_filename, yml_filename):
             with open(file=yml_filename, mode='w', encoding='utf-8') as f:
                 f.write(yaml.dump(yaml_data, default_flow_style=False))
 
-    def download_from_repo(self, org, repo, branch, globs):
+    def download_from_repo(self, org, repo, branch, globs=None):
         """
         Takes github info and file globs and downloads files from github using multiple processes
         :param org: github organization or person
@@ -208,43 +216,85 @@ def process(self):
         """
         print('Processing')
 
-        dogweb_globs = ['integration/**/*_metadata.csv', 'integration/**/manifest.json',
-                        'integration/**/service_checks.json', 'integration/**/README.md',
-                        'dd/utils/context/source.py']
-        integrations_globs = ['*[!}]/metadata.csv', '*[!}]/manifest.json', '*[!}]/service_checks.json', '*[!}]/README.md', 'docs/**']
-        extras_globs = ['**/metadata.csv', '**/manifest.json', '**/service_checks.json', '**/README.md']
-
-        # sync from dogweb, download if we don't have it (token required)
-        if not self.options.dogweb:
-            if self.options.token:
-                self.download_from_repo('DataDog', 'dogweb', 'prod', dogweb_globs)
-                self.options.dogweb = '{0}{1}{2}'.format(self.extract_dir, 'dogweb', sep)
-
-        # sync from integrations-core, download if we don't have it (public repo so no token needed)
-        if not options.integrations:
-            self.download_from_repo('DataDog', 'integrations-core', 'master', integrations_globs)
-            self.options.integrations = '{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)
-
-        # sync from integrations-extras, download if we don't have it (public repo so no token needed)
-        if not options.extras:
-            self.download_from_repo('DataDog', 'integrations-extras', 'master', extras_globs)
-            self.options.extras = '{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)
-
-        globs = []
-
-        for d_glob, i_glob, e_glob  in zip_longest(dogweb_globs, integrations_globs, extras_globs):
-            if d_glob:
-                globs.append('{}{}'.format(self.options.dogweb, d_glob))
-            if i_glob:
-                globs.append('{}{}'.format(self.options.integrations, i_glob))
-            if e_glob:
-                globs.append('{}{}'.format(self.options.extras, e_glob))
-
-        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)):
+        # TO DO Check first whether or not it's to do a local or a remote build
+        # then use  the config to build the doc
+
+        self.extract_config()
+
+        self.local_or_upstream()
+
+        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)):
             self.process_filename(file_name)
 
         self.merge_integrations()
 
+    def extract_config(self):
+
+        print('Loading {} configuration file'.format(CONFIGURATION_FILE))
+        configuration = yaml.load(open(CONFIGURATION_FILE))
+        
+        for org in configuration:
+            self.list_of_orgs.append(org['org_name'])
+            for repo in org['repos']:
+                self.list_of_repos.append(repo['repo_name'])
+                for content in repo['contents']:
+                    content_temp = {\
+                                    "org_name":org['org_name'],\
+                                    "repo_name":repo['repo_name'],\
+                                    "branch":content['branch'],\
+                                    "globs":content['globs']}
+                    self.list_of_contents.append(content_temp)
+                    print('Adding content {} '.format(content_temp))
+
+    def local_or_upstream(self):
+
+        for content in self.list_of_contents:
+
+            if content['repo_name']=='dogweb':
+                if not self.options.dogweb:
+                    if self.options.token:
+                        print("No local version of {} found, downloading content from upstream version".format(content['repo_name']))
+                        self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
+                        self.options.dogweb = '{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep)
+
+                print("Updating globs for new local version or {} repo".format(content['repo_name']))
+                content['globs'] = self.update_globs(self.options.dogweb,content['globs'])
+
+            # sync from integrations-core, download if we don't have it (public repo so no token needed)
+            elif content['repo_name']== 'integrations-core':
+                if not options.integrations:
+                    print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
+                    self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
+                    self.options.integrations = '{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)
+
+                print("Updating globs for new local version or {} repo".format(content['repo_name']))
+                content['globs'] = self.update_globs(self.options.integrations,content['globs'])
+
+            # sync from integrations-extras, download if we don't have it (public repo so no token needed)
+            elif content['repo_name']=='integrations-extras': 
+                if not options.extras:
+                    print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
+
+                    self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
+                    self.options.extras = '{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)
+
+                print("Updating globs for new local version or {} repo".format(content['repo_name']))
+                content['globs'] = self.update_globs(self.options.extras,content['globs'])
+
+            else:
+                print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
+                self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
+
+            # Adding the final globs to a global list of globs
+            self.list_of_files += content['globs']
+
+    def update_globs(self, new_path, globs):
+        new_globs = []
+        for item in globs:
+            new_globs.append('{}{}'.format(new_path, item))
+
+        return new_globs
+
     def process_filename(self, file_name):
 
         if file_name.endswith('dd/utils/context/source.py'):
@@ -363,7 +413,7 @@ def dev_doc_integrations_core(self, file_name):
             if file_name.endswith('README.md'):
                 file_name = '_index.md'
 
-            #Replacing links that point to the Github folder by link that point to the doc.
+            # Replacing links that point to the Github folder by link that point to the doc.
             new_link = doc_directory +'\\2'
             regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md')
             content = re.sub(regex_github_link, new_link, content, count=0)
@@ -380,7 +430,7 @@ def process_integration_manifest(self, file_name):
         set is_public to false to hide integrations we merge later
         :param file_name: path to a manifest json file
         """
-        
+
         names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d]
         with open(file_name) as f:
             data = json.load(f)
@@ -413,12 +463,7 @@ def process_integration_readme(self, file_name):
         5. write out file to content/integrations with filename changed to integrationname.md
         :param file_name: path to a readme md file
         """
-        
-        dependencies = []
-        if file_name.startswith(self.options.integrations):
-            dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/"))
-        elif file_name.startswith(self.options.extras):
-            dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/"))
+
         metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep))
         metrics = metrics[0] if len(metrics) > 0 else None
         metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2)
@@ -427,6 +472,7 @@ def process_integration_readme(self, file_name):
         service_check_exist = service_check and exists(service_check)
         manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json')
         manifest_json = json.load(open(manifest)) if exists(manifest) else {}
+        dependencies = self.add_dependencies(file_name)
         new_file_name = '{}.md'.format(basename(dirname(file_name)))
         exist_already = exists(self.content_integrations_dir + new_file_name)
         with open(file_name, 'r') as f:
@@ -469,6 +515,15 @@ def add_integration_frontmatter(self, file_name, content, dependencies=[]):
                 fm = {'kind': 'integration'}
         return template.format(front_matter=fm, content=content)
 
+    def add_dependencies(self, file_name):
+        dependencies = []
+        if file_name.startswith(self.options.integrations):
+            dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/"))
+
+        elif file_name.startswith(self.options.extras):
+            dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/"))
+
+        return dependencies
 
 if __name__ == '__main__':
     parser = OptionParser(usage="usage: %prog [options] link_type")
diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml
new file mode 100644
index 0000000000000..1e3cb812ba129
--- /dev/null
+++ b/local/etc/pull_config.yaml
@@ -0,0 +1,57 @@
+---
+- org_name: DataDog
+  
+  repos:
+  - repo_name: dogweb
+    
+    contents:
+
+    - content_name: source
+      branch: prod
+      globs:
+      - dd/utils/context/source.py
+      path_to_remove: ''
+      dest_dir: ''
+
+    - content_name: integrations
+      branch: prod
+      globs:
+      - integration/**/*_metadata.csv
+      - integration/**/manifest.json
+      - integration/**/service_checks.json
+      - integration/**/README.md
+      path_to_remove: ''
+      dest_dir: ''
+
+  - repo_name: integrations-core
+    contents:
+
+    - content_name: integrations
+      branch: master
+      globs:
+      - "*[!}]/metadata.csv"
+      - "*[!}]/manifest.json"
+      - "*[!}]/service_checks.json"
+      - "*[!}]/README.md"
+      dest_dir: ''
+      path_to_remove: ''
+
+    - content_name: integrations-core-doc
+      branch: master
+      globs:
+      - docs/**
+      dest_dir: ''
+      path_to_remove: ''
+
+  - repo_name: integrations-extras
+    contents:
+
+    - content_name: integrations
+      branch: master
+      globs:
+      - "**/metadata.csv"
+      - "**/manifest.json"
+      - "**/service_checks.json"
+      - "**/README.md"
+      path_to_remove: ''
+      dest_dir: ''

From 43280bc292b9139a3074d360a5cdcd66256276dd Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Mon, 7 Jan 2019 08:53:18 +0100
Subject: [PATCH 04/13] clean-up

---
 local/bin/py/update_pre_build.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index db07da4393336..b25ce391a0fad 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -125,12 +125,10 @@ def __init__(self, opts):
             self.options.integrations = self.options.integrations + sep
         if self.options.extras and not self.options.extras.endswith(sep):
             self.options.extras = self.options.extras + sep
-
         self.list_of_orgs = []
         self.list_of_repos = []
         self.list_of_files = []
         self.list_of_contents = []
-
         self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir()
         self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep)
         self.content_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'content' + sep)
@@ -216,16 +214,10 @@ def process(self):
         """
         print('Processing')
 
-        # TO DO Check first whether or not it's to do a local or a remote build
-        # then use  the config to build the doc
-
         self.extract_config()
-
         self.local_or_upstream()
-
         for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)):
             self.process_filename(file_name)
-
         self.merge_integrations()
 
     def extract_config(self):

From 2a89b706bf5cad9c7fb14833d5a1efda4feb2a99 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Wed, 9 Jan 2019 13:44:36 -0500
Subject: [PATCH 05/13] fixing build

---
 ja/layouts/shortcodes/get-metrics-from-git.html |  2 ++
 layouts/shortcodes/get-metrics-from-git.html    |  4 +++-
 local/bin/py/placehold_translations.py          |  3 +--
 local/bin/py/update_pre_build.py                | 11 ++++++-----
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/ja/layouts/shortcodes/get-metrics-from-git.html b/ja/layouts/shortcodes/get-metrics-from-git.html
index eb950303487a6..26658e7c81ce4 100644
--- a/ja/layouts/shortcodes/get-metrics-from-git.html
+++ b/ja/layouts/shortcodes/get-metrics-from-git.html
@@ -18,6 +18,7 @@
 
   {{ $data := index $.Page.Site.Data.integrations $integration }}
 
+{{ if $data }}
 
     {{ if (index $params 1 ) }} {{/* Custom metrics set in shortcode param index 1 (second position) */}}
 
@@ -71,5 +72,6 @@
 
     {{ end }}
 
+{{ end }}
 
 {{ end }}
\ No newline at end of file
diff --git a/layouts/shortcodes/get-metrics-from-git.html b/layouts/shortcodes/get-metrics-from-git.html
index 4f6886970fbc6..4aa23607fa380 100644
--- a/layouts/shortcodes/get-metrics-from-git.html
+++ b/layouts/shortcodes/get-metrics-from-git.html
@@ -29,6 +29,7 @@
   {{ end }}
   {{ $data := ($.Scratch.Get "data") }}
 
+  {{ if $data }}
 
     {{ if (index $params 1 ) }} {{/* Custom metrics set in shortcode param index 1 (second position) */}}
 
@@ -82,5 +83,6 @@
 
     {{ end }}
 
+  {{ end }}
 
-{{ end }}
\ No newline at end of file
+{{ end }}
diff --git a/local/bin/py/placehold_translations.py b/local/bin/py/placehold_translations.py
index bcd8aba5a4d50..75890a790f70e 100755
--- a/local/bin/py/placehold_translations.py
+++ b/local/bin/py/placehold_translations.py
@@ -67,8 +67,7 @@ def create_placeholder_file(template, new_glob):
                                   content=new_content.strip())
 
     with open(new_dest, 'w') as o_file:
-        o_file.write(content)
-        print("creating placeholder for {0} at {1}".format(template, new_dest))
+            o_file.write(content)
 
     return new_dest
 
diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index b25ce391a0fad..318553460c349 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -189,7 +189,7 @@ def csv_to_yaml(key_name, csv_filename, yml_filename):
             with open(file=yml_filename, mode='w', encoding='utf-8') as f:
                 f.write(yaml.dump(yaml_data, default_flow_style=False))
 
-    def download_from_repo(self, org, repo, branch, globs=None):
+    def download_from_repo(self, org, repo, branch, globs):
         """
         Takes github info and file globs and downloads files from github using multiple processes
         :param org: github organization or person
@@ -216,8 +216,10 @@ def process(self):
 
         self.extract_config()
         self.local_or_upstream()
+        
         for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)):
             self.process_filename(file_name)
+        
         self.merge_integrations()
 
     def extract_config(self):
@@ -241,16 +243,15 @@ def extract_config(self):
     def local_or_upstream(self):
 
         for content in self.list_of_contents:
-
             if content['repo_name']=='dogweb':
                 if not self.options.dogweb:
                     if self.options.token:
                         print("No local version of {} found, downloading content from upstream version".format(content['repo_name']))
                         self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-                        self.options.dogweb = '{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep)
 
                 print("Updating globs for new local version or {} repo".format(content['repo_name']))
-                content['globs'] = self.update_globs(self.options.dogweb,content['globs'])
+                
+                content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs'])
 
             # sync from integrations-core, download if we don't have it (public repo so no token needed)
             elif content['repo_name']== 'integrations-core':
@@ -276,7 +277,7 @@ def local_or_upstream(self):
             else:
                 print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
                 self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-
+            
             # Adding the final globs to a global list of globs
             self.list_of_files += content['globs']
 

From 8e29d01ecd4e21b0edbb116db91d67103ef5f8c0 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Wed, 9 Jan 2019 14:10:06 -0500
Subject: [PATCH 06/13] fixing multi content import

---
 local/bin/py/update_pre_build.py | 20 ++++++++------------
 local/etc/pull_config.yaml       |  2 +-
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index 318553460c349..2af3843d60062 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -152,7 +152,6 @@ def __init__(self, opts):
             'cassandra_nodetool': {'action': 'merge', 'target': 'cassandra', 'remove_header': False},
             'datadog_checks_base': {'action': 'discard', 'target': 'none', 'remove_header': False},
             'datadog_checks_tests_helper': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'dev': {'action': 'discard', 'target': 'none', 'remove_header': False},
             'docs': {'action': 'discard', 'target': 'none', 'remove_header': False},
             'gitlab_runner': {'action': 'merge', 'target': 'gitlab', 'remove_header': False},
             'hdfs_datanode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False},
@@ -255,24 +254,21 @@ def local_or_upstream(self):
 
             # sync from integrations-core, download if we don't have it (public repo so no token needed)
             elif content['repo_name']== 'integrations-core':
-                if not options.integrations:
+                if not self.options.integrations:
                     print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
                     self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-                    self.options.integrations = '{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)
 
                 print("Updating globs for new local version or {} repo".format(content['repo_name']))
-                content['globs'] = self.update_globs(self.options.integrations,content['globs'])
+                content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep),content['globs'])
 
             # sync from integrations-extras, download if we don't have it (public repo so no token needed)
             elif content['repo_name']=='integrations-extras': 
-                if not options.extras:
+                if not self.options.extras:
                     print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
-
                     self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-                    self.options.extras = '{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)
 
                 print("Updating globs for new local version or {} repo".format(content['repo_name']))
-                content['globs'] = self.update_globs(self.options.extras,content['globs'])
+                content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep),content['globs'])
 
             else:
                 print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
@@ -510,11 +506,11 @@ def add_integration_frontmatter(self, file_name, content, dependencies=[]):
 
     def add_dependencies(self, file_name):
         dependencies = []
-        if file_name.startswith(self.options.integrations):
-            dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/"))
+        if file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)):
+            dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep), "https://github.com/DataDog/integrations-core/blob/master/"))
 
-        elif file_name.startswith(self.options.extras):
-            dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/"))
+        elif file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)):
+            dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep), "https://github.com/DataDog/integrations-extras/blob/master/"))
 
         return dependencies
 
diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml
index 1e3cb812ba129..c2c9e1c8e4afe 100644
--- a/local/etc/pull_config.yaml
+++ b/local/etc/pull_config.yaml
@@ -39,7 +39,7 @@
     - content_name: integrations-core-doc
       branch: master
       globs:
-      - docs/**
+      - docs/dev/*.md
       dest_dir: ''
       path_to_remove: ''
 

From f8b920901af647f8c577750a39ac6652b4fd6db7 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Wed, 9 Jan 2019 15:30:09 -0500
Subject: [PATCH 07/13] abstracting file processing

---
 local/bin/py/update_pre_build.py | 154 +++++++++++++++++--------------
 local/etc/pull_config.yaml       |  21 ++---
 2 files changed, 93 insertions(+), 82 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index 2af3843d60062..e0a5b53769076 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -214,10 +214,10 @@ def process(self):
         print('Processing')
 
         self.extract_config()
+
         self.local_or_upstream()
         
-        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in self.list_of_files)):
-            self.process_filename(file_name)
+        self.process_filenames()
         
         self.merge_integrations()
 
@@ -225,17 +225,21 @@ def extract_config(self):
 
         print('Loading {} configuration file'.format(CONFIGURATION_FILE))
         configuration = yaml.load(open(CONFIGURATION_FILE))
-        
         for org in configuration:
             self.list_of_orgs.append(org['org_name'])
             for repo in org['repos']:
                 self.list_of_repos.append(repo['repo_name'])
                 for content in repo['contents']:
-                    content_temp = {\
-                                    "org_name":org['org_name'],\
-                                    "repo_name":repo['repo_name'],\
-                                    "branch":content['branch'],\
-                                    "globs":content['globs']}
+                    content_temp = {}
+                    content_temp['org_name'] = org['org_name']
+                    content_temp['repo_name'] = repo['repo_name']
+                    content_temp['branch'] = content['branch']
+                    content_temp['action']= content['action']
+                    content_temp['globs'] = content['globs']
+
+                    if content['action'] == 'pull-and-push':
+                        content_temp['options'] = content['options']
+
                     self.list_of_contents.append(content_temp)
                     print('Adding content {} '.format(content_temp))
 
@@ -284,28 +288,64 @@ def update_globs(self, new_path, globs):
 
         return new_globs
 
-    def process_filename(self, file_name):
+    def process_filenames(self):
 
-        if file_name.endswith('dd/utils/context/source.py'):
-            self.process_source_attribute(file_name)
+        for content in self.list_of_contents:
+            print("Processing content: {}".format(content))
+            if content['action'] == 'integrations':
+                self.process_integrations(content['globs'])
 
-        elif file_name.endswith('.csv'):
-            self.process_integration_metric(file_name)
+            elif content['action'] == 'source':
+                
+                self.process_source_attribute(content['globs'])
+            
+            elif content['action'] == 'pull-and-push':
+                
+                self.pull_and_push(content)
+            else:
+                print("[ERROR] Unsuccessful Processing of {}".format(content))
+
+    def process_integrations(self,globs):
+        
+        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)):
+            if file_name.endswith('.csv'):
+                self.process_integration_metric(file_name)
 
-        elif file_name.endswith('manifest.json'):
-            self.process_integration_manifest(file_name)
+            elif file_name.endswith('manifest.json'):
+                self.process_integration_manifest(file_name)
 
-        elif file_name.endswith('service_checks.json'):
-            self.process_service_checks(file_name)
+            elif file_name.endswith('service_checks.json'):
+                self.process_service_checks(file_name)
 
-        elif ('/integrations-core/docs/dev/' in file_name and file_name.endswith('.md')):
-            self.dev_doc_integrations_core(file_name)
+            elif file_name.endswith('.md'):
+                self.process_integration_readme(file_name)
 
-        elif file_name.endswith('.md'):
-            self.process_integration_readme(file_name)
-        else:
-            print("Processing of {} was unsuccessful".format(file_name))
+    def pull_and_push(self, content):
+        """
+        Take the content from a folder following github logic
+        and transform it to be displayed in the doc in dest_dir folder
+        :param globs: folder to pull
+        :param dest_dir: folder to push the data to in the doc repo
+        """
+
+        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in content['globs'])):
+            with open(file_name, mode='r+') as f:
+                file_content = f.read()
+
+                # Replacing the master README.md by _index.md to follow Hugo logic
+                if file_name.endswith('README.md'):
+                    file_name = '_index.md'
+
+                # Replacing links that point to the Github folder by link that point to the doc.
+                new_link = content['options']['dest_dir'] + '\\2'
+                regex_github_link = re.compile(r'(https:\/\/github\.com\/{}\/{}\/blob\/{}\/{})(\S+)\.md'.format(content['org_name'],content['repo_name'],content['branch'],content['options']['path_to_remove']))
+                file_content = re.sub(regex_github_link, new_link, file_content, count=0)
 
+            # Writing the new content to the documentation file
+            dirp = '{}{}'.format(self.content_dir, content['options']['dest_dir'][1:])
+            makedirs(dirp, exist_ok=True)
+            with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f:
+                f.write(file_content)
 
     def merge_integrations(self):
         """ Merges integrations that come under one """
@@ -349,31 +389,33 @@ def merge_integrations(self):
                         data = '---\n{0}\n---\n'.format(fm)
                         f.write(data)
 
-    def process_source_attribute(self, file_name):
+    def process_source_attribute(self, globs):
         """
         Take a single source.py file extracts the FROM_DISPLAY_NAME dict values
         and inserts them into the file something.md
         :param file_name: path to a source.py file
         """
-        out = '|Integration name | API source attribute|\n'
-        out += '|:---|:---|\n'
-        with open(file_name, 'r') as f:
-            result = f.read()
-            m = re.search(self.regex_source, result)
-            result = m.group(2) if m else result
-            result = re.sub(r'[^0-9A-Za-z:, ]', '', result)
-            for line in result.split(','):
-                pair = line.split(':')
-                if len(pair) > 1:
-                    out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip())
-        with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f:
-            boundary = re.compile(r'^-{3,}$', re.MULTILINE)
-            _, fm, content = boundary.split(f.read(), 2)
-            template = "---\n{front_matter}\n---\n\n{content}\n"
-            new_content = template.format(front_matter=fm.strip(), content=out)
-            f.truncate(0)
-            f.seek(0)
-            f.write(new_content)
+        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)):
+            if file_name.endswith('dd/utils/context/source.py'):
+                out = '|Integration name | API source attribute|\n'
+                out += '|:---|:---|\n'
+                with open(file_name, 'r') as f:
+                    result = f.read()
+                    m = re.search(self.regex_source, result)
+                    result = m.group(2) if m else result
+                    result = re.sub(r'[^0-9A-Za-z:, ]', '', result)
+                    for line in result.split(','):
+                        pair = line.split(':')
+                        if len(pair) > 1:
+                            out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip())
+                with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f:
+                    boundary = re.compile(r'^-{3,}$', re.MULTILINE)
+                    _, fm, content = boundary.split(f.read(), 2)
+                    template = "---\n{front_matter}\n---\n\n{content}\n"
+                    new_content = template.format(front_matter=fm.strip(), content=out)
+                    f.truncate(0)
+                    f.seek(0)
+                    f.write(new_content)
 
     def process_integration_metric(self, file_name):
         """
@@ -387,32 +429,6 @@ def process_integration_metric(self, file_name):
         new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name)
         self.csv_to_yaml(key_name, file_name, new_file_name)
 
-    def dev_doc_integrations_core(self, file_name):
-        """
-        Take the content from https://github.com/DataDog/integrations-core/tree/master/docs/dev
-        and transform it to be displayed on the doc in the /developers/integrations section
-        :param file_name: path to a file
-        """
-        doc_directory = '/developers/integrations/'
-
-        with open(file_name, mode='r+') as f:
-            content = f.read()
-
-            # Replacing the master README.md by _index.md to follow Hugo logic
-            if file_name.endswith('README.md'):
-                file_name = '_index.md'
-
-            # Replacing links that point to the Github folder by link that point to the doc.
-            new_link = doc_directory +'\\2'
-            regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md')
-            content = re.sub(regex_github_link, new_link, content, count=0)
-
-        # Writing the new content to the documentation file
-        dirp = '{}{}'.format(self.content_dir, doc_directory[1:])
-        makedirs(dirp, exist_ok=True)
-        with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f:
-            f.write(content)
-
     def process_integration_manifest(self, file_name):
         """
         Take a single manifest json file and upsert to integrations.json data
diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml
index c2c9e1c8e4afe..2c02f67470f42 100644
--- a/local/etc/pull_config.yaml
+++ b/local/etc/pull_config.yaml
@@ -6,47 +6,42 @@
     
     contents:
 
-    - content_name: source
+    - action: source
       branch: prod
       globs:
       - dd/utils/context/source.py
-      path_to_remove: ''
-      dest_dir: ''
 
-    - content_name: integrations
+    - action: integrations
       branch: prod
       globs:
       - integration/**/*_metadata.csv
       - integration/**/manifest.json
       - integration/**/service_checks.json
       - integration/**/README.md
-      path_to_remove: ''
-      dest_dir: ''
 
   - repo_name: integrations-core
     contents:
 
-    - content_name: integrations
+    - action: integrations
       branch: master
       globs:
       - "*[!}]/metadata.csv"
       - "*[!}]/manifest.json"
       - "*[!}]/service_checks.json"
       - "*[!}]/README.md"
-      dest_dir: ''
-      path_to_remove: ''
 
-    - content_name: integrations-core-doc
+    - action: pull-and-push
       branch: master
       globs:
       - docs/dev/*.md
-      dest_dir: ''
-      path_to_remove: ''
+      options:
+        dest_dir: '/developers/integrations/'
+        path_to_remove: 'docs/dev/'
 
   - repo_name: integrations-extras
     contents:
 
-    - content_name: integrations
+    - action: integrations
       branch: master
       globs:
       - "**/metadata.csv"

From 8045d65faa96eeca6613a210b3483d22fac2a0da Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Wed, 9 Jan 2019 15:38:38 -0500
Subject: [PATCH 08/13] removing useless params

---
 local/bin/py/update_pre_build.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index e0a5b53769076..07069dc3858b8 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -125,9 +125,6 @@ def __init__(self, opts):
             self.options.integrations = self.options.integrations + sep
         if self.options.extras and not self.options.extras.endswith(sep):
             self.options.extras = self.options.extras + sep
-        self.list_of_orgs = []
-        self.list_of_repos = []
-        self.list_of_files = []
         self.list_of_contents = []
         self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir()
         self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep)
@@ -226,9 +223,7 @@ def extract_config(self):
         print('Loading {} configuration file'.format(CONFIGURATION_FILE))
         configuration = yaml.load(open(CONFIGURATION_FILE))
         for org in configuration:
-            self.list_of_orgs.append(org['org_name'])
             for repo in org['repos']:
-                self.list_of_repos.append(repo['repo_name'])
                 for content in repo['contents']:
                     content_temp = {}
                     content_temp['org_name'] = org['org_name']
@@ -277,9 +272,6 @@ def local_or_upstream(self):
             else:
                 print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
                 self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-            
-            # Adding the final globs to a global list of globs
-            self.list_of_files += content['globs']
 
     def update_globs(self, new_path, globs):
         new_globs = []

From 27e5a1fbb0d226083f4338c6c30408291c90f697 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Wed, 9 Jan 2019 15:52:28 -0500
Subject: [PATCH 09/13] factorising lines

---
 local/bin/py/update_pre_build.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index 07069dc3858b8..703d02f52c06c 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -245,20 +245,14 @@ def local_or_upstream(self):
                 if not self.options.dogweb:
                     if self.options.token:
                         print("No local version of {} found, downloading content from upstream version".format(content['repo_name']))
-                        self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-
-                print("Updating globs for new local version or {} repo".format(content['repo_name']))
-                
-                content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs'])
+                        self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])                
 
             # sync from integrations-core, download if we don't have it (public repo so no token needed)
             elif content['repo_name']== 'integrations-core':
                 if not self.options.integrations:
                     print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
                     self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-
-                print("Updating globs for new local version or {} repo".format(content['repo_name']))
-                content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep),content['globs'])
+                
 
             # sync from integrations-extras, download if we don't have it (public repo so no token needed)
             elif content['repo_name']=='integrations-extras': 
@@ -266,13 +260,13 @@ def local_or_upstream(self):
                     print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
                     self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
 
-                print("Updating globs for new local version or {} repo".format(content['repo_name']))
-                content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep),content['globs'])
-
             else:
                 print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
                 self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
 
+            print("Updating globs for new local version of repo {}".format(content['repo_name']))
+            content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs'])
+
     def update_globs(self, new_path, globs):
         new_globs = []
         for item in globs:

From 2ca68cee6dc6225effa3a17bde6cda1fcac28765 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Thu, 10 Jan 2019 08:56:02 -0500
Subject: [PATCH 10/13] formating code and adding comments

---
 local/bin/py/update_pre_build.py | 1054 +++++++++++++++++++++++-------
 1 file changed, 820 insertions(+), 234 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index 703d02f52c06c..9eb33d06b4171 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -19,33 +19,52 @@
 from multiprocessing.pool import ThreadPool as Pool
 from optparse import OptionParser
 from os import sep, makedirs, getenv, remove
-from os.path import exists, basename, curdir, join, abspath, normpath, dirname
+from os.path import (
+    exists,
+    basename,
+    curdir,
+    join,
+    abspath,
+    normpath,
+    dirname,
+)
+
+CONFIGURATION_FILE = "./local/etc/pull_config.yaml"
 
-CONFIGURATION_FILE = './local/etc/pull_config.yaml'
 
 def cache_by_sha(func):
     """ only downloads fresh file, if we don't have one or we do and the sha has changed """
+
     @wraps(func)
     def cached_func(*args, **kwargs):
         cache = {}
         list_item = args[1]
-        dest_dir = kwargs.get('dest_dir')
-        path_to_file = list_item.get('path', '')
-        file_out = '{}{}'.format(dest_dir, path_to_file)
-        p_file_out = '{}{}.pickle'.format(dest_dir, path_to_file)
+        dest_dir = kwargs.get("dest_dir")
+        path_to_file = list_item.get("path", "")
+        file_out = "{}{}".format(dest_dir, path_to_file)
+        p_file_out = "{}{}.pickle".format(
+            dest_dir, path_to_file
+        )
         makedirs(dirname(file_out), exist_ok=True)
         if exists(p_file_out) and exists(file_out):
-            with open(p_file_out, 'rb') as pf:
+            with open(p_file_out, "rb") as pf:
                 cache = pickle.load(pf)
-        cache_sha = cache.get('sha', False)
-        input_sha = list_item.get('sha', False)
-        if cache_sha and input_sha and cache_sha == input_sha:
+        cache_sha = cache.get("sha", False)
+        input_sha = list_item.get("sha", False)
+        if (
+            cache_sha
+            and input_sha
+            and cache_sha == input_sha
+        ):
             # do nothing as we have the up to date file already
             return None
         else:
-            with open(p_file_out, mode='wb+') as pf:
-                pickle.dump(list_item, pf, pickle.HIGHEST_PROTOCOL)
+            with open(p_file_out, mode="wb+") as pf:
+                pickle.dump(
+                    list_item, pf, pickle.HIGHEST_PROTOCOL
+                )
             return func(*args, **kwargs)
+
     return cached_func
 
 
@@ -60,39 +79,68 @@ def __exit__(self, *exc):
         return False
 
     def headers(self):
-        return {'Authorization': 'token {}'.format(self.token)} if self.token else {}
+        return (
+            {"Authorization": "token {}".format(self.token)}
+            if self.token
+            else {}
+        )
 
     def extract(self, data):
         out = []
-        for item in data.get('tree', []):
-            out.append({'path': item.get('path', ''), 'url': item.get('url', ''), 'type': item.get('type', ''),
-                        'sha': item.get('sha', '')})
-            if item.get('tree', None):
-                out.append(self.extract(item.get('tree')))
+        for item in data.get("tree", []):
+            out.append(
+                {
+                    "path": item.get("path", ""),
+                    "url": item.get("url", ""),
+                    "type": item.get("type", ""),
+                    "sha": item.get("sha", ""),
+                }
+            )
+            if item.get("tree", None):
+                out.append(self.extract(item.get("tree")))
         return out
 
     def list(self, org, repo, branch, globs=None):
         globs = [] if globs is None else globs
         listing = []
         # get the latest sha
-        url = 'https://api.github.com/repos/{0}/{1}/git/refs/heads/{2}'.format(org, repo, branch)
+        url = "https://api.github.com/repos/{0}/{1}/git/refs/heads/{2}".format(
+            org, repo, branch
+        )
         headers = self.headers()
-        print('Getting latest sha from {}/{}..'.format(repo, branch))
+        print(
+            "Getting latest sha from {}/{}..".format(
+                repo, branch
+            )
+        )
         sha_response = requests.get(url, headers=headers)
         if sha_response.status_code == requests.codes.ok:
-            sha = sha_response.json().get('object', {}).get('sha', None)
+            sha = (
+                sha_response.json()
+                .get("object", {})
+                .get("sha", None)
+            )
             if sha:
-                print('Getting tree from {}/{} @ {}'.format(repo, branch, sha))
+                print(
+                    "Getting tree from {}/{} @ {}".format(
+                        repo, branch, sha
+                    )
+                )
                 tree_response = requests.get(
-                    'https://api.github.com/repos/{0}/{1}/git/trees/{2}?recursive=1'.format(org, repo, sha),
-                    headers=headers)
+                    "https://api.github.com/repos/{0}/{1}/git/trees/{2}?recursive=1".format(
+                        org, repo, sha
+                    ),
+                    headers=headers,
+                )
                 if tree_response.status_code == 200:
-                    listing = self.extract(tree_response.json())
+                    listing = self.extract(
+                        tree_response.json()
+                    )
 
         if globs:
             filtered_listing = []
             for item in listing:
-                path = item.get('path', '')
+                path = item.get("path", "")
                 for glob_string in globs:
                     if fnmatch.fnmatch(path, glob_string):
                         filtered_listing.append(item)
@@ -101,17 +149,27 @@ def list(self, org, repo, branch, globs=None):
             return listing
 
     @cache_by_sha
-    def raw(self, list_item, request_session, org, repo, branch, dest_dir):
+    def raw(
+        self,
+        list_item,
+        request_session,
+        org,
+        repo,
+        branch,
+        dest_dir,
+    ):
         headers = self.headers()
-        path_to_file = list_item.get('path', '')
-        file_out = '{}{}'.format(dest_dir, path_to_file)
+        path_to_file = list_item.get("path", "")
+        file_out = "{}{}".format(dest_dir, path_to_file)
         raw_response = request_session.get(
-            'https://raw.githubusercontent.com/{0}/{1}/{2}/{3}'.format(org, repo, branch, path_to_file),
-            headers=headers
+            "https://raw.githubusercontent.com/{0}/{1}/{2}/{3}".format(
+                org, repo, branch, path_to_file
+            ),
+            headers=headers,
         )
         if raw_response.status_code == requests.codes.ok:
             makedirs(dirname(file_out), exist_ok=True)
-            with open(file_out, mode='wb+') as f:
+            with open(file_out, mode="wb+") as f:
                 f.write(raw_response.content)
 
 
@@ -119,54 +177,204 @@ class PreBuild:
     def __init__(self, opts):
         super().__init__()
         self.options = opts
-        if self.options.dogweb and not self.options.dogweb.endswith(sep):
+        if (
+            self.options.dogweb
+            and not self.options.dogweb.endswith(sep)
+        ):
             self.options.dogweb = self.options.dogweb + sep
-        if self.options.integrations and not self.options.integrations.endswith(sep):
-            self.options.integrations = self.options.integrations + sep
-        if self.options.extras and not self.options.extras.endswith(sep):
+        if (
+            self.options.integrations
+            and not self.options.integrations.endswith(sep)
+        ):
+            self.options.integrations = (
+                self.options.integrations + sep
+            )
+        if (
+            self.options.extras
+            and not self.options.extras.endswith(sep)
+        ):
             self.options.extras = self.options.extras + sep
         self.list_of_contents = []
-        self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir()
-        self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep)
-        self.content_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'content' + sep)
-        self.data_integrations_dir = join(self.data_dir, 'integrations') + sep
-        self.data_service_checks_dir = join(self.data_dir, 'service_checks') + sep
-        self.content_integrations_dir = join(self.content_dir, 'integrations') + sep
-        self.extract_dir = '{0}'.format(join(self.tempdir, "extracted") + sep)
-        self.integration_datafile = '{0}{1}{2}'.format(abspath(normpath(self.options.source)), sep, "integrations.json")
-        self.regex_h1 = re.compile(r'^#{1}(?!#)(.*)', re.MULTILINE)
-        self.regex_h1_replace = re.compile(r'^(#{1})(?!#)(.*)', re.MULTILINE)
-        self.regex_metrics = re.compile(r'(#{3} Metrics\n)([\s\S]*this integration.|[\s\S]*this check.)([\s\S]*)(#{3} Events\n)', re.DOTALL)
-        self.regex_service_check = re.compile(r'(#{3} Service Checks\n)([\s\S]*does not include any service checks at this time.)([\s\S]*)(#{2} Troubleshooting\n)', re.DOTALL)
-        self.regex_fm = re.compile(r'(?:-{3})(.*?)(?:-{3})(.*)', re.DOTALL)
-        self.regex_source = re.compile(r'(\S*FROM_DISPLAY_NAME\s*=\s*\{)(.*?)\}', re.DOTALL)
+        self.tempdir = (
+            "/tmp"
+            if platform.system() == "Darwin"
+            else tempfile.gettempdir()
+        )
+        self.data_dir = "{0}{1}{2}".format(
+            abspath(normpath(options.source)),
+            sep,
+            "data" + sep,
+        )
+        self.content_dir = "{0}{1}{2}".format(
+            abspath(normpath(options.source)),
+            sep,
+            "content" + sep,
+        )
+        self.data_integrations_dir = (
+            join(self.data_dir, "integrations") + sep
+        )
+        self.data_service_checks_dir = (
+            join(self.data_dir, "service_checks") + sep
+        )
+        self.content_integrations_dir = (
+            join(self.content_dir, "integrations") + sep
+        )
+        self.extract_dir = "{0}".format(
+            join(self.tempdir, "extracted") + sep
+        )
+        self.integration_datafile = "{0}{1}{2}".format(
+            abspath(normpath(self.options.source)),
+            sep,
+            "integrations.json",
+        )
+        self.regex_h1 = re.compile(
+            r"^#{1}(?!#)(.*)", re.MULTILINE
+        )
+        self.regex_h1_replace = re.compile(
+            r"^(#{1})(?!#)(.*)", re.MULTILINE
+        )
+        self.regex_metrics = re.compile(
+            r"(#{3} Metrics\n)([\s\S]*this integration.|[\s\S]*this check.)([\s\S]*)(#{3} Events\n)",
+            re.DOTALL,
+        )
+        self.regex_service_check = re.compile(
+            r"(#{3} Service Checks\n)([\s\S]*does not include any service checks at this time.)([\s\S]*)(#{2} Troubleshooting\n)",
+            re.DOTALL,
+        )
+        self.regex_fm = re.compile(
+            r"(?:-{3})(.*?)(?:-{3})(.*)", re.DOTALL
+        )
+        self.regex_source = re.compile(
+            r"(\S*FROM_DISPLAY_NAME\s*=\s*\{)(.*?)\}",
+            re.DOTALL,
+        )
         self.datafile_json = []
         self.pool_size = 5
-        self.integration_mutations = OrderedDict({
-            'hdfs': {'action': 'create', 'target': 'hdfs', 'remove_header': False, 'fm': {'is_public': True, 'kind': 'integration', 'integration_title': 'Hdfs', 'short_description': 'Track cluster disk usage, volume failures, dead DataNodes, and more.'}},
-            'mesos': {'action': 'create', 'target': 'mesos', 'remove_header': False, 'fm': {'aliases': ['/integrations/mesos_master/','/integrations/mesos_slave/'], 'is_public': True, 'kind': 'integration', 'integration_title': 'Mesos', 'short_description': 'Track cluster resource usage, master and slave counts, tasks statuses, and more.'}},
-            'activemq_xml': {'action': 'merge', 'target': 'activemq', 'remove_header': False},
-            'cassandra_nodetool': {'action': 'merge', 'target': 'cassandra', 'remove_header': False},
-            'datadog_checks_base': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'datadog_checks_tests_helper': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'docs': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'gitlab_runner': {'action': 'merge', 'target': 'gitlab', 'remove_header': False},
-            'hdfs_datanode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False},
-            'hdfs_namenode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False},
-            'mesos_master': {'action': 'merge', 'target': 'mesos', 'remove_header': True},
-            'mesos_slave': {'action': 'merge', 'target': 'mesos', 'remove_header': False},
-            'kafka_consumer': {'action': 'merge', 'target': 'kafka', 'remove_header': False},
-            'kube_dns': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'kube_proxy': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'kubernetes_state': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'system_core': {'action': 'discard', 'target': 'system', 'remove_header': False},
-            'system_swap': {'action': 'discard', 'target': 'system', 'remove_header': False},
-            'hbase_regionserver': {'action': 'merge', 'target': 'hbase_master', 'remove_header': False},
-        })
-        self.initial_integration_files = glob.glob('{}*.md'.format(self.content_integrations_dir))
+        self.integration_mutations = OrderedDict(
+            {
+                "hdfs": {
+                    "action": "create",
+                    "target": "hdfs",
+                    "remove_header": False,
+                    "fm": {
+                        "is_public": True,
+                        "kind": "integration",
+                        "integration_title": "Hdfs",
+                        "short_description": "Track cluster disk usage, volume failures, dead DataNodes, and more.",
+                    },
+                },
+                "mesos": {
+                    "action": "create",
+                    "target": "mesos",
+                    "remove_header": False,
+                    "fm": {
+                        "aliases": [
+                            "/integrations/mesos_master/",
+                            "/integrations/mesos_slave/",
+                        ],
+                        "is_public": True,
+                        "kind": "integration",
+                        "integration_title": "Mesos",
+                        "short_description": "Track cluster resource usage, master and slave counts, tasks statuses, and more.",
+                    },
+                },
+                "activemq_xml": {
+                    "action": "merge",
+                    "target": "activemq",
+                    "remove_header": False,
+                },
+                "cassandra_nodetool": {
+                    "action": "merge",
+                    "target": "cassandra",
+                    "remove_header": False,
+                },
+                "datadog_checks_base": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "datadog_checks_tests_helper": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "docs": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "gitlab_runner": {
+                    "action": "merge",
+                    "target": "gitlab",
+                    "remove_header": False,
+                },
+                "hdfs_datanode": {
+                    "action": "merge",
+                    "target": "hdfs",
+                    "remove_header": False,
+                },
+                "hdfs_namenode": {
+                    "action": "merge",
+                    "target": "hdfs",
+                    "remove_header": False,
+                },
+                "mesos_master": {
+                    "action": "merge",
+                    "target": "mesos",
+                    "remove_header": True,
+                },
+                "mesos_slave": {
+                    "action": "merge",
+                    "target": "mesos",
+                    "remove_header": False,
+                },
+                "kafka_consumer": {
+                    "action": "merge",
+                    "target": "kafka",
+                    "remove_header": False,
+                },
+                "kube_dns": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "kube_proxy": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "kubernetes_state": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "system_core": {
+                    "action": "discard",
+                    "target": "system",
+                    "remove_header": False,
+                },
+                "system_swap": {
+                    "action": "discard",
+                    "target": "system",
+                    "remove_header": False,
+                },
+                "hbase_regionserver": {
+                    "action": "merge",
+                    "target": "hbase_master",
+                    "remove_header": False,
+                },
+            }
+        )
+        self.initial_integration_files = glob.glob(
+            "{}*.md".format(self.content_integrations_dir)
+        )
         makedirs(self.data_integrations_dir, exist_ok=True)
-        makedirs(self.data_service_checks_dir, exist_ok=True)
-        makedirs(self.content_integrations_dir, exist_ok=True)
+        makedirs(
+            self.data_service_checks_dir, exist_ok=True
+        )
+        makedirs(
+            self.content_integrations_dir, exist_ok=True
+        )
 
     @staticmethod
     def csv_to_yaml(key_name, csv_filename, yml_filename):
@@ -179,11 +387,21 @@ def csv_to_yaml(key_name, csv_filename, yml_filename):
         """
         yaml_data = {key_name: []}
         with open(csv_filename) as csv_file:
-            reader = csv.DictReader(csv_file, delimiter=',')
-            yaml_data[key_name] = [dict(line) for line in reader]
+            reader = csv.DictReader(csv_file, delimiter=",")
+            yaml_data[key_name] = [
+                dict(line) for line in reader
+            ]
         if yaml_data[key_name]:
-            with open(file=yml_filename, mode='w', encoding='utf-8') as f:
-                f.write(yaml.dump(yaml_data, default_flow_style=False))
+            with open(
+                file=yml_filename,
+                mode="w",
+                encoding="utf-8",
+            ) as f:
+                f.write(
+                    yaml.dump(
+                        yaml_data, default_flow_style=False
+                    )
+                )
 
     def download_from_repo(self, org, repo, branch, globs):
         """
@@ -196,114 +414,228 @@ def download_from_repo(self, org, repo, branch, globs):
         """
         with GitHub(self.options.token) as gh:
             listing = gh.list(org, repo, branch, globs)
-            dest = '{0}{1}{2}'.format(self.extract_dir, repo, sep)
+            dest = "{0}{1}{2}".format(
+                self.extract_dir, repo, sep
+            )
             with Pool(processes=self.pool_size) as pool:
                 with requests.Session() as s:
-                  r = [x for x in tqdm(
-                      pool.imap_unordered(partial(gh.raw, request_session=s, org=org, repo=repo, branch=branch, dest_dir=dest), listing))]
+                    r = [
+                        x
+                        for x in tqdm(
+                            pool.imap_unordered(
+                                partial(
+                                    gh.raw,
+                                    request_session=s,
+                                    org=org,
+                                    repo=repo,
+                                    branch=branch,
+                                    dest_dir=dest,
+                                ),
+                                listing,
+                            )
+                        )
+                    ]
 
     def process(self):
         """
-        1. If we did not specify local dogweb directory and there is a token download dogweb repo files we need
-        2. If we did not specify local integrations-core directory download with or without token as its public repo
-        3. Process all files we have dogweb first integrations-core second with the latter taking precedence
+        This represent the overall workflow of the build of the documentation
         """
-        print('Processing')
+        print("Processing")
 
         self.extract_config()
 
         self.local_or_upstream()
-        
+
         self.process_filenames()
-        
+
         self.merge_integrations()
 
     def extract_config(self):
-
-        print('Loading {} configuration file'.format(CONFIGURATION_FILE))
+        """
+        This pull the content from the configuration file at CONFIGURATION_FILE location
+        then parse it to populate the list_of_content variable that contain all contents
+        that needs to be pulled and processed.
+        """
+        print(
+            "Loading {} configuration file".format(
+                CONFIGURATION_FILE
+            )
+        )
         configuration = yaml.load(open(CONFIGURATION_FILE))
         for org in configuration:
-            for repo in org['repos']:
-                for content in repo['contents']:
+            for repo in org["repos"]:
+                for content in repo["contents"]:
                     content_temp = {}
-                    content_temp['org_name'] = org['org_name']
-                    content_temp['repo_name'] = repo['repo_name']
-                    content_temp['branch'] = content['branch']
-                    content_temp['action']= content['action']
-                    content_temp['globs'] = content['globs']
+                    content_temp["org_name"] = org[
+                        "org_name"
+                    ]
+                    content_temp["repo_name"] = repo[
+                        "repo_name"
+                    ]
+                    content_temp["branch"] = content[
+                        "branch"
+                    ]
+                    content_temp["action"] = content[
+                        "action"
+                    ]
+                    content_temp["globs"] = content["globs"]
 
-                    if content['action'] == 'pull-and-push':
-                        content_temp['options'] = content['options']
+                    if content["action"] == "pull-and-push":
+                        content_temp["options"] = content[
+                            "options"
+                        ]
 
-                    self.list_of_contents.append(content_temp)
-                    print('Adding content {} '.format(content_temp))
+                    self.list_of_contents.append(
+                        content_temp
+                    )
+                    print(
+                        "Adding content {} ".format(
+                            content_temp
+                        )
+                    )
 
     def local_or_upstream(self):
-
+        """
+        This goes through the list_of_contents and check for each repo specified
+        If a local version exists otherwise we download it from the upstream repo on Github
+        """
         for content in self.list_of_contents:
-            if content['repo_name']=='dogweb':
+            if content["repo_name"] == "dogweb":
                 if not self.options.dogweb:
                     if self.options.token:
-                        print("No local version of {} found, downloading content from upstream version".format(content['repo_name']))
-                        self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])                
+                        print(
+                            "No local version of {} found, downloading content from upstream version".format(
+                                content["repo_name"]
+                            )
+                        )
+                        self.download_from_repo(
+                            content["org_name"],
+                            content["repo_name"],
+                            content["branch"],
+                            content["globs"],
+                        )
 
-            # sync from integrations-core, download if we don't have it (public repo so no token needed)
-            elif content['repo_name']== 'integrations-core':
+            elif (
+                content["repo_name"] == "integrations-core"
+            ):
                 if not self.options.integrations:
-                    print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
-                    self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
-                
+                    print(
+                        "No local version of {} found, downloading downloading content from upstream version".format(
+                            content["repo_name"]
+                        )
+                    )
+                    self.download_from_repo(
+                        content["org_name"],
+                        content["repo_name"],
+                        content["branch"],
+                        content["globs"],
+                    )
 
-            # sync from integrations-extras, download if we don't have it (public repo so no token needed)
-            elif content['repo_name']=='integrations-extras': 
+            elif (
+                content["repo_name"]
+                == "integrations-extras"
+            ):
                 if not self.options.extras:
-                    print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
-                    self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
+                    print(
+                        "No local version of {} found, downloading downloading content from upstream version".format(
+                            content["repo_name"]
+                        )
+                    )
+                    self.download_from_repo(
+                        content["org_name"],
+                        content["repo_name"],
+                        content["branch"],
+                        content["globs"],
+                    )
 
             else:
-                print("No local version of {} found, downloading downloading content from upstream version".format(content['repo_name']))
-                self.download_from_repo(content['org_name'], content['repo_name'], content['branch'], content['globs'])
+                print(
+                    "No local version of {} found, downloading downloading content from upstream version".format(
+                        content["repo_name"]
+                    )
+                )
+                self.download_from_repo(
+                    content["org_name"],
+                    content["repo_name"],
+                    content["branch"],
+                    content["globs"],
+                )
 
-            print("Updating globs for new local version of repo {}".format(content['repo_name']))
-            content['globs'] = self.update_globs('{0}{1}{2}'.format(self.extract_dir, content['repo_name'], sep),content['globs'])
+            print(
+                "Updating globs for new local version of repo {}".format(
+                    content["repo_name"]
+                )
+            )
+            content["globs"] = self.update_globs(
+                "{0}{1}{2}".format(
+                    self.extract_dir,
+                    content["repo_name"],
+                    sep,
+                ),
+                content["globs"],
+            )
 
     def update_globs(self, new_path, globs):
+        """
+        Depending if the repo is local or we downloaded it we need to update the globs to match
+        the final version of the repo to use
+        :param new_path: new_path to update the globs with
+        :param globs: list of globs to update
+        """
         new_globs = []
         for item in globs:
-            new_globs.append('{}{}'.format(new_path, item))
+            new_globs.append("{}{}".format(new_path, item))
 
         return new_globs
 
     def process_filenames(self):
-
+        """
+        Goes through the list_of_contents and for each content
+        triggers the right action to apply.
+        """
         for content in self.list_of_contents:
             print("Processing content: {}".format(content))
-            if content['action'] == 'integrations':
-                self.process_integrations(content['globs'])
-
-            elif content['action'] == 'source':
-                
-                self.process_source_attribute(content['globs'])
-            
-            elif content['action'] == 'pull-and-push':
-                
+            if content["action"] == "integrations":
+                self.process_integrations(content["globs"])
+
+            elif content["action"] == "source":
+
+                self.process_source_attribute(
+                    content["globs"]
+                )
+
+            elif content["action"] == "pull-and-push":
+
                 self.pull_and_push(content)
             else:
-                print("[ERROR] Unsuccessful Processing of {}".format(content))
+                print(
+                    "[ERROR] Unsuccessful Processing of {}".format(
+                        content
+                    )
+                )
 
-    def process_integrations(self,globs):
-        
-        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)):
-            if file_name.endswith('.csv'):
+    def process_integrations(self, globs):
+        """
+        Go through all files needed for integrations build
+        and triggers the right function for the right type of file.
+        :param globs: list of globs for integrations. 
+        """
+        for file_name in tqdm(
+            chain.from_iterable(
+                glob.iglob(pattern, recursive=True)
+                for pattern in globs
+            )
+        ):
+            if file_name.endswith(".csv"):
                 self.process_integration_metric(file_name)
 
-            elif file_name.endswith('manifest.json'):
+            elif file_name.endswith("manifest.json"):
                 self.process_integration_manifest(file_name)
 
-            elif file_name.endswith('service_checks.json'):
+            elif file_name.endswith("service_checks.json"):
                 self.process_service_checks(file_name)
 
-            elif file_name.endswith('.md'):
+            elif file_name.endswith(".md"):
                 self.process_integration_readme(file_name)
 
     def pull_and_push(self, content):
@@ -314,65 +646,139 @@ def pull_and_push(self, content):
         :param dest_dir: folder to push the data to in the doc repo
         """
 
-        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in content['globs'])):
-            with open(file_name, mode='r+') as f:
+        for file_name in tqdm(
+            chain.from_iterable(
+                glob.iglob(pattern, recursive=True)
+                for pattern in content["globs"]
+            )
+        ):
+            with open(file_name, mode="r+") as f:
                 file_content = f.read()
 
                 # Replacing the master README.md by _index.md to follow Hugo logic
-                if file_name.endswith('README.md'):
-                    file_name = '_index.md'
+                if file_name.endswith("README.md"):
+                    file_name = "_index.md"
 
                 # Replacing links that point to the Github folder by link that point to the doc.
-                new_link = content['options']['dest_dir'] + '\\2'
-                regex_github_link = re.compile(r'(https:\/\/github\.com\/{}\/{}\/blob\/{}\/{})(\S+)\.md'.format(content['org_name'],content['repo_name'],content['branch'],content['options']['path_to_remove']))
-                file_content = re.sub(regex_github_link, new_link, file_content, count=0)
+                new_link = (
+                    content["options"]["dest_dir"] + "\\2"
+                )
+                regex_github_link = re.compile(
+                    r"(https:\/\/github\.com\/{}\/{}\/blob\/{}\/{})(\S+)\.md".format(
+                        content["org_name"],
+                        content["repo_name"],
+                        content["branch"],
+                        content["options"][
+                            "path_to_remove"
+                        ],
+                    )
+                )
+                file_content = re.sub(
+                    regex_github_link,
+                    new_link,
+                    file_content,
+                    count=0,
+                )
 
             # Writing the new content to the documentation file
-            dirp = '{}{}'.format(self.content_dir, content['options']['dest_dir'][1:])
+            dirp = "{}{}".format(
+                self.content_dir,
+                content["options"]["dest_dir"][1:],
+            )
             makedirs(dirp, exist_ok=True)
-            with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f:
+            with open(
+                "{}{}".format(dirp, basename(file_name)),
+                mode="w+",
+                encoding="utf-8",
+            ) as f:
                 f.write(file_content)
 
     def merge_integrations(self):
         """ Merges integrations that come under one """
-        for name, action_obj in self.integration_mutations.items():
+        for (
+            name,
+            action_obj,
+        ) in self.integration_mutations.items():
             if name not in self.initial_integration_files:
-                action = action_obj.get('action')
-                target = action_obj.get('target')
-                input_file = '{}{}.md'.format(self.content_integrations_dir, name)
-                output_file = '{}{}.md'.format(self.content_integrations_dir, target)
-                if action == 'merge':
-                    with open(input_file, 'r') as content_file, open(output_file, 'a') as target_file:
+                action = action_obj.get("action")
+                target = action_obj.get("target")
+                input_file = "{}{}.md".format(
+                    self.content_integrations_dir, name
+                )
+                output_file = "{}{}.md".format(
+                    self.content_integrations_dir, target
+                )
+                if action == "merge":
+                    with open(
+                        input_file, "r"
+                    ) as content_file, open(
+                        output_file, "a"
+                    ) as target_file:
                         content = content_file.read()
-                        content = re.sub(self.regex_fm, r'\2', content, count=0)
-                        if action_obj.get('remove_header', False):
-                            content = re.sub(self.regex_h1, '', content, count=0)
+                        content = re.sub(
+                            self.regex_fm,
+                            r"\2",
+                            content,
+                            count=0,
+                        )
+                        if action_obj.get(
+                            "remove_header", False
+                        ):
+                            content = re.sub(
+                                self.regex_h1,
+                                "",
+                                content,
+                                count=0,
+                            )
                         else:
-                            content = re.sub(self.regex_h1_replace, r'##\2', content, count=0)
+                            content = re.sub(
+                                self.regex_h1_replace,
+                                r"##\2",
+                                content,
+                                count=0,
+                            )
                         target_file.write(content)
                     try:
                         remove(input_file)
                     except OSError:
-                        print('the file {} was not found and could not be removed during merge action'.format(input_file))
-                elif action == 'truncate':
+                        print(
+                            "the file {} was not found and could not be removed during merge action".format(
+                                input_file
+                            )
+                        )
+                elif action == "truncate":
                     if exists(output_file):
-                        with open(output_file, 'r+') as target_file:
+                        with open(
+                            output_file, "r+"
+                        ) as target_file:
                             content = target_file.read()
-                            content = re.sub(self.regex_fm, r'---\n\1\n---\n', content, count=0)
+                            content = re.sub(
+                                self.regex_fm,
+                                r"---\n\1\n---\n",
+                                content,
+                                count=0,
+                            )
                             target_file.truncate(0)
                             target_file.seek(0)
                             target_file.write(content)
                     else:
-                        open(output_file, 'w').close()
-                elif action == 'discard':
+                        open(output_file, "w").close()
+                elif action == "discard":
                     try:
                         remove(input_file)
                     except OSError:
-                        print('the file {} was not found and could not be removed during discard action'.format(input_file))
-                elif action == 'create':
-                    with open(output_file, 'w+') as f:
-                        fm = yaml.dump(action_obj.get('fm'), default_flow_style=False).rstrip()
-                        data = '---\n{0}\n---\n'.format(fm)
+                        print(
+                            "the file {} was not found and could not be removed during discard action".format(
+                                input_file
+                            )
+                        )
+                elif action == "create":
+                    with open(output_file, "w+") as f:
+                        fm = yaml.dump(
+                            action_obj.get("fm"),
+                            default_flow_style=False,
+                        ).rstrip()
+                        data = "---\n{0}\n---\n".format(fm)
                         f.write(data)
 
     def process_source_attribute(self, globs):
@@ -381,24 +787,49 @@ def process_source_attribute(self, globs):
         and inserts them into the file something.md
         :param file_name: path to a source.py file
         """
-        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)):
-            if file_name.endswith('dd/utils/context/source.py'):
-                out = '|Integration name | API source attribute|\n'
-                out += '|:---|:---|\n'
-                with open(file_name, 'r') as f:
+        for file_name in tqdm(
+            chain.from_iterable(
+                glob.iglob(pattern, recursive=True)
+                for pattern in globs
+            )
+        ):
+            if file_name.endswith(
+                "dd/utils/context/source.py"
+            ):
+                out = "|Integration name | API source attribute|\n"
+                out += "|:---|:---|\n"
+                with open(file_name, "r") as f:
                     result = f.read()
                     m = re.search(self.regex_source, result)
                     result = m.group(2) if m else result
-                    result = re.sub(r'[^0-9A-Za-z:, ]', '', result)
-                    for line in result.split(','):
-                        pair = line.split(':')
+                    result = re.sub(
+                        r"[^0-9A-Za-z:, ]", "", result
+                    )
+                    for line in result.split(","):
+                        pair = line.split(":")
                         if len(pair) > 1:
-                            out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip())
-                with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f:
-                    boundary = re.compile(r'^-{3,}$', re.MULTILINE)
-                    _, fm, content = boundary.split(f.read(), 2)
+                            out += "|{0}|{1}|\n".format(
+                                pair[0].strip().title(),
+                                pair[1].strip(),
+                            )
+                with open(
+                    "{}{}".format(
+                        self.options.source,
+                        "/content/integrations/faq/list-of-api-source-attribute-value.md",
+                    ),
+                    mode="r+",
+                    encoding="utf-8",
+                ) as f:
+                    boundary = re.compile(
+                        r"^-{3,}$", re.MULTILINE
+                    )
+                    _, fm, content = boundary.split(
+                        f.read(), 2
+                    )
                     template = "---\n{front_matter}\n---\n\n{content}\n"
-                    new_content = template.format(front_matter=fm.strip(), content=out)
+                    new_content = template.format(
+                        front_matter=fm.strip(), content=out
+                    )
                     f.truncate(0)
                     f.seek(0)
                     f.write(new_content)
@@ -408,11 +839,17 @@ def process_integration_metric(self, file_name):
         Take a single metadata csv file and convert it to yaml
         :param file_name: path to a metadata csv file
         """
-        if file_name.endswith('/metadata.csv'):
-            key_name = basename(dirname(normpath(file_name)))
+        if file_name.endswith("/metadata.csv"):
+            key_name = basename(
+                dirname(normpath(file_name))
+            )
         else:
-            key_name = basename(file_name.replace('_metadata.csv', ''))
-        new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name)
+            key_name = basename(
+                file_name.replace("_metadata.csv", "")
+            )
+        new_file_name = "{}{}.yaml".format(
+            self.data_integrations_dir, key_name
+        )
         self.csv_to_yaml(key_name, file_name, new_file_name)
 
     def process_integration_manifest(self, file_name):
@@ -422,14 +859,27 @@ def process_integration_manifest(self, file_name):
         :param file_name: path to a manifest json file
         """
 
-        names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d]
+        names = [
+            d.get("name", "").lower()
+            for d in self.datafile_json
+            if "name" in d
+        ]
         with open(file_name) as f:
             data = json.load(f)
-            data_name = data.get('name', '').lower()
-            if data_name in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
-                data['is_public'] = False
+            data_name = data.get("name", "").lower()
+            if data_name in [
+                k
+                for k, v in self.integration_mutations.items()
+                if v.get("action") == "merge"
+            ]:
+                data["is_public"] = False
             if data_name in names:
-                item = [d for d in self.datafile_json if d.get('name', '').lower() == data_name]
+                item = [
+                    d
+                    for d in self.datafile_json
+                    if d.get("name", "").lower()
+                    == data_name
+                ]
                 if len(item) > 0:
                     item[0].update(data)
             else:
@@ -441,8 +891,13 @@ def process_service_checks(self, file_name):
         as the integration name it came from e.g /data/service_checks/docker.json
         :param file_name: path to a service_checks json file
         """
-        new_file_name = '{}.json'.format(basename(dirname(normpath(file_name))))
-        shutil.copy(file_name, self.data_service_checks_dir + new_file_name)
+        new_file_name = "{}.json".format(
+            basename(dirname(normpath(file_name)))
+        )
+        shutil.copy(
+            file_name,
+            self.data_service_checks_dir + new_file_name,
+        )
 
     def process_integration_readme(self, file_name):
         """
@@ -455,33 +910,90 @@ def process_integration_readme(self, file_name):
         :param file_name: path to a readme md file
         """
 
-        metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep))
+        metrics = glob.glob(
+            "{path}{sep}*metadata.csv".format(
+                path=dirname(file_name), sep=sep
+            )
+        )
         metrics = metrics[0] if len(metrics) > 0 else None
-        metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2)
-        service_check = glob.glob('{file}.json'.format(file=self.data_service_checks_dir + basename(dirname(file_name))))
-        service_check = service_check[0] if len(service_check) > 0 else None
-        service_check_exist = service_check and exists(service_check)
-        manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json')
-        manifest_json = json.load(open(manifest)) if exists(manifest) else {}
+        metrics_exist = (
+            metrics
+            and exists(metrics)
+            and linecache.getline(metrics, 2)
+        )
+        service_check = glob.glob(
+            "{file}.json".format(
+                file=self.data_service_checks_dir
+                + basename(dirname(file_name))
+            )
+        )
+        service_check = (
+            service_check[0]
+            if len(service_check) > 0
+            else None
+        )
+        service_check_exist = service_check and exists(
+            service_check
+        )
+        manifest = "{0}{1}{2}".format(
+            dirname(file_name), sep, "manifest.json"
+        )
+        manifest_json = (
+            json.load(open(manifest))
+            if exists(manifest)
+            else {}
+        )
         dependencies = self.add_dependencies(file_name)
-        new_file_name = '{}.md'.format(basename(dirname(file_name)))
-        exist_already = exists(self.content_integrations_dir + new_file_name)
-        with open(file_name, 'r') as f:
+        new_file_name = "{}.md".format(
+            basename(dirname(file_name))
+        )
+        exist_already = exists(
+            self.content_integrations_dir + new_file_name
+        )
+        with open(file_name, "r") as f:
             result = f.read()
-            title = manifest_json.get('name', '').lower()
-            if title not in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
-                result = re.sub(self.regex_h1, '', result, 1)
+            title = manifest_json.get("name", "").lower()
+            if title not in [
+                k
+                for k, v in self.integration_mutations.items()
+                if v.get("action") == "merge"
+            ]:
+                result = re.sub(
+                    self.regex_h1, "", result, 1
+                )
             if metrics_exist:
-                result = re.sub(self.regex_metrics, r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'%format(title), result, 0)
+                result = re.sub(
+                    self.regex_metrics,
+                    r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'
+                    % format(title),
+                    result,
+                    0,
+                )
             if service_check_exist:
-                result = re.sub(self.regex_service_check, r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' % format(title), result, 0)
-            result = "{0}\n\n{1}".format(result, '{{< get-dependencies >}}')
-            result = self.add_integration_frontmatter(new_file_name, result, dependencies)
+                result = re.sub(
+                    self.regex_service_check,
+                    r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4'
+                    % format(title),
+                    result,
+                    0,
+                )
+            result = "{0}\n\n{1}".format(
+                result, "{{< get-dependencies >}}"
+            )
+            result = self.add_integration_frontmatter(
+                new_file_name, result, dependencies
+            )
             if not exist_already:
-                with open(self.content_integrations_dir + new_file_name, 'w') as out:
+                with open(
+                    self.content_integrations_dir
+                    + new_file_name,
+                    "w",
+                ) as out:
                     out.write(result)
 
-    def add_integration_frontmatter(self, file_name, content, dependencies=[]):
+    def add_integration_frontmatter(
+        self, file_name, content, dependencies=[]
+    ):
         """
         Takes an integration README.md and injects front matter yaml based on manifest.json data of the same integration
         :param file_name: new integration markdown filename e.g airbrake.md
@@ -491,41 +1003,115 @@ def add_integration_frontmatter(self, file_name, content, dependencies=[]):
         fm = {}
         template = "---\n{front_matter}\n---\n\n{content}\n"
         if file_name not in self.initial_integration_files:
-            item = [d for d in self.datafile_json if d.get('name', '').lower() == basename(file_name).replace('.md', '')]
+            item = [
+                d
+                for d in self.datafile_json
+                if d.get("name", "").lower()
+                == basename(file_name).replace(".md", "")
+            ]
             if item and len(item) > 0:
-                item[0]['kind'] = 'integration'
-                item[0]['integration_title'] = item[0].get('public_title', '').replace('Datadog-', '').replace(
-                    'Integration', '').strip()
-                item[0]['git_integration_title'] = item[0].get('name', '').lower()
-                if item[0].get('type', None):
-                    item[0]['ddtype'] = item[0].get('type')
-                    del item[0]['type']
-                item[0]['dependencies'] = dependencies
-                fm = yaml.dump(item[0], default_flow_style=False).rstrip()
+                item[0]["kind"] = "integration"
+                item[0]["integration_title"] = (
+                    item[0]
+                    .get("public_title", "")
+                    .replace("Datadog-", "")
+                    .replace("Integration", "")
+                    .strip()
+                )
+                item[0]["git_integration_title"] = (
+                    item[0].get("name", "").lower()
+                )
+                if item[0].get("type", None):
+                    item[0]["ddtype"] = item[0].get("type")
+                    del item[0]["type"]
+                item[0]["dependencies"] = dependencies
+                fm = yaml.dump(
+                    item[0], default_flow_style=False
+                ).rstrip()
             else:
-                fm = {'kind': 'integration'}
-        return template.format(front_matter=fm, content=content)
+                fm = {"kind": "integration"}
+        return template.format(
+            front_matter=fm, content=content
+        )
 
     def add_dependencies(self, file_name):
         dependencies = []
-        if file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)):
-            dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep), "https://github.com/DataDog/integrations-core/blob/master/"))
+        if file_name.startswith(
+            "{0}{1}{2}".format(
+                self.extract_dir, "integrations-core", sep
+            )
+        ):
+            dependencies.append(
+                file_name.replace(
+                    "{0}{1}{2}".format(
+                        self.extract_dir,
+                        "integrations-core",
+                        sep,
+                    ),
+                    "https://github.com/DataDog/integrations-core/blob/master/",
+                )
+            )
 
-        elif file_name.startswith('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)):
-            dependencies.append(file_name.replace('{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep), "https://github.com/DataDog/integrations-extras/blob/master/"))
+        elif file_name.startswith(
+            "{0}{1}{2}".format(
+                self.extract_dir, "integrations-extras", sep
+            )
+        ):
+            dependencies.append(
+                file_name.replace(
+                    "{0}{1}{2}".format(
+                        self.extract_dir,
+                        "integrations-extras",
+                        sep,
+                    ),
+                    "https://github.com/DataDog/integrations-extras/blob/master/",
+                )
+            )
 
         return dependencies
 
-if __name__ == '__main__':
-    parser = OptionParser(usage="usage: %prog [options] link_type")
-    parser.add_option("-t", "--token", help="github access token", default=None)
-    parser.add_option("-w", "--dogweb", help="path to dogweb local folder", default=None)
-    parser.add_option("-i", "--integrations", help="path to integrations-core local folder", default=None)
-    parser.add_option("-e", "--extras", help="path to integrations-extras local folder", default=None)
-    parser.add_option("-s", "--source", help="location of src files", default=curdir)
+
+if __name__ == "__main__":
+    parser = OptionParser(
+        usage="usage: %prog [options] link_type"
+    )
+    parser.add_option(
+        "-t",
+        "--token",
+        help="github access token",
+        default=None,
+    )
+    parser.add_option(
+        "-w",
+        "--dogweb",
+        help="path to dogweb local folder",
+        default=None,
+    )
+    parser.add_option(
+        "-i",
+        "--integrations",
+        help="path to integrations-core local folder",
+        default=None,
+    )
+    parser.add_option(
+        "-e",
+        "--extras",
+        help="path to integrations-extras local folder",
+        default=None,
+    )
+    parser.add_option(
+        "-s",
+        "--source",
+        help="location of src files",
+        default=curdir,
+    )
 
     options, args = parser.parse_args()
-    options.token = getenv('GITHUB_TOKEN', options.token) if not options.token else options.token
+    options.token = (
+        getenv("GITHUB_TOKEN", options.token)
+        if not options.token
+        else options.token
+    )
 
     pre = PreBuild(options)
     pre.process()

From eff597171622d40f3062bd193e40594dcd74d6bc Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Thu, 10 Jan 2019 09:45:00 -0500
Subject: [PATCH 11/13] using same content logic for all processing function

---
 local/bin/py/update_pre_build.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index 9eb33d06b4171..236524c07288f 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -596,13 +596,11 @@ def process_filenames(self):
         for content in self.list_of_contents:
             print("Processing content: {}".format(content))
             if content["action"] == "integrations":
-                self.process_integrations(content["globs"])
+                self.process_integrations(content)
 
             elif content["action"] == "source":
 
-                self.process_source_attribute(
-                    content["globs"]
-                )
+                self.process_source_attribute(content)
 
             elif content["action"] == "pull-and-push":
 
@@ -614,16 +612,16 @@ def process_filenames(self):
                     )
                 )
 
-    def process_integrations(self, globs):
+    def process_integrations(self, content):
         """
         Go through all files needed for integrations build
         and triggers the right function for the right type of file.
-        :param globs: list of globs for integrations. 
+        :param content: integrations content to process
         """
         for file_name in tqdm(
             chain.from_iterable(
                 glob.iglob(pattern, recursive=True)
-                for pattern in globs
+                for pattern in content["globs"]
             )
         ):
             if file_name.endswith(".csv"):
@@ -642,8 +640,7 @@ def pull_and_push(self, content):
         """
         Take the content from a folder following github logic
         and transform it to be displayed in the doc in dest_dir folder
-        :param globs: folder to pull
-        :param dest_dir: folder to push the data to in the doc repo
+        :param content: content to process
         """
 
         for file_name in tqdm(
@@ -781,7 +778,7 @@ def merge_integrations(self):
                         data = "---\n{0}\n---\n".format(fm)
                         f.write(data)
 
-    def process_source_attribute(self, globs):
+    def process_source_attribute(self, content):
         """
         Take a single source.py file extracts the FROM_DISPLAY_NAME dict values
         and inserts them into the file something.md
@@ -790,7 +787,7 @@ def process_source_attribute(self, globs):
         for file_name in tqdm(
             chain.from_iterable(
                 glob.iglob(pattern, recursive=True)
-                for pattern in globs
+                for pattern in content["globs"]
             )
         ):
             if file_name.endswith(

From 3d37cb25fa138ac1dc616e3789c0fb0344e0ab20 Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Thu, 10 Jan 2019 09:48:41 -0500
Subject: [PATCH 12/13] comment update

---
 local/bin/py/update_pre_build.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index 236524c07288f..9260fcdf37719 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -438,7 +438,7 @@ def download_from_repo(self, org, repo, branch, globs):
 
     def process(self):
         """
-        This represent the overall workflow of the build of the documentation
+        This represents the overall workflow of the build of the documentation
         """
         print("Processing")
 
@@ -452,8 +452,8 @@ def process(self):
 
     def extract_config(self):
         """
-        This pull the content from the configuration file at CONFIGURATION_FILE location
-        then parse it to populate the list_of_content variable that contain all contents
+        This pulls the content from the configuration file at CONFIGURATION_FILE location
+        then parses it to populate the list_of_content variable that contains all contents
         that needs to be pulled and processed.
         """
         print(
@@ -614,7 +614,7 @@ def process_filenames(self):
 
     def process_integrations(self, content):
         """
-        Go through all files needed for integrations build
+        Goes through all files needed for integrations build
         and triggers the right function for the right type of file.
         :param content: integrations content to process
         """

From fa878deaa1a743fab0c5611848a112c010e2ef0a Mon Sep 17 00:00:00 2001
From: Pierre Guceski <p.guceski@gmail.com>
Date: Thu, 10 Jan 2019 09:49:59 -0500
Subject: [PATCH 13/13] removing unsused param in config file

---
 local/etc/pull_config.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml
index 2c02f67470f42..829c485472749 100644
--- a/local/etc/pull_config.yaml
+++ b/local/etc/pull_config.yaml
@@ -48,5 +48,3 @@
       - "**/manifest.json"
       - "**/service_checks.json"
       - "**/README.md"
-      path_to_remove: ''
-      dest_dir: ''